/ Check-in [04907fba]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix another bug caused by NEAR/matchinfo/order=DESC interaction.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-prefix-search
Files: files | file ages | folders
SHA1: 04907fbadeb743c95cc9f3529e63ef388684799f
User & Date: dan 2011-06-14 09:00:27
Context
2011-06-14
11:32
Add a couple of extra tests. Closed-Leaf check-in: aefd46df user: dan tags: fts3-prefix-search
09:00
Fix another bug caused by NEAR/matchinfo/order=DESC interaction. check-in: 04907fba user: dan tags: fts3-prefix-search
07:22
Merge recent trunk changes into fts3-prefix-search branch. check-in: 135ce30f user: dan tags: fts3-prefix-search
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401

3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412

3413
3414
3415
3416
3417
3418

3419
3420
3421
3422
3423
3424
3425
  int nDoclist,                   /* Length of aDoclist in bytes */
  char **ppIter,                  /* IN/OUT: Iterator pointer */
  sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
  int *pnList,                    /* IN/OUT: List length pointer */
  u8 *pbEof                       /* OUT: End-of-file flag */
){
  char *p = *ppIter;
  int iMul = (bDescIdx ? -1 : 1);

  assert( nDoclist>0 );
  assert( *pbEof==0 );
  assert( p || *piDocid==0 );
  assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );

  if( p==0 ){
    sqlite3_int64 iDocid = 0;
    char *pNext = 0;
    char *pDocid = aDoclist;
    char *pEnd = &aDoclist[nDoclist];


    pDocid += sqlite3Fts3GetVarint(pDocid, &iDocid);
    pNext = pDocid;
    fts3PoslistCopy(0, &pDocid);
    while( pDocid<pEnd ){
      sqlite3_int64 iDelta;
      pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
      iDocid += (iMul * iDelta);
      pNext = pDocid;
      fts3PoslistCopy(0, &pDocid);
      while( pDocid<pEnd && *pDocid==0 ) pDocid++;

    }

    *pnList = pEnd - pNext;
    *ppIter = pNext;
    *piDocid = iDocid;
  }else{

    sqlite3_int64 iDelta;
    fts3GetReverseVarint(&p, aDoclist, &iDelta);
    *piDocid -= (iMul * iDelta);

    if( p==aDoclist ){
      *pbEof = 1;
    }else{







<











>

<
<
<







>






>







3383
3384
3385
3386
3387
3388
3389

3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402



3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
  int nDoclist,                   /* Length of aDoclist in bytes */
  char **ppIter,                  /* IN/OUT: Iterator pointer */
  sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
  int *pnList,                    /* IN/OUT: List length pointer */
  u8 *pbEof                       /* OUT: End-of-file flag */
){
  char *p = *ppIter;


  assert( nDoclist>0 );
  assert( *pbEof==0 );
  assert( p || *piDocid==0 );
  assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) );

  if( p==0 ){
    sqlite3_int64 iDocid = 0;
    char *pNext = 0;
    char *pDocid = aDoclist;
    char *pEnd = &aDoclist[nDoclist];
    int iMul = 1;




    while( pDocid<pEnd ){
      sqlite3_int64 iDelta;
      pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta);
      iDocid += (iMul * iDelta);
      pNext = pDocid;
      fts3PoslistCopy(0, &pDocid);
      while( pDocid<pEnd && *pDocid==0 ) pDocid++;
      iMul = (bDescIdx ? -1 : 1);
    }

    *pnList = pEnd - pNext;
    *ppIter = pNext;
    *piDocid = iDocid;
  }else{
    int iMul = (bDescIdx ? -1 : 1);
    sqlite3_int64 iDelta;
    fts3GetReverseVarint(&p, aDoclist, &iDelta);
    *piDocid -= (iMul * iDelta);

    if( p==aDoclist ){
      *pbEof = 1;
    }else{

Changes to test/fts3auto.test.

35
36
37
38
39
40
41
42








43
44
45




















46
47
48
49
50
51
52
53
54
..
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
...
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
...
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
...
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
...
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
...
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
...
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445




























446
447
448
449
450
451
#
# $MATCHINFO may be any expression accepted by the FTS4 MATCH operator, 
# except that the "<column-name>:token" syntax is not supported. Tcl list
# commands are used to tokenize the expression. Any parenthesis must appear
# either as separate list elements, or as the first (for opening) or last
# (for closing) character of a list element. i.e. the expression "(a OR b)c"
# will not be parsed correctly, but "( a OR b) c" will.
#








set sqlite_fts3_enable_parentheses 1
proc do_fts3query_test {tn tbl expr} {





















  get_near_results $tbl $expr aMatchinfo
  set match $expr

  set matchinfo_asc [list]
  foreach docid [lsort -integer -incr [array names aMatchinfo]] {
    lappend matchinfo_asc $docid $aMatchinfo($docid)
  }
  set matchinfo_desc [list]
  foreach docid [lsort -integer -decr [array names aMatchinfo]] {
................................................................................
    if {[string match -nocase near/* $a]} { set a [string range $a 5 end] }
    lappend out $a
    lappend out $b
  }
  return $out
}

proc get_single_near_results {tbl expr arrayvar nullvar} {
  upvar $arrayvar aMatchinfo
  upvar $nullvar nullentry
  catch {array unset aMatchinfo}

  set expr [fix_near_expr $expr]

  # Calculate the expected results using [fts3_near_match]. The following
................................................................................
    foreach c $counts($key) {
      if {$c>0} { incr nDoc($iPhrase,$iCol) 1 }
      incr nHit($iPhrase,$iCol) $c
      incr iPhrase
    }
  }

  if {[info exists ::fts3_deferred] && [llength $expr]==1} {
    set phrase [lindex $expr 0]
    set rewritten [list]
    set partial 0
    foreach tok $phrase {
      if {[lsearch $::fts3_deferred $tok]>=0} {
        lappend rewritten *
      } else {
        lappend rewritten $tok
        set partial 1
      }
    }
    if {$partial==0} {
................................................................................
    } elseif {$rewritten != $phrase} {
      while {[lindex $rewritten end] == "*"} {
        set rewritten [lrange $rewritten 0 end-1]
      }
      while {[lindex $rewritten 0] == "*"} {
        set rewritten [lrange $rewritten 1 end]
      }
      get_single_near_results $tbl [list $rewritten] aRewrite nullentry
      foreach docid [array names hits] {
        set aMatchinfo($docid) $aRewrite($docid)
      }
      return
    }
  }

................................................................................
    if {$c == "("} {incr iBracket}
    if {$c == ")"} {incr iBracket -1}
  }

  return [expr ($iBracket==0 && $i==$nExpr)]
}

proc get_near_results {tbl expr arrayvar {nullvar ""}} {
  upvar $arrayvar aMatchinfo
  if {$nullvar != ""} { upvar $nullvar nullentry }

  set expr [string trim $expr]
  while { [matching_brackets $expr] } {
    set expr [string trim [string range $expr 1 end-1]]
  }
................................................................................
        if {$c == ")"} { incr iBracket -1 }
      }
    }
  }
  if {$iBracket!=0} { error "mismatched brackets in: $expr" }

  if {[info exists opidx]==0} {
    get_single_near_results $tbl $expr aMatchinfo nullentry
  } else {
    set eLeft  [lrange $expr 0 [expr $opidx-1]]
    set eRight [lrange $expr [expr $opidx+1] end]

    get_near_results $tbl $eLeft  aLeft  nullleft
    get_near_results $tbl $eRight aRight nullright

    switch -- [lindex $expr $opidx] {
      "NOT" {
        foreach hit [array names aLeft] {
          if {0==[info exists aRight($hit)]} {
            set aMatchinfo($hit) $aLeft($hit)
          }
................................................................................

#--------------------------------------------------------------------------
# Some test cases involving deferred tokens.
#
proc make_token_deferrable {tbl token} {
  set nRow [db one "SELECT count(*) FROM $tbl"]
  set pgsz [db one "PRAGMA page_size"]

  execsql "INSERT INTO $tbl ($tbl) VALUES('maxpending=100000000')"
  execsql BEGIN
  for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} {
    set doc [string repeat "$token " 100]
    execsql "INSERT INTO $tbl VALUES(\$doc)"
  }
  execsql "INSERT INTO $tbl VALUES('aaaaaaa ${token}aaaaa')"
  execsql COMMIT
................................................................................
    INSERT INTO t1(docid, x) VALUES(4, 'a c e g i k');
    INSERT INTO t1(docid, x) VALUES(5, 'a d g j');
    INSERT INTO t1(docid, x) VALUES(6, 'c a b');
  }

  make_token_deferrable t1 c

  set ::fts3_deferred [list]
  foreach {tn2 expr} {
    1     {a OR c}
  } {
    do_fts3query_test 3.$tn.2.$tn2 t1 $expr
  }

  set ::fts3_deferred [list c]
  execsql { 
    UPDATE t1_segments 
    SET block = zeroblob(length(block)) 
    WHERE length(block)>10000 AND 0
  }
  foreach {tn2 expr} {
    1     {a NEAR c}
    2     {a AND c}
    3     {"a c"}
    4     {"c a"}
    5     {"a c" NEAR/1 g}
    6     {"a c" NEAR/0 g}
  } {
    do_fts3query_test 3.$tn.2.$tn2 t1 $expr




























  }
}

set sqlite_fts3_enable_parentheses $sfep
finish_test









>
>
>
>
>
>
>
>

|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<







 







|







 







|




|







 







|







 







|







 







|




|
|







 







<
<







 







<






<





|
|
|
|
|
|
|

|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

75
76
77
78
79
80
81
...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
...
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
...
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
...
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
...
413
414
415
416
417
418
419


420
421
422
423
424
425
426
...
442
443
444
445
446
447
448

449
450
451
452
453
454

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
#
# $MATCHINFO may be any expression accepted by the FTS4 MATCH operator, 
# except that the "<column-name>:token" syntax is not supported. Tcl list
# commands are used to tokenize the expression. Any parenthesis must appear
# either as separate list elements, or as the first (for opening) or last
# (for closing) character of a list element. i.e. the expression "(a OR b)c"
# will not be parsed correctly, but "( a OR b) c" will.
#
# Available OPTIONS are:
#
#     -deferred TOKENLIST
#
# If the "deferred" option is supplied, it is passed a list of tokens that
# are deferred by FTS and result in the relevant matchinfo() stats being an
# approximation. 
#
set sqlite_fts3_enable_parentheses 1
proc do_fts3query_test {tn args} {

  set nArg [llength $args]
  if {$nArg < 2 || ($nArg % 2)} {
    set cmd do_fts3query_test
    error "wrong # args: should be \"$cmd ?-deferred LIST? TABLE MATCHEXPR\""
  }
  set tbl   [lindex $args [expr $nArg-2]]
  set match [lindex $args [expr $nArg-1]]
  set deferred [list]

  foreach {k v} [lrange $args 0 [expr $nArg-3]] {
    switch -- $k {
      -deferred {
        set deferred $v
      }
      default {
        error "bad option \"$k\": must be -deferred"
      }
    }
  }

  get_near_results $tbl $match $deferred aMatchinfo


  set matchinfo_asc [list]
  foreach docid [lsort -integer -incr [array names aMatchinfo]] {
    lappend matchinfo_asc $docid $aMatchinfo($docid)
  }
  set matchinfo_desc [list]
  foreach docid [lsort -integer -decr [array names aMatchinfo]] {
................................................................................
    if {[string match -nocase near/* $a]} { set a [string range $a 5 end] }
    lappend out $a
    lappend out $b
  }
  return $out
}

proc get_single_near_results {tbl expr deferred arrayvar nullvar} {
  upvar $arrayvar aMatchinfo
  upvar $nullvar nullentry
  catch {array unset aMatchinfo}

  set expr [fix_near_expr $expr]

  # Calculate the expected results using [fts3_near_match]. The following
................................................................................
    foreach c $counts($key) {
      if {$c>0} { incr nDoc($iPhrase,$iCol) 1 }
      incr nHit($iPhrase,$iCol) $c
      incr iPhrase
    }
  }

  if {[llength $deferred] && [llength $expr]==1} {
    set phrase [lindex $expr 0]
    set rewritten [list]
    set partial 0
    foreach tok $phrase {
      if {[lsearch $deferred $tok]>=0} {
        lappend rewritten *
      } else {
        lappend rewritten $tok
        set partial 1
      }
    }
    if {$partial==0} {
................................................................................
    } elseif {$rewritten != $phrase} {
      while {[lindex $rewritten end] == "*"} {
        set rewritten [lrange $rewritten 0 end-1]
      }
      while {[lindex $rewritten 0] == "*"} {
        set rewritten [lrange $rewritten 1 end]
      }
      get_single_near_results $tbl [list $rewritten] {} aRewrite nullentry
      foreach docid [array names hits] {
        set aMatchinfo($docid) $aRewrite($docid)
      }
      return
    }
  }

................................................................................
    if {$c == "("} {incr iBracket}
    if {$c == ")"} {incr iBracket -1}
  }

  return [expr ($iBracket==0 && $i==$nExpr)]
}

proc get_near_results {tbl expr deferred arrayvar {nullvar ""}} {
  upvar $arrayvar aMatchinfo
  if {$nullvar != ""} { upvar $nullvar nullentry }

  set expr [string trim $expr]
  while { [matching_brackets $expr] } {
    set expr [string trim [string range $expr 1 end-1]]
  }
................................................................................
        if {$c == ")"} { incr iBracket -1 }
      }
    }
  }
  if {$iBracket!=0} { error "mismatched brackets in: $expr" }

  if {[info exists opidx]==0} {
    get_single_near_results $tbl $expr $deferred aMatchinfo nullentry
  } else {
    set eLeft  [lrange $expr 0 [expr $opidx-1]]
    set eRight [lrange $expr [expr $opidx+1] end]

    get_near_results $tbl $eLeft  $deferred aLeft  nullleft
    get_near_results $tbl $eRight $deferred aRight nullright

    switch -- [lindex $expr $opidx] {
      "NOT" {
        foreach hit [array names aLeft] {
          if {0==[info exists aRight($hit)]} {
            set aMatchinfo($hit) $aLeft($hit)
          }
................................................................................

#--------------------------------------------------------------------------
# Some test cases involving deferred tokens.
#
proc make_token_deferrable {tbl token} {
  set nRow [db one "SELECT count(*) FROM $tbl"]
  set pgsz [db one "PRAGMA page_size"]


  execsql BEGIN
  for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} {
    set doc [string repeat "$token " 100]
    execsql "INSERT INTO $tbl VALUES(\$doc)"
  }
  execsql "INSERT INTO $tbl VALUES('aaaaaaa ${token}aaaaa')"
  execsql COMMIT
................................................................................
    INSERT INTO t1(docid, x) VALUES(4, 'a c e g i k');
    INSERT INTO t1(docid, x) VALUES(5, 'a d g j');
    INSERT INTO t1(docid, x) VALUES(6, 'c a b');
  }

  make_token_deferrable t1 c


  foreach {tn2 expr} {
    1     {a OR c}
  } {
    do_fts3query_test 3.$tn.2.$tn2 t1 $expr
  }


  execsql { 
    UPDATE t1_segments 
    SET block = zeroblob(length(block)) 
    WHERE length(block)>10000 AND 0
  }
  foreach {tn2 expr def} {
    1     {a NEAR c}            {}
    2     {a AND c}             c
    3     {"a c"}               c
    4     {"c a"}               c
    5     {"a c" NEAR/1 g}      {}
    6     {"a c" NEAR/0 g}      {}
  } {
    do_fts3query_test 3.$tn.2.$tn2 -deferred $def t1 $expr
  }
}

#--------------------------------------------------------------------------
#
foreach {tn create} {
  1    "fts4(x, y)"
  2    "fts4(x, y, order=DESC)"
} {
  catchsql { DROP TABLE t1 }
  execsql  "CREATE VIRTUAL TABLE t1 USING $create"

  foreach {x y} {
    {one two five four five} {}
    {} {one two five four five}
    {one two} {five four five}
  } {
    execsql {INSERT INTO t1 VALUES($x, $y)}
  }

  foreach {tn2 expr} {
    1     {one AND five}
    2     {one NEAR five}
    3     {one NEAR/1 five}
    4     {one NEAR/2 five}
    5     {one NEAR/3 five}
  } {
    do_fts3query_test 4.$tn.2.$tn2 t1 $expr
  }
}

set sqlite_fts3_enable_parentheses $sfep
finish_test

Changes to test/permutations.test.

179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test

  fts3fault.test fts3malloc.test fts3matchinfo.test

  fts3aux1.test fts3comp1.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#







|







179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
  fts3atoken.test fts3b.test fts3c.test fts3cov.test fts3d.test
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test

  fts3fault.test fts3malloc.test fts3matchinfo.test

  fts3aux1.test fts3comp1.test fts3auto.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#