/ Check-in [63ac33c8]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-content
Files: files | file ages | folders
SHA1: 63ac33c860eb32ce96699f06bf83121cec2ffaca
User & Date: dan 2011-10-18 19:39:41
Context
2011-10-19
09:40
Fix a problem in FTS to do with ^ tokens and the snippet() function. check-in: 2c03b24f user: dan tags: fts4-content
2011-10-18
19:39
If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match. check-in: 63ac33c8 user: dan tags: fts4-content
12:49
Cherrypick patch [3126754c72] from the trunk into the content= branch. check-in: f9b5b217 user: dan tags: fts4-content
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

2343
2344
2345
2346
2347
2348
2349





























































2350
2351
2352
2353
2354
2355
2356
....
3513
3514
3515
3516
3517
3518
3519




3520
3521
3522
3523
3524
3525
3526
....
3717
3718
3719
3720
3721
3722
3723

3724
3725
3726
3727
3728
3729
3730
      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
    }
  }

  *pnRight = p - aOut;
}































































/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
**
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
................................................................................
  Fts3Table *pTab,                /* FTS Table pointer */
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){
  assert( iToken!=p->iDoclistToken );





  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }

................................................................................
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( pCsr->bDesc==pTab->bDescIdx 
   && bOptOk==1 
   && p->nToken==1 
   && pFirst->pSegcsr 
   && pFirst->pSegcsr->bLookup 

  ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
    rc = sqlite3Fts3MsrIncrStart(
        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
    p->bIncr = 1;








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>







 







>







2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
....
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
....
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
    }
  }

  *pnRight = p - aOut;
}

/*
** When this function is called, pList points to a doclist containing position
** data, length *pnList bytes. This removes all entries from the doclist that
** do not correspond to the first token in a column and overwrites pList
** with the result. *pnList is set to the length of the new doclist before
** returning.
**
** If bDescDoclist is true, then both the input and output are in descending
** order. Otherwise, ascending.
*/
static void fts3DoclistFirstFilter(
  int bDescDoclist,               /* True if pList is a descending doclist */
  char *pList,                    /* Buffer containing doclist */
  int *pnList                     /* IN/OUT: Size of doclist */
){
  char *p = pList;
  char *pOut = pList;
  char *pEnd = &pList[*pnList];

  sqlite3_int64 iDoc;
  sqlite3_int64 iPrev;
  int bFirstOut = 0;

  fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc);
  while( p ){
    int bWritten = 0;
    if( *p!=0x01 ){
      if( *p==0x02 ){
        fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
        *pOut++ = 0x02;
        bWritten = 1;
      }
      fts3ColumnlistCopy(0, &p);
    }

    while( *p==0x01 ){
      sqlite3_int64 iCol;
      p++;
      p += sqlite3Fts3GetVarint(p, &iCol);
      if( *p==0x02 ){
        if( bWritten==0 ){
          fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
          bWritten = 1;
        }
        pOut += sqlite3Fts3PutVarint(pOut, iCol);
        *pOut++ = 0x02;
      }
      fts3ColumnlistCopy(0, &p);
    }
    if( bWritten ){
      *pOut++ = 0x00;
    }

    assert( *p==0x00 );
    p++;
    fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc);
  }

  *pnList = (pOut - pList);
}


/*
** Merge all doclists in the TermSelect.aaOutput[] array into a single
** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
**
** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
................................................................................
  Fts3Table *pTab,                /* FTS Table pointer */
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){
  assert( iToken!=p->iDoclistToken );

  if( p->aToken[iToken].bFirst ){
    fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList);
  }

  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }

................................................................................
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;

  if( pCsr->bDesc==pTab->bDescIdx 
   && bOptOk==1 
   && p->nToken==1 
   && pFirst->pSegcsr 
   && pFirst->pSegcsr->bLookup 
   && pFirst->bFirst==0
  ){
    /* Use the incremental approach. */
    int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
    rc = sqlite3Fts3MsrIncrStart(
        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
    p->bIncr = 1;

Changes to ext/fts3/fts3Int.h.

306
307
308
309
310
311
312

313
314
315
316
317
318
319
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.
*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */


  /* Variables above this point are populated when the expression is
  ** parsed (by code in fts3_expr.c). Below this point the variables are
  ** used when evaluating the expression. */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
};







>







306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.
*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */
  int bFirst;                     /* True if token must appear at position 0 */

  /* Variables above this point are populated when the expression is
  ** parsed (by code in fts3_expr.c). Below this point the variables are
  ** used when evaluating the expression. */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
  Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
};

Changes to ext/fts3/fts3_expr.c.

176
177
178
179
180
181
182


183


184






185


186
187
188
189
190
191
192
...
277
278
279
280
281
282
283

284
285
286
287
288
289
290
        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

        if( iEnd<n && z[iEnd]=='*' ){
          pRet->pPhrase->aToken[0].isPrefix = 1;
          iEnd++;
        }


        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){


          pParse->isNot = 1;






        }


      }
      nConsumed = iEnd;
    }

    pModule->xClose(pCursor);
  }
  
................................................................................
        memset(pToken, 0, sizeof(Fts3PhraseToken));

        memcpy(&zTemp[nTemp], zByte, nByte);
        nTemp += nByte;

        pToken->n = nByte;
        pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');

        nToken = ii+1;
      }
    }

    pModule->xClose(pCursor);
    pCursor = 0;
  }







>
>
|
>
>
|
>
>
>
>
>
>
|
>
>







 







>







176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
...
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

        if( iEnd<n && z[iEnd]=='*' ){
          pRet->pPhrase->aToken[0].isPrefix = 1;
          iEnd++;
        }

        while( 1 ){
          if( !sqlite3_fts3_enable_parentheses 
           && iStart>0 && z[iStart-1]=='-' 
          ){
            pParse->isNot = 1;
            iStart--;
          }else if( iStart>0 && z[iStart-1]=='^' ){
            pRet->pPhrase->aToken[0].bFirst = 1;
            iStart--;
          }else{
            break;
          }
        }

      }
      nConsumed = iEnd;
    }

    pModule->xClose(pCursor);
  }
  
................................................................................
        memset(pToken, 0, sizeof(Fts3PhraseToken));

        memcpy(&zTemp[nTemp], zByte, nByte);
        nTemp += nByte;

        pToken->n = nByte;
        pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
        pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
        nToken = ii+1;
      }
    }

    pModule->xClose(pCursor);
    pCursor = 0;
  }

Changes to ext/fts3/fts3_write.c.

3113
3114
3115
3116
3117
3118
3119

3120
3121
3122
3123
3124
3125
3126
        int iPos;                 /* Position of token in zText */
  
        pTC->pTokenizer = pT;
        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)

           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))
          ){
            fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
          }
        }
      }







>







3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
        int iPos;                 /* Position of token in zText */
  
        pTC->pTokenizer = pT;
        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
           && (pPT->bFirst==0 || iPos==0)
           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))
          ){
            fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
          }
        }
      }

Changes to test/fts3defer.test.

422
423
424
425
426
427
428







429
430
431
432
433
434
435
    SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"'
  } {15 26 92 96}
  if {$fts3_simple_deferred_tokens_only==0} {
    do_select_test 6.2.3 {
      SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"'
    } {8 15 26 92 96}
  }







}

set testprefix fts3defer

do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE x1 USING fts4(a, b);
  INSERT INTO x1 VALUES('a b c', 'd e f');







>
>
>
>
>
>
>







422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
    SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"'
  } {15 26 92 96}
  if {$fts3_simple_deferred_tokens_only==0} {
    do_select_test 6.2.3 {
      SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"'
    } {8 15 26 92 96}
  }

  do_select_test 7.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH '^zm mjpavjuhw'
  } {56 62}
  do_select_test 7.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH '^azavwm zm'
  } {43}
}

set testprefix fts3defer

do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE x1 USING fts4(a, b);
  INSERT INTO x1 VALUES('a b c', 'd e f');

Added test/fts3first.test.































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# 2011 October 18
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/malloc_common.tcl

ifcapable !fts3 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE x1 USING FTS4(a, b, c);
  INSERT INTO x1(docid,a,b,c) VALUES(0, 'K H D S T', 'V M N Y K', 'S Z N Q S');
  INSERT INTO x1(docid,a,b,c) VALUES(1, 'K N J L W', 'S Z W J Q', 'D U W S E');
  INSERT INTO x1(docid,a,b,c) VALUES(2, 'B P M O I', 'R P H W S', 'R J L L E');
  INSERT INTO x1(docid,a,b,c) VALUES(3, 'U R Q M L', 'M J K A V', 'Q W J T J');
  INSERT INTO x1(docid,a,b,c) VALUES(4, 'N J C Y N', 'R U D X V', 'B O U A Q');
  INSERT INTO x1(docid,a,b,c) VALUES(5, 'Q L X L U', 'I F N X S', 'U Q A N Y');
  INSERT INTO x1(docid,a,b,c) VALUES(6, 'M R G U T', 'U V I Q P', 'X Y D L S');
  INSERT INTO x1(docid,a,b,c) VALUES(7, 'D Y P O I', 'X J P K R', 'V O T H V');
  INSERT INTO x1(docid,a,b,c) VALUES(8, 'R Y D L R', 'U U E S J', 'N W L M R');
  INSERT INTO x1(docid,a,b,c) VALUES(9, 'Z P F N P', 'W A X D U', 'V A E Q A');
  INSERT INTO x1(docid,a,b,c) VALUES(10, 'Q I A Q M', 'N D K H C', 'A H T Q Z');
  INSERT INTO x1(docid,a,b,c) VALUES(11, 'T E R Q B', 'C I B C B', 'F Z U W R');
  INSERT INTO x1(docid,a,b,c) VALUES(12, 'E S V U W', 'T P F W H', 'A M D J Q');
  INSERT INTO x1(docid,a,b,c) VALUES(13, 'X S B T Y', 'U D N D P', 'X Z Y G F');
  INSERT INTO x1(docid,a,b,c) VALUES(14, 'K H A B L', 'S R C C Z', 'D W E H J');
  INSERT INTO x1(docid,a,b,c) VALUES(15, 'C E U C C', 'W F M N M', 'T Z U X T');
  INSERT INTO x1(docid,a,b,c) VALUES(16, 'Q G C G H', 'H N N B H', 'B Q I H Y');
  INSERT INTO x1(docid,a,b,c) VALUES(17, 'Q T S K B', 'W B D Y N', 'V J P E C');
  INSERT INTO x1(docid,a,b,c) VALUES(18, 'A J M O Q', 'L G Y Y A', 'G N M R N');
  INSERT INTO x1(docid,a,b,c) VALUES(19, 'T R Y P Y', 'N V Y B X', 'L Z T N T');

  CREATE VIRTUAL TABLE x2 USING FTS4(a, b, c, order=DESC);
  INSERT INTO x2(docid, a, b, c) SELECT docid, a, b, c FROM x1;
}

foreach x {1 2} {
  foreach {tn match res} {
    1  "^K"              {0 1 14}
    2  "^S"              {0 1 14}
    3  "^W"              {9 15 17}
    4  "^J"              {}
    5  "^E"              {12}
    6  "V ^-E"           {0 3 4 6 7 9 17 19}
    7  "V -^E"           {0 3 4 6 7 9 17 19}
    8  "^-E V"           {0 3 4 6 7 9 17 19}
    9  "-^E V"           {0 3 4 6 7 9 17 19}
    10 "V"               {0 3 4 6 7 9 12 17 19}

    11 {"^K H"}          {0 14}
    12 {"K H"}           {0 10 14}
    13 {"K ^H"}          {}
  } {
    set rev [list]
    for {set ii [expr [llength $res]-1]} {$ii>=0} {incr ii -1} {
      lappend rev [lindex $res $ii]
    }
    do_execsql_test 1.$x.$tn.1 {SELECT docid FROM x1 WHERE x1 MATCH $match} $res
    do_execsql_test 1.$x.$tn.2 {SELECT docid FROM x2 WHERE x2 MATCH $match} $rev
  }

  do_execsql_test 1.$x.[expr $tn+1] { 
    INSERT INTO x1(x1) VALUES('optimize');
    INSERT INTO x2(x2) VALUES('optimize');
  } {}
}

finish_test

Changes to test/permutations.test.

181
182
183
184
185
186
187

188
189
190
191
192
193
194
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test

}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#







>







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test
  fts3first.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#