/ Check-in [63ac33c8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-content
Files: files | file ages | folders
SHA1: 63ac33c860eb32ce96699f06bf83121cec2ffaca
User & Date: dan 2011-10-18 19:39:41
Context
2011-10-19
09:40
Fix a problem in FTS to do with ^ tokens and the snippet() function. check-in: 2c03b24f user: dan tags: fts4-content
2011-10-18
19:39
If a token within an FTS query is prefixed with a '^' character, it must be the first token in a column of data to match. check-in: 63ac33c8 user: dan tags: fts4-content
12:49
Cherrypick patch [3126754c72] from the trunk into the content= branch. check-in: f9b5b217 user: dan tags: fts4-content
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  2343   2343         fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2344   2344       }
  2345   2345     }
  2346   2346   
  2347   2347     *pnRight = p - aOut;
  2348   2348   }
  2349   2349   
         2350  +/*
         2351  +** When this function is called, pList points to a doclist containing position
         2352  +** data, length *pnList bytes. This removes all entries from the doclist that
         2353  +** do not correspond to the first token in a column and overwrites pList
         2354  +** with the result. *pnList is set to the length of the new doclist before
         2355  +** returning.
         2356  +**
         2357  +** If bDescDoclist is true, then both the input and output are in descending
         2358  +** order. Otherwise, ascending.
         2359  +*/
         2360  +static void fts3DoclistFirstFilter(
         2361  +  int bDescDoclist,               /* True if pList is a descending doclist */
         2362  +  char *pList,                    /* Buffer containing doclist */
         2363  +  int *pnList                     /* IN/OUT: Size of doclist */
         2364  +){
         2365  +  char *p = pList;
         2366  +  char *pOut = pList;
         2367  +  char *pEnd = &pList[*pnList];
         2368  +
         2369  +  sqlite3_int64 iDoc;
         2370  +  sqlite3_int64 iPrev;
         2371  +  int bFirstOut = 0;
         2372  +
         2373  +  fts3GetDeltaVarint3(&p, pEnd, 0, &iDoc);
         2374  +  while( p ){
         2375  +    int bWritten = 0;
         2376  +    if( *p!=0x01 ){
         2377  +      if( *p==0x02 ){
         2378  +        fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
         2379  +        *pOut++ = 0x02;
         2380  +        bWritten = 1;
         2381  +      }
         2382  +      fts3ColumnlistCopy(0, &p);
         2383  +    }
         2384  +
         2385  +    while( *p==0x01 ){
         2386  +      sqlite3_int64 iCol;
         2387  +      p++;
         2388  +      p += sqlite3Fts3GetVarint(p, &iCol);
         2389  +      if( *p==0x02 ){
         2390  +        if( bWritten==0 ){
         2391  +          fts3PutDeltaVarint3(&pOut, bDescDoclist, &iPrev, &bFirstOut, iDoc); 
         2392  +          bWritten = 1;
         2393  +        }
         2394  +        pOut += sqlite3Fts3PutVarint(pOut, iCol);
         2395  +        *pOut++ = 0x02;
         2396  +      }
         2397  +      fts3ColumnlistCopy(0, &p);
         2398  +    }
         2399  +    if( bWritten ){
         2400  +      *pOut++ = 0x00;
         2401  +    }
         2402  +
         2403  +    assert( *p==0x00 );
         2404  +    p++;
         2405  +    fts3GetDeltaVarint3(&p, pEnd, bDescDoclist, &iDoc);
         2406  +  }
         2407  +
         2408  +  *pnList = (pOut - pList);
         2409  +}
         2410  +
  2350   2411   
  2351   2412   /*
  2352   2413   ** Merge all doclists in the TermSelect.aaOutput[] array into a single
  2353   2414   ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
  2354   2415   ** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
  2355   2416   **
  2356   2417   ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
................................................................................
  3513   3574     Fts3Table *pTab,                /* FTS Table pointer */
  3514   3575     Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  3515   3576     int iToken,                     /* Token pList/nList corresponds to */
  3516   3577     char *pList,                    /* Pointer to doclist */
  3517   3578     int nList                       /* Number of bytes in pList */
  3518   3579   ){
  3519   3580     assert( iToken!=p->iDoclistToken );
         3581  +
         3582  +  if( p->aToken[iToken].bFirst ){
         3583  +    fts3DoclistFirstFilter(pTab->bDescIdx, pList, &nList);
         3584  +  }
  3520   3585   
  3521   3586     if( pList==0 ){
  3522   3587       sqlite3_free(p->doclist.aAll);
  3523   3588       p->doclist.aAll = 0;
  3524   3589       p->doclist.nAll = 0;
  3525   3590     }
  3526   3591   
................................................................................
  3717   3782     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3718   3783   
  3719   3784     if( pCsr->bDesc==pTab->bDescIdx 
  3720   3785      && bOptOk==1 
  3721   3786      && p->nToken==1 
  3722   3787      && pFirst->pSegcsr 
  3723   3788      && pFirst->pSegcsr->bLookup 
         3789  +   && pFirst->bFirst==0
  3724   3790     ){
  3725   3791       /* Use the incremental approach. */
  3726   3792       int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
  3727   3793       rc = sqlite3Fts3MsrIncrStart(
  3728   3794           pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
  3729   3795       p->bIncr = 1;
  3730   3796   

Changes to ext/fts3/fts3Int.h.

   306    306   ** For a sequence of tokens contained in double-quotes (i.e. "one two three")
   307    307   ** nToken will be the number of tokens in the string.
   308    308   */
   309    309   struct Fts3PhraseToken {
   310    310     char *z;                        /* Text of the token */
   311    311     int n;                          /* Number of bytes in buffer z */
   312    312     int isPrefix;                   /* True if token ends with a "*" character */
          313  +  int bFirst;                     /* True if token must appear at position 0 */
   313    314   
   314    315     /* Variables above this point are populated when the expression is
   315    316     ** parsed (by code in fts3_expr.c). Below this point the variables are
   316    317     ** used when evaluating the expression. */
   317    318     Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
   318    319     Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
   319    320   };

Changes to ext/fts3/fts3_expr.c.

   176    176           pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
   177    177           memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
   178    178   
   179    179           if( iEnd<n && z[iEnd]=='*' ){
   180    180             pRet->pPhrase->aToken[0].isPrefix = 1;
   181    181             iEnd++;
   182    182           }
   183         -        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
   184         -          pParse->isNot = 1;
          183  +
          184  +        while( 1 ){
          185  +          if( !sqlite3_fts3_enable_parentheses 
          186  +           && iStart>0 && z[iStart-1]=='-' 
          187  +          ){
          188  +            pParse->isNot = 1;
          189  +            iStart--;
          190  +          }else if( iStart>0 && z[iStart-1]=='^' ){
          191  +            pRet->pPhrase->aToken[0].bFirst = 1;
          192  +            iStart--;
          193  +          }else{
          194  +            break;
          195  +          }
   185    196           }
          197  +
   186    198         }
   187    199         nConsumed = iEnd;
   188    200       }
   189    201   
   190    202       pModule->xClose(pCursor);
   191    203     }
   192    204     
................................................................................
   277    289           memset(pToken, 0, sizeof(Fts3PhraseToken));
   278    290   
   279    291           memcpy(&zTemp[nTemp], zByte, nByte);
   280    292           nTemp += nByte;
   281    293   
   282    294           pToken->n = nByte;
   283    295           pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
          296  +        pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
   284    297           nToken = ii+1;
   285    298         }
   286    299       }
   287    300   
   288    301       pModule->xClose(pCursor);
   289    302       pCursor = 0;
   290    303     }

Changes to ext/fts3/fts3_write.c.

  3113   3113           int iPos;                 /* Position of token in zText */
  3114   3114     
  3115   3115           pTC->pTokenizer = pT;
  3116   3116           rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
  3117   3117           for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
  3118   3118             Fts3PhraseToken *pPT = pDef->pToken;
  3119   3119             if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
         3120  +           && (pPT->bFirst==0 || iPos==0)
  3120   3121              && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
  3121   3122              && (0==memcmp(zToken, pPT->z, pPT->n))
  3122   3123             ){
  3123   3124               fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
  3124   3125             }
  3125   3126           }
  3126   3127         }

Changes to test/fts3defer.test.

   422    422       SELECT rowid FROM t1 WHERE t1 MATCH '"zm azavwm"'
   423    423     } {15 26 92 96}
   424    424     if {$fts3_simple_deferred_tokens_only==0} {
   425    425       do_select_test 6.2.3 {
   426    426         SELECT rowid FROM t1 WHERE t1 MATCH '"jk xduvfhk" OR "zm azavwm"'
   427    427       } {8 15 26 92 96}
   428    428     }
          429  +
          430  +  do_select_test 7.1 {
          431  +    SELECT rowid FROM t1 WHERE t1 MATCH '^zm mjpavjuhw'
          432  +  } {56 62}
          433  +  do_select_test 7.2 {
          434  +    SELECT rowid FROM t1 WHERE t1 MATCH '^azavwm zm'
          435  +  } {43}
   429    436   }
   430    437   
   431    438   set testprefix fts3defer
   432    439   
   433    440   do_execsql_test 3.1 {
   434    441     CREATE VIRTUAL TABLE x1 USING fts4(a, b);
   435    442     INSERT INTO x1 VALUES('a b c', 'd e f');

Added test/fts3first.test.

            1  +# 2011 October 18
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +
           12  +set testdir [file dirname $argv0]
           13  +source $testdir/tester.tcl
           14  +source $testdir/malloc_common.tcl
           15  +
           16  +ifcapable !fts3 {
           17  +  finish_test
           18  +  return
           19  +}
           20  +
           21  +do_execsql_test 1.0 {
           22  +  CREATE VIRTUAL TABLE x1 USING FTS4(a, b, c);
           23  +  INSERT INTO x1(docid,a,b,c) VALUES(0, 'K H D S T', 'V M N Y K', 'S Z N Q S');
           24  +  INSERT INTO x1(docid,a,b,c) VALUES(1, 'K N J L W', 'S Z W J Q', 'D U W S E');
           25  +  INSERT INTO x1(docid,a,b,c) VALUES(2, 'B P M O I', 'R P H W S', 'R J L L E');
           26  +  INSERT INTO x1(docid,a,b,c) VALUES(3, 'U R Q M L', 'M J K A V', 'Q W J T J');
           27  +  INSERT INTO x1(docid,a,b,c) VALUES(4, 'N J C Y N', 'R U D X V', 'B O U A Q');
           28  +  INSERT INTO x1(docid,a,b,c) VALUES(5, 'Q L X L U', 'I F N X S', 'U Q A N Y');
           29  +  INSERT INTO x1(docid,a,b,c) VALUES(6, 'M R G U T', 'U V I Q P', 'X Y D L S');
           30  +  INSERT INTO x1(docid,a,b,c) VALUES(7, 'D Y P O I', 'X J P K R', 'V O T H V');
           31  +  INSERT INTO x1(docid,a,b,c) VALUES(8, 'R Y D L R', 'U U E S J', 'N W L M R');
           32  +  INSERT INTO x1(docid,a,b,c) VALUES(9, 'Z P F N P', 'W A X D U', 'V A E Q A');
           33  +  INSERT INTO x1(docid,a,b,c) VALUES(10, 'Q I A Q M', 'N D K H C', 'A H T Q Z');
           34  +  INSERT INTO x1(docid,a,b,c) VALUES(11, 'T E R Q B', 'C I B C B', 'F Z U W R');
           35  +  INSERT INTO x1(docid,a,b,c) VALUES(12, 'E S V U W', 'T P F W H', 'A M D J Q');
           36  +  INSERT INTO x1(docid,a,b,c) VALUES(13, 'X S B T Y', 'U D N D P', 'X Z Y G F');
           37  +  INSERT INTO x1(docid,a,b,c) VALUES(14, 'K H A B L', 'S R C C Z', 'D W E H J');
           38  +  INSERT INTO x1(docid,a,b,c) VALUES(15, 'C E U C C', 'W F M N M', 'T Z U X T');
           39  +  INSERT INTO x1(docid,a,b,c) VALUES(16, 'Q G C G H', 'H N N B H', 'B Q I H Y');
           40  +  INSERT INTO x1(docid,a,b,c) VALUES(17, 'Q T S K B', 'W B D Y N', 'V J P E C');
           41  +  INSERT INTO x1(docid,a,b,c) VALUES(18, 'A J M O Q', 'L G Y Y A', 'G N M R N');
           42  +  INSERT INTO x1(docid,a,b,c) VALUES(19, 'T R Y P Y', 'N V Y B X', 'L Z T N T');
           43  +
           44  +  CREATE VIRTUAL TABLE x2 USING FTS4(a, b, c, order=DESC);
           45  +  INSERT INTO x2(docid, a, b, c) SELECT docid, a, b, c FROM x1;
           46  +}
           47  +
           48  +foreach x {1 2} {
           49  +  foreach {tn match res} {
           50  +    1  "^K"              {0 1 14}
           51  +    2  "^S"              {0 1 14}
           52  +    3  "^W"              {9 15 17}
           53  +    4  "^J"              {}
           54  +    5  "^E"              {12}
           55  +    6  "V ^-E"           {0 3 4 6 7 9 17 19}
           56  +    7  "V -^E"           {0 3 4 6 7 9 17 19}
           57  +    8  "^-E V"           {0 3 4 6 7 9 17 19}
           58  +    9  "-^E V"           {0 3 4 6 7 9 17 19}
           59  +    10 "V"               {0 3 4 6 7 9 12 17 19}
           60  +
           61  +    11 {"^K H"}          {0 14}
           62  +    12 {"K H"}           {0 10 14}
           63  +    13 {"K ^H"}          {}
           64  +  } {
           65  +    set rev [list]
           66  +    for {set ii [expr [llength $res]-1]} {$ii>=0} {incr ii -1} {
           67  +      lappend rev [lindex $res $ii]
           68  +    }
           69  +    do_execsql_test 1.$x.$tn.1 {SELECT docid FROM x1 WHERE x1 MATCH $match} $res
           70  +    do_execsql_test 1.$x.$tn.2 {SELECT docid FROM x2 WHERE x2 MATCH $match} $rev
           71  +  }
           72  +
           73  +  do_execsql_test 1.$x.[expr $tn+1] { 
           74  +    INSERT INTO x1(x1) VALUES('optimize');
           75  +    INSERT INTO x2(x2) VALUES('optimize');
           76  +  } {}
           77  +}
           78  +
           79  +finish_test

Changes to test/permutations.test.

   181    181     fts3near.test fts3query.test fts3shared.test fts3snippet.test 
   182    182     fts3sort.test
   183    183     fts3fault.test fts3malloc.test fts3matchinfo.test
   184    184     fts3aux1.test fts3comp1.test fts3auto.test
   185    185     fts4aa.test fts4content.test
   186    186     fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
   187    187     fts3corrupt2.test
          188  +  fts3first.test
   188    189   }
   189    190   
   190    191   
   191    192   lappend ::testsuitelist xxx
   192    193   #-------------------------------------------------------------------------
   193    194   # Define the coverage related test suites:
   194    195   #