/ Check-in [1c20c1c2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further tweaks to improve fts5 prefix query performance.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1c20c1c28b56411f106cf2f6961b3ad4b4d6f6c8
User & Date: dan 2015-10-12 19:12:29
Context
2015-10-12
22:20
Fix a couple harmless compiler warnings. check-in: 7f896a97 user: mistachkin tags: trunk
19:12
Further tweaks to improve fts5 prefix query performance. check-in: 1c20c1c2 user: dan tags: trunk
04:56
Change all references to 3.8.12 into 3.9.0. Comment changes only - no changes to code. check-in: 6f2858f6 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

   250    250   
   251    251   #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
   252    252   #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)
   253    253   
   254    254   typedef struct Fts5PoslistReader Fts5PoslistReader;
   255    255   struct Fts5PoslistReader {
   256    256     /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
   257         -  int iCol;                       /* If (iCol>=0), this column only */
   258    257     const u8 *a;                    /* Position list to iterate through */
   259    258     int n;                          /* Size of buffer at a[] in bytes */
   260    259     int i;                          /* Current offset in a[] */
   261    260   
   262    261     u8 bFlag;                       /* For client use (any custom purpose) */
   263    262   
   264    263     /* Output variables */
   265    264     u8 bEof;                        /* Set to true at EOF */
   266    265     i64 iPos;                       /* (iCol<<32) + iPos */
   267    266   };
   268    267   int sqlite3Fts5PoslistReaderInit(
   269         -  int iCol,                       /* If (iCol>=0), this column only */
   270    268     const u8 *a, int n,             /* Poslist buffer to iterate through */
   271    269     Fts5PoslistReader *pIter        /* Iterator object to initialize */
   272    270   );
   273    271   int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
   274    272   
   275    273   typedef struct Fts5PoslistWriter Fts5PoslistWriter;
   276    274   struct Fts5PoslistWriter {
................................................................................
   343    341   ** The various operations on open token or token prefix iterators opened
   344    342   ** using sqlite3Fts5IndexQuery().
   345    343   */
   346    344   int sqlite3Fts5IterEof(Fts5IndexIter*);
   347    345   int sqlite3Fts5IterNext(Fts5IndexIter*);
   348    346   int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
   349    347   i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
   350         -int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
          348  +int sqlite3Fts5IterPoslist(Fts5IndexIter*,Fts5Colset*, const u8**, int*, i64*);
   351    349   int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);
   352    350   
   353    351   /*
   354    352   ** Close an iterator opened by sqlite3Fts5IndexQuery().
   355    353   */
   356    354   void sqlite3Fts5IterClose(Fts5IndexIter*);
   357    355   

Changes to ext/fts5/fts5_buffer.c.

   199    199   
   200    200   
   201    201   /*
   202    202   ** Advance the iterator object passed as the only argument. Return true
   203    203   ** if the iterator reaches EOF, or false otherwise.
   204    204   */
   205    205   int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
   206         -  if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) 
   207         -   || (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol)
   208         -  ){
          206  +  if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
   209    207       pIter->bEof = 1;
   210    208     }
   211    209     return pIter->bEof;
   212    210   }
   213    211   
   214    212   int sqlite3Fts5PoslistReaderInit(
   215         -  int iCol,                       /* If (iCol>=0), this column only */
   216    213     const u8 *a, int n,             /* Poslist buffer to iterate through */
   217    214     Fts5PoslistReader *pIter        /* Iterator object to initialize */
   218    215   ){
   219    216     memset(pIter, 0, sizeof(*pIter));
   220    217     pIter->a = a;
   221    218     pIter->n = n;
   222         -  pIter->iCol = iCol;
   223         -  do {
   224         -    sqlite3Fts5PoslistReaderNext(pIter);
   225         -  }while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
          219  +  sqlite3Fts5PoslistReaderNext(pIter);
   226    220     return pIter->bEof;
   227    221   }
   228    222   
   229    223   int sqlite3Fts5PoslistWriterAppend(
   230    224     Fts5Buffer *pBuf, 
   231    225     Fts5PoslistWriter *pWriter,
   232    226     i64 iPos

Changes to ext/fts5/fts5_expr.c.

   305    305   }
   306    306   
   307    307   /*
   308    308   ** Argument pTerm must be a synonym iterator.
   309    309   */
   310    310   static int fts5ExprSynonymPoslist(
   311    311     Fts5ExprTerm *pTerm, 
          312  +  Fts5Colset *pColset,
   312    313     i64 iRowid,
   313    314     int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
   314    315     u8 **pa, int *pn
   315    316   ){
   316    317     Fts5PoslistReader aStatic[4];
   317    318     Fts5PoslistReader *aIter = aStatic;
   318    319     int nIter = 0;
................................................................................
   323    324     assert( pTerm->pSynonym );
   324    325     for(p=pTerm; p; p=p->pSynonym){
   325    326       Fts5IndexIter *pIter = p->pIter;
   326    327       if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
   327    328         const u8 *a;
   328    329         int n;
   329    330         i64 dummy;
   330         -      rc = sqlite3Fts5IterPoslist(pIter, &a, &n, &dummy);
          331  +      rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy);
   331    332         if( rc!=SQLITE_OK ) goto synonym_poslist_out;
   332    333         if( nIter==nAlloc ){
   333    334           int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
   334    335           Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
   335    336           if( aNew==0 ){
   336    337             rc = SQLITE_NOMEM;
   337    338             goto synonym_poslist_out;
   338    339           }
   339    340           memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
   340    341           nAlloc = nAlloc*2;
   341    342           if( aIter!=aStatic ) sqlite3_free(aIter);
   342    343           aIter = aNew;
   343    344         }
   344         -      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[nIter]);
          345  +      sqlite3Fts5PoslistReaderInit(a, n, &aIter[nIter]);
   345    346         assert( aIter[nIter].bEof==0 );
   346    347         nIter++;
   347    348       }
   348    349     }
   349    350   
   350    351     assert( *pbDel==0 );
   351    352     if( nIter==1 ){
................................................................................
   405    406     int *pbMatch                    /* OUT: Set to true if really a match */
   406    407   ){
   407    408     Fts5PoslistWriter writer = {0};
   408    409     Fts5PoslistReader aStatic[4];
   409    410     Fts5PoslistReader *aIter = aStatic;
   410    411     int i;
   411    412     int rc = SQLITE_OK;
   412         -  int iCol = -1;
   413    413     
   414         -  if( pColset && pColset->nCol==1 ){
   415         -    iCol = pColset->aiCol[0];
   416         -    pColset = 0;
   417         -  }
   418         -
   419    414     fts5BufferZero(&pPhrase->poslist);
   420    415   
   421    416     /* If the aStatic[] array is not large enough, allocate a large array
   422    417     ** using sqlite3_malloc(). This approach could be improved upon. */
   423    418     if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
   424    419       int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
   425    420       aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
................................................................................
   431    426     for(i=0; i<pPhrase->nTerm; i++){
   432    427       Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
   433    428       i64 dummy;
   434    429       int n = 0;
   435    430       int bFlag = 0;
   436    431       const u8 *a = 0;
   437    432       if( pTerm->pSynonym ){
   438         -      rc = fts5ExprSynonymPoslist(pTerm, pNode->iRowid, &bFlag, (u8**)&a, &n);
          433  +      rc = fts5ExprSynonymPoslist(
          434  +          pTerm, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n
          435  +      );
   439    436       }else{
   440         -      rc = sqlite3Fts5IterPoslist(pTerm->pIter, &a, &n, &dummy);
          437  +      rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy);
   441    438       }
   442    439       if( rc!=SQLITE_OK ) goto ismatch_out;
   443         -    sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]);
          440  +    sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
   444    441       aIter[i].bFlag = bFlag;
   445    442       if( aIter[i].bEof ) goto ismatch_out;
   446    443     }
   447    444   
   448    445     while( 1 ){
   449    446       int bMatch;
   450    447       i64 iPos = aIter[0].iPos;
................................................................................
   459    456               if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
   460    457             }
   461    458             if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
   462    459           }
   463    460         }
   464    461       }while( bMatch==0 );
   465    462   
   466         -    if( pColset==0 || fts5ExprColsetTest(pColset, FTS5_POS2COLUMN(iPos)) ){
   467         -      /* Append position iPos to the output */
   468         -      rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
   469         -      if( rc!=SQLITE_OK ) goto ismatch_out;
   470         -    }
          463  +    /* Append position iPos to the output */
          464  +    rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
          465  +    if( rc!=SQLITE_OK ) goto ismatch_out;
   471    466   
   472    467       for(i=0; i<pPhrase->nTerm; i++){
   473    468         if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
   474    469       }
   475    470     }
   476    471   
   477    472    ismatch_out:
................................................................................
   758    753       bEof = 1;
   759    754     }else{
   760    755       *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
   761    756     }
   762    757     return bEof;
   763    758   }
   764    759   
   765         -/*
   766         -** IN/OUT parameter (*pa) points to a position list n bytes in size. If
   767         -** the position list contains entries for column iCol, then (*pa) is set
   768         -** to point to the sub-position-list for that column and the number of
   769         -** bytes in it returned. Or, if the argument position list does not
   770         -** contain any entries for column iCol, return 0.
   771         -*/
   772         -static int fts5ExprExtractCol(
   773         -  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
   774         -  int n,                          /* IN: Size of poslist in bytes */
   775         -  int iCol                        /* Column to extract from poslist */
   776         -){
   777         -  int iCurrent = 0;
   778         -  const u8 *p = *pa;
   779         -  const u8 *pEnd = &p[n];         /* One byte past end of position list */
   780         -  u8 prev = 0;
   781         -
   782         -  while( iCol!=iCurrent ){
   783         -    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
   784         -    ** not part of a varint */
   785         -    while( (prev & 0x80) || *p!=0x01 ){
   786         -      prev = *p++;
   787         -      if( p==pEnd ) return 0;
   788         -    }
   789         -    *pa = p++;
   790         -    p += fts5GetVarint32(p, iCurrent);
   791         -  }
   792         -
   793         -  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
   794         -  ** not part of a varint */
   795         -  assert( (prev & 0x80)==0 );
   796         -  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
   797         -    prev = *p++;
   798         -  }
   799         -  return p - (*pa);
   800         -}
   801         -
   802         -static int fts5ExprExtractColset (
   803         -  Fts5Colset *pColset,            /* Colset to filter on */
   804         -  const u8 *pPos, int nPos,       /* Position list */
   805         -  Fts5Buffer *pBuf                /* Output buffer */
   806         -){
   807         -  int rc = SQLITE_OK;
   808         -  int i;
   809         -
   810         -  fts5BufferZero(pBuf);
   811         -  for(i=0; i<pColset->nCol; i++){
   812         -    const u8 *pSub = pPos;
   813         -    int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]);
   814         -    if( nSub ){
   815         -      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
   816         -    }
   817         -  }
   818         -  return rc;
   819         -}
   820    760   
   821    761   static int fts5ExprNearTest(
   822    762     int *pRc,
   823    763     Fts5Expr *pExpr,                /* Expression that pNear is a part of */
   824    764     Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
   825    765   ){
   826    766     Fts5ExprNearset *pNear = pNode->pNear;
................................................................................
   860    800     ** fts5_index.c iterator object. This is much faster than synthesizing 
   861    801     ** a new poslist the way we have to for more complicated phrase or NEAR
   862    802     ** expressions.  */
   863    803     Fts5ExprNearset *pNear = pNode->pNear;
   864    804     Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
   865    805     Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
   866    806     Fts5Colset *pColset = pNear->pColset;
   867         -  const u8 *pPos;
   868         -  int nPos;
   869    807     int rc;
   870    808   
   871    809     assert( pNode->eType==FTS5_TERM );
   872    810     assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );
   873    811     assert( pPhrase->aTerm[0].pSynonym==0 );
   874    812   
   875         -  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);
   876         -
   877         -  /* If the term may match any column, then this must be a match. 
   878         -  ** Return immediately in this case. Otherwise, try to find the
   879         -  ** part of the poslist that corresponds to the required column.
   880         -  ** If it can be found, return. If it cannot, the next iteration
   881         -  ** of the loop will test the next rowid in the database for this
   882         -  ** term.  */
   883         -  if( pColset==0 ){
   884         -    assert( pPhrase->poslist.nSpace==0 );
   885         -    pPhrase->poslist.p = (u8*)pPos;
   886         -    pPhrase->poslist.n = nPos;
   887         -  }else if( pColset->nCol==1 ){
   888         -    assert( pPhrase->poslist.nSpace==0 );
   889         -    pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]);
   890         -    pPhrase->poslist.p = (u8*)pPos;
   891         -  }else if( rc==SQLITE_OK ){
   892         -    rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist);
   893         -  }
   894         -
          813  +  rc = sqlite3Fts5IterPoslist(pIter, pColset, 
          814  +      (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid
          815  +  );
   895    816     pNode->bNomatch = (pPhrase->poslist.n==0);
   896    817     return rc;
   897    818   }
   898    819   
   899    820   /*
   900    821   ** All individual term iterators in pNear are guaranteed to be valid when
   901    822   ** this function is called. This function checks if all term iterators

Changes to ext/fts5/fts5_index.c.

   507    507   struct Fts5IndexIter {
   508    508     Fts5Index *pIndex;              /* Index that owns this iterator */
   509    509     Fts5Structure *pStruct;         /* Database structure for this iterator */
   510    510     Fts5Buffer poslist;             /* Buffer containing current poslist */
   511    511   
   512    512     int nSeg;                       /* Size of aSeg[] array */
   513    513     int bRev;                       /* True to iterate in reverse order */
   514         -  int bSkipEmpty;                 /* True to skip deleted entries */
   515         -  int bEof;                       /* True at EOF */
          514  +  u8 bSkipEmpty;                  /* True to skip deleted entries */
          515  +  u8 bEof;                        /* True at EOF */
          516  +  u8 bFiltered;                   /* True if column-filter already applied */
   516    517   
   517    518     i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
   518    519     Fts5CResult *aFirst;            /* Current merge state (see above) */
   519    520     Fts5SegIter aSeg[1];            /* Array of segment iterators */
   520    521   };
   521    522   
   522    523   
................................................................................
  1453   1454   ** Argument p points to a buffer containing a varint to be interpreted as a
  1454   1455   ** position list size field. Read the varint and return the number of bytes
  1455   1456   ** read. Before returning, set *pnSz to the number of bytes in the position
  1456   1457   ** list, and *pbDel to true if the delete flag is set, or false otherwise.
  1457   1458   */
  1458   1459   static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  1459   1460     int nSz;
  1460         -  int n = fts5GetVarint32(p, nSz);
         1461  +  int n = 0;
         1462  +  fts5FastGetVarint32(p, n, nSz);
  1461   1463     assert_nc( nSz>=0 );
  1462   1464     *pnSz = nSz/2;
  1463   1465     *pbDel = nSz & 0x0001;
  1464   1466     return n;
  1465   1467   }
  1466   1468   
  1467   1469   /*
................................................................................
  1474   1476   **
  1475   1477   ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
  1476   1478   ** position list content (if any).
  1477   1479   */
  1478   1480   static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  1479   1481     if( p->rc==SQLITE_OK ){
  1480   1482       int iOff = pIter->iLeafOffset;  /* Offset to read at */
         1483  +    int nSz;
  1481   1484       ASSERT_SZLEAF_OK(pIter->pLeaf);
  1482         -    if( iOff>=pIter->pLeaf->szLeaf ){
  1483         -      p->rc = FTS5_CORRUPT;
  1484         -    }else{
  1485         -      const u8 *a = &pIter->pLeaf->p[iOff];
  1486         -      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
  1487         -    }
         1485  +    fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
         1486  +    pIter->bDel = (nSz & 0x0001);
         1487  +    pIter->nPos = nSz>>1;
         1488  +    pIter->iLeafOffset = iOff;
  1488   1489     }
  1489   1490   }
  1490   1491   
  1491   1492   static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  1492   1493     u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  1493   1494     int iOff = pIter->iLeafOffset;
  1494   1495   
................................................................................
  2721   2722     Fts5IndexIter **ppOut           /* New object */
  2722   2723   ){
  2723   2724     Fts5IndexIter *pNew;
  2724   2725     pNew = fts5MultiIterAlloc(p, 2);
  2725   2726     if( pNew ){
  2726   2727       Fts5SegIter *pIter = &pNew->aSeg[1];
  2727   2728   
         2729  +    pNew->bFiltered = 1;
  2728   2730       pIter->flags = FTS5_SEGITER_ONETERM;
  2729   2731       if( pData->szLeaf>0 ){
  2730   2732         pIter->pLeaf = pData;
  2731   2733         pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
  2732   2734         pIter->iEndofDoclist = pData->nn;
  2733   2735         pNew->aFirst[1].iFirst = 1;
  2734   2736         if( bDesc ){
................................................................................
  3936   3938   static void fts5PoslistCallback(
  3937   3939     Fts5Index *p, 
  3938   3940     void *pContext, 
  3939   3941     const u8 *pChunk, int nChunk
  3940   3942   ){
  3941   3943     assert_nc( nChunk>=0 );
  3942   3944     if( nChunk>0 ){
  3943         -    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk);
         3945  +    fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
  3944   3946     }
  3945   3947   }
  3946   3948   
  3947   3949   typedef struct PoslistCallbackCtx PoslistCallbackCtx;
  3948   3950   struct PoslistCallbackCtx {
  3949   3951     Fts5Buffer *pBuf;               /* Append to this buffer */
  3950   3952     Fts5Colset *pColset;            /* Restrict matches to this column */
................................................................................
  3976   3978       int iStart = 0;
  3977   3979   
  3978   3980       if( pCtx->eState==2 ){
  3979   3981         int iCol;
  3980   3982         fts5FastGetVarint32(pChunk, i, iCol);
  3981   3983         if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
  3982   3984           pCtx->eState = 1;
  3983         -        fts5BufferAppendVarint(&p->rc, pCtx->pBuf, 1);
         3985  +        fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
  3984   3986         }else{
  3985   3987           pCtx->eState = 0;
  3986   3988         }
  3987   3989       }
  3988   3990   
  3989   3991       do {
  3990   3992         while( i<nChunk && pChunk[i]!=0x01 ){
  3991   3993           while( pChunk[i] & 0x80 ) i++;
  3992   3994           i++;
  3993   3995         }
  3994   3996         if( pCtx->eState ){
  3995         -        fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
         3997  +        fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
  3996   3998         }
  3997   3999         if( i<nChunk ){
  3998   4000           int iCol;
  3999   4001           iStart = i;
  4000   4002           i++;
  4001   4003           if( i>=nChunk ){
  4002   4004             pCtx->eState = 2;
  4003   4005           }else{
  4004   4006             fts5FastGetVarint32(pChunk, i, iCol);
  4005   4007             pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
  4006   4008             if( pCtx->eState ){
  4007         -            fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]);
         4009  +            fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
  4008   4010               iStart = i;
  4009   4011             }
  4010   4012           }
  4011   4013         }
  4012   4014       }while( i<nChunk );
  4013   4015     }
  4014   4016   }
................................................................................
  4021   4023   */
  4022   4024   static void fts5SegiterPoslist(
  4023   4025     Fts5Index *p,
  4024   4026     Fts5SegIter *pSeg,
  4025   4027     Fts5Colset *pColset,
  4026   4028     Fts5Buffer *pBuf
  4027   4029   ){
  4028         -  if( pColset==0 ){
  4029         -    fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
  4030         -  }else{
  4031         -    PoslistCallbackCtx sCtx;
  4032         -    sCtx.pBuf = pBuf;
  4033         -    sCtx.pColset = pColset;
  4034         -    sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
  4035         -    assert( sCtx.eState==0 || sCtx.eState==1 );
  4036         -    fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
  4037         -  }
  4038         -}
         4030  +  if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){
         4031  +    if( pColset==0 ){
         4032  +      fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
         4033  +    }else{
         4034  +      PoslistCallbackCtx sCtx;
         4035  +      sCtx.pBuf = pBuf;
         4036  +      sCtx.pColset = pColset;
         4037  +      sCtx.eState = pColset ? fts5IndexColsetTest(pColset, 0) : 1;
         4038  +      assert( sCtx.eState==0 || sCtx.eState==1 );
         4039  +      fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
         4040  +    }
         4041  +  }
         4042  +}
         4043  +
         4044  +/*
         4045  +** IN/OUT parameter (*pa) points to a position list n bytes in size. If
         4046  +** the position list contains entries for column iCol, then (*pa) is set
         4047  +** to point to the sub-position-list for that column and the number of
         4048  +** bytes in it returned. Or, if the argument position list does not
         4049  +** contain any entries for column iCol, return 0.
         4050  +*/
         4051  +static int fts5IndexExtractCol(
         4052  +  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
         4053  +  int n,                          /* IN: Size of poslist in bytes */
         4054  +  int iCol                        /* Column to extract from poslist */
         4055  +){
         4056  +  int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
         4057  +  const u8 *p = *pa;
         4058  +  const u8 *pEnd = &p[n];         /* One byte past end of position list */
         4059  +  u8 prev = 0;
         4060  +
         4061  +  while( iCol!=iCurrent ){
         4062  +    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
         4063  +    ** not part of a varint */
         4064  +    while( (prev & 0x80) || *p!=0x01 ){
         4065  +      prev = *p++;
         4066  +      if( p==pEnd ) return 0;
         4067  +    }
         4068  +    *pa = p++;
         4069  +    p += fts5GetVarint32(p, iCurrent);
         4070  +  }
         4071  +
         4072  +  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
         4073  +  ** not part of a varint */
         4074  +  assert( (prev & 0x80)==0 );
         4075  +  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
         4076  +    prev = *p++;
         4077  +  }
         4078  +  return p - (*pa);
         4079  +}
         4080  +
  4039   4081   
  4040   4082   /*
  4041   4083   ** Iterator pMulti currently points to a valid entry (not EOF). This
  4042         -** function appends a copy of the position-list of the entry pMulti 
  4043         -** currently points to to buffer pBuf.
         4084  +** function appends the following to buffer pBuf:
  4044   4085   **
  4045         -** If an error occurs, an error code is left in p->rc. It is assumed
  4046         -** no error has already occurred when this function is called.
         4086  +**   * The varint iDelta, and
         4087  +**   * the position list that currently points to, including the size field.
         4088  +**
         4089  +** If argument pColset is NULL, then the position list is filtered according
         4090  +** to pColset before being appended to the buffer. If this means there are
         4091  +** no entries in the position list, nothing is appended to the buffer (not
         4092  +** even iDelta).
         4093  +**
         4094  +** If an error occurs, an error code is left in p->rc. 
  4047   4095   */
  4048         -static int fts5MultiIterPoslist(
         4096  +static int fts5AppendPoslist(
  4049   4097     Fts5Index *p,
         4098  +  i64 iDelta,
  4050   4099     Fts5IndexIter *pMulti,
  4051   4100     Fts5Colset *pColset,
  4052   4101     Fts5Buffer *pBuf
  4053   4102   ){
  4054   4103     if( p->rc==SQLITE_OK ){
  4055         -    int iSz;
  4056         -    int iData;
  4057         -
  4058   4104       Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
  4059   4105       assert( fts5MultiIterEof(p, pMulti)==0 );
         4106  +    assert( pSeg->nPos>0 );
         4107  +    if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){
         4108  +      int iSv1;
         4109  +      int iSv2;
         4110  +      int iData;
  4060   4111   
  4061         -    /* WRITEPOSLISTSIZE */
  4062         -    iSz = pBuf->n;
  4063         -    fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
  4064         -    iData = pBuf->n;
         4112  +      /* Append iDelta */
         4113  +      iSv1 = pBuf->n;
         4114  +      fts5BufferSafeAppendVarint(pBuf, iDelta);
  4065   4115   
  4066         -    fts5SegiterPoslist(p, pSeg, pColset, pBuf);
         4116  +      /* WRITEPOSLISTSIZE */
         4117  +      iSv2 = pBuf->n;
         4118  +      fts5BufferSafeAppendVarint(pBuf, pSeg->nPos*2);
         4119  +      iData = pBuf->n;
  4067   4120   
  4068         -    if( pColset ){
  4069         -      int nActual = pBuf->n - iData;
  4070         -      if( nActual!=pSeg->nPos ){
  4071         -        /* WRITEPOSLISTSIZE */
  4072         -        if( nActual==0 ){
  4073         -          return 1;
         4121  +      if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf 
         4122  +       && (pColset==0 || pColset->nCol==1)
         4123  +      ){
         4124  +        const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
         4125  +        int nPos;
         4126  +        if( pColset ){
         4127  +          nPos = fts5IndexExtractCol(&pPos, pSeg->nPos, pColset->aiCol[0]);
  4074   4128           }else{
  4075         -          int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
  4076         -          while( iSz<(iData-nReq) ){ pBuf->p[iSz++] = 0x80; }
  4077         -          sqlite3Fts5PutVarint(&pBuf->p[iSz], nActual*2);
         4129  +          nPos = pSeg->nPos;
         4130  +        }
         4131  +        fts5BufferSafeAppendBlob(pBuf, pPos, nPos);
         4132  +      }else{
         4133  +        fts5SegiterPoslist(p, pSeg, pColset, pBuf);
         4134  +      }
         4135  +
         4136  +      if( pColset ){
         4137  +        int nActual = pBuf->n - iData;
         4138  +        if( nActual!=pSeg->nPos ){
         4139  +          if( nActual==0 ){
         4140  +            pBuf->n = iSv1;
         4141  +            return 1;
         4142  +          }else{
         4143  +            int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2));
         4144  +            while( iSv2<(iData-nReq) ){ pBuf->p[iSv2++] = 0x80; }
         4145  +            sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2);
         4146  +          }
  4078   4147           }
  4079   4148         }
  4080   4149       }
  4081   4150     }
  4082   4151   
  4083   4152     return 0;
  4084   4153   }
................................................................................
  4278   4347               fts5MergePrefixLists(p, &doclist, &aBuf[i]);
  4279   4348               fts5BufferZero(&aBuf[i]);
  4280   4349             }
  4281   4350           }
  4282   4351           iLastRowid = 0;
  4283   4352         }
  4284   4353   
  4285         -      if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){
  4286         -        int iSave = doclist.n;
  4287         -        assert( doclist.n!=0 || iLastRowid==0 );
  4288         -        fts5BufferSafeAppendVarint(&doclist, iRowid - iLastRowid);
  4289         -        if( fts5MultiIterPoslist(p, p1, pColset, &doclist) ){
  4290         -          doclist.n = iSave;
  4291         -        }else{
  4292         -          iLastRowid = iRowid;
  4293         -        }
         4354  +      if( !fts5AppendPoslist(p, iRowid-iLastRowid, p1, pColset, &doclist) ){
         4355  +        iLastRowid = iRowid;
  4294   4356         }
  4295   4357       }
  4296   4358   
  4297   4359       for(i=0; i<nBuf; i++){
  4298   4360         if( p->rc==SQLITE_OK ){
  4299   4361           fts5MergePrefixLists(p, &doclist, &aBuf[i]);
  4300   4362         }
................................................................................
  4643   4705   const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  4644   4706     int n;
  4645   4707     const char *z = (const char*)fts5MultiIterTerm(pIter, &n);
  4646   4708     *pn = n-1;
  4647   4709     return &z[1];
  4648   4710   }
  4649   4711   
         4712  +
         4713  +static int fts5IndexExtractColset (
         4714  +  Fts5Colset *pColset,            /* Colset to filter on */
         4715  +  const u8 *pPos, int nPos,       /* Position list */
         4716  +  Fts5Buffer *pBuf                /* Output buffer */
         4717  +){
         4718  +  int rc = SQLITE_OK;
         4719  +  int i;
         4720  +
         4721  +  fts5BufferZero(pBuf);
         4722  +  for(i=0; i<pColset->nCol; i++){
         4723  +    const u8 *pSub = pPos;
         4724  +    int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]);
         4725  +    if( nSub ){
         4726  +      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
         4727  +    }
         4728  +  }
         4729  +  return rc;
         4730  +}
         4731  +
  4650   4732   
  4651   4733   /*
  4652   4734   ** Return a pointer to a buffer containing a copy of the position list for
  4653   4735   ** the current entry. Output variable *pn is set to the size of the buffer 
  4654   4736   ** in bytes before returning.
  4655   4737   **
  4656   4738   ** The returned position list does not include the "number of bytes" varint
  4657   4739   ** field that starts the position list on disk.
  4658   4740   */
  4659   4741   int sqlite3Fts5IterPoslist(
  4660   4742     Fts5IndexIter *pIter, 
         4743  +  Fts5Colset *pColset,            /* Column filter (or NULL) */
  4661   4744     const u8 **pp,                  /* OUT: Pointer to position-list data */
  4662   4745     int *pn,                        /* OUT: Size of position-list in bytes */
  4663   4746     i64 *piRowid                    /* OUT: Current rowid */
  4664   4747   ){
  4665   4748     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  4666   4749     assert( pIter->pIndex->rc==SQLITE_OK );
  4667   4750     *piRowid = pSeg->iRowid;
  4668         -  *pn = pSeg->nPos;
  4669         -  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
  4670         -    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
         4751  +  if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
         4752  +    u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset];
         4753  +    if( pColset==0 || pIter->bFiltered ){
         4754  +      *pn = pSeg->nPos;
         4755  +      *pp = pPos;
         4756  +    }else if( pColset->nCol==1 ){
         4757  +      *pp = pPos;
         4758  +      *pn = fts5IndexExtractCol(pp, pSeg->nPos, pColset->aiCol[0]);
         4759  +    }else{
         4760  +      fts5BufferZero(&pIter->poslist);
         4761  +      fts5IndexExtractColset(pColset, pPos, pSeg->nPos, &pIter->poslist);
         4762  +      *pp = pIter->poslist.p;
         4763  +      *pn = pIter->poslist.n;
         4764  +    }
  4671   4765     }else{
  4672   4766       fts5BufferZero(&pIter->poslist);
  4673         -    fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
         4767  +    fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
  4674   4768       *pp = pIter->poslist.p;
         4769  +    *pn = pIter->poslist.n;
  4675   4770     }
  4676   4771     return fts5IndexReturn(pIter->pIndex);
  4677   4772   }
  4678   4773   
  4679   4774   /*
  4680   4775   ** This function is similar to sqlite3Fts5IterPoslist(), except that it
  4681   4776   ** copies the position list into the buffer supplied as the second 
................................................................................
  4864   4959     int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter);
  4865   4960   
  4866   4961     while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
  4867   4962       i64 dummy;
  4868   4963       const u8 *pPos;
  4869   4964       int nPos;
  4870   4965       i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
  4871         -    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
         4966  +    rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy);
  4872   4967       if( rc==SQLITE_OK ){
  4873   4968         Fts5PoslistReader sReader;
  4874         -      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
         4969  +      for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader);
  4875   4970             sReader.bEof==0;
  4876   4971             sqlite3Fts5PoslistReaderNext(&sReader)
  4877   4972         ){
  4878   4973           int iCol = FTS5_POS2COLUMN(sReader.iPos);
  4879   4974           int iOff = FTS5_POS2OFFSET(sReader.iPos);
  4880   4975           cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
  4881   4976         }

Changes to ext/fts5/fts5_main.c.

  1639   1639       int nInst = 0;                /* Number instances seen so far */
  1640   1640       int i;
  1641   1641   
  1642   1642       /* Initialize all iterators */
  1643   1643       for(i=0; i<nIter; i++){
  1644   1644         const u8 *a;
  1645   1645         int n = fts5CsrPoslist(pCsr, i, &a);
  1646         -      sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
         1646  +      sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
  1647   1647       }
  1648   1648   
  1649   1649       while( 1 ){
  1650   1650         int *aInst;
  1651   1651         int iBest = -1;
  1652   1652         for(i=0; i<nIter; i++){
  1653   1653           if( (aIter[i].bEof==0) 

Changes to ext/fts5/fts5_vocab.c.

   345    345         assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
   346    346         while( rc==SQLITE_OK ){
   347    347           i64 dummy;
   348    348           const u8 *pPos; int nPos;   /* Position list */
   349    349           i64 iPos = 0;               /* 64-bit position read from poslist */
   350    350           int iOff = 0;               /* Current offset within position list */
   351    351   
   352         -        rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
          352  +        rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy);
   353    353           if( rc==SQLITE_OK ){
   354    354             if( pTab->eType==FTS5_VOCAB_ROW ){
   355    355               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
   356    356                 pCsr->aVal[1]++;
   357    357               }
   358    358               pCsr->aVal[0]++;
   359    359             }else{

Changes to ext/fts5/test/fts5simple.test.

   246    246     INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
   247    247     INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
   248    248   }
   249    249   
   250    250   do_execsql_test 10.1 {
   251    251     SELECT rowid FROM t3('c: c*');
   252    252   } {2}
          253  +
          254  +do_execsql_test 10.2 {
          255  +  SELECT rowid FROM t3('b: acb');
          256  +} {2}
   253    257   
   254    258   #-------------------------------------------------------------------------
   255    259   # Test that character 0x1A is allowed in fts5 barewords.
   256    260   #
   257    261   do_test 11.0 {
   258    262     execsql "CREATE VIRTUAL TABLE t4 USING fts5(x, tokenize=\"ascii tokenchars '\x1A'\")"
   259    263     execsql "
................................................................................
   277    281   do_test 11.4 {
   278    282     catchsql "SELECT rowid FROM t4('d\x1B')"
   279    283   } {/fts5: syntax error/}
   280    284   do_test 11.5 {
   281    285     catchsql "SELECT rowid FROM t4('d\x19')"
   282    286   } {/fts5: syntax error/}
   283    287   
          288  +#-------------------------------------------------------------------------
          289  +#
          290  +do_test 12.1 {
          291  +  execsql {
          292  +    CREATE VIRTUAL TABLE xx USING fts5(x,y);
          293  +    BEGIN;
          294  +      INSERT INTO xx VALUES('1 2 3', 'a b c');
          295  +  }
          296  +} {}
          297  +
          298  +do_execsql_test 12.2 {
          299  +  SELECT rowid FROM xx('x:a');
          300  +  COMMIT;
          301  +} {}
   284    302   
   285    303   finish_test
   286    304