/ Check-in [ea543f08]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-docid-range-constraints
Files: files | file ages | folders
SHA1: ea543f081d93ed1bf66c21ce2108ec94e349f4c5
User & Date: dan 2013-10-01 20:02:32
Context
2013-10-01
20:10
Merge trunk changes with this branch. check-in: 65d9c6fa user: dan tags: fts4-docid-range-constraints
20:02
Allow multi-token phrases to load doclists from the database incrementally. This allows queries that feature such phrases to benefit from the "docid<?" optimization. check-in: ea543f08 user: dan tags: fts4-docid-range-constraints
2013-09-30
18:16
Merge trunk changes with this branch. check-in: e294a9c7 user: dan tags: fts4-docid-range-constraints
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Changes to ext/fts3/fts3.c.

  4011   4011         sqlite3_free(aPoslist);
  4012   4012       }
  4013   4013     }
  4014   4014   
  4015   4015     return SQLITE_OK;
  4016   4016   }
  4017   4017   
         4018  +/*
         4019  +** Maximum number of tokens a phrase may have to be considered for the
         4020  +** incremental doclists strategy.
         4021  +*/
         4022  +#define MAX_INCR_PHRASE_TOKENS 4
         4023  +
  4018   4024   /*
  4019   4025   ** This function is called for each Fts3Phrase in a full-text query 
  4020   4026   ** expression to initialize the mechanism for returning rows. Once this
  4021   4027   ** function has been called successfully on an Fts3Phrase, it may be
  4022   4028   ** used with fts3EvalPhraseNext() to iterate through the matching docids.
  4023   4029   **
  4024   4030   ** If parameter bOptOk is true, then the phrase may (or may not) use the
  4025   4031   ** incremental loading strategy. Otherwise, the entire doclist is loaded into
  4026   4032   ** memory within this call.
  4027   4033   **
  4028   4034   ** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
  4029   4035   */
  4030   4036   static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  4031         -  int rc;                         /* Error code */
  4032         -  Fts3PhraseToken *pFirst = &p->aToken[0];
  4033   4037     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
         4038  +  int rc = SQLITE_OK;             /* Error code */
         4039  +  int i;
  4034   4040   
  4035         -  if( pCsr->bDesc==pTab->bDescIdx 
  4036         -   && bOptOk==1 
  4037         -   && p->nToken==1 
  4038         -   && pFirst->pSegcsr 
  4039         -   && pFirst->pSegcsr->bLookup 
  4040         -   && pFirst->bFirst==0
  4041         -  ){
         4041  +  /* Determine if doclists may be loaded from disk incrementally. This is
         4042  +  ** possible if the bOptOk argument is true, the FTS doclists will be
         4043  +  ** scanned in forward order, and the phrase consists of 
         4044  +  ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first"
         4045  +  ** tokens or prefix tokens that cannot use a prefix-index.  */
         4046  +  int bIncrOk = (bOptOk 
         4047  +   && pCsr->bDesc==pTab->bDescIdx 
         4048  +   && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0
         4049  +  );
         4050  +  for(i=0; bIncrOk==1 && i<p->nToken; i++){
         4051  +    Fts3PhraseToken *pToken = &p->aToken[i];
         4052  +    if( pToken->bFirst || !pToken->pSegcsr || !pToken->pSegcsr->bLookup ){
         4053  +      bIncrOk = 0;
         4054  +    }
         4055  +  }
         4056  +
         4057  +  if( bIncrOk ){
  4042   4058       /* Use the incremental approach. */
  4043   4059       int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
  4044         -    rc = sqlite3Fts3MsrIncrStart(
  4045         -        pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
  4046         -    p->bIncr = 1;
  4047         -
         4060  +    for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
         4061  +      Fts3PhraseToken *pTok = &p->aToken[i];
         4062  +      rc = sqlite3Fts3MsrIncrStart(pTab, pTok->pSegcsr, iCol, pTok->z, pTok->n);
         4063  +    }
  4048   4064     }else{
  4049   4065       /* Load the full doclist for the phrase into memory. */
  4050   4066       rc = fts3EvalPhraseLoad(pCsr, p);
  4051         -    p->bIncr = 0;
  4052   4067     }
         4068  +  p->bIncr = bIncrOk;
  4053   4069   
  4054   4070     assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
  4055   4071     return rc;
  4056   4072   }
  4057   4073   
  4058   4074   /*
  4059   4075   ** This function is used to iterate backwards (from the end to start) 
................................................................................
  4148   4164         p += sqlite3Fts3GetVarint(p, &iVar);
  4149   4165         *piDocid += ((bDescIdx ? -1 : 1) * iVar);
  4150   4166       }
  4151   4167     }
  4152   4168   
  4153   4169     *ppIter = p;
  4154   4170   }
         4171  +
         4172  +/*
         4173  +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext().
         4174  +*/
         4175  +typedef struct TokenDoclist TokenDoclist;
         4176  +struct TokenDoclist {
         4177  +  sqlite3_int64 iDocid;
         4178  +  char *pList;
         4179  +  int nList;
         4180  +};
         4181  +
         4182  +/*
         4183  +** Token pToken is an incrementally loaded token that is part of a 
         4184  +** multi-token phrase. Advance it to the next matching document in the
         4185  +** database and populate output variable *p with the details of the new
         4186  +** entry. Or, if the iterator has reached EOF, set *pbEof to true.
         4187  +**
         4188  +** If an error occurs, return an SQLite error code. Otherwise, return 
         4189  +** SQLITE_OK.
         4190  +*/
         4191  +static int incrPhraseTokenNext(
         4192  +  Fts3Table *pTab,                /* Virtual table handle */
         4193  +  Fts3PhraseToken *pToken,        /* Advance the iterator for this token */
         4194  +  TokenDoclist *p,                /* OUT: Docid and doclist for new entry */
         4195  +  int *pbEof                      /* OUT: True if iterator is at EOF */
         4196  +){
         4197  +  int rc;
         4198  +  assert( pToken->pDeferred==0 );
         4199  +  rc = sqlite3Fts3MsrIncrNext(
         4200  +      pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList
         4201  +  );
         4202  +  if( p->pList==0 ) *pbEof = 1;
         4203  +  return rc;
         4204  +}
         4205  +
         4206  +
         4207  +/*
         4208  +** The phrase iterator passed as the second argument uses the incremental
         4209  +** doclist strategy. Advance it to the next matching documnent in the
         4210  +** database. If an error occurs, return an SQLite error code. Otherwise, 
         4211  +** return SQLITE_OK.
         4212  +**
         4213  +** If there is no "next" entry and no error occurs, then *pbEof is set to
         4214  +** 1 before returning. Otherwise, if no error occurs and the iterator is
         4215  +** successfully advanced, *pbEof is set to 0.
         4216  +*/
         4217  +static int fts3EvalIncrPhraseNext(
         4218  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         4219  +  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
         4220  +  u8 *pbEof                       /* OUT: Set to 1 if EOF */
         4221  +){
         4222  +  int rc = SQLITE_OK;
         4223  +  Fts3Doclist *pDL = &p->doclist;
         4224  +  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
         4225  +  int bEof = 0;
         4226  +
         4227  +  assert( p->bIncr==1 );
         4228  +  assert( pDL->pNextDocid==0 );
         4229  +
         4230  +  if( p->nToken==1 ){
         4231  +    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
         4232  +        &pDL->iDocid, &pDL->pList, &pDL->nList
         4233  +    );
         4234  +    if( pDL->pList==0 ) bEof = 1;
         4235  +  }else{
         4236  +    int bDescDoclist = pCsr->bDesc;
         4237  +    struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS];
         4238  +
         4239  +    assert( p->nToken<=MAX_INCR_PHRASE_TOKENS );
         4240  +
         4241  +    while( bEof==0 ){
         4242  +      sqlite3_int64 iMax;         /* Largest docid for all iterators */
         4243  +      int i;                      /* Used to iterate through tokens */
         4244  +
         4245  +      /* Advance the iterator for each token in the phrase once. */
         4246  +      for(i=0; rc==SQLITE_OK && i<p->nToken; i++){
         4247  +        rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
         4248  +        if( i==0 || DOCID_CMP(iMax, a[i].iDocid)<0 ){
         4249  +          iMax = a[i].iDocid;
         4250  +        }
         4251  +      }
         4252  +
         4253  +      /* Keep advancing iterators until they all point to the same document */
         4254  +      if( bEof==0 && rc==SQLITE_OK ){
         4255  +        for(i=0; i<p->nToken; i++){
         4256  +          while( DOCID_CMP(a[i].iDocid, iMax)<0 && rc==SQLITE_OK && bEof==0 ){
         4257  +            rc = incrPhraseTokenNext(pTab, &p->aToken[i], &a[i], &bEof);
         4258  +            if( DOCID_CMP(a[i].iDocid, iMax)>0 ){
         4259  +              iMax = a[i].iDocid;
         4260  +              i = 0;
         4261  +            }
         4262  +          }
         4263  +        }
         4264  +      }
         4265  +
         4266  +      /* Check if the current entries really are a phrase match */
         4267  +      if( bEof==0 ){
         4268  +        int nByte = a[p->nToken-1].nList;
         4269  +        char *aDoclist = sqlite3_malloc(nByte+1);
         4270  +        if( !aDoclist ) return SQLITE_NOMEM;
         4271  +        memcpy(aDoclist, a[p->nToken-1].pList, nByte+1);
         4272  +
         4273  +        int nList;
         4274  +        for(i=0; i<(p->nToken-1); i++){
         4275  +          char *pLeft = a[i].pList;
         4276  +          char *pRight = aDoclist;
         4277  +          char *pOut = aDoclist;
         4278  +          int nDist = p->nToken-1-i;
         4279  +          int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pLeft, &pRight);
         4280  +          if( res==0 ) break;
         4281  +          nList = (pOut - aDoclist);
         4282  +        }
         4283  +        if( i==(p->nToken-1) ){
         4284  +          pDL->iDocid = a[0].iDocid;
         4285  +          pDL->pList = aDoclist;
         4286  +          pDL->nList = nList;
         4287  +          pDL->bFreeList = 1;
         4288  +          break;
         4289  +        }
         4290  +        sqlite3_free(aDoclist);
         4291  +      }
         4292  +    }
         4293  +  }
         4294  +
         4295  +  *pbEof = bEof;
         4296  +  return rc;
         4297  +}
  4155   4298   
  4156   4299   /*
  4157   4300   ** Attempt to move the phrase iterator to point to the next matching docid. 
  4158   4301   ** If an error occurs, return an SQLite error code. Otherwise, return 
  4159   4302   ** SQLITE_OK.
  4160   4303   **
  4161   4304   ** If there is no "next" entry and no error occurs, then *pbEof is set to
................................................................................
  4168   4311     u8 *pbEof                       /* OUT: Set to 1 if EOF */
  4169   4312   ){
  4170   4313     int rc = SQLITE_OK;
  4171   4314     Fts3Doclist *pDL = &p->doclist;
  4172   4315     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  4173   4316   
  4174   4317     if( p->bIncr ){
  4175         -    assert( p->nToken==1 );
  4176         -    assert( pDL->pNextDocid==0 );
  4177         -    rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
  4178         -        &pDL->iDocid, &pDL->pList, &pDL->nList
  4179         -    );
  4180         -    if( rc==SQLITE_OK && !pDL->pList ){
  4181         -      *pbEof = 1;
  4182         -    }
         4318  +    rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof);
  4183   4319     }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
  4184   4320       sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, 
  4185   4321           &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof
  4186   4322       );
  4187   4323       pDL->pList = pDL->pNextDocid;
  4188   4324     }else{
  4189   4325       char *pIter;                            /* Used to iterate through aAll */
................................................................................
  4241   4377   **
  4242   4378   ** If an error occurs within this function, *pRc is set to an SQLite error
  4243   4379   ** code before returning.
  4244   4380   */
  4245   4381   static void fts3EvalStartReaders(
  4246   4382     Fts3Cursor *pCsr,               /* FTS Cursor handle */
  4247   4383     Fts3Expr *pExpr,                /* Expression to initialize phrases in */
  4248         -  int bOptOk,                     /* True to enable incremental loading */
  4249   4384     int *pRc                        /* IN/OUT: Error code */
  4250   4385   ){
  4251   4386     if( pExpr && SQLITE_OK==*pRc ){
  4252   4387       if( pExpr->eType==FTSQUERY_PHRASE ){
  4253   4388         int i;
  4254   4389         int nToken = pExpr->pPhrase->nToken;
  4255   4390         for(i=0; i<nToken; i++){
  4256   4391           if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
  4257   4392         }
  4258   4393         pExpr->bDeferred = (i==nToken);
  4259         -      *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
         4394  +      *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase);
  4260   4395       }else{
  4261         -      fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
  4262         -      fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
         4396  +      fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc);
         4397  +      fts3EvalStartReaders(pCsr, pExpr->pRight, pRc);
  4263   4398         pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
  4264   4399       }
  4265   4400     }
  4266   4401   }
  4267   4402   
  4268   4403   /*
  4269   4404   ** An array of the following structures is assembled as part of the process
................................................................................
  4577   4712         }
  4578   4713   
  4579   4714         sqlite3_free(aTC);
  4580   4715       }
  4581   4716     }
  4582   4717   #endif
  4583   4718   
  4584         -  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
         4719  +  fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc);
  4585   4720     return rc;
  4586   4721   }
  4587   4722   
  4588   4723   /*
  4589   4724   ** Invalidate the current position list for phrase pPhrase.
  4590   4725   */
  4591   4726   static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
................................................................................
  5093   5228   ){
  5094   5229     if( pExpr && *pRc==SQLITE_OK ){
  5095   5230       Fts3Phrase *pPhrase = pExpr->pPhrase;
  5096   5231   
  5097   5232       if( pPhrase ){
  5098   5233         fts3EvalInvalidatePoslist(pPhrase);
  5099   5234         if( pPhrase->bIncr ){
  5100         -        assert( pPhrase->nToken==1 );
  5101         -        assert( pPhrase->aToken[0].pSegcsr );
  5102         -        sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
         5235  +        int i;
         5236  +        for(i=0; i<pPhrase->nToken; i++){
         5237  +          assert( pPhrase->aToken[i].pSegcsr );
         5238  +          sqlite3Fts3MsrIncrRestart(pPhrase->aToken[i].pSegcsr);
         5239  +        }
  5103   5240           *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
  5104   5241         }
  5105         -
  5106   5242         pPhrase->doclist.pNextDocid = 0;
  5107   5243         pPhrase->doclist.iDocid = 0;
  5108   5244       }
  5109   5245   
  5110   5246       pExpr->iDocid = 0;
  5111   5247       pExpr->bEof = 0;
  5112   5248       pExpr->bStart = 0;