/ Check-in [8f939723]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid loading doclists for infrequent terms that are part of phrases twice.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8f939723f742329cedba8930f71dff42004f3d0d
User & Date: dan 2011-06-17 17:37:31
Context
2011-06-17
18:52
Fix a header dependency in nmake Makefile. check-in: 54492212 user: shaneh tags: trunk
17:37
Avoid loading doclists for infrequent terms that are part of phrases twice. check-in: 8f939723 user: dan tags: trunk
16:04
Add a missing declaration to fts3Int.h. check-in: 3bfd4466 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  3178   3178               pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
  3179   3179           );
  3180   3180           if( rc!=SQLITE_OK ){
  3181   3181             *pRc = rc;
  3182   3182             return;
  3183   3183           }
  3184   3184         }
         3185  +      assert( pExpr->pPhrase->iDoclistToken==0 );
         3186  +      pExpr->pPhrase->iDoclistToken = -1;
  3185   3187       }else{
  3186   3188         *pnOr += (pExpr->eType==FTSQUERY_OR);
  3187   3189         fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
  3188   3190         fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
  3189   3191       }
  3190   3192     }
  3191   3193   }
         3194  +
         3195  +static void fts3EvalPhraseMergeToken(
         3196  +  Fts3Table *pTab,
         3197  +  Fts3Phrase *p,
         3198  +  int iToken,
         3199  +  char *pList,
         3200  +  int nList
         3201  +){
         3202  +  assert( iToken!=p->iDoclistToken );
         3203  +
         3204  +  if( pList==0 ){
         3205  +    sqlite3_free(p->doclist.aAll);
         3206  +    p->doclist.aAll = 0;
         3207  +    p->doclist.nAll = 0;
         3208  +  }
         3209  +
         3210  +  else if( p->iDoclistToken<0 ){
         3211  +    p->doclist.aAll = pList;
         3212  +    p->doclist.nAll = nList;
         3213  +  }
         3214  +
         3215  +  else if( p->doclist.aAll==0 ){
         3216  +    sqlite3_free(pList);
         3217  +  }
         3218  +
         3219  +  else {
         3220  +    char *pLeft;
         3221  +    char *pRight;
         3222  +    int nLeft;
         3223  +    int nRight;
         3224  +    int nDiff;
         3225  +
         3226  +    if( p->iDoclistToken<iToken ){
         3227  +      pLeft = p->doclist.aAll;
         3228  +      nLeft = p->doclist.nAll;
         3229  +      pRight = pList;
         3230  +      nRight = nList;
         3231  +      nDiff = iToken - p->iDoclistToken;
         3232  +    }else{
         3233  +      pRight = p->doclist.aAll;
         3234  +      nRight = p->doclist.nAll;
         3235  +      pLeft = pList;
         3236  +      nLeft = nList;
         3237  +      nDiff = p->iDoclistToken - iToken;
         3238  +    }
         3239  +
         3240  +    fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);
         3241  +    sqlite3_free(pLeft);
         3242  +    p->doclist.aAll = pRight;
         3243  +    p->doclist.nAll = nRight;
         3244  +  }
         3245  +
         3246  +  if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
         3247  +}
  3192   3248   
  3193   3249   static int fts3EvalPhraseLoad(
  3194   3250     Fts3Cursor *pCsr, 
  3195   3251     Fts3Phrase *p
  3196   3252   ){
  3197   3253     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3198   3254     int iToken;
  3199   3255     int rc = SQLITE_OK;
  3200   3256   
  3201         -  char *aDoclist = 0;
  3202         -  int nDoclist = 0;
  3203         -  int iPrev = -1;
  3204         -
  3205   3257     for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
  3206   3258       Fts3PhraseToken *pToken = &p->aToken[iToken];
  3207         -    assert( pToken->pSegcsr || pToken->pDeferred );
         3259  +    assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
  3208   3260   
  3209         -    if( pToken->pDeferred==0 ){
         3261  +    if( pToken->pSegcsr ){
  3210   3262         int nThis = 0;
  3211   3263         char *pThis = 0;
  3212   3264         rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
  3213   3265         if( rc==SQLITE_OK ){
  3214         -        if( pThis==0 ){
  3215         -          sqlite3_free(aDoclist);
  3216         -          aDoclist = 0;
  3217         -          nDoclist = 0;
  3218         -          break;
  3219         -        }else if( aDoclist==0 ){
  3220         -          aDoclist = pThis;
  3221         -          nDoclist = nThis;
  3222         -        }else{
  3223         -          assert( iPrev>=0 );
  3224         -          fts3DoclistPhraseMerge(pTab->bDescIdx,
  3225         -              iToken-iPrev, aDoclist, nDoclist, pThis, &nThis
  3226         -          );
  3227         -          sqlite3_free(aDoclist);
  3228         -          aDoclist = pThis;
  3229         -          nDoclist = nThis;
  3230         -        }
  3231         -        iPrev = iToken;
         3266  +        fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
  3232   3267         }
  3233   3268       }
         3269  +    assert( pToken->pSegcsr==0 );
  3234   3270     }
  3235   3271   
  3236         -  if( rc==SQLITE_OK ){
  3237         -    p->doclist.aAll = aDoclist;
  3238         -    p->doclist.nAll = nDoclist;
  3239         -  }else{
  3240         -    sqlite3_free(aDoclist);
  3241         -  }
  3242   3272     return rc;
  3243   3273   }
  3244   3274   
  3245   3275   static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
  3246   3276     int iToken;
  3247   3277     int rc = SQLITE_OK;
  3248   3278   
  3249         -  int nMaxUndeferred = -1;
         3279  +  int nMaxUndeferred = pPhrase->iDoclistToken;
  3250   3280     char *aPoslist = 0;
  3251   3281     int nPoslist = 0;
  3252   3282     int iPrev = -1;
  3253   3283   
  3254   3284     assert( pPhrase->doclist.bFreeList==0 );
  3255   3285   
  3256   3286     for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
................................................................................
  3287   3317             sqlite3_free(aPoslist);
  3288   3318             pPhrase->doclist.pList = 0;
  3289   3319             pPhrase->doclist.nList = 0;
  3290   3320             return SQLITE_OK;
  3291   3321           }
  3292   3322         }
  3293   3323         iPrev = iToken;
  3294         -    }else{
  3295         -      nMaxUndeferred = iToken;
  3296   3324       }
  3297   3325     }
  3298   3326   
  3299   3327     if( iPrev>=0 ){
  3300   3328       if( nMaxUndeferred<0 ){
  3301   3329         pPhrase->doclist.pList = aPoslist;
  3302   3330         pPhrase->doclist.nList = nPoslist;
................................................................................
  3347   3375   ** used with fts3EvalPhraseNext() to iterate through the matching docids.
  3348   3376   */
  3349   3377   static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  3350   3378     int rc;
  3351   3379     Fts3PhraseToken *pFirst = &p->aToken[0];
  3352   3380     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3353   3381   
  3354         -  assert( p->doclist.aAll==0 );
  3355         -  if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 
  3356         -   && pFirst->pSegcsr && pFirst->pSegcsr->bLookup 
         3382  +  if( pCsr->bDesc==pTab->bDescIdx 
         3383  +   && bOptOk==1 
         3384  +   && p->nToken==1 
         3385  +   && pFirst->pSegcsr 
         3386  +   && pFirst->pSegcsr->bLookup 
  3357   3387     ){
  3358   3388       /* Use the incremental approach. */
  3359   3389       int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
  3360   3390       rc = sqlite3Fts3MsrIncrStart(
  3361   3391           pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n);
  3362   3392       p->bIncr = 1;
  3363   3393   
................................................................................
  3520   3550         fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
  3521   3551         fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
  3522   3552         pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
  3523   3553       }
  3524   3554     }
  3525   3555   }
  3526   3556   
  3527         -
  3528   3557   typedef struct Fts3TokenAndCost Fts3TokenAndCost;
  3529   3558   struct Fts3TokenAndCost {
  3530         -  Fts3PhraseToken *pToken;
  3531         -  Fts3Expr *pRoot;
         3559  +  Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
         3560  +  int iToken;                     /* Position of token in phrase */
         3561  +  Fts3PhraseToken *pToken;        /* The token itself */
         3562  +  Fts3Expr *pRoot; 
  3532   3563     int nOvfl;
  3533         -  int iCol;
         3564  +  int iCol;                       /* The column the token must match */
  3534   3565   };
  3535   3566   
  3536   3567   static void fts3EvalTokenCosts(
  3537   3568     Fts3Cursor *pCsr, 
  3538   3569     Fts3Expr *pRoot, 
  3539   3570     Fts3Expr *pExpr, 
  3540   3571     Fts3TokenAndCost **ppTC,
................................................................................
  3543   3574   ){
  3544   3575     if( *pRc==SQLITE_OK && pExpr ){
  3545   3576       if( pExpr->eType==FTSQUERY_PHRASE ){
  3546   3577         Fts3Phrase *pPhrase = pExpr->pPhrase;
  3547   3578         int i;
  3548   3579         for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
  3549   3580           Fts3TokenAndCost *pTC = (*ppTC)++;
         3581  +        pTC->pPhrase = pPhrase;
         3582  +        pTC->iToken = i;
  3550   3583           pTC->pRoot = pRoot;
  3551   3584           pTC->pToken = &pPhrase->aToken[i];
  3552   3585           pTC->iCol = pPhrase->iColumn;
  3553   3586           *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
  3554   3587         }
  3555   3588       }else if( pExpr->eType!=FTSQUERY_NOT ){
  3556   3589         if( pExpr->eType==FTSQUERY_OR ){
................................................................................
  3655   3688       assert( pTC );
  3656   3689   
  3657   3690       /* At this point pTC points to the cheapest remaining token. */
  3658   3691       if( ii==0 ){
  3659   3692         if( pTC->nOvfl ){
  3660   3693           nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
  3661   3694         }else{
  3662         -        /* TODO: Fix this so that the doclist need not be read twice. */
  3663   3695           Fts3PhraseToken *pToken = pTC->pToken;
  3664   3696           int nList = 0;
  3665   3697           char *pList = 0;
  3666   3698           rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
         3699  +        assert( rc==SQLITE_OK || pList==0 );
         3700  +
  3667   3701           if( rc==SQLITE_OK ){
  3668   3702             nDocEst = fts3DoclistCountDocids(1, pList, nList);
  3669         -        }
  3670         -        sqlite3_free(pList);
  3671         -        if( rc==SQLITE_OK ){
  3672         -          rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
  3673         -              pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
  3674         -          );
         3703  +          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
  3675   3704           }
  3676   3705         }
  3677   3706       }else{
  3678   3707         if( pTC->nOvfl>=(nDocEst*nDocSize) ){
  3679   3708           Fts3PhraseToken *pToken = pTC->pToken;
  3680   3709           rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
  3681   3710           fts3SegReaderCursorFree(pToken->pSegcsr);

Changes to ext/fts3/fts3Int.h.

   302    302     char *z;                        /* Text of the token */
   303    303     int n;                          /* Number of bytes in buffer z */
   304    304     int isPrefix;                   /* True if token ends with a "*" character */
   305    305   
   306    306     /* Variables above this point are populated when the expression is
   307    307     ** parsed (by code in fts3_expr.c). Below this point the variables are
   308    308     ** used when evaluating the expression. */
   309         -  int bFulltext;                  /* True if full-text index was used */
   310    309     Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
   311    310     Fts3MultiSegReader *pSegcsr;    /* Segment-reader for this token */
   312    311   };
   313    312   
   314    313   struct Fts3Phrase {
   315    314     /* Cache of doclist for this phrase. */
   316    315     Fts3Doclist doclist;
   317    316     int bIncr;                 /* True if doclist is loaded incrementally */
          317  +  int iDoclistToken;
   318    318   
   319    319     /* Variables below this point are populated by fts3_expr.c when parsing 
   320    320     ** a MATCH expression. Everything above is part of the evaluation phase. 
   321    321     */
   322    322     int nToken;                /* Number of tokens in the phrase */
   323    323     int iColumn;               /* Index of column this phrase must match */
   324    324     Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */