/ Check-in [9d10a684]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-prefix-search
Files: files | file ages | folders
SHA1: 9d10a6846b12a9cc8fd4fdc3affd931a27218b5a
User & Date: dan 2011-06-07 18:35:45
Context
2011-06-08
18:39
Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). check-in: 3972a787 user: dan tags: fts3-prefix-search
2011-06-07
18:35
Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. check-in: 9d10a684 user: dan tags: fts3-prefix-search
2011-06-06
18:14
Merge the latest trunk changes into the fts3-prefix-search branch. check-in: 567dd843 user: drh tags: fts3-prefix-search
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  1065   1065     }
  1066   1066     memset(p, 0, nByte);
  1067   1067     p->db = db;
  1068   1068     p->nColumn = nCol;
  1069   1069     p->nPendingData = 0;
  1070   1070     p->azColumn = (char **)&p[1];
  1071   1071     p->pTokenizer = pTokenizer;
  1072         -  p->nNodeSize = 1000;
  1073   1072     p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  1074   1073     p->bHasDocsize = (isFts4 && bNoDocsize==0);
  1075   1074     p->bHasStat = isFts4;
  1076   1075     p->bDescIdx = bDescIdx;
  1077   1076     TESTONLY( p->inTransaction = -1 );
  1078   1077     TESTONLY( p->mxSavepoint = -1 );
  1079   1078   
................................................................................
  1123   1122     }
  1124   1123   
  1125   1124     /* Figure out the page-size for the database. This is required in order to
  1126   1125     ** estimate the cost of loading large doclists from the database (see 
  1127   1126     ** function sqlite3Fts3SegReaderCost() for details).
  1128   1127     */
  1129   1128     fts3DatabasePageSize(&rc, p);
         1129  +  p->nNodeSize = p->nPgsz-35;
  1130   1130   
  1131   1131     /* Declare the table schema to SQLite. */
  1132   1132     fts3DeclareVtab(&rc, p);
  1133   1133   
  1134   1134   fts3_init_out:
  1135   1135     sqlite3_free(zPrefix);
  1136   1136     sqlite3_free(aFree);
................................................................................
  1858   1858     }
  1859   1859     *p++ = 0x00;
  1860   1860     *pp = p;
  1861   1861     return 1;
  1862   1862   }
  1863   1863   
  1864   1864   /*
  1865         -** Merge two position-lists as required by the NEAR operator.
         1865  +** Merge two position-lists as required by the NEAR operator. The argument
         1866  +** position lists correspond to the left and right phrases of an expression 
         1867  +** like:
         1868  +**
         1869  +**     "phrase 1" NEAR "phrase number 2"
         1870  +**
         1871  +** Position list *pp1 corresponds to the left-hand side of the NEAR 
         1872  +** expression and *pp2 to the right. As usual, the indexes in the position 
         1873  +** lists are the offsets of the last token in each phrase (tokens "1" and "2" 
         1874  +** in the example above).
         1875  +**
         1876  +** The output position list - written to *pp - is a copy of *pp2 with those
         1877  +** entries that are not sufficiently NEAR entries in *pp1 removed.
  1866   1878   */
  1867   1879   static int fts3PoslistNearMerge(
  1868   1880     char **pp,                      /* Output buffer */
  1869   1881     char *aTmp,                     /* Temporary buffer space */
  1870   1882     int nRight,                     /* Maximum difference in token positions */
  1871   1883     int nLeft,                      /* Maximum difference in token positions */
  1872   1884     char **pp1,                     /* IN/OUT: Left input list */
  1873   1885     char **pp2                      /* IN/OUT: Right input list */
  1874   1886   ){
  1875   1887     char *p1 = *pp1;
  1876   1888     char *p2 = *pp2;
  1877   1889   
  1878         -  if( !pp ){
  1879         -    if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1;
  1880         -    *pp1 = p1;
  1881         -    *pp2 = p2;
  1882         -    return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1);
  1883         -  }else{
  1884         -    char *pTmp1 = aTmp;
  1885         -    char *pTmp2;
  1886         -    char *aTmp2;
  1887         -    int res = 1;
  1888         -
  1889         -    fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
  1890         -    aTmp2 = pTmp2 = pTmp1;
  1891         -    *pp1 = p1;
  1892         -    *pp2 = p2;
  1893         -    fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
  1894         -    if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
  1895         -      fts3PoslistMerge(pp, &aTmp, &aTmp2);
  1896         -    }else if( pTmp1!=aTmp ){
  1897         -      fts3PoslistCopy(pp, &aTmp);
  1898         -    }else if( pTmp2!=aTmp2 ){
  1899         -      fts3PoslistCopy(pp, &aTmp2);
  1900         -    }else{
  1901         -      res = 0;
  1902         -    }
  1903         -
  1904         -    return res;
  1905         -  }
         1890  +  char *pTmp1 = aTmp;
         1891  +  char *pTmp2;
         1892  +  char *aTmp2;
         1893  +  int res = 1;
         1894  +
         1895  +  fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2);
         1896  +  aTmp2 = pTmp2 = pTmp1;
         1897  +  *pp1 = p1;
         1898  +  *pp2 = p2;
         1899  +  fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1);
         1900  +  if( pTmp1!=aTmp && pTmp2!=aTmp2 ){
         1901  +    fts3PoslistMerge(pp, &aTmp, &aTmp2);
         1902  +  }else if( pTmp1!=aTmp ){
         1903  +    fts3PoslistCopy(pp, &aTmp);
         1904  +  }else if( pTmp2!=aTmp2 ){
         1905  +    fts3PoslistCopy(pp, &aTmp2);
         1906  +  }else{
         1907  +    res = 0;
         1908  +  }
         1909  +
         1910  +  return res;
  1906   1911   }
  1907   1912   
  1908   1913   /* 
  1909   1914   ** A pointer to an instance of this structure is used as the context 
  1910   1915   ** argument to sqlite3Fts3SegReaderIterate()
  1911   1916   */
  1912   1917   typedef struct TermSelect TermSelect;
................................................................................
  3240   3245   ** and merged incrementally. Otherwise, it has to be merged into an in-memory 
  3241   3246   ** doclist and then traversed.
  3242   3247   */
  3243   3248   static void fts3EvalAllocateReaders(
  3244   3249     Fts3Cursor *pCsr, 
  3245   3250     Fts3Expr *pExpr, 
  3246   3251     int *pnToken,                   /* OUT: Total number of tokens in phrase. */
         3252  +  int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
  3247   3253     int *pRc
  3248   3254   ){
  3249   3255     if( pExpr && SQLITE_OK==*pRc ){
  3250   3256       if( pExpr->eType==FTSQUERY_PHRASE ){
  3251   3257         int i;
  3252   3258         int nToken = pExpr->pPhrase->nToken;
  3253   3259         *pnToken += nToken;
................................................................................
  3258   3264           );
  3259   3265           if( rc!=SQLITE_OK ){
  3260   3266             *pRc = rc;
  3261   3267             return;
  3262   3268           }
  3263   3269         }
  3264   3270       }else{
  3265         -      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pRc);
  3266         -      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pRc);
         3271  +      *pnOr += (pExpr->eType==FTSQUERY_OR);
         3272  +      fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
         3273  +      fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
  3267   3274       }
  3268   3275     }
  3269   3276   }
  3270   3277   
  3271   3278   static int fts3EvalPhraseLoad(
  3272   3279     Fts3Cursor *pCsr, 
  3273   3280     Fts3Phrase *p
................................................................................
  3541   3548   ){
  3542   3549     int rc = SQLITE_OK;
  3543   3550     Fts3Doclist *pDL = &p->doclist;
  3544   3551     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3545   3552   
  3546   3553     if( p->bIncr ){
  3547   3554       assert( p->nToken==1 );
         3555  +    assert( pDL->pNextDocid==0 );
  3548   3556       rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, 
  3549   3557           &pDL->iDocid, &pDL->pList, &pDL->nList
  3550   3558       );
  3551   3559       if( rc==SQLITE_OK && !pDL->pList ){
  3552   3560         *pbEof = 1;
  3553   3561       }
  3554   3562     }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){
................................................................................
  3598   3606         int nToken = pExpr->pPhrase->nToken;
  3599   3607         for(i=0; i<nToken; i++){
  3600   3608           if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
  3601   3609         }
  3602   3610         pExpr->bDeferred = (i==nToken);
  3603   3611         *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase);
  3604   3612       }else{
  3605         -      if( pExpr->eType==FTSQUERY_NEAR ){
  3606         -        bOptOk = 0;
  3607         -      }
  3608   3613         fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
  3609   3614         fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
  3610   3615         pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
  3611   3616       }
  3612   3617     }
  3613   3618   }
  3614   3619   
................................................................................
  3696   3701       }
  3697   3702     }
  3698   3703   }
  3699   3704   
  3700   3705   typedef struct Fts3TokenAndCost Fts3TokenAndCost;
  3701   3706   struct Fts3TokenAndCost {
  3702   3707     Fts3PhraseToken *pToken;
         3708  +  Fts3Expr *pRoot;
  3703   3709     int nOvfl;
  3704   3710     int iCol;
  3705   3711   };
  3706   3712   
  3707   3713   static void fts3EvalTokenCosts(
  3708   3714     Fts3Cursor *pCsr, 
         3715  +  Fts3Expr *pRoot, 
  3709   3716     Fts3Expr *pExpr, 
  3710   3717     Fts3TokenAndCost **ppTC,
         3718  +  Fts3Expr ***ppOr,
  3711   3719     int *pRc
  3712   3720   ){
  3713   3721     if( *pRc==SQLITE_OK && pExpr ){
  3714   3722       if( pExpr->eType==FTSQUERY_PHRASE ){
  3715   3723         Fts3Phrase *pPhrase = pExpr->pPhrase;
  3716   3724         int i;
  3717   3725         for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
  3718   3726           Fts3TokenAndCost *pTC = (*ppTC)++;
         3727  +        pTC->pRoot = pRoot;
  3719   3728           pTC->pToken = &pPhrase->aToken[i];
  3720   3729           pTC->iCol = pPhrase->iColumn;
  3721   3730           *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
  3722   3731         }
  3723         -    }else if( pExpr->eType==FTSQUERY_AND ){
  3724         -      fts3EvalTokenCosts(pCsr, pExpr->pLeft, ppTC, pRc);
  3725         -      fts3EvalTokenCosts(pCsr, pExpr->pRight, ppTC, pRc);
         3732  +    }else if( pExpr->eType!=FTSQUERY_NOT ){
         3733  +      if( pExpr->eType==FTSQUERY_OR ){
         3734  +        pRoot = pExpr;
         3735  +        **ppOr = pExpr;
         3736  +        (*ppOr)++;
         3737  +      }
         3738  +      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc);
         3739  +      fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
  3726   3740       }
  3727   3741     }
  3728   3742   }
  3729   3743   
  3730   3744   static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
  3731   3745     if( pCsr->nRowAvg==0 ){
  3732   3746       /* The average document size, which is required to calculate the cost
................................................................................
  3768   3782       rc = sqlite3_reset(pStmt);
  3769   3783       if( rc!=SQLITE_OK ) return rc;
  3770   3784     }
  3771   3785   
  3772   3786     *pnPage = pCsr->nRowAvg;
  3773   3787     return SQLITE_OK;
  3774   3788   }
         3789  +
         3790  +static int fts3EvalSelectDeferred(
         3791  +  Fts3Cursor *pCsr,
         3792  +  Fts3Expr *pRoot,
         3793  +  Fts3TokenAndCost *aTC,
         3794  +  int nTC
         3795  +){
         3796  +  int nDocSize = 0;
         3797  +  int nDocEst = 0;
         3798  +  int rc = SQLITE_OK;
         3799  +  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
         3800  +  int ii;
         3801  +
         3802  +  int nOvfl = 0;
         3803  +  int nTerm = 0;
         3804  +
         3805  +  for(ii=0; ii<nTC; ii++){
         3806  +    if( aTC[ii].pRoot==pRoot ){
         3807  +      nOvfl += aTC[ii].nOvfl;
         3808  +      nTerm++;
         3809  +    }
         3810  +  }
         3811  +  if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;
         3812  +
         3813  +  rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
         3814  +
         3815  +  for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
         3816  +    int jj;
         3817  +    Fts3TokenAndCost *pTC = 0;
         3818  +
         3819  +    for(jj=0; jj<nTC; jj++){
         3820  +      if( aTC[jj].pToken && aTC[jj].pRoot==pRoot 
         3821  +       && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) 
         3822  +      ){
         3823  +        pTC = &aTC[jj];
         3824  +      }
         3825  +    }
         3826  +    assert( pTC );
         3827  +
         3828  +    /* At this point pTC points to the cheapest remaining token. */
         3829  +    if( ii==0 ){
         3830  +      if( pTC->nOvfl ){
         3831  +        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
         3832  +      }else{
         3833  +        /* TODO: Fix this so that the doclist need not be read twice. */
         3834  +        Fts3PhraseToken *pToken = pTC->pToken;
         3835  +        int nList = 0;
         3836  +        char *pList = 0;
         3837  +        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
         3838  +        if( rc==SQLITE_OK ){
         3839  +          nDocEst = fts3DoclistCountDocids(1, pList, nList);
         3840  +        }
         3841  +        sqlite3_free(pList);
         3842  +        if( rc==SQLITE_OK ){
         3843  +          rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
         3844  +              pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
         3845  +          );
         3846  +        }
         3847  +      }
         3848  +    }else{
         3849  +      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
         3850  +        Fts3PhraseToken *pToken = pTC->pToken;
         3851  +        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
         3852  +        fts3SegReaderCursorFree(pToken->pSegcsr);
         3853  +        pToken->pSegcsr = 0;
         3854  +      }
         3855  +      nDocEst = 1 + (nDocEst/4);
         3856  +    }
         3857  +    pTC->pToken = 0;
         3858  +  }
         3859  +
         3860  +  return rc;
         3861  +}
  3775   3862   
  3776   3863   int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
  3777   3864     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3778   3865     int rc = SQLITE_OK;
  3779   3866     int nToken = 0;
         3867  +  int nOr = 0;
  3780   3868   
  3781   3869     /* Allocate a MultiSegReader for each token in the expression. */
  3782         -  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &rc);
         3870  +  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);
  3783   3871   
  3784   3872     /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
  3785   3873     ** This call will eventually also be responsible for determining which
  3786   3874     ** tokens are 'deferred' until the document text is loaded into memory.
  3787   3875     **
  3788   3876     ** Each token in each phrase is dealt with using one of the following
  3789   3877     ** three strategies:
................................................................................
  3795   3883     **      sqlite3Fts3EvalNext() call.
  3796   3884     **
  3797   3885     **   3. Token doclist is never loaded. Instead, documents are loaded into
  3798   3886     **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
  3799   3887     **      call. This is known as a "deferred" token.
  3800   3888     */
  3801   3889   
  3802         -  /* If bOptOk is true, check if there are any tokens that can be 
  3803         -  ** deferred (strategy 3). */
         3890  +  /* If bOptOk is true, check if there are any tokens that should be deferred.
         3891  +  */
  3804   3892     if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
  3805   3893       Fts3TokenAndCost *aTC;
  3806         -    aTC = (Fts3TokenAndCost *)sqlite3_malloc(sizeof(Fts3TokenAndCost) * nToken);
         3894  +    Fts3Expr **apOr;
         3895  +    aTC = (Fts3TokenAndCost *)sqlite3_malloc(
         3896  +        sizeof(Fts3TokenAndCost) * nToken
         3897  +      + sizeof(Fts3Expr *) * nOr
         3898  +    );
         3899  +    apOr = (Fts3Expr **)&aTC[nToken];
         3900  +
  3807   3901       if( !aTC ){
  3808   3902         rc = SQLITE_NOMEM;
  3809   3903       }else{
  3810   3904         int ii;
  3811         -      int nDocEst = 0;
  3812   3905         int nDocSize;
  3813   3906         Fts3TokenAndCost *pTC = aTC;
         3907  +      Fts3Expr **ppOr = apOr;
  3814   3908   
  3815         -      rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
  3816         -      fts3EvalTokenCosts(pCsr, pExpr, &pTC, &rc);
         3909  +      fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
  3817   3910         nToken = pTC-aTC;
         3911  +      nOr = ppOr-apOr;
  3818   3912   
         3913  +      rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
         3914  +      for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
         3915  +        rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
         3916  +      }
         3917  +
         3918  +#if 0
  3819   3919         for(ii=0; rc==SQLITE_OK && ii<nToken; ii++){
  3820   3920           int jj;
  3821   3921           pTC = 0;
  3822   3922           for(jj=0; jj<nToken; jj++){
  3823   3923             if( aTC[jj].pToken && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){
  3824   3924               pTC = &aTC[jj];
  3825   3925             }
  3826   3926           }
  3827   3927           assert( pTC );
         3928  +
  3828   3929   
  3829   3930           /* At this point pTC points to the cheapest remaining token. */
  3830   3931           if( ii==0 ){
  3831   3932             if( pTC->nOvfl ){
  3832   3933               nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
  3833   3934             }else{
  3834   3935               /* TODO: Fix this so that the doclist need not be read twice. */
................................................................................
  3853   3954               fts3SegReaderCursorFree(pToken->pSegcsr);
  3854   3955               pToken->pSegcsr = 0;
  3855   3956             }
  3856   3957             nDocEst = 1 + (nDocEst/4);
  3857   3958           }
  3858   3959           pTC->pToken = 0;
  3859   3960         }
         3961  +#endif
         3962  +
  3860   3963         sqlite3_free(aTC);
  3861   3964       }
  3862   3965     }
  3863   3966   
  3864   3967     fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);
  3865         -
  3866         -  /* Fix the results of NEAR expressions. */
  3867         -  fts3EvalNearTrim(pCsr, pExpr, &rc);
  3868         -
  3869   3968     return rc;
  3870   3969   }
  3871   3970   
  3872   3971   static void fts3EvalFreeDeferredDoclist(Fts3Phrase *pPhrase){
  3873   3972     if( pPhrase->doclist.bFreeList ){
  3874   3973       sqlite3_free(pPhrase->doclist.pList);
  3875   3974       pPhrase->doclist.pList = 0;
  3876   3975       pPhrase->doclist.nList = 0;
  3877   3976       pPhrase->doclist.bFreeList = 0;
  3878   3977     }
  3879   3978   }
         3979  +
         3980  +static int fts3EvalNearTrim2(
         3981  +  int nNear,
         3982  +  char *aTmp,                     /* Temporary space to use */
         3983  +  char **paPoslist,               /* IN/OUT: Position list */
         3984  +  int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
         3985  +  Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
         3986  +){
         3987  +  int nParam1 = nNear + pPhrase->nToken;
         3988  +  int nParam2 = nNear + *pnToken;
         3989  +  char *p2; 
         3990  +  char *pOut; 
         3991  +  int res;
         3992  +
         3993  +  p2 = pOut = pPhrase->doclist.pList;
         3994  +  res = fts3PoslistNearMerge(
         3995  +    &pOut, aTmp, nParam1, nParam2, paPoslist, &p2
         3996  +  );
         3997  +  pPhrase->doclist.nList = pOut - pPhrase->doclist.pList;
         3998  +  *paPoslist = pPhrase->doclist.pList;
         3999  +  *pnToken = pPhrase->nToken;
         4000  +
         4001  +  return res;
         4002  +}
         4003  +
         4004  +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
         4005  +  int res = 1;
         4006  +
         4007  +  /* The following block runs if pExpr is the root of a NEAR query.
         4008  +  ** For example, the query:
         4009  +  **
         4010  +  **         "w" NEAR "x" NEAR "y" NEAR "z"
         4011  +  **
         4012  +  ** which is represented in tree form as:
         4013  +  **
         4014  +  **                               |
         4015  +  **                          +--NEAR--+      <-- root of NEAR query
         4016  +  **                          |        |
         4017  +  **                     +--NEAR--+   "z"
         4018  +  **                     |        |
         4019  +  **                +--NEAR--+   "y"
         4020  +  **                |        |
         4021  +  **               "w"      "x"
         4022  +  **
         4023  +  ** The right-hand child of a NEAR node is always a phrase. The 
         4024  +  ** left-hand child may be either a phrase or a NEAR node. There are
         4025  +  ** no exceptions to this.
         4026  +  */
         4027  +  if( *pRc==SQLITE_OK 
         4028  +   && pExpr->eType==FTSQUERY_NEAR 
         4029  +   && pExpr->bEof==0
         4030  +   && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
         4031  +  ){
         4032  +    Fts3Expr *p; 
         4033  +    int nTmp = 0;                 /* Bytes of temp space */
         4034  +    char *aTmp;                   /* Temp space for PoslistNearMerge() */
         4035  +
         4036  +    /* Allocate temporary working space. */
         4037  +    for(p=pExpr; p->pLeft; p=p->pLeft){
         4038  +      nTmp += p->pRight->pPhrase->doclist.nList;
         4039  +    }
         4040  +    nTmp += p->pPhrase->doclist.nList;
         4041  +    aTmp = sqlite3_malloc(nTmp*2);
         4042  +    if( !aTmp ){
         4043  +      *pRc = SQLITE_NOMEM;
         4044  +      res = 0;
         4045  +    }else{
         4046  +      char *aPoslist = p->pPhrase->doclist.pList;
         4047  +      int nToken = p->pPhrase->nToken;
         4048  +
         4049  +      for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
         4050  +        Fts3Phrase *pPhrase = p->pRight->pPhrase;
         4051  +        int nNear = p->nNear;
         4052  +        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4053  +      }
         4054  +  
         4055  +      aPoslist = pExpr->pRight->pPhrase->doclist.pList;
         4056  +      nToken = pExpr->pRight->pPhrase->nToken;
         4057  +      for(p=pExpr->pLeft; p && res; p=p->pLeft){
         4058  +        int nNear = p->pParent->nNear;
         4059  +        Fts3Phrase *pPhrase = (
         4060  +            p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
         4061  +        );
         4062  +        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4063  +      }
         4064  +    }
         4065  +
         4066  +    sqlite3_free(aTmp);
         4067  +  }
         4068  +
         4069  +  return res;
         4070  +}
  3880   4071   
  3881   4072   /*
  3882   4073   ** This macro is used by the fts3EvalNext() function. The two arguments are
  3883   4074   ** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
  3884   4075   ** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
  3885   4076   ** it returns (i2 - i1). This allows the same code to be used for merging
  3886   4077   ** doclists in ascending or descending order.
................................................................................
  3897   4088       pExpr->bStart = 1;
  3898   4089   
  3899   4090       switch( pExpr->eType ){
  3900   4091         case FTSQUERY_NEAR:
  3901   4092         case FTSQUERY_AND: {
  3902   4093           Fts3Expr *pLeft = pExpr->pLeft;
  3903   4094           Fts3Expr *pRight = pExpr->pRight;
  3904         -
  3905   4095           assert( !pLeft->bDeferred || !pRight->bDeferred );
  3906   4096           if( pLeft->bDeferred ){
  3907   4097             fts3EvalNext(pCsr, pRight, pRc);
  3908   4098             pExpr->iDocid = pRight->iDocid;
  3909   4099             pExpr->bEof = pRight->bEof;
  3910   4100           }else if( pRight->bDeferred ){
  3911   4101             fts3EvalNext(pCsr, pLeft, pRc);
................................................................................
  3920   4110               if( iDiff==0 ) break;
  3921   4111               if( iDiff<0 ){
  3922   4112                 fts3EvalNext(pCsr, pLeft, pRc);
  3923   4113               }else{
  3924   4114                 fts3EvalNext(pCsr, pRight, pRc);
  3925   4115               }
  3926   4116             }
  3927         -    
         4117  +
  3928   4118             pExpr->iDocid = pLeft->iDocid;
  3929   4119             pExpr->bEof = (pLeft->bEof || pRight->bEof);
  3930   4120           }
  3931   4121           break;
  3932   4122         }
  3933   4123     
  3934   4124         case FTSQUERY_OR: {
................................................................................
  3983   4173         }
  3984   4174   
  3985   4175         default: {
  3986   4176           Fts3Phrase *pPhrase = pExpr->pPhrase;
  3987   4177           fts3EvalFreeDeferredDoclist(pPhrase);
  3988   4178           *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
  3989   4179           pExpr->iDocid = pPhrase->doclist.iDocid;
  3990         -#if 0
  3991         -        printf("token \"%.*s\" docid=%lld\n", 
  3992         -            pPhrase->aToken[0].n, pPhrase->aToken[0].z, pExpr->iDocid
  3993         -        );
  3994         -#endif
  3995         -        
  3996   4180           break;
  3997   4181         }
  3998   4182       }
  3999   4183     }
  4000   4184   }
  4001   4185   
  4002   4186   static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
  4003         -  int bHit = 0;
         4187  +  int bHit = 1;
  4004   4188     if( *pRc==SQLITE_OK ){
  4005   4189       switch( pExpr->eType ){
  4006   4190         case FTSQUERY_NEAR:
  4007   4191         case FTSQUERY_AND:
  4008   4192           bHit = (
  4009   4193               fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4010   4194            && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         4195  +         && fts3EvalNearTest(pExpr, pRc)
  4011   4196           );
         4197  +
         4198  +        /* If this is a NEAR node and the NEAR expression does not match
         4199  +        ** any rows, zero the doclist for all phrases involved in the NEAR.
         4200  +        ** This is because the snippet(), offsets() and matchinfo() functions
         4201  +        ** are not supposed to recognize any instances of phrases that are
         4202  +        ** part of unmatched NEAR queries. For example if this expression:
         4203  +        **
         4204  +        **    ... MATCH 'a OR (b NEAR c)'
         4205  +        **
         4206  +        ** is matched against a row containing:
         4207  +        **
         4208  +        **        'a b d e'
         4209  +        **
         4210  +        ** then any snippet() should ony highlight the "a" term, not the "b"
         4211  +        ** (as "b" is part of a non-matching NEAR clause).
         4212  +        */
         4213  +        if( pExpr->eType==FTSQUERY_NEAR && bHit==0 ){
         4214  +          Fts3Expr *p;
         4215  +          for(p=pExpr; p->pPhrase==0; p=p->pLeft){
         4216  +            p->pRight->pPhrase->doclist.pList = 0;
         4217  +          }
         4218  +          p->pPhrase->doclist.pList = 0;
         4219  +        }
         4220  +
  4012   4221           break;
  4013   4222   
  4014         -      case FTSQUERY_OR:
  4015         -        bHit = (
  4016         -            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4017         -         || fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
  4018         -        );
         4223  +      case FTSQUERY_OR: {
         4224  +        int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
         4225  +        int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);
         4226  +        bHit = bHit1 || bHit2;
  4019   4227           break;
         4228  +      }
  4020   4229   
  4021   4230         case FTSQUERY_NOT:
  4022   4231           bHit = (
  4023   4232               fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4024   4233            && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
  4025   4234           );
  4026   4235           break;
  4027   4236   
  4028   4237         default: {
  4029         -        Fts3Phrase *pPhrase = pExpr->pPhrase;
  4030         -        fts3EvalFreeDeferredDoclist(pPhrase);
  4031         -        *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
  4032         -        bHit = (pPhrase->doclist.pList!=0);
  4033         -        pExpr->iDocid = pCsr->iPrevId;
         4238  +        if( pCsr->pDeferred ){
         4239  +          Fts3Phrase *pPhrase = pExpr->pPhrase;
         4240  +          fts3EvalFreeDeferredDoclist(pPhrase);
         4241  +          *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
         4242  +          bHit = (pPhrase->doclist.pList!=0);
         4243  +          pExpr->iDocid = pCsr->iPrevId;
         4244  +        }else{
         4245  +          bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
         4246  +        }
  4034   4247           break;
  4035   4248         }
  4036   4249       }
  4037   4250     }
  4038   4251     return bHit;
  4039   4252   }
  4040   4253   
................................................................................
  4048   4261   **
  4049   4262   ** Or, if no error occurs and it seems the current row does match the FTS
  4050   4263   ** query, return 0.
  4051   4264   */
  4052   4265   static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
  4053   4266     int rc = *pRc;
  4054   4267     int bMiss = 0;
  4055         -  if( rc==SQLITE_OK && pCsr->pDeferred ){
  4056         -    rc = fts3CursorSeek(0, pCsr);
  4057         -    if( rc==SQLITE_OK ){
  4058         -      sqlite3Fts3FreeDeferredDoclists(pCsr);
  4059         -      rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
         4268  +  if( rc==SQLITE_OK ){
         4269  +    if( pCsr->pDeferred ){
         4270  +      rc = fts3CursorSeek(0, pCsr);
         4271  +      if( rc==SQLITE_OK ){
         4272  +        rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
         4273  +      }
  4060   4274       }
  4061   4275       bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
  4062   4276       sqlite3Fts3FreeDeferredDoclists(pCsr);
  4063   4277       *pRc = rc;
  4064   4278     }
  4065   4279     return (rc==SQLITE_OK && bMiss);
  4066   4280   }
................................................................................
  4145   4359       if( pExpr->bStart && !pExpr->bEof ){
  4146   4360         pExpr->bStart = 0;
  4147   4361         while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){
  4148   4362           fts3EvalNext(pCsr, pExpr, &rc);
  4149   4363           assert( !pExpr->bEof );
  4150   4364         }
  4151   4365       }
         4366  +  }
         4367  +
         4368  +  if( rc==SQLITE_OK 
         4369  +   && pExpr->pParent 
         4370  +   && pExpr->pParent->eType==FTSQUERY_NEAR 
         4371  +  ){
         4372  +
  4152   4373     }
  4153   4374   
  4154   4375     *pnList = pPhrase->doclist.nAll;
  4155   4376     *ppList = pPhrase->doclist.aAll;
  4156   4377     return rc;
  4157   4378   }
  4158   4379   

Changes to test/fts3defer2.test.

    44     44     INSERT INTO t1(t1) VALUES('optimize');
    45     45   }
    46     46   do_execsql_test 1.1.4 {
    47     47     SELECT count(*) FROM t1_segments WHERE length(block)>10000;
    48     48     UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000;
    49     49   } {2}
    50     50   
           51  +do_execsql_test 1.2.0 {
           52  +  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
           53  +} {{a b c d e f a x y}}
           54  +
    51     55   do_execsql_test 1.2.1 {
    52     56     SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
    53     57   } {{a b c d e f a x y}}
    54     58   
           59  +breakpoint
    55     60   do_execsql_test 1.2.2 {
    56     61     SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
    57     62     FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
    58     63   } [list                              \
    59     64      {a b c d [e] [f] [a] x y}         \
    60     65      {0 1 8 1 0 0 10 1 0 2 12 1}       \
    61         -   [list 3 1   1 1 1   1 8 8   1 8 8   8 5001 9]
           66  +   [list 3 1   1 1 1   1 1 1   1 8 8   8 5001 9]
    62     67   ]
    63     68   
    64     69   do_execsql_test 1.2.3 {
    65     70     SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
    66     71     FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)';
    67     72   } [list                                 \
    68     73      {[a] b c d [e] [f] [a] x y}          \
    69     74      {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1}  \
    70         -   [list 3 1   1 1 1   1 8 8   2 8 8   8 5001 9]
           75  +   [list 3 1   1 1 1   1 1 1   2 8 8   8 5001 9]
    71     76   ]
    72     77   
    73     78   do_execsql_test 1.3.1 { DROP TABLE t1 }
    74     79   
    75     80   #-----------------------------------------------------------------------------
    76     81   # Test cases fts3defer2-2.* focus specifically on the matchinfo function.
    77     82   # 
................................................................................
    95    100     do_execsql_test 2.2.$tn.1 {
    96    101       SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b';
    97    102     } [list                                          \
    98    103       [list 2 1  1 54 54  1 3 3  54 372 8]        \
    99    104       [list 2 1  1 54 54  1 3 3  54 372 7]        \
   100    105     ]
   101    106   
   102         -  set sqlite_fts3_enable_parentheses 1
   103    107     do_execsql_test 2.2.$tn.2 {
          108  +    SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g z';
          109  +  } [list                                       \
          110  +    [list 1 2 2  1 54 54]                       \
          111  +  ]
          112  +
          113  +  set sqlite_fts3_enable_parentheses 1
          114  +  do_execsql_test 2.2.$tn.3 {
   104    115       SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g OR (g z)';
   105    116     } [list                                       \
   106    117       [list 1 2 2  1 2 2   1 54 54]               \
   107    118       [list 1 2 2  1 2 2   0 54 54]               \
   108    119     ]
   109    120     set sqlite_fts3_enable_parentheses 0
   110    121   }

Changes to test/fts3matchinfo.test.

    64     64     CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
    65     65     INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
    66     66     SELECT mtchinfo FROM t3;
    67     67   } {{Beside the lake, beneath the trees}}
    68     68   
    69     69   do_execsql_test 3.2 {
    70     70     CREATE VIRTUAL TABLE xx USING FTS4;
           71  +}
           72  +do_execsql_test 3.3 {
    71     73     SELECT * FROM xx WHERE xx MATCH 'abc';
           74  +}
           75  +do_execsql_test 3.4 {
    72     76     SELECT * FROM xx WHERE xx MATCH 'a b c';
    73     77   }
    74     78   
    75     79   
    76     80   #--------------------------------------------------------------------------
    77     81   # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
    78     82   #