SQLite4
Check-in [0d5a640f1f]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: 0d5a640f1f63b335288d4b5cd8ab1fcb7c59a305
User & Date: dan 2013-01-09 17:16:24
Context
2013-01-09
18:09
Fix a few compiler warnings and test failures. Leaf check-in: 201233ee64 user: dan tags: matchinfo
17:16
Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase. check-in: 0d5a640f1f user: dan tags: matchinfo
2013-01-08
20:35
Add tests and many fixes for snippet implementation. Some tests are still failing. check-in: a257d81d4b user: dan tags: matchinfo
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/fts5.c.

  2262   2262         int i;
  2263   2263         for(i=0; i<pCsr->pInfo->nCol; i++){
  2264   2264           sqlite4DbFree(db, pCsr->aMem[i].zMalloc);
  2265   2265         }
  2266   2266         sqlite4DbFree(db, pCsr->aMem);
  2267   2267       }
  2268   2268   
         2269  +    sqlite4KVCursorClose(pCsr->pCsr);
  2269   2270       fts5ExpressionFree(db, pCsr->pExpr);
  2270   2271       sqlite4DbFree(db, pCsr->pIter);
  2271   2272       sqlite4DbFree(db, pCsr->aKey);
  2272   2273       sqlite4DbFree(db, pCsr->anRow);
  2273   2274       sqlite4DbFree(db, pCsr);
  2274   2275     }
  2275   2276   }
................................................................................
  2683   2684     if( pCtx->pFts ){
  2684   2685       *pn = pCtx->pFts->pExpr->nPhrase;
  2685   2686     }else{
  2686   2687       rc = SQLITE4_MISUSE;
  2687   2688     }
  2688   2689     return rc;
  2689   2690   }
         2691  +
         2692  +int sqlite4_mi_phrase_token_count(sqlite4_context *pCtx, int iP, int *pn){
         2693  +  int rc = SQLITE4_OK;
         2694  +  if( pCtx->pFts ){
         2695  +    Fts5Expr *pExpr = pCtx->pFts->pExpr;
         2696  +    if( iP>pExpr->nPhrase || iP<0 ){
         2697  +      *pn = 0;
         2698  +    }else{
         2699  +      *pn = pExpr->apPhrase[iP]->nToken;
         2700  +    }
         2701  +  }else{
         2702  +    rc = SQLITE4_MISUSE;
         2703  +  }
         2704  +  return rc;
         2705  +}
  2690   2706   
  2691   2707   int sqlite4_mi_stream_count(sqlite4_context *pCtx, int *pn){
  2692   2708     int rc = SQLITE4_OK;
  2693   2709     Fts5Cursor *pCsr = pCtx->pFts;
  2694   2710     if( pCsr ){
  2695   2711       rc = fts5CsrLoadGlobal(pCtx->pFts);
  2696   2712       if( rc==SQLITE4_OK ) *pn = pCsr->pGlobal->nStream;
................................................................................
  3060   3076   
  3061   3077     if( pBest==0 ){
  3062   3078       pIter->iCurrent = -1;
  3063   3079     }else{
  3064   3080       pIter->iCurrent = pBest - pIter->aList;
  3065   3081     }
  3066   3082   }
         3083  +
         3084  +static void fts5InitExprIterator(
         3085  +  const u8 *aPk, 
         3086  +  int nPk, 
         3087  +  Fts5ExprNode *p,
         3088  +  Fts5MatchIter *pIter
         3089  +){
         3090  +  if( p ){
         3091  +    if( p->eType==TOKEN_PRIMITIVE ){
         3092  +      if( p->nPk==nPk && 0==memcmp(aPk, p->aPk, nPk) ){
         3093  +        int i;
         3094  +        for(i=0; i<p->pPhrase->nStr; i++){
         3095  +          Fts5Str *pStr = &p->pPhrase->aStr[i];
         3096  +          InstanceList *pList = &pIter->aList[pIter->iCurrent++];
         3097  +          fts5InstanceListInit(pStr->aList, pStr->nList, pList);
         3098  +          fts5InstanceListNext(pList);
         3099  +        }
         3100  +      }else{
         3101  +        memset(&pIter->aList[pIter->iCurrent], 0, sizeof(InstanceList));
         3102  +        pIter->iCurrent += p->pPhrase->nStr;
         3103  +      }
         3104  +    }
         3105  +    fts5InitExprIterator(aPk, nPk, p->pLeft, pIter);
         3106  +    fts5InitExprIterator(aPk, nPk, p->pRight, pIter);
         3107  +  }
         3108  +}
         3109  +
         3110  +static void fts5InitIterator(Fts5Cursor *pCsr){
         3111  +  Fts5MatchIter *pIter = pCsr->pIter;
         3112  +  Fts5ExprNode *pRoot = pCsr->pExpr->pRoot;
         3113  +
         3114  +  pIter->iCurrent = 0;
         3115  +  fts5InitExprIterator(pRoot->aPk, pRoot->nPk, pRoot, pIter);
         3116  +  pIter->iMatch = 0;
         3117  +  pIter->bValid = 1;
         3118  +  fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
         3119  +}
  3067   3120   
  3068   3121   int sqlite4_mi_match_detail(
  3069   3122     sqlite4_context *pCtx,          /* Context object passed to mi function */
  3070   3123     int iMatch,                     /* Index of match */
  3071   3124     int *piOff,                     /* OUT: Token offset of match */
  3072   3125     int *piC,                       /* OUT: Column number of match iMatch */
  3073   3126     int *piS,                       /* OUT: Stream number of match iMatch */
................................................................................
  3088   3141           pIter->aList = (InstanceList *)&pIter[1];
  3089   3142         }else{
  3090   3143           rc = SQLITE4_NOMEM;
  3091   3144         }
  3092   3145       }
  3093   3146   
  3094   3147       if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatch<pIter->iMatch) ){
         3148  +      fts5InitIterator(pCsr);
         3149  +#if 0
  3095   3150         int i;
  3096   3151         for(i=0; i<pCsr->pExpr->nPhrase; i++){
  3097   3152           Fts5Str *pStr = pCsr->pExpr->apPhrase[i];
  3098   3153           fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]);
  3099   3154           fts5InstanceListNext(&pIter->aList[i]);
  3100   3155         }
  3101         -
  3102   3156         pIter->iMatch = 0;
  3103   3157         fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
         3158  +#endif
  3104   3159       }
  3105   3160   
  3106   3161       if( rc==SQLITE4_OK ){
  3107   3162         assert( pIter->iMatch<=iMatch );
  3108   3163         while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
  3109   3164           fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
  3110   3165           fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);

Changes to src/fts5func.c.

   322    322       rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
   323    323       if( rc==SQLITE4_OK ){
   324    324         u64 tmask = 0;
   325    325         u64 miss = 0;
   326    326         int iMask;
   327    327         int nShift; 
   328    328         int nScore = 0;
          329  +
          330  +      int nPTok;
          331  +      int iPTok;
   329    332   
   330    333         if( iColumn>=0 && iColumn!=iCol ) continue;
   331    334   
   332    335         allmask |= (1 << iPhrase);
   333    336   
   334    337         nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);
   335    338   
................................................................................
   336    339         for(iMask=0; iMask<nPhrase; iMask++){
   337    340           if( nShift<64){
   338    341             aMask[iMask] = aMask[iMask] >> nShift;
   339    342           }else{
   340    343             aMask[iMask] = 0;
   341    344           }
   342    345         }
   343         -      aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1));
          346  +      sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok);
          347  +      for(iPTok=0; iPTok<nPTok; iPTok++){
          348  +        aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1+iPTok));
          349  +      }
   344    350   
   345    351         for(iMask=0; iMask<nPhrase; iMask++){
   346    352           if( aMask[iMask] ){
   347    353             nScore += (((1 << iMask) & mask) ? 100 : 1);
   348    354           }else{
   349    355             miss |= (1 << iMask);
   350    356           }

Changes to src/sqlite.h.in.

  4429   4429   **
  4430   4430   ** sqlite4_mi_phrase_count():
  4431   4431   **   Set *pn to the number of phrases in the query.
  4432   4432   **
  4433   4433   ** sqlite4_mi_stream_count():
  4434   4434   **   Set *pn to the number of streams in the FTS index.
  4435   4435   **
         4436  +** sqlite4_mi_phrase_token_count():
         4437  +**   Set *pn to the number of tokens in phrase iP of the query.
         4438  +**
  4436   4439   ** sqlite4_mi_size():
  4437   4440   **   Set *pn to the number of tokens belonging to stream iS in the value 
  4438   4441   **   stored in column iC of the current row. 
  4439   4442   **
  4440   4443   **   Either or both of iS and iC may be negative. If iC is negative, then the
  4441   4444   **   output value is the total number of tokens for the specified stream (or
  4442   4445   **   streams) across all table columns. Similarly, if iS is negative, the 
................................................................................
  4483   4486   **   Set *ppVal to point to an sqlite4_value object containing the value
  4484   4487   **   read from column iCol of the current row. This object is valid until
  4485   4488   **   the function callback returns.
  4486   4489   */
  4487   4490   int sqlite4_mi_column_count(sqlite4_context *, int *pn);
  4488   4491   int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
  4489   4492   int sqlite4_mi_stream_count(sqlite4_context *, int *pn);
         4493  +int sqlite4_mi_phrase_token_count(sqlite4_context *, int iP, int *pn);
  4490   4494   
  4491   4495   int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
  4492   4496   int sqlite4_mi_total_rows(sqlite4_context *, int *pn);
  4493   4497   
  4494   4498   int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);
  4495   4499   
  4496   4500   int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);

Changes to test/fts5snippet.test.

   114    114   }]
   115    115   
   116    116   foreach {DO_MALLOC_TEST enc} {
   117    117     0 utf8
   118    118     1 utf8
   119    119     1 utf16
   120    120   } {
   121         -if {$DO_MALLOC_TEST} continue
          121  +if {$DO_MALLOC_TEST || $enc=="utf16"} continue
   122    122   
   123    123     db close
   124    124     forcedelete test.db
   125    125     sqlite4 db test.db
   126    126     sqlite4_db_config_lookaside db 0 0 0
   127    127     db eval "PRAGMA encoding = \"$enc\""
   128    128   
................................................................................
   208    208     }
   209    209     do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 {
   210    210        ...eighteen {nineteen} twenty...three {four} five...
   211    211     }
   212    212     do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 {
   213    213        ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
   214    214     }
   215         -  do_snippet_test $T.4.11 {four NOT (nineteen twentyone)} 0 5 {
          215  +  do_snippet_test $T.4.11 {four NOT (nineteen+twentyone)} 0 5 {
   216    216        ...two three {four} five six...
   217    217     } {
   218    218        ...two three {four} five six...
   219    219     }
   220    220     do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 {
   221    221        ...two three {four} five six...
   222    222     } {