Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: 0d5a640f1f63b335288d4b5cd8ab1fcb7c59a305
User & Date: dan 2013-01-09 17:16:24.181
Context
2013-01-09
18:09
Fix a few compiler warnings and test failures. Leaf check-in: 201233ee64 user: dan tags: matchinfo
17:16
Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase. check-in: 0d5a640f1f user: dan tags: matchinfo
2013-01-08
20:35
Add tests and many fixes for snippet implementation. Some tests are still failing. check-in: a257d81d4b user: dan tags: matchinfo
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/fts5.c.
2262
2263
2264
2265
2266
2267
2268

2269
2270
2271
2272
2273
2274
2275
      int i;
      for(i=0; i<pCsr->pInfo->nCol; i++){
        sqlite4DbFree(db, pCsr->aMem[i].zMalloc);
      }
      sqlite4DbFree(db, pCsr->aMem);
    }


    fts5ExpressionFree(db, pCsr->pExpr);
    sqlite4DbFree(db, pCsr->pIter);
    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}







>







2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
      int i;
      for(i=0; i<pCsr->pInfo->nCol; i++){
        sqlite4DbFree(db, pCsr->aMem[i].zMalloc);
      }
      sqlite4DbFree(db, pCsr->aMem);
    }

    sqlite4KVCursorClose(pCsr->pCsr);
    fts5ExpressionFree(db, pCsr->pExpr);
    sqlite4DbFree(db, pCsr->pIter);
    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}
2683
2684
2685
2686
2687
2688
2689















2690
2691
2692
2693
2694
2695
2696
  if( pCtx->pFts ){
    *pn = pCtx->pFts->pExpr->nPhrase;
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}
















int sqlite4_mi_stream_count(sqlite4_context *pCtx, int *pn){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;
  if( pCsr ){
    rc = fts5CsrLoadGlobal(pCtx->pFts);
    if( rc==SQLITE4_OK ) *pn = pCsr->pGlobal->nStream;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
  if( pCtx->pFts ){
    *pn = pCtx->pFts->pExpr->nPhrase;
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_phrase_token_count(sqlite4_context *pCtx, int iP, int *pn){
  int rc = SQLITE4_OK;
  if( pCtx->pFts ){
    Fts5Expr *pExpr = pCtx->pFts->pExpr;
    if( iP>pExpr->nPhrase || iP<0 ){
      *pn = 0;
    }else{
      *pn = pExpr->apPhrase[iP]->nToken;
    }
  }else{
    rc = SQLITE4_MISUSE;
  }
  return rc;
}

int sqlite4_mi_stream_count(sqlite4_context *pCtx, int *pn){
  int rc = SQLITE4_OK;
  Fts5Cursor *pCsr = pCtx->pFts;
  if( pCsr ){
    rc = fts5CsrLoadGlobal(pCtx->pFts);
    if( rc==SQLITE4_OK ) *pn = pCsr->pGlobal->nStream;
3060
3061
3062
3063
3064
3065
3066





































3067
3068
3069
3070
3071
3072
3073

  if( pBest==0 ){
    pIter->iCurrent = -1;
  }else{
    pIter->iCurrent = pBest - pIter->aList;
  }
}






































int sqlite4_mi_match_detail(
  sqlite4_context *pCtx,          /* Context object passed to mi function */
  int iMatch,                     /* Index of match */
  int *piOff,                     /* OUT: Token offset of match */
  int *piC,                       /* OUT: Column number of match iMatch */
  int *piS,                       /* OUT: Stream number of match iMatch */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126

  if( pBest==0 ){
    pIter->iCurrent = -1;
  }else{
    pIter->iCurrent = pBest - pIter->aList;
  }
}

static void fts5InitExprIterator(
  const u8 *aPk, 
  int nPk, 
  Fts5ExprNode *p,
  Fts5MatchIter *pIter
){
  if( p ){
    if( p->eType==TOKEN_PRIMITIVE ){
      if( p->nPk==nPk && 0==memcmp(aPk, p->aPk, nPk) ){
        int i;
        for(i=0; i<p->pPhrase->nStr; i++){
          Fts5Str *pStr = &p->pPhrase->aStr[i];
          InstanceList *pList = &pIter->aList[pIter->iCurrent++];
          fts5InstanceListInit(pStr->aList, pStr->nList, pList);
          fts5InstanceListNext(pList);
        }
      }else{
        memset(&pIter->aList[pIter->iCurrent], 0, sizeof(InstanceList));
        pIter->iCurrent += p->pPhrase->nStr;
      }
    }
    fts5InitExprIterator(aPk, nPk, p->pLeft, pIter);
    fts5InitExprIterator(aPk, nPk, p->pRight, pIter);
  }
}

static void fts5InitIterator(Fts5Cursor *pCsr){
  Fts5MatchIter *pIter = pCsr->pIter;
  Fts5ExprNode *pRoot = pCsr->pExpr->pRoot;

  pIter->iCurrent = 0;
  fts5InitExprIterator(pRoot->aPk, pRoot->nPk, pRoot, pIter);
  pIter->iMatch = 0;
  pIter->bValid = 1;
  fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
}

int sqlite4_mi_match_detail(
  sqlite4_context *pCtx,          /* Context object passed to mi function */
  int iMatch,                     /* Index of match */
  int *piOff,                     /* OUT: Token offset of match */
  int *piC,                       /* OUT: Column number of match iMatch */
  int *piS,                       /* OUT: Stream number of match iMatch */
3088
3089
3090
3091
3092
3093
3094


3095
3096
3097
3098
3099
3100
3101
3102
3103

3104
3105
3106
3107
3108
3109
3110
        pIter->aList = (InstanceList *)&pIter[1];
      }else{
        rc = SQLITE4_NOMEM;
      }
    }

    if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatch<pIter->iMatch) ){


      int i;
      for(i=0; i<pCsr->pExpr->nPhrase; i++){
        Fts5Str *pStr = pCsr->pExpr->apPhrase[i];
        fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]);
        fts5InstanceListNext(&pIter->aList[i]);
      }

      pIter->iMatch = 0;
      fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);

    }

    if( rc==SQLITE4_OK ){
      assert( pIter->iMatch<=iMatch );
      while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
        fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
        fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);







>
>






<


>







3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155

3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
        pIter->aList = (InstanceList *)&pIter[1];
      }else{
        rc = SQLITE4_NOMEM;
      }
    }

    if( rc==SQLITE4_OK && (pIter->bValid==0 || iMatch<pIter->iMatch) ){
      fts5InitIterator(pCsr);
#if 0
      int i;
      for(i=0; i<pCsr->pExpr->nPhrase; i++){
        Fts5Str *pStr = pCsr->pExpr->apPhrase[i];
        fts5InstanceListInit(pStr->aList, pStr->nList, &pIter->aList[i]);
        fts5InstanceListNext(&pIter->aList[i]);
      }

      pIter->iMatch = 0;
      fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
#endif
    }

    if( rc==SQLITE4_OK ){
      assert( pIter->iMatch<=iMatch );
      while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
        fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
        fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
Changes to src/fts5func.c.
322
323
324
325
326
327
328



329
330
331
332
333
334
335
336
337
338
339
340
341
342


343

344
345
346
347
348
349
350
    rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
    if( rc==SQLITE4_OK ){
      u64 tmask = 0;
      u64 miss = 0;
      int iMask;
      int nShift; 
      int nScore = 0;




      if( iColumn>=0 && iColumn!=iCol ) continue;

      allmask |= (1 << iPhrase);

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] >> nShift;
        }else{
          aMask[iMask] = 0;
        }
      }


      aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1));


      for(iMask=0; iMask<nPhrase; iMask++){
        if( aMask[iMask] ){
          nScore += (((1 << iMask) & mask) ? 100 : 1);
        }else{
          miss |= (1 << iMask);
        }







>
>
>














>
>
|
>







322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
    rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
    if( rc==SQLITE4_OK ){
      u64 tmask = 0;
      u64 miss = 0;
      int iMask;
      int nShift; 
      int nScore = 0;

      int nPTok;
      int iPTok;

      if( iColumn>=0 && iColumn!=iCol ) continue;

      allmask |= (1 << iPhrase);

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] >> nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok);
      for(iPTok=0; iPTok<nPTok; iPTok++){
        aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1+iPTok));
      }

      for(iMask=0; iMask<nPhrase; iMask++){
        if( aMask[iMask] ){
          nScore += (((1 << iMask) & mask) ? 100 : 1);
        }else{
          miss |= (1 << iMask);
        }
Changes to src/sqlite.h.in.
4429
4430
4431
4432
4433
4434
4435



4436
4437
4438
4439
4440
4441
4442
**
** sqlite4_mi_phrase_count():
**   Set *pn to the number of phrases in the query.
**
** sqlite4_mi_stream_count():
**   Set *pn to the number of streams in the FTS index.
**



** sqlite4_mi_size():
**   Set *pn to the number of tokens belonging to stream iS in the value 
**   stored in column iC of the current row. 
**
**   Either or both of iS and iC may be negative. If iC is negative, then the
**   output value is the total number of tokens for the specified stream (or
**   streams) across all table columns. Similarly, if iS is negative, the 







>
>
>







4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
**
** sqlite4_mi_phrase_count():
**   Set *pn to the number of phrases in the query.
**
** sqlite4_mi_stream_count():
**   Set *pn to the number of streams in the FTS index.
**
** sqlite4_mi_phrase_token_count():
**   Set *pn to the number of tokens in phrase iP of the query.
**
** sqlite4_mi_size():
**   Set *pn to the number of tokens belonging to stream iS in the value 
**   stored in column iC of the current row. 
**
**   Either or both of iS and iC may be negative. If iC is negative, then the
**   output value is the total number of tokens for the specified stream (or
**   streams) across all table columns. Similarly, if iS is negative, the 
4483
4484
4485
4486
4487
4488
4489

4490
4491
4492
4493
4494
4495
4496
**   Set *ppVal to point to an sqlite4_value object containing the value
**   read from column iCol of the current row. This object is valid until
**   the function callback returns.
*/
int sqlite4_mi_column_count(sqlite4_context *, int *pn);
int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
int sqlite4_mi_stream_count(sqlite4_context *, int *pn);


int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_total_rows(sqlite4_context *, int *pn);

int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);

int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);







>







4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
**   Set *ppVal to point to an sqlite4_value object containing the value
**   read from column iCol of the current row. This object is valid until
**   the function callback returns.
*/
int sqlite4_mi_column_count(sqlite4_context *, int *pn);
int sqlite4_mi_phrase_count(sqlite4_context *, int *pn);
int sqlite4_mi_stream_count(sqlite4_context *, int *pn);
int sqlite4_mi_phrase_token_count(sqlite4_context *, int iP, int *pn);

int sqlite4_mi_total_size(sqlite4_context *, int iC, int iS, int *pn);
int sqlite4_mi_total_rows(sqlite4_context *, int *pn);

int sqlite4_mi_row_count(sqlite4_context *, int iC, int iS, int iP, int *pn);

int sqlite4_mi_size(sqlite4_context *, int iC, int iS, int *pn);
Changes to test/fts5snippet.test.
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
}]

foreach {DO_MALLOC_TEST enc} {
  0 utf8
  1 utf8
  1 utf16
} {
if {$DO_MALLOC_TEST} continue

  db close
  forcedelete test.db
  sqlite4 db test.db
  sqlite4_db_config_lookaside db 0 0 0
  db eval "PRAGMA encoding = \"$enc\""








|







114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
}]

foreach {DO_MALLOC_TEST enc} {
  0 utf8
  1 utf8
  1 utf16
} {
if {$DO_MALLOC_TEST || $enc=="utf16"} continue

  db close
  forcedelete test.db
  sqlite4 db test.db
  sqlite4_db_config_lookaside db 0 0 0
  db eval "PRAGMA encoding = \"$enc\""

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  }
  do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 {
     ...eighteen {nineteen} twenty...three {four} five...
  }
  do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 {
     ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
  }
  do_snippet_test $T.4.11 {four NOT (nineteen twentyone)} 0 5 {
     ...two three {four} five six...
  } {
     ...two three {four} five six...
  }
  do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 {
     ...two three {four} five six...
  } {







|







208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  }
  do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 {
     ...eighteen {nineteen} twenty...three {four} five...
  }
  do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 {
     ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
  }
  do_snippet_test $T.4.11 {four NOT (nineteen+twentyone)} 0 5 {
     ...two three {four} five six...
  } {
     ...two three {four} five six...
  }
  do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 {
     ...two three {four} five six...
  } {