SQLite4
Check-in [a257d81d4b]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add tests and many fixes for snippet implementation. Some tests are still failing.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | matchinfo
Files: files | file ages | folders
SHA1: a257d81d4b850eced3971937f5ee39f2c4de4098
User & Date: dan 2013-01-08 20:35:12
Context
2013-01-09
17:16
Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase. check-in: 0d5a640f1f user: dan tags: matchinfo
2013-01-08
20:35
Add tests and many fixes for snippet implementation. Some tests are still failing. check-in: a257d81d4b user: dan tags: matchinfo
11:45
Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum. check-in: e7b52edf68 user: dan tags: matchinfo
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/fts5.c.

928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
....
1277
1278
1279
1280
1281
1282
1283
1284

1285
1286
1287
1288
1289
1290
1291
....
1354
1355
1356
1357
1358
1359
1360


1361
1362
1363
1364
1365
1366
1367
....
1419
1420
1421
1422
1423
1424
1425

1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
....
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
....
2250
2251
2252
2253
2254
2255
2256








2257
2258
2259
2260
2261
2262
2263
....
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
....
3092
3093
3094
3095
3096
3097
3098

3099
3100
3101
3102
3103
3104
3105
            rc = SQLITE4_NOMEM;
          }else{
            pNode->eType = TOKEN_PRIMITIVE;
            pNode->pPhrase = pPhrase;
            *pp = pNode;
          }
        }
        nStr++;
        break;
      }

      case TOKEN_AND:
      case TOKEN_OR:
      case TOKEN_NOT: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;
................................................................................

  if( iStream>=p->nStream ){
    int nOld = p->nStream;
    int nNew = 4;
    while( nNew<=iStream ) nNew = nNew*2;
    p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64));
    if( p->aSz==0 ) goto tokenize_cb_out;
    memset(&p->aSz[p->nStream * p->nCol], 0, (nNew-nOld)*p->nCol*sizeof(i64));

  }
  p->aSz[iStream*p->nCol + p->iCol]++;

  pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken);
  if( pTerm==0 ){
    /* Size the initial allocation so that it fits in the lookaside buffer */
    int nAlloc = sizeof(TokenizeTerm) + nToken + 32;
................................................................................
      if( pnRow ){
        int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream;
        pSz = sqlite4DbMallocZero(db, nByte);
        if( pSz==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          pSz->aSz = (i64 *)&pSz[1];


          *pnRow = 0;
          rc = SQLITE4_OK;
        }
      }else{
        rc = SQLITE4_CORRUPT_BKPT;
      }
    }else if( rc==SQLITE4_OK ){
................................................................................
  int iOff = 0;
  int iCol;

  if( nRow>=0 ){
    iOff += sqlite4PutVarint(&a[iOff], nRow);
  }
  iOff += sqlite4PutVarint(&a[iOff], pSz->nStream);

  for(iCol=0; iCol<pSz->nCol; iCol++){
    int i;
    for(i=0; i<pSz->nStream; i++){
      iOff += sqlite4PutVarint(&a[iOff], pSz->aSz[iCol*pSz->nCol+i]);
    }
  }

  return sqlite4KVStoreReplace(p, aKey, nKey, a, iOff);
}

static int fts5CsrLoadGlobal(Fts5Cursor *pCsr){
................................................................................
    nByte += nCol * sizeof(char *);
  }

  pInfo = sqlite4DbMallocZero(db, nByte);
  if( pInfo ){
    pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema);
    pInfo->iRoot = pIdx->tnum;
    sqlite4FindPrimaryKey(pIdx->pTable, &pInfo->iTbl);
    pInfo->nCol = pIdx->pTable->nCol;
    fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p);

    if( pInfo->p==0 ){
      assert( pParse->nErr );
      sqlite4DbFree(db, pInfo);
      pInfo = 0;
................................................................................
*/
static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){
  return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot);
}

void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){
  if( pCsr ){








    fts5ExpressionFree(db, pCsr->pExpr);
    sqlite4DbFree(db, pCsr->pIter);
    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}
................................................................................

  if( iC<0 && iS<0 ){
    int nFin = pSz->nCol * pSz->nStream;
    for(i=0; i<nFin; i++) nToken += pSz->aSz[i];
  }else if( iC<0 ){
    for(i=0; i<pSz->nCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS];
  }else if( iS<0 ){
    for(i=0; i<pSz->nStream; i++) nToken += pSz->aSz[pSz->nStream*iC + iS];
  }else if( iC<pSz->nCol && iS<pSz->nStream ){
    nToken = pSz->aSz[iC * pSz->nStream + iS];
  }

  return nToken;
}

................................................................................
    }

    if( rc==SQLITE4_OK ){
      assert( pIter->iMatch<=iMatch );
      while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
        fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
        fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);

      }
      if( pIter->iCurrent<0 ){
        rc = SQLITE4_NOTFOUND;
      }else{
        InstanceList *p = &pIter->aList[pIter->iCurrent];
        *piOff = p->iOff;
        *piC = p->iCol;







|







 







|
>







 







>
>







 







>



|







 







|







 







>
>
>
>
>
>
>
>







 







|







 







>







928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
....
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
....
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
....
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
....
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
....
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
....
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
....
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
            rc = SQLITE4_NOMEM;
          }else{
            pNode->eType = TOKEN_PRIMITIVE;
            pNode->pPhrase = pPhrase;
            *pp = pNode;
          }
        }
        nStr += pPhrase->nStr;
        break;
      }

      case TOKEN_AND:
      case TOKEN_OR:
      case TOKEN_NOT: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;
................................................................................

  if( iStream>=p->nStream ){
    int nOld = p->nStream;
    int nNew = 4;
    while( nNew<=iStream ) nNew = nNew*2;
    p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64));
    if( p->aSz==0 ) goto tokenize_cb_out;
    memset(&p->aSz[nOld * p->nCol], 0, (nNew-nOld)*p->nCol*sizeof(i64));
    p->nStream = nNew;
  }
  p->aSz[iStream*p->nCol + p->iCol]++;

  pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken);
  if( pTerm==0 ){
    /* Size the initial allocation so that it fits in the lookaside buffer */
    int nAlloc = sizeof(TokenizeTerm) + nToken + 32;
................................................................................
      if( pnRow ){
        int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream;
        pSz = sqlite4DbMallocZero(db, nByte);
        if( pSz==0 ){
          rc = SQLITE4_NOMEM;
        }else{
          pSz->aSz = (i64 *)&pSz[1];
          pSz->nStream = nMinStream;
          pSz->nCol = pInfo->nCol;
          *pnRow = 0;
          rc = SQLITE4_OK;
        }
      }else{
        rc = SQLITE4_CORRUPT_BKPT;
      }
    }else if( rc==SQLITE4_OK ){
................................................................................
  int iOff = 0;
  int iCol;

  if( nRow>=0 ){
    iOff += sqlite4PutVarint(&a[iOff], nRow);
  }
  iOff += sqlite4PutVarint(&a[iOff], pSz->nStream);

  for(iCol=0; iCol<pSz->nCol; iCol++){
    int i;
    for(i=0; i<pSz->nStream; i++){
      iOff += sqlite4PutVarint(&a[iOff], pSz->aSz[i*pSz->nCol+iCol]);
    }
  }

  return sqlite4KVStoreReplace(p, aKey, nKey, a, iOff);
}

static int fts5CsrLoadGlobal(Fts5Cursor *pCsr){
................................................................................
    nByte += nCol * sizeof(char *);
  }

  pInfo = sqlite4DbMallocZero(db, nByte);
  if( pInfo ){
    pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema);
    pInfo->iRoot = pIdx->tnum;
    pInfo->iTbl = sqlite4FindPrimaryKey(pIdx->pTable, 0)->tnum;
    pInfo->nCol = pIdx->pTable->nCol;
    fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p);

    if( pInfo->p==0 ){
      assert( pParse->nErr );
      sqlite4DbFree(db, pInfo);
      pInfo = 0;
................................................................................
*/
static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){
  return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot);
}

void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){
  if( pCsr ){
    if( pCsr->aMem ){
      int i;
      for(i=0; i<pCsr->pInfo->nCol; i++){
        sqlite4DbFree(db, pCsr->aMem[i].zMalloc);
      }
      sqlite4DbFree(db, pCsr->aMem);
    }

    fts5ExpressionFree(db, pCsr->pExpr);
    sqlite4DbFree(db, pCsr->pIter);
    sqlite4DbFree(db, pCsr->aKey);
    sqlite4DbFree(db, pCsr->anRow);
    sqlite4DbFree(db, pCsr);
  }
}
................................................................................

  if( iC<0 && iS<0 ){
    int nFin = pSz->nCol * pSz->nStream;
    for(i=0; i<nFin; i++) nToken += pSz->aSz[i];
  }else if( iC<0 ){
    for(i=0; i<pSz->nCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS];
  }else if( iS<0 ){
    for(i=0; i<pSz->nStream; i++) nToken += pSz->aSz[pSz->nStream*iC + i];
  }else if( iC<pSz->nCol && iS<pSz->nStream ){
    nToken = pSz->aSz[iC * pSz->nStream + iS];
  }

  return nToken;
}

................................................................................
    }

    if( rc==SQLITE4_OK ){
      assert( pIter->iMatch<=iMatch );
      while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){
        fts5InstanceListNext(&pIter->aList[pIter->iCurrent]);
        fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase);
        pIter->iMatch++;
      }
      if( pIter->iCurrent<0 ){
        rc = SQLITE4_NOTFOUND;
      }else{
        InstanceList *p = &pIter->aList[pIter->iCurrent];
        *piOff = p->iOff;
        *piC = p->iCol;

Changes to src/fts5func.c.

8
9
10
11
12
13
14
15

16
17
18
19
20
21
22
23
24
25
26
27
28
29
...
147
148
149
150
151
152
153















154
155
156
157
158
159
160



161
162
163
164

165
166

167
168
169
170
171
172
173


174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
...
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221


222
223
224
225
226
227
228
229
230
231
232
233


234
235
236




237
238
239
240
241
242
243
244
245
246
247
248
249




250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

266
267
268
269
270
271
272
273
274
275
276
277
278
279

280
281
282
283
284


285
286
287
288
289
290
291
292
293
294
295
296
297
298
299


300
301
302




303
304
305
306
307
308
309
310
311
312
313
314
315
316
317


318
319
320
321
322
323

324
325
326
327
328
329
330
331
332

333
334
335
336


337
338
339
340
341

342




343
344



345
346





















347

348

349
























350






351

352


353







354
355
356
357
358
359
360
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

/*
** BM25 and BM25F references:

**
**   Stephen Robertson and Hugo Zaragoza: "The Probablistic Relevance
**   Framework: BM25 and Beyond", 2009.
**
**   http://xapian.org/docs/bm25.html
**
**   http://en.wikipedia.org/wiki/Okapi_BM25
*/

#include "sqliteInt.h"
#include <math.h>                 /* temporary: For log() */

static char fts5Tolower(char c){
  if( c>='A' && c<='Z' ) c = c + ('a' - 'A');
................................................................................

  if( rc==SQLITE4_OK ){
    sqlite4_result_double(pCtx, rank);
  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}
















typedef struct SnippetCtx SnippetCtx;
struct SnippetCtx {
  sqlite4 *db;                    /* Database handle */
  int nToken;                     /* Number of tokens in snippet */
  int iOff;                       /* First token in snippet */
  u64 mask;                       /* Snippet mask. Highlight these terms */




  char *zOut;                     /* Pointer to snippet text */
  int nOut;                       /* Size of zOut in bytes */
  int nAlloc;                     /* Bytes of space allocated at zOut */


  int iFrom;

  const char *zText;              /* Document to extract snippet from */

  int rc;                         /* Set to NOMEM if OOM is encountered */
};

static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){
  if( p->rc==SQLITE4_OK ){


    if( (p->nOut + n) > p->nAlloc ){
      int nNew = (p->nOut+n) * 2;

      p->zOut = sqlite4DbReallocOrFree(p->db, p->zOut, nNew);
      if( p->zOut==0 ){
        p->rc = SQLITE4_NOMEM;
        return;
      }
      p->nAlloc = sqlite4DbMallocSize(p->db, p->zOut);
    }

    memcpy(&p->zOut[p->nOut], z, n);
    p->nOut += n;
  }
}

static int fts5SnippetCb(
  void *pCtx, 
  int iStream, 
  int iOff, 
................................................................................
  const char *z, int n,
  int iSrc, int nSrc
){
  SnippetCtx *p = (SnippetCtx *)pCtx;

  if( iOff<p->iOff ){
    return 0;
  }else if( iOff>(p->iOff + p->nToken) ){
    fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom);
    fts5SnippetAppend(p, "...", 3);
    p->iFrom = -1;
    return 1;
  }else{
    int bHighlight;               /* True to highlight term */

    bHighlight = (p->mask & (1 << (p->iOff+p->nToken - iOff - 1))) ? 1 : 0;

    if( p->iFrom==0 && p->iOff!=0 ){
      p->iFrom = iSrc;
      fts5SnippetAppend(p, "...", 3);
    }

    if( bHighlight ){
      fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom);
      fts5SnippetAppend(p, "[", 1);
      fts5SnippetAppend(p, &p->zText[iSrc], nSrc);
      fts5SnippetAppend(p, "]", 1);
      p->iFrom = iSrc+nSrc;


    }
  }

  return 0;
}

static int fts5SnippetText(
  sqlite4_context *pCtx, 
  int iCol,
  int iOff,
  int nToken,
  u64 mask


){
  int rc;
  sqlite4_value *pVal = 0;





  rc = sqlite4_mi_column_value(pCtx, iCol, &pVal);
  if( rc==SQLITE4_OK ){
    SnippetCtx sCtx;
    int nText;

    nText = sqlite4_value_bytes(pVal);
    memset(&sCtx, 0, sizeof(sCtx));
    sCtx.zText = (const char *)sqlite4_value_text(pVal);
    sCtx.db = sqlite4_context_db_handle(pCtx);
    sCtx.nToken = nToken;
    sCtx.iOff = iOff;
    sCtx.mask = mask;





    sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb);
    if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){
      fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom);
    }
    rc = sCtx.rc;

    sqlite4_result_text(pCtx, sCtx.zOut, sCtx.nOut, SQLITE4_TRANSIENT);
    sqlite4DbFree(sCtx.db, sCtx.zOut);
  }

  return rc;
}

static int fts5BestSnippet(
  sqlite4_context *pCtx, 

  u64 mask,                       /* Mask of high-priority phrases */
  int nToken,
  int *piOff,
  int *piCol,
  u64 *pMask
){
  sqlite4 *db = sqlite4_context_db_handle(pCtx);
  int nPhrase;
  int rc = SQLITE4_OK;
  int i;
  int iPrev = 0;
  int iPrevCol = 0;
  u64 *aMask;
  u64 lmask = (((u64)1) << nToken) - 1;


  int iBestOff = 0;
  int iBestCol = 0;
  int nBest = 0;
  u64 bmask = 0;



  sqlite4_mi_phrase_count(pCtx, &nPhrase);
  aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase);
  if( !aMask ) return SQLITE4_NOMEM;

  /* Iterate through all matches for all phrases */
  for(i=0; rc==SQLITE4_OK; i++){
    int iOff;
    int iCol;
    int iStream;
    int iPhrase;
    u64 tmask = 0;

    rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
    if( rc==SQLITE4_OK ){


      int iMask;
      int nShift; 
      int nScore = 0;





      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] << nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      aMask[iPhrase] = aMask[iMask] | 0x0001;

      for(iMask=0; iMask<nPhrase; iMask++){
        if( (aMask[iMask] & lmask) ){
          nScore += ((aMask[iMask] & mask) ? 100 : 1);


        }
        tmask = tmask | aMask[iMask];
      }

      if( nScore>nBest ){
        bmask = (tmask & lmask);

        nBest = nScore;
        iBestOff = iOff;
        iBestCol = iCol;
      }

      iPrev = iOff;
      iPrevCol = iCol;
    }
  }


  *piOff = iBestOff;
  *piCol = iBestCol;
  *pMask = bmask;



  sqlite4DbFree(db, aMask);
  return rc;
}


static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){




  int nToken = 15;
  u64 hlmask = 0;



  u64 mask = 0;
  int iOff = 0;





















  int iCol = 0;

  int rc;


























  rc = fts5BestSnippet(pCtx, mask, nToken, &iOff, &iCol, &hlmask);






  if( rc==SQLITE4_OK ){

    rc = fts5SnippetText(pCtx, iCol, iOff, nToken, hlmask);


  }







  if( rc!=SQLITE4_OK ){
    sqlite4_result_error_code(pCtx, rc);
  }
}

static int fts5SimpleTokenize(
  void *pCtx, sqlite4_tokenizer *p,







|
>



<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







>
>
>

<
<
<
>


>

<





>
>
|
|

|
|



|


|
|







 







|
|






|



|




|

|
|
>
>








|
|

|
>
>



>
>
>
>













>
>
>
>






<
<
<






|
>
|
<
|
|
<








|
>

|
|

<
>
>











<



>
>



>
>
>
>





|




|


|
|
>
>





|
>









>

|
|
<
>
>





>
|
>
>
>
>
|
<
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>

>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
|
>
|
>
>
|
>
>
>
>
>
>
>







8
9
10
11
12
13
14
15
16
17
18
19




20
21
22
23
24
25
26
...
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176



177
178
179
180
181

182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
...
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282



283
284
285
286
287
288
289
290
291

292
293

294
295
296
297
298
299
300
301
302
303
304
305
306
307

308
309
310
311
312
313
314
315
316
317
318
319
320

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369

370
371
372
373
374
375
376
377
378
379
380
381
382
383

384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

/*
** The BM25 and BM25F implementations in this file are based on information
** found in:
**
**   Stephen Robertson and Hugo Zaragoza: "The Probablistic Relevance
**   Framework: BM25 and Beyond", 2009.




*/

#include "sqliteInt.h"
#include <math.h>                 /* temporary: For log() */

static char fts5Tolower(char c){
  if( c>='A' && c<='Z' ) c = c + ('a' - 'A');
................................................................................

  if( rc==SQLITE4_OK ){
    sqlite4_result_double(pCtx, rank);
  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}

typedef struct Snippet Snippet;
typedef struct SnippetText SnippetText;

struct Snippet {
  int iCol;
  int iOff;
  u64 hlmask;
};

struct SnippetText {
  char *zOut;                     /* Pointer to snippet text */
  int nOut;                       /* Size of zOut in bytes */
  int nAlloc;                     /* Bytes of space allocated at zOut */
};

typedef struct SnippetCtx SnippetCtx;
struct SnippetCtx {
  sqlite4 *db;                    /* Database handle */
  int nToken;                     /* Number of tokens in snippet */
  int iOff;                       /* First token in snippet */
  u64 mask;                       /* Snippet mask. Highlight these terms */
  const char *zStart;
  const char *zEnd;
  const char *zEllipses;




  SnippetText *pOut;

  int iFrom;
  int iTo;
  const char *zText;              /* Document to extract snippet from */

  int rc;                         /* Set to NOMEM if OOM is encountered */
};

static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){
  if( p->rc==SQLITE4_OK ){
    SnippetText *pOut = p->pOut;
    if( n<0 ) n = strlen(z);
    if( (pOut->nOut + n) > pOut->nAlloc ){
      int nNew = (pOut->nOut+n) * 2;

      pOut->zOut = sqlite4DbReallocOrFree(p->db, pOut->zOut, nNew);
      if( pOut->zOut==0 ){
        p->rc = SQLITE4_NOMEM;
        return;
      }
      pOut->nAlloc = sqlite4DbMallocSize(p->db, pOut->zOut);
    }

    memcpy(&pOut->zOut[pOut->nOut], z, n);
    pOut->nOut += n;
  }
}

static int fts5SnippetCb(
  void *pCtx, 
  int iStream, 
  int iOff, 
................................................................................
  const char *z, int n,
  int iSrc, int nSrc
){
  SnippetCtx *p = (SnippetCtx *)pCtx;

  if( iOff<p->iOff ){
    return 0;
  }else if( iOff>=(p->iOff + p->nToken) ){
    fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom);
    fts5SnippetAppend(p, "...", 3);
    p->iFrom = -1;
    return 1;
  }else{
    int bHighlight;               /* True to highlight term */

    bHighlight = (p->mask & (1 << (iOff-p->iOff)));

    if( p->iFrom==0 && p->iOff!=0 ){
      p->iFrom = iSrc;
      if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1);
    }

    if( bHighlight ){
      fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom);
      fts5SnippetAppend(p, p->zStart, -1);
      fts5SnippetAppend(p, &p->zText[iSrc], nSrc);
      fts5SnippetAppend(p, p->zEnd, -1);
      p->iTo = p->iFrom = iSrc+nSrc;
    }else{
      p->iTo = iSrc + nSrc;
    }
  }

  return 0;
}

static int fts5SnippetText(
  sqlite4_context *pCtx, 
  Snippet *pSnip,
  SnippetText *pText,
  int nToken,
  const char *zStart,
  const char *zEnd,
  const char *zEllipses
){
  int rc;
  sqlite4_value *pVal = 0;

  u64 mask = pSnip->hlmask;
  int iOff = pSnip->iOff;
  int iCol = pSnip->iCol;

  rc = sqlite4_mi_column_value(pCtx, iCol, &pVal);
  if( rc==SQLITE4_OK ){
    SnippetCtx sCtx;
    int nText;

    nText = sqlite4_value_bytes(pVal);
    memset(&sCtx, 0, sizeof(sCtx));
    sCtx.zText = (const char *)sqlite4_value_text(pVal);
    sCtx.db = sqlite4_context_db_handle(pCtx);
    sCtx.nToken = nToken;
    sCtx.iOff = iOff;
    sCtx.mask = mask;
    sCtx.zStart = zStart;
    sCtx.zEnd = zEnd;
    sCtx.zEllipses = zEllipses;
    sCtx.pOut = pText;

    sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb);
    if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){
      fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom);
    }
    rc = sCtx.rc;



  }

  return rc;
}

static int fts5BestSnippet(
  sqlite4_context *pCtx,          /* Context snippet() was called in */
  int iColumn,                    /* In this column (-1 means any column) */
  u64 *pMask,                     /* IN/OUT: Mask of high-priority phrases */

  int nToken,                     /* Number of tokens in requested snippet */
  Snippet *pSnip                  /* Populate this object */

){
  sqlite4 *db = sqlite4_context_db_handle(pCtx);
  int nPhrase;
  int rc = SQLITE4_OK;
  int i;
  int iPrev = 0;
  int iPrevCol = 0;
  u64 *aMask;
  u64 mask = *pMask;
  u64 allmask = 0;

  int iBestOff = nToken-1;
  int iBestCol = (iColumn >= 0 ? iColumn : 0);
  int nBest = 0;

  u64 hlmask = 0;                 /* Highlight mask associated with iBestOff */
  u64 missmask = 0;               /* Mask of missing terms in iBestOff snip. */

  sqlite4_mi_phrase_count(pCtx, &nPhrase);
  aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase);
  if( !aMask ) return SQLITE4_NOMEM;

  /* Iterate through all matches for all phrases */
  for(i=0; rc==SQLITE4_OK; i++){
    int iOff;
    int iCol;
    int iStream;
    int iPhrase;


    rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase);
    if( rc==SQLITE4_OK ){
      u64 tmask = 0;
      u64 miss = 0;
      int iMask;
      int nShift; 
      int nScore = 0;

      if( iColumn>=0 && iColumn!=iCol ) continue;

      allmask |= (1 << iPhrase);

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] >> nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1));

      for(iMask=0; iMask<nPhrase; iMask++){
        if( aMask[iMask] ){
          nScore += (((1 << iMask) & mask) ? 100 : 1);
        }else{
          miss |= (1 << iMask);
        }
        tmask = tmask | aMask[iMask];
      }

      if( nScore>nBest ){
        hlmask = tmask;
        missmask = miss;
        nBest = nScore;
        iBestOff = iOff;
        iBestCol = iCol;
      }

      iPrev = iOff;
      iPrevCol = iCol;
    }
  }
  if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;

  pSnip->iOff = iBestOff-nToken+1;
  pSnip->iCol = iBestCol;

  pSnip->hlmask = hlmask;
  *pMask = mask & missmask & allmask;

  sqlite4DbFree(db, aMask);
  return rc;
}

static void fts5SnippetImprove(
  sqlite4_context *pCtx, 
  int nToken,                     /* Size of required snippet */
  int nSz,                        /* Total size of column in tokens */
  Snippet *pSnip
){
  int i;

  int nLead = 0;
  int nShift = 0;

  u64 mask = pSnip->hlmask;
  int iOff = pSnip->iOff;

  if( mask==0 ) return;
  assert( mask & (1 << (nToken-1)) );

  for(i=0; (mask & (1<<i))==0; i++);
  nLead = i;

  nShift = (nLead/2);
  if( iOff+nShift > nSz-nToken ) nShift = (nSz-nToken) - iOff;
  if( iOff+nShift < 0 ) nShift = -1 * iOff;

  iOff += nShift;
  mask = mask >> nShift;

  pSnip->iOff = iOff;
  pSnip->hlmask = mask;
}

static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){
  Snippet aSnip[4];
  int nSnip;
  int iCol = -1;
  int nToken = -15;
  int rc;
  int nPhrase;

  const char *zStart = "<b>";
  const char *zEnd = "</b>";
  const char *zEllipses = "...";

  if( nArg>0 ) zStart = (const char *)sqlite4_value_text(apArg[0]);
  if( nArg>1 ) zEnd = (const char *)sqlite4_value_text(apArg[1]);
  if( nArg>2 ) zEllipses = (const char *)sqlite4_value_text(apArg[2]);
  if( nArg>3 ) iCol = sqlite4_value_int(apArg[3]);
  if( nArg>4 ) nToken = sqlite4_value_int(apArg[4]);

  rc = sqlite4_mi_phrase_count(pCtx, &nPhrase);
  for(nSnip=1; rc==SQLITE4_OK && nSnip<5; nSnip = ((nSnip==2) ? 3 : (nSnip+1))){
    int nTok;
    int i;
    u64 mask = ((u64)1 << nPhrase) - 1;

    if( nToken<0 ){
      nTok = nToken * -1;
    }else{
      nTok = (nToken + (nSnip-1)) / nSnip;
    }

    memset(aSnip, 0, sizeof(aSnip));
    for(i=0; rc==SQLITE4_OK && i<nSnip; i++){
      rc = fts5BestSnippet(pCtx, iCol, &mask, nTok, &aSnip[i]);
    }
    if( mask==0 || nSnip==4 ){
      SnippetText text = {0, 0, 0};
      for(i=0; rc==SQLITE4_OK && i<nSnip; i++){
        int nSz;
        rc = sqlite4_mi_size(pCtx, aSnip[i].iCol, -1, &nSz);
        if( rc==SQLITE4_OK ){
          fts5SnippetImprove(pCtx, nTok, nSz, &aSnip[i]);
          rc = fts5SnippetText(
              pCtx, &aSnip[i], &text, nTok, zStart, zEnd, zEllipses
          );
        }
      }
      sqlite4_result_text(pCtx, text.zOut, text.nOut, SQLITE4_TRANSIENT);
      sqlite4DbFree(sqlite4_context_db_handle(pCtx), text.zOut);
      break;
    }
  }

  if( rc!=SQLITE4_OK ){
    sqlite4_result_error_code(pCtx, rc);
  }
}

static int fts5SimpleTokenize(
  void *pCtx, sqlite4_tokenizer *p,

Changes to test/fts5query1.test.

140
141
142
143
144
145
146
147
148

149







150

























151
152
do_execsql_test 8.0 {
  CREATE TABLE t8(a PRIMARY KEY, b, c);
  CREATE INDEX i8 ON t8 USING fts5();
  INSERT INTO t8 VALUES('one', 'a b c', 'a a a');
  INSERT INTO t8 VALUES('two', 'd e f', 'b b b');
}

do_execsql_test 8.1 {
  SELECT rank(t8) FROM t8 WHERE t8 MATCH 'b a'

}

































finish_test








|
|
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
do_execsql_test 8.0 {
  CREATE TABLE t8(a PRIMARY KEY, b, c);
  CREATE INDEX i8 ON t8 USING fts5();
  INSERT INTO t8 VALUES('one', 'a b c', 'a a a');
  INSERT INTO t8 VALUES('two', 'd e f', 'b b b');
}

#do_execsql_test 8.1 {
#  SELECT rank(t8) FROM t8 WHERE t8 MATCH 'b a'
#}

do_execsql_test 9.0 {
  CREATE TABLE t9(a PRIMARY KEY, b);
  CREATE INDEX i9 ON t9 USING fts5();
  INSERT INTO t9 VALUES('one', 
    'a b c d e f g h i j k l m n o p q r s t u v w x y z ' ||
    'a b c d e f g h i j k l m n o p q r s t u v w x y z'
  );
}

#do_execsql_test 9.1 {
#  SELECT snippet(t9) FROM t9 WHERE t9 MATCH 'b'
#} 

do_execsql_test 10.1 {
  CREATE TABLE ft(content);
  CREATE INDEX fti ON ft USING fts5();
}
do_execsql_test 10.2 {
  INSERT INTO ft VALUES('a b c d e');
  INSERT INTO ft VALUES('f g h i j');
}
do_execsql_test 10.3 { SELECT rowid FROM ft WHERE ft MATCH 'c' } {1}
do_execsql_test 10.4 { SELECT rowid FROM ft WHERE ft MATCH 'f' } {2}

breakpoint
do_execsql_test 10.5 {
  DELETE FROM ft;
  CREATE TABLE ft2(a, b, c);
  CREATE INDEX fti2 ON ft2 USING fts5();
  INSERT INTO ft2 VALUES('1 2 3 4 5', '6 7 8 9 10', '11 12 13 14 15');
  SELECT snippet(ft2, '[', ']', '...', -1, 3) FROM ft2 WHERE ft2 MATCH '5';
}

finish_test

Added test/fts5snippet.test.

















































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
# 2010 January 07
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file test the FTS3 auxillary functions offsets(), 
# snippet() and matchinfo() work. At time of writing, running this file 
# provides full coverage of fts3_snippet.c.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE4_ENABLE_FTS3 is not defined, omit this file.
source $testdir/fts3_common.tcl

set sqlite_fts3_enable_parentheses 1
set DO_MALLOC_TEST 0

# Transform the list $L to its "normal" form. So that it can be compared to
# another list with the same set of elements using [string compare].
#
proc normalize {L} {
  set ret [list]
  foreach l $L {lappend ret $l}
  return $ret
}

# Document text used by a few tests. Contains the English names of all
# integers between 1 and 300.
#
set numbers [normalize {
  one two three four five six seven eight nine ten eleven twelve thirteen
  fourteen fifteen sixteen seventeen eighteen nineteen twenty twentyone
  twentytwo twentythree twentyfour twentyfive twentysix twentyseven
  twentyeight twentynine thirty thirtyone thirtytwo thirtythree thirtyfour
  thirtyfive thirtysix thirtyseven thirtyeight thirtynine forty fortyone
  fortytwo fortythree fortyfour fortyfive fortysix fortyseven fortyeight
  fortynine fifty fiftyone fiftytwo fiftythree fiftyfour fiftyfive fiftysix
  fiftyseven fiftyeight fiftynine sixty sixtyone sixtytwo sixtythree sixtyfour
  sixtyfive sixtysix sixtyseven sixtyeight sixtynine seventy seventyone
  seventytwo seventythree seventyfour seventyfive seventysix seventyseven
  seventyeight seventynine eighty eightyone eightytwo eightythree eightyfour
  eightyfive eightysix eightyseven eightyeight eightynine ninety ninetyone
  ninetytwo ninetythree ninetyfour ninetyfive ninetysix ninetyseven
  ninetyeight ninetynine onehundred onehundredone onehundredtwo
  onehundredthree onehundredfour onehundredfive onehundredsix onehundredseven
  onehundredeight onehundrednine onehundredten onehundredeleven
  onehundredtwelve onehundredthirteen onehundredfourteen onehundredfifteen
  onehundredsixteen onehundredseventeen onehundredeighteen onehundrednineteen
  onehundredtwenty onehundredtwentyone onehundredtwentytwo
  onehundredtwentythree onehundredtwentyfour onehundredtwentyfive
  onehundredtwentysix onehundredtwentyseven onehundredtwentyeight
  onehundredtwentynine onehundredthirty onehundredthirtyone
  onehundredthirtytwo onehundredthirtythree onehundredthirtyfour
  onehundredthirtyfive onehundredthirtysix onehundredthirtyseven
  onehundredthirtyeight onehundredthirtynine onehundredforty
  onehundredfortyone onehundredfortytwo onehundredfortythree
  onehundredfortyfour onehundredfortyfive onehundredfortysix
  onehundredfortyseven onehundredfortyeight onehundredfortynine
  onehundredfifty onehundredfiftyone onehundredfiftytwo onehundredfiftythree
  onehundredfiftyfour onehundredfiftyfive onehundredfiftysix
  onehundredfiftyseven onehundredfiftyeight onehundredfiftynine
  onehundredsixty onehundredsixtyone onehundredsixtytwo onehundredsixtythree
  onehundredsixtyfour onehundredsixtyfive onehundredsixtysix
  onehundredsixtyseven onehundredsixtyeight onehundredsixtynine
  onehundredseventy onehundredseventyone onehundredseventytwo
  onehundredseventythree onehundredseventyfour onehundredseventyfive
  onehundredseventysix onehundredseventyseven onehundredseventyeight
  onehundredseventynine onehundredeighty onehundredeightyone
  onehundredeightytwo onehundredeightythree onehundredeightyfour
  onehundredeightyfive onehundredeightysix onehundredeightyseven
  onehundredeightyeight onehundredeightynine onehundredninety
  onehundredninetyone onehundredninetytwo onehundredninetythree
  onehundredninetyfour onehundredninetyfive onehundredninetysix
  onehundredninetyseven onehundredninetyeight onehundredninetynine twohundred
  twohundredone twohundredtwo twohundredthree twohundredfour twohundredfive
  twohundredsix twohundredseven twohundredeight twohundrednine twohundredten
  twohundredeleven twohundredtwelve twohundredthirteen twohundredfourteen
  twohundredfifteen twohundredsixteen twohundredseventeen twohundredeighteen
  twohundrednineteen twohundredtwenty twohundredtwentyone twohundredtwentytwo
  twohundredtwentythree twohundredtwentyfour twohundredtwentyfive
  twohundredtwentysix twohundredtwentyseven twohundredtwentyeight
  twohundredtwentynine twohundredthirty twohundredthirtyone
  twohundredthirtytwo twohundredthirtythree twohundredthirtyfour
  twohundredthirtyfive twohundredthirtysix twohundredthirtyseven
  twohundredthirtyeight twohundredthirtynine twohundredforty
  twohundredfortyone twohundredfortytwo twohundredfortythree
  twohundredfortyfour twohundredfortyfive twohundredfortysix
  twohundredfortyseven twohundredfortyeight twohundredfortynine
  twohundredfifty twohundredfiftyone twohundredfiftytwo twohundredfiftythree
  twohundredfiftyfour twohundredfiftyfive twohundredfiftysix
  twohundredfiftyseven twohundredfiftyeight twohundredfiftynine
  twohundredsixty twohundredsixtyone twohundredsixtytwo twohundredsixtythree
  twohundredsixtyfour twohundredsixtyfive twohundredsixtysix
  twohundredsixtyseven twohundredsixtyeight twohundredsixtynine
  twohundredseventy twohundredseventyone twohundredseventytwo
  twohundredseventythree twohundredseventyfour twohundredseventyfive
  twohundredseventysix twohundredseventyseven twohundredseventyeight
  twohundredseventynine twohundredeighty twohundredeightyone
  twohundredeightytwo twohundredeightythree twohundredeightyfour
  twohundredeightyfive twohundredeightysix twohundredeightyseven
  twohundredeightyeight twohundredeightynine twohundredninety
  twohundredninetyone twohundredninetytwo twohundredninetythree
  twohundredninetyfour twohundredninetyfive twohundredninetysix
  twohundredninetyseven twohundredninetyeight twohundredninetynine
  threehundred
}]

foreach {DO_MALLOC_TEST enc} {
  0 utf8
  1 utf8
  1 utf16
} {
if {$DO_MALLOC_TEST} continue

  db close
  forcedelete test.db
  sqlite4 db test.db
  sqlite4_db_config_lookaside db 0 0 0
  db eval "PRAGMA encoding = \"$enc\""

  # Set variable $T to the test name prefix for this iteration of the loop.
  #
  set T "fts3snippet-$enc"
  
  ##########################################################################
  # Test the snippet function.
  #
  proc do_snippet_test {name expr iCol nTok args} {
    set res [list]
    foreach a $args { lappend res [string trim $a] }
    do_select_test $name {
      SELECT snippet(ft,'{','}','...',$iCol,$nTok) FROM ft WHERE ft MATCH $expr
    } $res
  }
  do_test $T.3.1 {
    execsql {
      DROP TABLE IF EXISTS ft;
      CREATE TABLE ft(content);
      CREATE INDEX fti ON ft USING fts5();

      INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
    }
  } {}
  do_snippet_test $T.3.2  one    0 5 "{one} two three four five..."
  do_snippet_test $T.3.3  two    0 5 "one {two} three four five..."
  do_snippet_test $T.3.4  three  0 5 "one two {three} four five..."
  do_snippet_test $T.3.5  four   0 5 "...two three {four} five six..."
  do_snippet_test $T.3.6  five   0 5 "...three four {five} six seven..."
  do_snippet_test $T.3.7  six    0 5 "...four five {six} seven eight..."
  do_snippet_test $T.3.8  seven  0 5 "...five six {seven} eight nine..."
  do_snippet_test $T.3.9  eight  0 5 "...six seven {eight} nine ten"
  do_snippet_test $T.3.10 nine   0 5 "...six seven eight {nine} ten"
  do_snippet_test $T.3.11 ten    0 5 "...six seven eight nine {ten}"
  
  do_test $T.4.1 {
    execsql {
      INSERT INTO ft VALUES(
           'one two three four five '
        || 'six seven eight nine ten '
        || 'eleven twelve thirteen fourteen fifteen '
        || 'sixteen seventeen eighteen nineteen twenty '
        || 'one two three four five '
        || 'six seven eight nine ten '
        || 'eleven twelve thirteen fourteen fifteen '
        || 'sixteen seventeen eighteen nineteen twenty'
      );
    }
  } {}
  
  do_snippet_test $T.4.2 {one nine} 0 5 {
     {one} two three...eight {nine} ten
  } {
     {one} two three...eight {nine} ten...
  }
  
  do_snippet_test $T.4.3 {one nine} 0 -5 {
     {one} two three four five...six seven eight {nine} ten
  } {
     {one} two three four five...seven eight {nine} ten eleven...
  }
  do_snippet_test $T.4.3 {one nineteen} 0 -5 {
     ...eighteen {nineteen} twenty {one} two...
  }
  do_snippet_test $T.4.4 {two nineteen} 0 -5 {
     ...eighteen {nineteen} twenty one {two}...
  }
  do_snippet_test $T.4.5 {three nineteen} 0 -5 {
     ...{nineteen} twenty one two {three}...
  }
  
  do_snippet_test $T.4.6 {four nineteen} 0 -5 {
     ...two three {four} five six...seventeen eighteen {nineteen} twenty one...
  }
  do_snippet_test $T.4.7 {four NEAR nineteen} 0 -5 {
     ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
  }
  
  do_snippet_test $T.4.8 {four nineteen} 0 5 {
     ...three {four} five...eighteen {nineteen} twenty...
  }
  do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 {
     ...eighteen {nineteen} twenty...three {four} five...
  }
  do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 {
     ...seventeen eighteen {nineteen} twenty one...two three {four} five six...
  }
  do_snippet_test $T.4.11 {four NOT (nineteen twentyone)} 0 5 {
     ...two three {four} five six...
  } {
     ...two three {four} five six...
  }
  do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 {
     ...two three {four} five six...
  } {
     ...two three {four} five six...
  }
  
  do_test $T.5.1 {
    execsql {
      DROP TABLE IF EXISTS ft;
      CREATE TABLE ft(a, b, c);
      CREATE INDEX fti ON ft USING fts5();
      INSERT INTO ft VALUES(
        'one two three four five', 
        'four five six seven eight', 
        'seven eight nine ten eleven'
      );
    }
  } {}
  
  do_snippet_test $T.5.2 {five} -1 3 {...three four {five}}
  do_snippet_test $T.5.3 {five}  0 3 {...three four {five}}
  do_snippet_test $T.5.4 {five}  1 3 {four {five} six...}
  do_snippet_test $T.5.5 {five}  2 3 {seven eight nine...}
  
  do_test $T.5.6 {
    execsql { UPDATE ft SET b = NULL }
  } {}
  
  do_snippet_test $T.5.7  {five} -1 3 {...three four {five}}
  do_snippet_test $T.5.8  {five}  0 3 {...three four {five}}
  do_snippet_test $T.5.9  {five}  1 3 {}
  do_snippet_test $T.5.10 {five}  2 3 {seven eight nine...}
  
  do_snippet_test $T.5.11 {one "seven eight nine"} -1 -3 {
    {one} two three...{seven} {eight} {nine}...
  }

  do_test $T.6.1 {
    execsql {
      DROP TABLE IF EXISTS ft;
      CREATE TABLE ft(x);
      CREATE INDEX fti ON ft USING fts5();
      INSERT INTO ft VALUES($numbers);
    }
  } {}
  do_snippet_test $T.6.2 {
    one fifty onehundred onehundredfifty twohundredfifty threehundred
  } -1 4 {
    {one}...{fifty}...{onehundred}...{onehundredfifty}...
  }
  do_snippet_test $T.6.3 {
    one fifty onehundred onehundredfifty twohundredfifty threehundred
  } -1 -4 {
    {one} two three four...fortyeight fortynine {fifty} fiftyone...ninetyeight ninetynine {onehundred} onehundredone...onehundredfortyeight onehundredfortynine {onehundredfifty} onehundredfiftyone...
  }

  do_test $T.7.1 {
    execsql {
      BEGIN;
        DROP TABLE IF EXISTS ft;
        CREATE TABLE ft(x);
        CREATE INDEX fti ON ft USING fts5();
    }
    set testresults [list]
    for {set i 1} {$i < 150} {incr i} {
      set commas [string repeat , $i]
      execsql {INSERT INTO ft VALUES('one' || $commas || 'two')}
      lappend testresults "{one}$commas{two}"
    }
    execsql COMMIT
  } {}
  eval [list do_snippet_test $T.7.2 {one two} -1 3] $testresults
  
}

set sqlite_fts3_enable_parentheses 0
finish_test