SQLite

Check-in [a2b1183d9e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Modify snippets code to run more efficiently. And to avoid a bug relating to snippets based on full-text queries that contain duplicate terms.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a2b1183d9e9898d06d623b342bbb552e85a9b3f6
User & Date: dan 2010-01-11 12:00:48.000
Context
2010-01-11
18:26
Add a few documentation evidence comments to the built-in function implementations. (check-in: 8bd0f8147d user: drh tags: trunk)
12:00
Modify snippets code to run more efficiently. And to avoid a bug relating to snippets based on full-text queries that contain duplicate terms. (check-in: a2b1183d9e user: dan tags: trunk)
2010-01-09
07:33
Fix handling of an OOM error in the fts3 offsets() function. Fix a couple of snippet related test cases in e_fts3.test. (check-in: 14dc46a74a user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){
  return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
}

/*
** After ExprLoadDoclist() (see above) has been called, this function is
** used to iterate through the position lists that make up the doclist
** stored in pExpr->aDoclist.
*/
char *sqlite3Fts3FindPositions(
  Fts3Expr *pExpr,                /* Access this expressions doclist */
  sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */
  int iCol                        /* Column of requested pos-list */
){







|







2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){
  return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
}

/*
** After ExprLoadDoclist() (see above) has been called, this function is
** used to iterate/search through the position lists that make up the doclist
** stored in pExpr->aDoclist.
*/
char *sqlite3Fts3FindPositions(
  Fts3Expr *pExpr,                /* Access this expressions doclist */
  sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */
  int iCol                        /* Column of requested pos-list */
){
Changes to ext/fts3/fts3_snippet.c.
23
24
25
26
27
28
29





















30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#define SNIPPET_BUFFER_MASK   (SNIPPET_BUFFER_SIZE-1)

static void fts3GetDeltaPosition(char **pp, int *piPos){
  int iVal;
  *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
  *piPos += (iVal-2);
}






















/*
** Iterate through all phrase nodes in an FTS3 query, except those that
** are part of a sub-tree that is the right-hand-side of a NOT operator.
** For each phrase node found, the supplied callback function is invoked.
**
** If the callback function returns anything other than SQLITE_OK, 
** the iteration is abandoned and the error code returned immediately.
** Otherwise, SQLITE_OK is returned after a callback has been made for
** all eligible phrase nodes.
*/
static int fts3ExprIterate(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int (*x)(Fts3Expr *, void *),   /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int rc;
  int eType = pExpr->eType;
  if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
      rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
    }
  }else{
    rc = x(pExpr, pCtx);
  }
  return rc;
}

typedef struct LoadDoclistCtx LoadDoclistCtx;
struct LoadDoclistCtx {
  Fts3Table *pTab;                /* FTS3 Table */
  int nPhrase;                    /* Number of phrases so far */
  int nToken;                     /* Number of tokens so far */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|


|
<
<
<
|
<
<
<
<
<
<
<







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67



68







69
70
71
72
73
74
75
#define SNIPPET_BUFFER_MASK   (SNIPPET_BUFFER_SIZE-1)

static void fts3GetDeltaPosition(char **pp, int *piPos){
  int iVal;
  *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
  *piPos += (iVal-2);
}

static int fts3ExprIterate2(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int *piPhrase,                  /* Pointer to phrase counter */
  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int rc;
  int eType = pExpr->eType;
  if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
      rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
    }
  }else{
    rc = x(pExpr, *piPhrase, pCtx);
    (*piPhrase)++;
  }
  return rc;
}

/*
** Iterate through all phrase nodes in an FTS3 query, except those that
** are part of a sub-tree that is the right-hand-side of a NOT operator.
** For each phrase node found, the supplied callback function is invoked.
**
** If the callback function returns anything other than SQLITE_OK, 
** the iteration is abandoned and the error code returned immediately.
** Otherwise, SQLITE_OK is returned after a callback has been made for
** all eligible phrase nodes.
*/
static int fts3ExprIterate(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int iPhrase = 0;



  return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);







}

typedef struct LoadDoclistCtx LoadDoclistCtx;
struct LoadDoclistCtx {
  Fts3Table *pTab;                /* FTS3 Table */
  int nPhrase;                    /* Number of phrases so far */
  int nToken;                     /* Number of tokens so far */
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    pExpr = pLeft;
    pParent = pExpr->pParent;
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){
  int rc = SQLITE_OK;
  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;

  p->nPhrase++;
  p->nToken += pExpr->pPhrase->nToken;

  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK ){
      rc = fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){
  if( pExpr->aDoclist ){
    pExpr->pCurrent = pExpr->aDoclist;
    pExpr->iCurrent = 0;
    pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent, &pExpr->iCurrent);
  }
  return SQLITE_OK;
}







|

















|







102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    pExpr = pLeft;
    pParent = pExpr->pParent;
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){
  int rc = SQLITE_OK;
  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;

  p->nPhrase++;
  p->nToken += pExpr->pPhrase->nToken;

  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK ){
      rc = fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}

static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, int iPhrase, void *ctx){
  if( pExpr->aDoclist ){
    pExpr->pCurrent = pExpr->aDoclist;
    pExpr->iCurrent = 0;
    pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent, &pExpr->iCurrent);
  }
  return SQLITE_OK;
}
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193


194

195
196
197
198
199
200























































































































201
202
203
204
205
206
207
208

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

292
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326


327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
367
368
369
370
371
372
373


374
375
376
377
378
379
380
381
382
383
384
385
386
387
388

389
390
391
392
393
394
395
  }
  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
  if( pnToken ) *pnToken = sCtx.nToken;
  return rc;
}

/*
** Each call to this function populates a chunk of a snippet-buffer 
** SNIPPET_BUFFER_CHUNK bytes in size.
**
** Return true if the end of the data has been reached (and all subsequent
** calls to fts3LoadSnippetBuffer() with the same arguments will be no-ops), 
** or false otherwise.
*/
static int fts3LoadSnippetBuffer(
  int iPos,                       /* Document token offset to load data for */
  u8 *aBuffer,                    /* Circular snippet buffer to populate */
  int nList,                      /* Number of position lists in appList */
  char **apList,                  /* IN/OUT: nList position list pointers */
  int *aiPrev                     /* IN/OUT: Previous positions read */
){
  int i;
  int nFin = 0;

  assert( (iPos&(SNIPPET_BUFFER_CHUNK-1))==0 );

  memset(&aBuffer[iPos&SNIPPET_BUFFER_MASK], 0, SNIPPET_BUFFER_CHUNK);

  for(i=0; i<nList; i++){
    int iPrev = aiPrev[i];
    char *pList = apList[i];

    if( iPrev<0 || !pList ){
      nFin++;
      continue;
    }

    while( iPrev<(iPos+SNIPPET_BUFFER_CHUNK) ){
      assert( iPrev>=iPos );
      aBuffer[iPrev&SNIPPET_BUFFER_MASK] = i+1;
      if( 0==((*pList)&0xFE) ){
        iPrev = -1;
        break;
      }else{
        fts3GetDeltaPosition(&pList, &iPrev); 
      }
    }

    aiPrev[i] = iPrev;
    apList[i] = pList;
  }

  return (nFin==nList);
}

typedef struct SnippetCtx SnippetCtx;
struct SnippetCtx {
  Fts3Cursor *pCsr;


  int iCol;

  int iPhrase;
  int *aiPrev;
  int *anToken;
  char **apList;
};
























































































































static int fts3SnippetFindPositions(Fts3Expr *pExpr, void *ctx){
  SnippetCtx *p = (SnippetCtx *)ctx;
  int iPhrase = p->iPhrase++;
  char *pCsr;

  p->anToken[iPhrase] = pExpr->pPhrase->nToken;
  pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol);


  if( pCsr ){
    int iVal;
    pCsr += sqlite3Fts3GetVarint32(pCsr, &iVal);
    p->apList[iPhrase] = pCsr;
    p->aiPrev[iPhrase] = iVal-2;
  }
  return SQLITE_OK;
}

static void fts3SnippetCnt(
  int iIdx, 
  int nSnippet, 
  int *anCnt, 
  u8 *aBuffer,
  int *anToken,
  u64 *pHlmask
){
  int iSub =  (iIdx-1)&SNIPPET_BUFFER_MASK;
  int iAdd =  (iIdx+nSnippet-1)&SNIPPET_BUFFER_MASK;

  u64 h = *pHlmask;

  anCnt[ aBuffer[iSub]  ]--;
  anCnt[ aBuffer[iAdd]  ]++;

  h = h >> 1;
  if( aBuffer[iAdd] ){
    int j;
    for(j=anToken[aBuffer[iAdd]-1]; j>=1; j--){
      h |= (u64)1 << (nSnippet-j);
    }
  }
  *pHlmask = h;
}

static int fts3SnippetScore(int n, int *anCnt, u64 covmask){
  int j;
  int iScore = 0;
  for(j=1; j<=n; j++){
    int nCnt = anCnt[j];
    iScore += nCnt;
    if( nCnt && 0==(covmask & ((u64)1 << (j-1))) ){
      iScore += 1000;
    }
  }
  return iScore;
}

static u64 fts3SnippetMask(int n, int *anCnt){
  int j;
  u64 mask = 0;

  if( n>64 ) n = 64;
  for(j=1; j<=n; j++){
    if( anCnt[j] ) mask |= ((u64)1)<<(j-1);
  }
  return mask;
}

typedef struct SnippetFragment SnippetFragment;
struct SnippetFragment {
  int iCol;                       /* Column snippet is extracted from */
  int iPos;                       /* Index of first token in snippet */
  u64 covered;                    /* Mask of query phrases covered */
  u64 hlmask;                     /* Mask of snippet terms to highlight */
};

static int fts3BestSnippet(
  int nSnippet,                   /* Desired snippet length */
  Fts3Cursor *pCsr,               /* Cursor to create snippet for */
  int iCol,                       /* Index of column to create snippet from */
  u64 mCovered,                   /* Mask of phrases already covered */
  u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
  SnippetFragment *pFragment,     /* OUT: Best snippet found */
  int *piScore                    /* OUT: Score of snippet pFragment */
){
  int rc;                         /* Return Code */
  u8 aBuffer[SNIPPET_BUFFER_SIZE];/* Circular snippet buffer */
  int *aiPrev;                    /* Used by fts3LoadSnippetBuffer() */
  int *anToken;                   /* Number of tokens in each phrase */
  char **apList;                  /* Array of position lists */
  int *anCnt;                     /* Running totals of phrase occurences */
  int nList;                      /* Number of phrases in expression */

  int nByte;                      /* Bytes of dynamic space required */
  int i;                          /* Loop counter */
  u64 hlmask = 0;                 /* Current mask of highlighted terms */
  u64 besthlmask = 0;             /* Mask of highlighted terms for iBestPos */
  u64 bestcovmask = 0;            /* Mask of terms with at least one hit */
  int iBestPos = 0;               /* Starting position of 'best' snippet */
  int iBestScore = 0;             /* Score of best snippet higher->better */
  int iEnd = 0x7FFFFFFF;
  SnippetCtx sCtx;


  /* Iterate through the phrases in the expression to count them. The same
  ** callback makes sure the doclists are loaded for each phrase.
  */
  rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Now that it is known how many phrases there are, allocate and zero
  ** the required arrays using malloc().
  */
  nByte = sizeof(u8*)*nList +     /* apList */
      sizeof(int)*(nList) +       /* anToken */
      sizeof(int)*nList +         /* aiPrev */
      sizeof(int)*(nList+1);      /* anCnt */
  apList = (char **)sqlite3_malloc(nByte);
  if( !apList ){
    return SQLITE_NOMEM;
  }
  memset(apList, 0, nByte);
  anToken = (int *)&apList[nList];
  aiPrev = &anToken[nList];
  anCnt = &aiPrev[nList];

  /* Initialize the contents of the aiPrev and aiList arrays. */


  sCtx.pCsr = pCsr;
  sCtx.iCol = iCol;
  sCtx.apList = apList;
  sCtx.aiPrev = aiPrev;
  sCtx.anToken = anToken;
  sCtx.iPhrase = 0;
  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);

  for(i=0; i<nList; i++){
    if( apList[i] && aiPrev[i]>=0 ){
      *pmSeen |= (u64)1 << i;
    }
  }

  /* Load the first two chunks of data into the buffer. */
  memset(aBuffer, 0, SNIPPET_BUFFER_SIZE);
  fts3LoadSnippetBuffer(0, aBuffer, nList, apList, aiPrev);
  fts3LoadSnippetBuffer(SNIPPET_BUFFER_CHUNK, aBuffer, nList, apList, aiPrev);

  /* Set the initial contents of the highlight-mask and anCnt[] array. */
  for(i=1-nSnippet; i<=0; i++){
    fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask);
  }
  iBestScore = fts3SnippetScore(nList, anCnt, mCovered);
  besthlmask = hlmask;
  iBestPos = 0;
  bestcovmask = fts3SnippetMask(nList, anCnt);

  for(i=1; i<iEnd; i++){
    int iScore;

    if( 0==(i&(SNIPPET_BUFFER_CHUNK-1)) ){
      int iLoad = i + SNIPPET_BUFFER_CHUNK;

      if( fts3LoadSnippetBuffer(iLoad, aBuffer, nList, apList, aiPrev) ){
        iEnd = iLoad;
      }
    }

    /* Figure out how highly a snippet starting at token offset i scores
    ** according to fts3SnippetScore(). If it is higher than any previously
    ** considered position, save the current position, score and hlmask as 
    ** the best snippet candidate found so far.
    */
    fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask);
    iScore = fts3SnippetScore(nList, anCnt, mCovered);
    if( iScore>iBestScore ){
      iBestPos = i;


      iBestScore = iScore;
      besthlmask = hlmask;
      bestcovmask = fts3SnippetMask(nList, anCnt);
    }
  }

  sqlite3_free(apList);

  pFragment->iPos = iBestPos;
  pFragment->hlmask = besthlmask;
  pFragment->iCol = iCol;
  pFragment->covered = bestcovmask;
  *piScore = iBestScore;
  return SQLITE_OK;
}


typedef struct StrBuffer StrBuffer;
struct StrBuffer {
  char *z;
  int n;
  int nAlloc;
};







|
<
|
<
<
<

|
<
|
<
<
<
<
<
<

<
|
<
|
<
|
<
|
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
<
|
<
<
|
<
>
>
|
>
|
<
<
|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|


|
<

>

|
<
|
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
|
<
|
<
<
|
|
<
<
<
<
|
<
<
<
|
<
<
<
<
<
<
<
<
|
|
|


<
<
<
|
<
<
<
<
<
<



















<
<
<
<
<

>
|
<
<
<
<
<
|
|
|
>










|

|
<
<
<
|
|


|
<
<
<

|
>
>


|
|
<
|



|




<
<
<
<
|
<
<
<
<
<
<
<
|
|
<

|
<
<
>
|
<
|
<
|
<
<
<
<
<
<
<

|
>
>

<
<



|
<
<
<
<
<



>







147
148
149
150
151
152
153
154

155



156
157

158






159

160

161

162

163




164










165



166

167


168

169
170
171
172
173


174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301

302
303
304
305

306




307










308

309


310
311




312



313








314
315
316
317
318



319






320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338





339
340
341





342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358



359
360
361
362
363



364
365
366
367
368
369
370
371

372
373
374
375
376
377
378
379
380




381







382
383

384
385


386
387

388

389







390
391
392
393
394


395
396
397
398





399
400
401
402
403
404
405
406
407
408
409
  }
  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
  if( pnToken ) *pnToken = sCtx.nToken;
  return rc;
}

/*
** The following types are used as part of the implementation of the 

** fts3BestSnippet() routine.



*/
typedef struct SnippetCtx SnippetCtx;

typedef struct SnippetPhrase SnippetPhrase;








struct SnippetCtx {

  Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */

  int iCol;                       /* Extract snippet from this column */

  int nSnippet;                   /* Requested snippet length (in tokens) */




  int nPhrase;                    /* Number of phrases in query */










  SnippetPhrase *aPhrase;         /* Array of size nPhrase */



  int iCurrent;                   /* First token of current snippet */

};


struct SnippetPhrase {

  int nToken;                     /* Number of tokens in phrase */
  char *pList;                    /* Pointer to start of phrase position list */
  int iHead;                      /* Next value in position list */
  char *pHead;                    /* Position list data following iHead */
  int iTail;                      /* Next value in trailing position list */


  char *pTail;                    /* Position list data following iTail */
};

/*
** Advance the position list iterator specified by the first two 
** arguments so that it points to the first element with a value greater
** than or equal to parameter iNext.
*/
static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
  char *pIter = *ppIter;
  if( pIter ){
    int iIter = *piIter;

    while( iIter<iNext ){
      if( 0==(*pIter & 0xFE) ){
        iIter = -1;
        pIter = 0;
        break;
      }
      fts3GetDeltaPosition(&pIter, &iIter);
    }

    *piIter = iIter;
    *ppIter = pIter;
  }
}

static int fts3SnippetNextCandidate(SnippetCtx *pIter){
  int i;                          /* Loop counter */

  if( pIter->iCurrent<0 ){
    /* The SnippetCtx object has just been initialized. The first snippet
    ** candidate always starts at offset 0 (even if this candidate has a
    ** score of 0.0).
    */
    pIter->iCurrent = 0;

    /* Advance the 'head' iterator of each phrase to the first offset that
    ** is greater than or equal to (iNext+nSnippet).
    */
    for(i=0; i<pIter->nPhrase; i++){
      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
    }
  }else{
    int iStart;
    int iEnd = 0x7FFFFFFF;

    for(i=0; i<pIter->nPhrase; i++){
      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
      if( pPhrase->pHead && pPhrase->iHead<iEnd ){
        iEnd = pPhrase->iHead;
      }
    }
    if( iEnd==0x7FFFFFFF ){
      return 1;
    }

    pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
    for(i=0; i<pIter->nPhrase; i++){
      SnippetPhrase *pPhrase = &pIter->aPhrase[i];
      fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
      fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
    }
  }

  return 0;
}

static void fts3SnippetDetails(
  SnippetCtx *pIter,              /* Snippet iterator */
  u64 mCovered,                   /* Bitmask of phrases already covered */
  int *piToken,                   /* OUT: First token of proposed snippet */
  int *piScore,                   /* OUT: "Score" for this snippet */
  u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
  u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
){
  int iStart = pIter->iCurrent;   /* First token of snippet */

  int iScore = 0;
  int i;
  u64 mCover = 0;
  u64 mHighlight = 0;

  for(i=0; i<pIter->nPhrase; i++){
    SnippetPhrase *pPhrase = &pIter->aPhrase[i];
    if( pPhrase->pTail ){
      char *pCsr = pPhrase->pTail;
      int iCsr = pPhrase->iTail;

      while( iCsr<(iStart+pIter->nSnippet) ){
        int j;
        u64 mPhrase = (u64)1 << i;
        u64 mPos = (u64)1 << (iCsr - iStart);
        assert( iCsr>=iStart );
        if( (mCover|mCovered)&mPhrase ){
          iScore++;
        }else{
          iScore += 1000;
        }
        mCover |= mPhrase;

        for(j=0; j<pPhrase->nToken; j++){
          mHighlight |= (mPos>>j);
        }

        if( 0==(*pCsr & 0x0FE) ) break;
        fts3GetDeltaPosition(&pCsr, &iCsr);
      }
    }
  }

  *piToken = iStart;
  *piScore = iScore;
  *pmCover = mCover;
  *pmHighlight = mHighlight;
}

/*
** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
** Each invocation populates an element of the SnippetCtx.aPhrase[] array.
*/
static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
  SnippetCtx *p = (SnippetCtx *)ctx;
  SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
  char *pCsr;

  pPhrase->nToken = pExpr->pPhrase->nToken;


  pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol);
  if( pCsr ){
    int iFirst = 0;

    pPhrase->pList = pCsr;




    fts3GetDeltaPosition(&pCsr, &iFirst);










    pPhrase->pHead = pCsr;

    pPhrase->pTail = pCsr;


    pPhrase->iHead = iFirst;
    pPhrase->iTail = iFirst;




  }else{



    assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 );








  }

  return SQLITE_OK;
}




#define BITMASK_SIZE 64







typedef struct SnippetFragment SnippetFragment;
struct SnippetFragment {
  int iCol;                       /* Column snippet is extracted from */
  int iPos;                       /* Index of first token in snippet */
  u64 covered;                    /* Mask of query phrases covered */
  u64 hlmask;                     /* Mask of snippet terms to highlight */
};

static int fts3BestSnippet(
  int nSnippet,                   /* Desired snippet length */
  Fts3Cursor *pCsr,               /* Cursor to create snippet for */
  int iCol,                       /* Index of column to create snippet from */
  u64 mCovered,                   /* Mask of phrases already covered */
  u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
  SnippetFragment *pFragment,     /* OUT: Best snippet found */
  int *piScore                    /* OUT: Score of snippet pFragment */
){
  int rc;                         /* Return Code */





  int nList;                      /* Number of phrases in expression */
  SnippetCtx sCtx;                /* Snippet context object */
  int nByte;                      /* Number of bytes of space to allocate */





  int iBestScore = -1;
  int i;

  memset(&sCtx, 0, sizeof(sCtx));

  /* Iterate through the phrases in the expression to count them. The same
  ** callback makes sure the doclists are loaded for each phrase.
  */
  rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Now that it is known how many phrases there are, allocate and zero
  ** the required space using malloc().
  */
  nByte = sizeof(SnippetPhrase) * nList;



  sCtx.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
  if( !sCtx.aPhrase ){
    return SQLITE_NOMEM;
  }
  memset(sCtx.aPhrase, 0, nByte);




  /* Initialize the contents of the SnippetCtx object. Then iterate through
  ** the set of phrases in the expression to populate the aPhrase[] array.
  */
  sCtx.pCsr = pCsr;
  sCtx.iCol = iCol;
  sCtx.nSnippet = nSnippet;
  sCtx.nPhrase = nList;

  sCtx.iCurrent = -1;
  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);

  for(i=0; i<nList; i++){
    if( sCtx.aPhrase[i].pHead ){
      *pmSeen |= (u64)1 << i;
    }
  }





  pFragment->iCol = iCol;







  while( !fts3SnippetNextCandidate(&sCtx) ){
    int iPos;

    int iScore;
    u64 mCover;


    u64 mHighlight;
    fts3SnippetDetails(&sCtx, mCovered, &iPos, &iScore, &mCover, &mHighlight);



    assert( iScore>=0 );







    if( iScore>iBestScore ){
      pFragment->iPos = iPos;
      pFragment->hlmask = mHighlight;
      pFragment->covered = mCover;
      iBestScore = iScore;


    }
  }

  sqlite3_free(sCtx.aPhrase);





  *piScore = iBestScore;
  return SQLITE_OK;
}


typedef struct StrBuffer StrBuffer;
struct StrBuffer {
  char *z;
  int n;
  int nAlloc;
};
635
636
637
638
639
640
641

642
643
644
645
646
647
648

/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
*/
static int fts3ExprGlobalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */

  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  char *pCsr;
  char *pEnd;
  const int iStart = 2 + p->nCol*p->iPhrase;








>







649
650
651
652
653
654
655
656
657
658
659
660
661
662
663

/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
*/
static int fts3ExprGlobalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  char *pCsr;
  char *pEnd;
  const int iStart = 2 + p->nCol*p->iPhrase;

658
659
660
661
662
663
664

665
666
667
668
669
670
671
672
673
674
675

  p->iPhrase++;
  return SQLITE_OK;
}

static int fts3ExprLocalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */

  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  int iPhrase = p->iPhrase++;

  if( pExpr->aDoclist ){
    char *pCsr;
    int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase);

    memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32));
    pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);







>



|







673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691

  p->iPhrase++;
  return SQLITE_OK;
}

static int fts3ExprLocalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  p->iPhrase++;

  if( pExpr->aDoclist ){
    char *pCsr;
    int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase);

    memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32));
    pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
  sqlite3_int64 iDocid;
  TermOffset *aTerm;
};

/*
** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
*/
static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, void *ctx){
  TermOffsetCtx *p = (TermOffsetCtx *)ctx;
  int nTerm;                      /* Number of tokens in phrase */
  int iTerm;                      /* For looping through nTerm phrase terms */
  char *pList;                    /* Pointer to position list for phrase */
  int iPos = 0;                   /* First position in position-list */

  pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol);







|







848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
  sqlite3_int64 iDocid;
  TermOffset *aTerm;
};

/*
** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
*/
static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
  TermOffsetCtx *p = (TermOffsetCtx *)ctx;
  int nTerm;                      /* Number of tokens in phrase */
  int iTerm;                      /* For looping through nTerm phrase terms */
  char *pList;                    /* Pointer to position list for phrase */
  int iPos = 0;                   /* First position in position-list */

  pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol);