SQLite

Check-in [deb80eac91]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes for the matchinfo() function related to FTS4 common token handling.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: deb80eac9112d21835dfd3cee08ed8f09d975bf7
User & Date: dan 2010-10-23 19:07:30.000
Context
2010-10-25
09:01
Add test for matchinfo when a phrase includes some common tokens. (check-in: 80a54ebc41 user: dan tags: experimental)
2010-10-23
19:07
Fixes for the matchinfo() function related to FTS4 common token handling. (check-in: deb80eac91 user: dan tags: experimental)
2010-10-22
19:03
Add new test file fts3defer2.test. (check-in: 5a4d5bfcae user: dan tags: experimental)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
    rc = fts3DeferExpression(pCsr, pExpr->pLeft);
    if( rc==SQLITE_OK ){
      rc = fts3DeferExpression(pCsr, pExpr->pRight);
    }
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int iCol = pExpr->pPhrase->iColumn;
      int i;
      pExpr->bDeferred = 1;
      for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){
        Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
        if( pToken->pDeferred==0 ){
          rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol);
        }
      }
    }







<







2104
2105
2106
2107
2108
2109
2110

2111
2112
2113
2114
2115
2116
2117
    rc = fts3DeferExpression(pCsr, pExpr->pLeft);
    if( rc==SQLITE_OK ){
      rc = fts3DeferExpression(pCsr, pExpr->pRight);
    }
    if( pExpr->eType==FTSQUERY_PHRASE ){
      int iCol = pExpr->pPhrase->iColumn;
      int i;

      for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){
        Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
        if( pToken->pDeferred==0 ){
          rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol);
        }
      }
    }
2166
2167
2168
2169
2170
2171
2172

2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184




2185
2186
2187
2188

2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201

2202




2203
2204
2205
2206
2207
2208
2209
  char *pOut = 0;
  int nOut = 0;
  int rc = SQLITE_OK;
  int ii;
  int iCol = pPhrase->iColumn;
  int isTermPos = (pPhrase->nToken>1 || isReqPos);
  Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;


  int iPrevTok = 0;
  int nDoc = 0;

  /* If this is an xFilter() evaluation, create a segment-reader for each
  ** phrase token. Or, if this is an xNest() or snippet/offsets/matchinfo
  ** evaluation, only create segment-readers if there are no Fts3DeferredToken
  ** objects attached to the phrase-tokens.
  */
  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok = &pPhrase->aToken[ii];
    if( pTok->pArray==0 && (pCsr->doDeferred==0 || pTok->pDeferred==0) ){




      rc = fts3TermSegReaderArray(
          pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray
      );
      if( rc!=SQLITE_OK ) return rc;

    }
  }

  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok;        /* Token to find doclist for */
    int iTok;                     /* The token being queried this iteration */
    char *pList;                  /* Pointer to token doclist */
    int nList;                    /* Size of buffer at pList */

    /* Select a token to process. If this is an xFilter() call, then tokens 
    ** are processed in order from least to most costly. Otherwise, tokens 
    ** are processed in the order in which they occur in the phrase.
    */

    if( pCsr->doDeferred || isReqPos ){




      iTok = ii;
      pTok = &pPhrase->aToken[iTok];
    }else{
      int nMinCost = 0x7FFFFFFF;
      int jj;

      /* Find the remaining token with the lowest cost. */







>





|





|
>
>
>
>
|
|
|
|
>













>
|
>
>
>
>







2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
  char *pOut = 0;
  int nOut = 0;
  int rc = SQLITE_OK;
  int ii;
  int iCol = pPhrase->iColumn;
  int isTermPos = (pPhrase->nToken>1 || isReqPos);
  Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
  int isFirst = 1;

  int iPrevTok = 0;
  int nDoc = 0;

  /* If this is an xFilter() evaluation, create a segment-reader for each
  ** phrase token. Or, if this is an xNext() or snippet/offsets/matchinfo
  ** evaluation, only create segment-readers if there are no Fts3DeferredToken
  ** objects attached to the phrase-tokens.
  */
  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok = &pPhrase->aToken[ii];
    if( pTok->pArray==0 ){
      if( (pCsr->eEvalmode==FTS3_EVAL_FILTER)
       || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0) 
       || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext) 
      ){
        rc = fts3TermSegReaderArray(
            pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray
        );
        if( rc!=SQLITE_OK ) return rc;
      }
    }
  }

  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok;        /* Token to find doclist for */
    int iTok;                     /* The token being queried this iteration */
    char *pList;                  /* Pointer to token doclist */
    int nList;                    /* Size of buffer at pList */

    /* Select a token to process. If this is an xFilter() call, then tokens 
    ** are processed in order from least to most costly. Otherwise, tokens 
    ** are processed in the order in which they occur in the phrase.
    */
    if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){
      assert( isReqPos );
      iTok = ii;
      pTok = &pPhrase->aToken[iTok];
      if( pTok->bFulltext==0 ) continue;
    }else if( pCsr->eEvalmode==FTS3_EVAL_NEXT || isReqPos ){
      iTok = ii;
      pTok = &pPhrase->aToken[iTok];
    }else{
      int nMinCost = 0x7FFFFFFF;
      int jj;

      /* Find the remaining token with the lowest cost. */
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234

2235
2236
2237
2238
2239
2240
2241
2242
2243
2244

2245
2246
2247
2248
2249
2250
2251
      */
      if( nMinCost>nDoc && ii>0 ){
        rc = fts3DeferExpression(pCsr, pCsr->pExpr);
        break;
      }
    }

    if( pCsr->doDeferred && pTok->pDeferred ){
      rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList);
    }else{
      assert( pTok->pArray );
      rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList);

    }
    assert( rc!=SQLITE_OK || pCsr->doDeferred || pTok->pArray==0 );
    if( rc!=SQLITE_OK ) break;

    if( ii==0 ){
      pOut = pList;
      nOut = nList;
      if( pCsr->doDeferred==0 && pPhrase->nToken>1 ){
        nDoc = fts3DoclistCountDocids(1, pOut, nOut);
      }

    }else{
      /* Merge the new term list and the current output. */
      char *aLeft, *aRight;
      int nLeft, nRight;
      int nDist;
      int mt;








|




>

|


|


|


>







2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
      */
      if( nMinCost>nDoc && ii>0 ){
        rc = fts3DeferExpression(pCsr, pCsr->pExpr);
        break;
      }
    }

    if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){
      rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList);
    }else{
      assert( pTok->pArray );
      rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList);
      pTok->bFulltext = 1;
    }
    assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pArray==0 );
    if( rc!=SQLITE_OK ) break;

    if( isFirst ){
      pOut = pList;
      nOut = nList;
      if( pCsr->eEvalmode==FTS3_EVAL_FILTER && pPhrase->nToken>1 ){
        nDoc = fts3DoclistCountDocids(1, pOut, nOut);
      }
      isFirst = 0;
    }else{
      /* Merge the new term list and the current output. */
      char *aLeft, *aRight;
      int nLeft, nRight;
      int nDist;
      int mt;

2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
    }
    assert( nOut==0 || pOut!=0 );

    iPrevTok = iTok;
  }

  if( rc==SQLITE_OK ){
    if( ii!=pPhrase->nToken ){
      assert( pCsr->doDeferred==0 && isReqPos==0 );
      fts3DoclistStripPositions(pOut, &nOut);
    }
    *paOut = pOut;
    *pnOut = nOut;
  }else{
    sqlite3_free(pOut);
  }







|
|







2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
    }
    assert( nOut==0 || pOut!=0 );

    iPrevTok = iTok;
  }

  if( rc==SQLITE_OK ){
    if( ii!=pPhrase->nToken && pCsr->eEvalmode==FTS3_EVAL_FILTER ){
      assert( pCsr->eEvalmode==FTS3_EVAL_FILTER && isReqPos==0 );
      fts3DoclistStripPositions(pOut, &nOut);
    }
    *paOut = pOut;
    *pnOut = nOut;
  }else{
    sqlite3_free(pOut);
  }
2394
2395
2396
2397
2398
2399
2400

2401
2402
2403
2404
2405
2406
2407
2408
static int fts3ExprAllocateSegReaders(
  Fts3Cursor *pCsr,               /* FTS3 table */
  Fts3Expr *pExpr,                /* Expression to create seg-readers for */
  int *pnExpr                     /* OUT: Number of AND'd expressions */
){
  int rc = SQLITE_OK;             /* Return code */


  if( pCsr->doDeferred ) return SQLITE_OK;
  if( pnExpr && pExpr->eType!=FTSQUERY_AND ){
    (*pnExpr)++;
    pnExpr = 0;
  }

  if( pExpr->eType==FTSQUERY_PHRASE ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;







>
|







2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
static int fts3ExprAllocateSegReaders(
  Fts3Cursor *pCsr,               /* FTS3 table */
  Fts3Expr *pExpr,                /* Expression to create seg-readers for */
  int *pnExpr                     /* OUT: Number of AND'd expressions */
){
  int rc = SQLITE_OK;             /* Return code */

  assert( pCsr->eEvalmode!=FTS3_EVAL_MATCHINFO );
  if( pCsr->eEvalmode==FTS3_EVAL_NEXT ) return SQLITE_OK;
  if( pnExpr && pExpr->eType!=FTSQUERY_AND ){
    (*pnExpr)++;
    pnExpr = 0;
  }

  if( pExpr->eType==FTSQUERY_PHRASE ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569

    if( pExpr->eType==FTSQUERY_PHRASE ){
      rc = fts3PhraseSelect(p, pExpr->pPhrase,
          isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR),
          paOut, pnOut
      );
      fts3ExprFreeSegReaders(pExpr);
    }else if( p->doDeferred==0 && pExpr->eType==FTSQUERY_AND ){
      ExprAndCost *aExpr = 0;     /* Array of AND'd expressions and costs */
      int nExpr = 0;              /* Size of aExpr[] */
      char *aRet = 0;             /* Doclist to return to caller */
      int nRet = 0;               /* Length of aRet[] in bytes */
      int nDoc = 0x7FFFFFFF;

      assert( !isReqPos );







|







2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582

    if( pExpr->eType==FTSQUERY_PHRASE ){
      rc = fts3PhraseSelect(p, pExpr->pPhrase,
          isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR),
          paOut, pnOut
      );
      fts3ExprFreeSegReaders(pExpr);
    }else if( p->eEvalmode==FTS3_EVAL_FILTER && pExpr->eType==FTSQUERY_AND ){
      ExprAndCost *aExpr = 0;     /* Array of AND'd expressions and costs */
      int nExpr = 0;              /* Size of aExpr[] */
      char *aRet = 0;             /* Doclist to return to caller */
      int nRet = 0;               /* Length of aRet[] in bytes */
      int nDoc = 0x7FFFFFFF;

      assert( !isReqPos );
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
      char *aRight;
      int nLeft;
      int nRight;

      assert( pExpr->eType==FTSQUERY_NEAR 
           || pExpr->eType==FTSQUERY_OR
           || pExpr->eType==FTSQUERY_NOT
           || (pExpr->eType==FTSQUERY_AND && p->doDeferred)
      );

      if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos))
       && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos))
      ){
        switch( pExpr->eType ){
          case FTSQUERY_NEAR: {







|







2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
      char *aRight;
      int nLeft;
      int nRight;

      assert( pExpr->eType==FTSQUERY_NEAR 
           || pExpr->eType==FTSQUERY_OR
           || pExpr->eType==FTSQUERY_NOT
           || (pExpr->eType==FTSQUERY_AND && p->eEvalmode==FTS3_EVAL_NEXT)
      );

      if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos))
       && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos))
      ){
        switch( pExpr->eType ){
          case FTSQUERY_NEAR: {
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
    if( rc==SQLITE_OK ){
      sqlite3Fts3FreeDeferredDoclists(pCsr);
      rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
    }
    if( rc==SQLITE_OK ){
      char *a = 0;
      int n = 0;
      pCsr->doDeferred = 1;
      rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0);
      pCsr->doDeferred = 0;
      assert( n>=0 );
      *pbRes = (n>0);
      sqlite3_free(a);
    }
  }
  return rc;
}







<

<







2734
2735
2736
2737
2738
2739
2740

2741

2742
2743
2744
2745
2746
2747
2748
    if( rc==SQLITE_OK ){
      sqlite3Fts3FreeDeferredDoclists(pCsr);
      rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
    }
    if( rc==SQLITE_OK ){
      char *a = 0;
      int n = 0;

      rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0);

      assert( n>=0 );
      *pbRes = (n>0);
      sqlite3_free(a);
    }
  }
  return rc;
}
2748
2749
2750
2751
2752
2753
2754

2755
2756
2757
2758
2759
2760
2761
** subsequently to determine whether or not an EOF was hit.
*/
static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
  int res;
  int rc = SQLITE_OK;             /* Return code */
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;


  do {
    if( pCsr->aDoclist==0 ){
      if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
        pCsr->isEof = 1;
        rc = sqlite3_reset(pCsr->pStmt);
        break;
      }







>







2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
** subsequently to determine whether or not an EOF was hit.
*/
static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
  int res;
  int rc = SQLITE_OK;             /* Return code */
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;

  pCsr->eEvalmode = FTS3_EVAL_NEXT;
  do {
    if( pCsr->aDoclist==0 ){
      if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){
        pCsr->isEof = 1;
        rc = sqlite3_reset(pCsr->pStmt);
        break;
      }
2999
3000
3001
3002
3003
3004
3005

3006
3007


3008












3009
3010
3011
3012
3013
3014
3015
** Load the doclist associated with expression pExpr to pExpr->aDoclist.
** The loaded doclist contains positions as well as the document ids.
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){
  int rc;

  pCsr->doDeferred = 1;
  rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);


  pCsr->doDeferred = 0;












  return rc;
}

/*
** After ExprLoadDoclist() (see above) has been called, this function is
** used to iterate/search through the position lists that make up the doclist
** stored in pExpr->aDoclist.







>
|

>
>
|
>
>
>
>
>
>
>
>
>
>
>
>







3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
** Load the doclist associated with expression pExpr to pExpr->aDoclist.
** The loaded doclist contains positions as well as the document ids.
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){
  int rc;
  assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase );
  assert( pCsr->eEvalmode==FTS3_EVAL_NEXT );
  rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
  return rc;
}

int sqlite3Fts3ExprLoadFtDoclist(
  Fts3Cursor *pCsr, 
  Fts3Expr *pExpr,
  char **paDoclist,
  int *pnDoclist
){
  int rc;
  assert( pCsr->eEvalmode==FTS3_EVAL_NEXT );
  assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase );
  pCsr->eEvalmode = FTS3_EVAL_MATCHINFO;
  rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1);
  pCsr->eEvalmode = FTS3_EVAL_NEXT;
  return rc;
}

/*
** After ExprLoadDoclist() (see above) has been called, this function is
** used to iterate/search through the position lists that make up the doclist
** stored in pExpr->aDoclist.
Changes to ext/fts3/fts3Int.h.
159
160
161
162
163
164
165
166
167
168




169
170
171
172
173
174
175
  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */
  int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */
  u32 *aMatchinfo;                /* Information about most recent match */
  int doDeferred;
  int nRowAvg;                    /* Average size of database rows, in pages */
};





/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched.  For example, in
**







|


>
>
>
>







159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */
  int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */
  u32 *aMatchinfo;                /* Information about most recent match */
  int eEvalmode;                  /* An FTS3_EVAL_XX constant */
  int nRowAvg;                    /* Average size of database rows, in pages */
};

#define FTS3_EVAL_FILTER    0
#define FTS3_EVAL_NEXT      1
#define FTS3_EVAL_MATCHINFO 2

/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched.  For example, in
**
187
188
189
190
191
192
193






194
195
196
197
198

199
200
201
202
203

204
205
206
207
208
209
210
#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */

/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.






*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */

  Fts3SegReaderArray *pArray;     /* Segment-reader for this token */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
};

struct Fts3Phrase {

  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  int isNot;                 /* Phrase prefixed by unary not (-) operator */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};

/*







>
>
>
>
>
>





>





>







191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */

/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.
**
** The nDocMatch and nMatch variables contain data that may be used by the
** matchinfo() function. They are populated when the full-text index is 
** queried for hits on the phrase. If one or more tokens in the phrase
** are deferred, the nDocMatch and nMatch variables are populated based
** on the assumption that the 
*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */
  int bFulltext;                  /* True if full-text index was used */
  Fts3SegReaderArray *pArray;     /* Segment-reader for this token */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
};

struct Fts3Phrase {
  /* Variables populated by fts3_expr.c when parsing a MATCH expression */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  int isNot;                 /* Phrase prefixed by unary not (-) operator */
  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};

/*
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */

  int bDeferred;

  int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
  char *aDoclist;            /* Buffer containing doclist */
  int nDoclist;              /* Size of aDoclist in bytes */

  sqlite3_int64 iCurrent;
  char *pCurrent;
};







<
<







236
237
238
239
240
241
242


243
244
245
246
247
248
249
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */



  int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
  char *aDoclist;            /* Buffer containing doclist */
  int nDoclist;              /* Size of aDoclist in bytes */

  sqlite3_int64 iCurrent;
  char *pCurrent;
};
303
304
305
306
307
308
309

310
311
312
313
314
315
316
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);

char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);

int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);

/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, 
  const char *, sqlite3_tokenizer **, const char **, char **







>







313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);

char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *);
int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);

/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, 
  const char *, sqlite3_tokenizer **, const char **, char **
Changes to ext/fts3/fts3_expr.c.
101
102
103
104
105
106
107












108
109
110
111
112
113
114
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int fts3isspace(char c){
  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
}













/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the







>
>
>
>
>
>
>
>
>
>
>
>







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int fts3isspace(char c){
  return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
}

/*
** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
** zero the memory before returning a pointer to it. If unsuccessful, 
** return NULL.
*/
static void *fts3MallocZero(int nByte){
  void *pRet = sqlite3_malloc(nByte);
  if( pRet ) memset(pRet, 0, nByte);
  return pRet;
}


/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
    int nByte;                               /* total space to allocate */

    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)sqlite3_malloc(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        memset(pRet, 0, nByte);
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
        pRet->pPhrase->nToken = 1;
        pRet->pPhrase->iColumn = iCol;
        pRet->pPhrase->aToken[0].n = nToken;
        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);







|



<







151
152
153
154
155
156
157
158
159
160
161

162
163
164
165
166
167
168
    int nByte;                               /* total space to allocate */

    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{

        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
        pRet->pPhrase->nToken = 1;
        pRet->pPhrase->iColumn = iCol;
        pRet->pPhrase->aToken[0].n = nToken;
        pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
229
230
231
232
233
234
235

236
237
238
239
240
241
242
243
244
245
246
          goto no_mem;
        }
        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
        }
        p->pPhrase = (Fts3Phrase *)&p[1];

        p->pPhrase->nToken = ii+1;
        p->pPhrase->aToken[ii].n = nToken;
        p->pPhrase->aToken[ii].pDeferred = 0;
        p->pPhrase->aToken[ii].pArray = 0;
        memcpy(&zTemp[nTemp], zToken, nToken);
        nTemp += nToken;
        if( iEnd<nInput && zInput[iEnd]=='*' ){
          p->pPhrase->aToken[ii].isPrefix = 1;
        }else{
          p->pPhrase->aToken[ii].isPrefix = 0;
        }







>


<
<







240
241
242
243
244
245
246
247
248
249


250
251
252
253
254
255
256
          goto no_mem;
        }
        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
        }
        p->pPhrase = (Fts3Phrase *)&p[1];
        memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
        p->pPhrase->nToken = ii+1;
        p->pPhrase->aToken[ii].n = nToken;


        memcpy(&zTemp[nTemp], zToken, nToken);
        nTemp += nToken;
        if( iEnd<nInput && zInput[iEnd]=='*' ){
          p->pPhrase->aToken[ii].isPrefix = 1;
        }else{
          p->pPhrase->aToken[ii].isPrefix = 0;
        }
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
      ** the next byte must contain either whitespace, an open or close
      ** parenthesis, a quote character, or EOF. 
      */
      cNext = zInput[nKey];
      if( fts3isspace(cNext) 
       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
      ){
        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
        if( !pRet ){
          return SQLITE_NOMEM;
        }
        memset(pRet, 0, sizeof(Fts3Expr));
        pRet->eType = pKey->eType;
        pRet->nNear = nNear;
        *ppExpr = pRet;
        *pnConsumed = (int)((zInput - z) + nKey);
        return SQLITE_OK;
      }








|



<







380
381
382
383
384
385
386
387
388
389
390

391
392
393
394
395
396
397
      ** the next byte must contain either whitespace, an open or close
      ** parenthesis, a quote character, or EOF. 
      */
      cNext = zInput[nKey];
      if( fts3isspace(cNext) 
       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
      ){
        pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr));
        if( !pRet ){
          return SQLITE_NOMEM;
        }

        pRet->eType = pKey->eType;
        pRet->nNear = nNear;
        *ppExpr = pRet;
        *pnConsumed = (int)((zInput - z) + nKey);
        return SQLITE_OK;
      }

550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
    if( rc==SQLITE_OK ){
      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = sqlite3_malloc(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }
        memset(pNot, 0, sizeof(Fts3Expr));
        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        if( pNotBranch ){
          pNot->pLeft = pNotBranch;
        }
        pNotBranch = pNot;
        p = pPrev;







|





<







559
560
561
562
563
564
565
566
567
568
569
570
571

572
573
574
575
576
577
578
    if( rc==SQLITE_OK ){
      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }

        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        if( pNotBranch ){
          pNot->pLeft = pNotBranch;
        }
        pNotBranch = pNot;
        p = pPrev;
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
          goto exprparse_out;
        }
  
        if( isPhrase && !isRequirePhrase ){
          /* Insert an implicit AND operator. */
          Fts3Expr *pAnd;
          assert( pRet && pPrev );
          pAnd = sqlite3_malloc(sizeof(Fts3Expr));
          if( !pAnd ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_NOMEM;
            goto exprparse_out;
          }
          memset(pAnd, 0, sizeof(Fts3Expr));
          pAnd->eType = FTSQUERY_AND;
          insertBinaryOperator(&pRet, pPrev, pAnd);
          pPrev = pAnd;
        }

        /* This test catches attempts to make either operand of a NEAR
        ** operator something other than a phrase. For example, either of







|





<







592
593
594
595
596
597
598
599
600
601
602
603
604

605
606
607
608
609
610
611
          goto exprparse_out;
        }
  
        if( isPhrase && !isRequirePhrase ){
          /* Insert an implicit AND operator. */
          Fts3Expr *pAnd;
          assert( pRet && pPrev );
          pAnd = fts3MallocZero(sizeof(Fts3Expr));
          if( !pAnd ){
            sqlite3Fts3ExprFree(p);
            rc = SQLITE_NOMEM;
            goto exprparse_out;
          }

          pAnd->eType = FTSQUERY_AND;
          insertBinaryOperator(&pRet, pPrev, pAnd);
          pPrev = pAnd;
        }

        /* This test catches attempts to make either operand of a NEAR
        ** operator something other than a phrase. For example, either of
Changes to ext/fts3/fts3_snippet.c.
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
  sCtx.pCsr = pCsr;
  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
  if( rc==SQLITE_OK ){
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0);
  }
  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
  if( pnToken ) *pnToken = sCtx.nToken;
  sqlite3Fts3SegmentsClose((Fts3Table *)pCsr->base.pVtab);
  return rc;
}

/*
** Advance the position list iterator specified by the first two 
** arguments so that it points to the first element with a value greater
** than or equal to parameter iNext.







<







264
265
266
267
268
269
270

271
272
273
274
275
276
277
  sCtx.pCsr = pCsr;
  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
  if( rc==SQLITE_OK ){
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0);
  }
  if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
  if( pnToken ) *pnToken = sCtx.nToken;

  return rc;
}

/*
** Advance the position list iterator specified by the first two 
** arguments so that it points to the first element with a value greater
** than or equal to parameter iNext.
789
790
791
792
793
794
795

796
797

798
799
800

801






802














803
804



805
806
807
808
809

810
811
812
813
814
815
816
*/
static int fts3ExprGlobalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;

  char *pCsr;
  char *pEnd;

  const int iStart = 2 + (iPhrase * p->nCol * 3) + 1;

  assert( pExpr->isLoaded );








  /* Fill in the global hit count matrix row for this phrase. */














  pCsr = pExpr->aDoclist;
  pEnd = &pExpr->aDoclist[pExpr->nDoclist];



  while( pCsr<pEnd ){
    while( *pCsr++ & 0x80 );      /* Skip past docid. */
    fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 1);
  }


  return SQLITE_OK;
}

/*
** fts3ExprIterate() callback used to collect the "local" matchinfo stats
** for a single query. The "local" stats are those elements of the matchinfo
** array that are different for each row returned by the query.







>
|

>



>

>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
|
|
|


>







788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
*/
static int fts3ExprGlobalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  Fts3Cursor *pCsr = p->pCursor;
  char *pIter;
  char *pEnd;
  char *pFree = 0;
  const int iStart = 2 + (iPhrase * p->nCol * 3) + 1;

  assert( pExpr->isLoaded );
  assert( pExpr->eType==FTSQUERY_PHRASE );

  if( pCsr->pDeferred ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;
    int ii;
    for(ii=0; ii<pPhrase->nToken; ii++){
      if( pPhrase->aToken[ii].bFulltext ) break;
    }
    if( ii<pPhrase->nToken ){
      int nFree = 0;
      int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
      if( rc!=SQLITE_OK ) return rc;
      pIter = pFree;
      pEnd = &pFree[nFree];
    }else{
      int nDoc = p->aMatchinfo[2 + 3*p->nCol*p->aMatchinfo[0]];
      for(ii=0; ii<p->nCol; ii++){
        p->aMatchinfo[iStart + ii*3] = nDoc;
        p->aMatchinfo[iStart + ii*3 + 1] = nDoc;
      }
      return SQLITE_OK;
    }
  }else{
    pIter = pExpr->aDoclist;
    pEnd = &pExpr->aDoclist[pExpr->nDoclist];
  }

  /* Fill in the global hit count matrix row for this phrase. */
  while( pIter<pEnd ){
    while( *pIter++ & 0x80 );      /* Skip past docid. */
    fts3LoadColumnlistCounts(&pIter, &p->aMatchinfo[iStart], 1);
  }

  sqlite3_free(pFree);
  return SQLITE_OK;
}

/*
** fts3ExprIterate() callback used to collect the "local" matchinfo stats
** for a single query. The "local" stats are those elements of the matchinfo
** array that are different for each row returned by the query.
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886

887
888
889
890
891
892
893

    sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
    if( !sInfo.aMatchinfo ){ 
      return SQLITE_NOMEM;
    }
    memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo);


    /* First element of match-info is the number of phrases in the query */
    sInfo.aMatchinfo[0] = nPhrase;
    sInfo.aMatchinfo[1] = sInfo.nCol;
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo);
    if( pTab->bHasDocsize ){
      int ofst = 2 + 3*sInfo.aMatchinfo[0]*sInfo.aMatchinfo[1];
      rc = sqlite3Fts3MatchinfoDocsizeGlobal(pCsr, &sInfo.aMatchinfo[ofst]);
    }

    pCsr->aMatchinfo = sInfo.aMatchinfo;
    pCsr->isMatchinfoNeeded = 1;
  }

  sInfo.aMatchinfo = pCsr->aMatchinfo;
  if( rc==SQLITE_OK && pCsr->isMatchinfoNeeded ){
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo);







<



<




>







897
898
899
900
901
902
903

904
905
906

907
908
909
910
911
912
913
914
915
916
917
918

    sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
    if( !sInfo.aMatchinfo ){ 
      return SQLITE_NOMEM;
    }
    memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo);


    /* First element of match-info is the number of phrases in the query */
    sInfo.aMatchinfo[0] = nPhrase;
    sInfo.aMatchinfo[1] = sInfo.nCol;

    if( pTab->bHasDocsize ){
      int ofst = 2 + 3*sInfo.aMatchinfo[0]*sInfo.aMatchinfo[1];
      rc = sqlite3Fts3MatchinfoDocsizeGlobal(pCsr, &sInfo.aMatchinfo[ofst]);
    }
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo);
    pCsr->aMatchinfo = sInfo.aMatchinfo;
    pCsr->isMatchinfoNeeded = 1;
  }

  sInfo.aMatchinfo = pCsr->aMatchinfo;
  if( rc==SQLITE_OK && pCsr->isMatchinfoNeeded ){
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo);
989
990
991
992
993
994
995

996
997
998
999
1000
1001
1002
  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
    rc = fts3SnippetText(pCsr, &aSnippet[i], 
        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
    );
  }

 snippet_out:

  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(res.z);
  }else{
    sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
  }
}







>







1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
  for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
    rc = fts3SnippetText(pCsr, &aSnippet[i], 
        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
    );
  }

 snippet_out:
  sqlite3Fts3SegmentsClose(pTab);
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(res.z);
  }else{
    sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
  }
}
1168
1169
1170
1171
1172
1173
1174

1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193

1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
    pMod->xClose(pC);
    if( rc!=SQLITE_OK ) goto offsets_out;
  }

 offsets_out:
  sqlite3_free(sCtx.aTerm);
  assert( rc!=SQLITE_DONE );

  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx,  rc);
    sqlite3_free(res.z);
  }else{
    sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
  }
  return;
}

/*
** Implementation of matchinfo() function.
*/
void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
  int rc;
  if( !pCsr->pExpr ){
    sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
    return;
  }
  rc = fts3GetMatchinfo(pCsr);

  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pContext, rc);
  }else{
    Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab;
    int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3);
    if( pTab->bHasDocsize ){
      n += sizeof(u32)*(1 + 2*pTab->nColumn);
    }
    sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
  }
}

#endif







>



















>













1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
    pMod->xClose(pC);
    if( rc!=SQLITE_OK ) goto offsets_out;
  }

 offsets_out:
  sqlite3_free(sCtx.aTerm);
  assert( rc!=SQLITE_DONE );
  sqlite3Fts3SegmentsClose(pTab);
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx,  rc);
    sqlite3_free(res.z);
  }else{
    sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
  }
  return;
}

/*
** Implementation of matchinfo() function.
*/
void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
  int rc;
  if( !pCsr->pExpr ){
    sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
    return;
  }
  rc = fts3GetMatchinfo(pCsr);
  sqlite3Fts3SegmentsClose((Fts3Table *)pCsr->base.pVtab );
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pContext, rc);
  }else{
    Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab;
    int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3);
    if( pTab->bHasDocsize ){
      n += sizeof(u32)*(1 + 2*pTab->nColumn);
    }
    sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
  }
}

#endif
Changes to ext/fts3/fts3_write.c.
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667

2668

2669
2670
2671
2672
2673
2674
2675
** references to deferred doclists from within the tree of Fts3Expr 
** structures headed by 
*/
static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
  if( pExpr ){
    fts3DeferredDoclistClear(pExpr->pLeft);
    fts3DeferredDoclistClear(pExpr->pRight);
    if( pExpr->bDeferred && pExpr->isLoaded ){
      sqlite3_free(pExpr->aDoclist);
      pExpr->isLoaded = 0;
      pExpr->aDoclist = 0;
      pExpr->nDoclist = 0;
      pExpr->pCurrent = 0;
      pExpr->iCurrent = 0;
    }
  }
}

/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
*/
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
  Fts3DeferredToken *pDef;
  for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
    sqlite3_free(pDef->pList);
    pDef->pList = 0;
  }

  fts3DeferredDoclistClear(pCsr->pExpr);

}

/*
** Free all entries in the pCsr->pDeffered list. Entries are added to 
** this list using sqlite3Fts3DeferToken().
*/
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){







|




















>
|
>







2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
** references to deferred doclists from within the tree of Fts3Expr 
** structures headed by 
*/
static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
  if( pExpr ){
    fts3DeferredDoclistClear(pExpr->pLeft);
    fts3DeferredDoclistClear(pExpr->pRight);
    if( pExpr->isLoaded ){
      sqlite3_free(pExpr->aDoclist);
      pExpr->isLoaded = 0;
      pExpr->aDoclist = 0;
      pExpr->nDoclist = 0;
      pExpr->pCurrent = 0;
      pExpr->iCurrent = 0;
    }
  }
}

/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
*/
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
  Fts3DeferredToken *pDef;
  for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
    sqlite3_free(pDef->pList);
    pDef->pList = 0;
  }
  if( pCsr->pDeferred ){
    fts3DeferredDoclistClear(pCsr->pExpr);
  }
}

/*
** Free all entries in the pCsr->pDeffered list. Entries are added to 
** this list using sqlite3Fts3DeferToken().
*/
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
Changes to test/fts3defer2.test.
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \
   [list 3 1   1 1 1   1 1 1   1 1 1   3 13336 9]
]

do_execsql_test 1.2.3 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)';
} [list                                 \
   {[a] b c d [e] [f] [a] x y}          \
   {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1}  \
   [list 3 1   1 1 1   1 1 1   2 2 1   3 13336 9]
]

do_execsql_test 1.3.1 { DROP TABLE t1 }

#-----------------------------------------------------------------------------
# Test cases fts3defer2-2.* focus specifically on the matchinfo function.
# 
do_execsql_test 2.1.1 {
  CREATE VIRTUAL TABLE t2 USING fts4;
}
do_execsql_test 2.1.2 "INSERT INTO t2 VALUES('[string repeat {a } 20000]')"
do_execsql_test 2.1.3 "INSERT INTO t2 VALUES('[string repeat {z } 20000]')"

do_execsql_test 2.1.4 {
  INSERT INTO t2 VALUES('a b c d e f g');
  INSERT INTO t2 VALUES('a b c d e f g');
}

foreach {tn sql} {
  1 {}
  2 { INSERT INTO t2(t2) VALUES('optimize') }
  3 { UPDATE t2_segments SET block = zeroblob(length(block)) 
      WHERE length(block)>10000;
  }
} {
if {$tn==3} break
  execsql $sql
  do_execsql_test 2.2.$tn.1 {
    SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 20002 3  1 2 2  4 10004 7]        \
    [list 2 1  1 20002 3  1 2 2  4 10004 7]        \
  ]
}


finish_test








|








|










|
|
>
|











<




|
|






53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106

do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \
   [list 3 1   1 1 1   1 3 3   1 3 3   3 13336 9]
]

do_execsql_test 1.2.3 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)';
} [list                                 \
   {[a] b c d [e] [f] [a] x y}          \
   {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1}  \
   [list 3 1   1 1 1   1 3 3   2 3 3   3 13336 9]
]

do_execsql_test 1.3.1 { DROP TABLE t1 }

#-----------------------------------------------------------------------------
# Test cases fts3defer2-2.* focus specifically on the matchinfo function.
# 
do_execsql_test 2.1.1 {
  CREATE VIRTUAL TABLE t2 USING fts4;
}
do_execsql_test 2.1.2 "INSERT INTO t2 VALUES('[string repeat {a } 10000]')"
do_execsql_test 2.1.3 "INSERT INTO t2 VALUES('b [string repeat {z } 10000]')"
do_execsql_test 2.1.4 [string repeat "INSERT INTO t2 VALUES('x');" 50]
do_execsql_test 2.1.5 {
  INSERT INTO t2 VALUES('a b c d e f g');
  INSERT INTO t2 VALUES('a b c d e f g');
}

foreach {tn sql} {
  1 {}
  2 { INSERT INTO t2(t2) VALUES('optimize') }
  3 { UPDATE t2_segments SET block = zeroblob(length(block)) 
      WHERE length(block)>10000;
  }
} {

  execsql $sql
  do_execsql_test 2.2.$tn.1 {
    SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'a b';
  } [list                                          \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
    [list 2 1  1 54 54  1 3 3  54 372 7]        \
  ]
}


finish_test