SQLite

Check-in [84097a4c75]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Minor changes made while planning a larger change.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts3-prefix-search
Files: files | file ages | folders
SHA1: 84097a4c759b1d65890af885f137d3cb16eef584
User & Date: dan 2011-05-28 15:57:40.694
Context
2011-06-02
19:57
Changes to improve performance and support LIMIT clauses on fts3 tables. This branch is unstable for now. (check-in: 28149a7882 user: dan tags: fts3-prefix-search)
2011-05-28
15:57
Minor changes made while planning a larger change. (check-in: 84097a4c75 user: dan tags: fts3-prefix-search)
2011-05-25
19:17
If a prefix index of size N is not present, use a prefix index of size N+1 along with the terms index for queries for prefixes of length N. (check-in: cc83991caa user: dan tags: fts3-prefix-search)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2763
2764
2765
2766
2767
2768
2769
2770
2771


2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
**
** Both pLeft and pRight are expression nodes of type FTSQUERY_PHRASE. Both
** have their respective doclists (including position information) loaded
** in Fts3Expr.aDoclist/nDoclist. This function removes all entries from
** each doclist that are not within nNear tokens of a corresponding entry
** in the other doclist.
*/
int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){
  int rc;                         /* Return code */



  assert( pLeft->eType==FTSQUERY_PHRASE );
  assert( pRight->eType==FTSQUERY_PHRASE );
  assert( pLeft->isLoaded && pRight->isLoaded );

  if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){
    sqlite3_free(pLeft->aDoclist);
    sqlite3_free(pRight->aDoclist);
    pRight->aDoclist = 0;
    pLeft->aDoclist = 0;
    rc = SQLITE_OK;
  }else{
    char *aOut;                   /* Buffer in which to assemble new doclist */
    int nOut;                     /* Size of buffer aOut in bytes */

    rc = fts3NearMerge(MERGE_POS_NEAR, nNear, 
        pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist,
        pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist,
        &aOut, &nOut
    );
    if( rc!=SQLITE_OK ) return rc;
    sqlite3_free(pRight->aDoclist);
    pRight->aDoclist = aOut;
    pRight->nDoclist = nOut;

    rc = fts3NearMerge(MERGE_POS_NEAR, nNear, 
        pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist,
        pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist,
        &aOut, &nOut
    );
    sqlite3_free(pLeft->aDoclist);
    pLeft->aDoclist = aOut;
    pLeft->nDoclist = nOut;
  }
  return rc;







|

>
>

|
|













|
|








|
|







2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
**
** Both pLeft and pRight are expression nodes of type FTSQUERY_PHRASE. Both
** have their respective doclists (including position information) loaded
** in Fts3Expr.aDoclist/nDoclist. This function removes all entries from
** each doclist that are not within nNear tokens of a corresponding entry
** in the other doclist.
*/
int sqlite3Fts3ExprNearTrim(Fts3Expr *pELeft, Fts3Expr *pERight, int nNear){
  int rc;                         /* Return code */
  Fts3Phrase *pLeft = pELeft->pPhrase;
  Fts3Phrase *pRight = pERight->pPhrase;

  assert( pELeft->eType==FTSQUERY_PHRASE && pLeft );
  assert( pERight->eType==FTSQUERY_PHRASE && pRight );
  assert( pLeft->isLoaded && pRight->isLoaded );

  if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){
    sqlite3_free(pLeft->aDoclist);
    sqlite3_free(pRight->aDoclist);
    pRight->aDoclist = 0;
    pLeft->aDoclist = 0;
    rc = SQLITE_OK;
  }else{
    char *aOut;                   /* Buffer in which to assemble new doclist */
    int nOut;                     /* Size of buffer aOut in bytes */

    rc = fts3NearMerge(MERGE_POS_NEAR, nNear, 
        pLeft->nToken, pLeft->aDoclist, pLeft->nDoclist,
        pRight->nToken, pRight->aDoclist, pRight->nDoclist,
        &aOut, &nOut
    );
    if( rc!=SQLITE_OK ) return rc;
    sqlite3_free(pRight->aDoclist);
    pRight->aDoclist = aOut;
    pRight->nDoclist = nOut;

    rc = fts3NearMerge(MERGE_POS_NEAR, nNear, 
        pRight->nToken, pRight->aDoclist, pRight->nDoclist,
        pLeft->nToken, pLeft->aDoclist, pLeft->nDoclist,
        &aOut, &nOut
    );
    sqlite3_free(pLeft->aDoclist);
    pLeft->aDoclist = aOut;
    pLeft->nDoclist = nOut;
  }
  return rc;
3455
3456
3457
3458
3459
3460
3461

3462
3463
3464
3465
3466
3467






3468
3469
3470
3471
3472
3473
3474
** Load the doclist associated with expression pExpr to pExpr->aDoclist.
** The loaded doclist contains positions as well as the document ids.
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){
  int rc;

  assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase );
  assert( pCsr->eEvalmode==FTS3_EVAL_NEXT );
  rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
  return rc;
}







int sqlite3Fts3ExprLoadFtDoclist(
  Fts3Cursor *pCsr, 
  Fts3Expr *pExpr,
  char **paDoclist,
  int *pnDoclist
){
  int rc;







>
|

|



>
>
>
>
>
>







3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
** Load the doclist associated with expression pExpr to pExpr->aDoclist.
** The loaded doclist contains positions as well as the document ids.
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.
*/
int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){
  int rc;
  Fts3Phrase *pPhrase = pExpr->pPhrase;
  assert( pExpr->eType==FTSQUERY_PHRASE && pPhrase );
  assert( pCsr->eEvalmode==FTS3_EVAL_NEXT );
  rc = fts3EvalExpr(pCsr, pExpr, &pPhrase->aDoclist, &pPhrase->nDoclist, 1);
  return rc;
}

/*
** TODO: This is something to do with matchinfo(). Similar to
** sqlite3ExprLoadDoclists() but slightly different.
**
** UPDATE: Only used when there are deferred tokens.
*/
int sqlite3Fts3ExprLoadFtDoclist(
  Fts3Cursor *pCsr, 
  Fts3Expr *pExpr,
  char **paDoclist,
  int *pnDoclist
){
  int rc;
3506
3507
3508
3509
3510
3511
3512

3513

3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
*/
char *sqlite3Fts3FindPositions(
  Fts3Cursor *pCursor,            /* Associate FTS3 cursor */
  Fts3Expr *pExpr,                /* Access this expressions doclist */
  sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */
  int iCol                        /* Column of requested pos-list */
){

  assert( pExpr->isLoaded );

  if( pExpr->aDoclist ){
    char *pEnd = &pExpr->aDoclist[pExpr->nDoclist];
    char *pCsr;

    if( pExpr->pCurrent==0 ){
      if( pCursor->desc==0 ){
        pExpr->pCurrent = pExpr->aDoclist;
        pExpr->iCurrent = 0;
        fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iCurrent);
      }else{
        pCsr = pExpr->aDoclist;
        while( pCsr<pEnd ){
          fts3GetDeltaVarint(&pCsr, &pExpr->iCurrent);
          fts3PoslistCopy(0, &pCsr);
        }
        fts3ReversePoslist(pExpr->aDoclist, &pCsr);
        pExpr->pCurrent = pCsr;
      }
    }
    pCsr = pExpr->pCurrent;
    assert( pCsr );

    while( (pCursor->desc==0 && pCsr<pEnd) 
        || (pCursor->desc && pCsr>pExpr->aDoclist) 
    ){
      if( pCursor->desc==0 && pExpr->iCurrent<iDocid ){
        fts3PoslistCopy(0, &pCsr);
        if( pCsr<pEnd ){
          fts3GetDeltaVarint(&pCsr, &pExpr->iCurrent);
        }
        pExpr->pCurrent = pCsr;
      }else if( pCursor->desc && pExpr->iCurrent>iDocid ){
        fts3GetReverseDeltaVarint(&pCsr, pExpr->aDoclist, &pExpr->iCurrent);
        fts3ReversePoslist(pExpr->aDoclist, &pCsr);
        pExpr->pCurrent = pCsr;
      }else{
        if( pExpr->iCurrent==iDocid ){
          int iThis = 0;
          if( iCol<0 ){
            /* If iCol is negative, return a pointer to the start of the
            ** position-list (instead of a pointer to the start of a list
            ** of offsets associated with a specific column).
            */
            return pCsr;







>
|
>
|
|


|

|
|
|

|

|


|
|


|



|

|


|

|
|
|
|
|

|







3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
*/
char *sqlite3Fts3FindPositions(
  Fts3Cursor *pCursor,            /* Associate FTS3 cursor */
  Fts3Expr *pExpr,                /* Access this expressions doclist */
  sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */
  int iCol                        /* Column of requested pos-list */
){
  Fts3Phrase *pPhrase = pExpr->pPhrase;
  assert( pPhrase->isLoaded );

  if( pPhrase->aDoclist ){
    char *pEnd = &pPhrase->aDoclist[pPhrase->nDoclist];
    char *pCsr;

    if( pPhrase->pCurrent==0 ){
      if( pCursor->desc==0 ){
        pPhrase->pCurrent = pPhrase->aDoclist;
        pPhrase->iCurrent = 0;
        fts3GetDeltaVarint(&pPhrase->pCurrent, &pPhrase->iCurrent);
      }else{
        pCsr = pPhrase->aDoclist;
        while( pCsr<pEnd ){
          fts3GetDeltaVarint(&pCsr, &pPhrase->iCurrent);
          fts3PoslistCopy(0, &pCsr);
        }
        fts3ReversePoslist(pPhrase->aDoclist, &pCsr);
        pPhrase->pCurrent = pCsr;
      }
    }
    pCsr = pPhrase->pCurrent;
    assert( pCsr );

    while( (pCursor->desc==0 && pCsr<pEnd) 
        || (pCursor->desc && pCsr>pPhrase->aDoclist) 
    ){
      if( pCursor->desc==0 && pPhrase->iCurrent<iDocid ){
        fts3PoslistCopy(0, &pCsr);
        if( pCsr<pEnd ){
          fts3GetDeltaVarint(&pCsr, &pPhrase->iCurrent);
        }
        pPhrase->pCurrent = pCsr;
      }else if( pCursor->desc && pPhrase->iCurrent>iDocid ){
        fts3GetReverseDeltaVarint(&pCsr, pPhrase->aDoclist, &pPhrase->iCurrent);
        fts3ReversePoslist(pPhrase->aDoclist, &pCsr);
        pPhrase->pCurrent = pCsr;
      }else{
        if( pPhrase->iCurrent==iDocid ){
          int iThis = 0;
          if( iCol<0 ){
            /* If iCol is negative, return a pointer to the start of the
            ** position-list (instead of a pointer to the start of a list
            ** of offsets associated with a specific column).
            */
            return pCsr;
Changes to ext/fts3/fts3Int.h.
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281





282
283
284
285
286
287
288
289
290
291






292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */

/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.
**
** The nDocMatch and nMatch variables contain data that may be used by the
** matchinfo() function. They are populated when the full-text index is 
** queried for hits on the phrase. If one or more tokens in the phrase
** are deferred, the nDocMatch and nMatch variables are populated based
** on the assumption that the 
*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */





  int bFulltext;                  /* True if full-text index was used */
  Fts3SegReaderCursor *pSegcsr;   /* Segment-reader for this token */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
};

struct Fts3Phrase {
  /* Variables populated by fts3_expr.c when parsing a MATCH expression */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */
  int isNot;                 /* Phrase prefixed by unary not (-) operator */






  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};

/*
** A tree of these objects forms the RHS of a MATCH operator.
**
** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded
** is true, then aDoclist points to a malloced buffer, size nDoclist bytes, 
** containing the results of the NEAR or phrase query in FTS3 doclist
** format. As usual, the initial "Length" field found in doclists stored
** on disk is omitted from this buffer.
**
** Variable pCurrent always points to the start of a docid field within
** aDoclist. Since the doclist is usually scanned in docid order, this can
** be used to accelerate seeking to the required docid within the doclist.
*/
struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */

  int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
  char *aDoclist;            /* Buffer containing doclist */
  int nDoclist;              /* Size of aDoclist in bytes */

  sqlite3_int64 iCurrent;
  char *pCurrent;
};

/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For 
** example, the following:
**







<
<
<
<
<
<





>
>
>
>
>









|
>
>
>
>
>
>






|
|
|
|
|












<
<
<
<
<
<
<







264
265
266
267
268
269
270






271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319







320
321
322
323
324
325
326
#define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */

/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in double-quotes (i.e. "one two three")
** nToken will be the number of tokens in the string.






*/
struct Fts3PhraseToken {
  char *z;                        /* Text of the token */
  int n;                          /* Number of bytes in buffer z */
  int isPrefix;                   /* True if token ends with a "*" character */

  /* Variables above this point are populated when the expression is
  ** parsed (by code in fts3_expr.c). Below this point the variables are
  ** used when evaluating the expression. */

  int bFulltext;                  /* True if full-text index was used */
  Fts3SegReaderCursor *pSegcsr;   /* Segment-reader for this token */
  Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
};

struct Fts3Phrase {
  /* Variables populated by fts3_expr.c when parsing a MATCH expression */
  int nToken;                /* Number of tokens in the phrase */
  int iColumn;               /* Index of column this phrase must match */

  int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
  char *aDoclist;            /* Buffer containing doclist */
  int nDoclist;              /* Size of aDoclist in bytes */
  sqlite3_int64 iCurrent;
  char *pCurrent;

  Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
};

/*
** A tree of these objects forms the RHS of a MATCH operator.
**
** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist 
** points to a malloced buffer, size nDoclist bytes, containing the results 
** of this phrase query in FTS3 doclist format. As usual, the initial 
** "Length" field found in doclists stored on disk is omitted from this 
** buffer.
**
** Variable pCurrent always points to the start of a docid field within
** aDoclist. Since the doclist is usually scanned in docid order, this can
** be used to accelerate seeking to the required docid within the doclist.
*/
struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */







};

/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For 
** example, the following:
**
Changes to ext/fts3/fts3_expr.c.
77
78
79
80
81
82
83








84
85
86
87
88
89

90
91
92
93
94
95
96
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

#include "fts3Int.h"
#include <string.h>
#include <assert.h>









typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
  const char **azCol;                 /* Array of column names for fts3 table */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */

  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
};

/*
** This function is equivalent to the standard isspace() function. 
**







>
>
>
>
>
>
>
>






>







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

#include "fts3Int.h"
#include <string.h>
#include <assert.h>

/*
** isNot:
**   This variable is used by function getNextNode(). When getNextNode() is
**   called, it sets ParseContext.isNot to true if the 'next node' is a 
**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
**   zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
  const char **azCol;                 /* Array of column names for fts3 table */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  int isNot;                          /* True if getNextNode() sees a unary - */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
};

/*
** This function is equivalent to the standard isspace() function. 
**
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

        if( iEnd<n && z[iEnd]=='*' ){
          pRet->pPhrase->aToken[0].isPrefix = 1;
          iEnd++;
        }
        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
          pRet->pPhrase->isNot = 1;
        }
      }
      nConsumed = iEnd;
    }

    pModule->xClose(pCursor);
  }







|







177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
        memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

        if( iEnd<n && z[iEnd]=='*' ){
          pRet->pPhrase->aToken[0].isPrefix = 1;
          iEnd++;
        }
        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
          pParse->isNot = 1;
        }
      }
      nConsumed = iEnd;
    }

    pModule->xClose(pCursor);
  }
220
221
222
223
224
225
226






















227
228
229
230
231
232
233
234
235
236
237
238
239
240
241

242
243
244
245

246
247
248
249
250
251
252
253
254
255
256



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271




272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  Fts3Expr *p = 0;
  sqlite3_tokenizer_cursor *pCursor = 0;
  char *zTemp = 0;
  int nTemp = 0;























  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zToken;
      int nToken, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
        p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken));
        zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
        if( !p || !zTemp ){
          goto no_mem;
        }

        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
        }

        p->pPhrase = (Fts3Phrase *)&p[1];
        memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken));
        p->pPhrase->nToken = ii+1;
        p->pPhrase->aToken[ii].n = nToken;
        memcpy(&zTemp[nTemp], zToken, nToken);
        nTemp += nToken;
        if( iEnd<nInput && zInput[iEnd]=='*' ){
          p->pPhrase->aToken[ii].isPrefix = 1;
        }else{
          p->pPhrase->aToken[ii].isPrefix = 0;
        }



      }
    }

    pModule->xClose(pCursor);
    pCursor = 0;
  }

  if( rc==SQLITE_DONE ){
    int jj;
    char *zNew = NULL;
    int nNew = 0;
    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken);
    p = fts3ReallocOrFree(p, nByte + nTemp);
    if( !p ){




      goto no_mem;
    }
    if( zTemp ){
      zNew = &(((char *)p)[nByte]);
      memcpy(zNew, zTemp, nTemp);
    }else{
      memset(p, 0, nByte+nTemp);
    }
    p->pPhrase = (Fts3Phrase *)&p[1];
    for(jj=0; jj<p->pPhrase->nToken; jj++){
      p->pPhrase->aToken[jj].z = &zNew[nNew];
      nNew += p->pPhrase->aToken[jj].n;
    }
    sqlite3_free(zTemp);
    p->eType = FTSQUERY_PHRASE;
    p->pPhrase->iColumn = pParse->iDefaultCol;
    rc = SQLITE_OK;
  }

  *ppExpr = p;
  return rc;
no_mem:








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|
|
|

|
|
|
<
|
|
>
|
<
<
|
>
|
|
|
<
|
|
<
<
<
<
|
>
>
>









|
|
<
<
|
|
>
>
>
>
|
|
<
|
|
<
|
|
<

|
|

<
<
<







229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

270
271
272
273


274
275
276
277
278

279
280




281
282
283
284
285
286
287
288
289
290
291
292
293
294
295


296
297
298
299
300
301
302
303

304
305

306
307

308
309
310
311



312
313
314
315
316
317
318
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  Fts3Expr *p = 0;
  sqlite3_tokenizer_cursor *pCursor = 0;
  char *zTemp = 0;
  int nTemp = 0;

  const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
  int nToken = 0;

  /* The final Fts3Expr data structure, including the Fts3Phrase,
  ** Fts3PhraseToken structures token buffers are all stored as a single 
  ** allocation so that the expression can be freed with a single call to
  ** sqlite3_free(). Setting this up requires a two pass approach.
  **
  ** The first pass, in the block below, uses a tokenizer cursor to iterate
  ** through the tokens in the expression. This pass uses fts3ReallocOrFree()
  ** to assemble data in two dynamic buffers:
  **
  **   Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
  **             structure, followed by the array of Fts3PhraseToken 
  **             structures. This pass only populates the Fts3PhraseToken array.
  **
  **   Buffer zTemp: Contains copies of all tokens.
  **
  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */
  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        Fts3PhraseToken *pToken;

        p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));

        if( !p ) goto no_mem;

        zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
        if( !zTemp ) goto no_mem;



        assert( nToken==ii );
        pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
        memset(pToken, 0, sizeof(Fts3PhraseToken));


        memcpy(&zTemp[nTemp], zByte, nByte);
        nTemp += nByte;





        pToken->n = nByte;
        pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
        nToken = ii+1;
      }
    }

    pModule->xClose(pCursor);
    pCursor = 0;
  }

  if( rc==SQLITE_DONE ){
    int jj;
    char *zBuf = 0;



    p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
    if( !p ) goto no_mem;
    memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
    p->eType = FTSQUERY_PHRASE;
    p->pPhrase = (Fts3Phrase *)&p[1];
    p->pPhrase->iColumn = pParse->iDefaultCol;
    p->pPhrase->nToken = nToken;


    zBuf = (char *)&p->pPhrase->aToken[nToken];
    memcpy(zBuf, zTemp, nTemp);

    sqlite3_free(zTemp);


    for(jj=0; jj<p->pPhrase->nToken; jj++){
      p->pPhrase->aToken[jj].z = zBuf;
      zBuf += p->pPhrase->aToken[jj].n;
    }



    rc = SQLITE_OK;
  }

  *ppExpr = p;
  return rc;
no_mem:

336
337
338
339
340
341
342


343
344
345
346
347
348
349
  int iCol;
  int iColLen;
  int rc;
  Fts3Expr *pRet = 0;

  const char *zInput = z;
  int nInput = n;



  /* Skip over any whitespace before checking for a keyword, an open or
  ** close bracket, or a quoted string. 
  */
  while( nInput>0 && fts3isspace(*zInput) ){
    nInput--;
    zInput++;







>
>







360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
  int iCol;
  int iColLen;
  int rc;
  Fts3Expr *pRet = 0;

  const char *zInput = z;
  int nInput = n;

  pParse->isNot = 0;

  /* Skip over any whitespace before checking for a keyword, an open or
  ** close bracket, or a quoted string. 
  */
  while( nInput>0 && fts3isspace(*zInput) ){
    nInput--;
    zInput++;
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
    Fts3Expr *p = 0;
    int nByte = 0;
    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    if( rc==SQLITE_OK ){
      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }
        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        if( pNotBranch ){
          pNot->pLeft = pNotBranch;
        }
        pNotBranch = pNot;
        p = pPrev;
      }else{
        int eType = p->eType;
        assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot );
        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);

        /* The isRequirePhrase variable is set to true if a phrase or
        ** an expression contained in parenthesis is required. If a
        ** binary operator (AND, OR, NOT or NEAR) is encounted when
        ** isRequirePhrase is set, this is a syntax error.
        */







|

















<







581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605

606
607
608
609
610
611
612
    Fts3Expr *p = 0;
    int nByte = 0;
    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    if( rc==SQLITE_OK ){
      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && pParse->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
        if( !pNot ){
          sqlite3Fts3ExprFree(p);
          rc = SQLITE_NOMEM;
          goto exprparse_out;
        }
        pNot->eType = FTSQUERY_NOT;
        pNot->pRight = p;
        if( pNotBranch ){
          pNot->pLeft = pNotBranch;
        }
        pNotBranch = pNot;
        p = pPrev;
      }else{
        int eType = p->eType;

        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);

        /* The isRequirePhrase variable is set to true if a phrase or
        ** an expression contained in parenthesis is required. If a
        ** binary operator (AND, OR, NOT or NEAR) is encounted when
        ** isRequirePhrase is set, this is a syntax error.
        */
736
737
738
739
740
741
742

743
744
745
746
747
748
749
750
751
752
}

/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
  if( p ){

    sqlite3Fts3ExprFree(p->pLeft);
    sqlite3Fts3ExprFree(p->pRight);
    sqlite3_free(p->aDoclist);
    sqlite3_free(p);
  }
}

/****************************************************************************
*****************************************************************************
** Everything after this point is just test code.







>


|







761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
}

/*
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
  if( p ){
    assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
    sqlite3Fts3ExprFree(p->pLeft);
    sqlite3Fts3ExprFree(p->pRight);
    if( p->pPhrase ) sqlite3_free(p->pPhrase->aDoclist);
    sqlite3_free(p);
  }
}

/****************************************************************************
*****************************************************************************
** Everything after this point is just test code.
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
  switch( pExpr->eType ){
    case FTSQUERY_PHRASE: {
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      zBuf = sqlite3_mprintf(
          "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot);
      for(i=0; zBuf && i<pPhrase->nToken; i++){
        zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 
            pPhrase->aToken[i].n, pPhrase->aToken[i].z,
            (pPhrase->aToken[i].isPrefix?"+":"")
        );
      }
      return zBuf;







|







822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
*/
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
  switch( pExpr->eType ){
    case FTSQUERY_PHRASE: {
      Fts3Phrase *pPhrase = pExpr->pPhrase;
      int i;
      zBuf = sqlite3_mprintf(
          "%zPHRASE %d 0", zBuf, pPhrase->iColumn);
      for(i=0; zBuf && i<pPhrase->nToken; i++){
        zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, 
            pPhrase->aToken[i].n, pPhrase->aToken[i].z,
            (pPhrase->aToken[i].isPrefix?"+":"")
        );
      }
      return zBuf;
Changes to ext/fts3/fts3_snippet.c.
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/*
** This is an fts3ExprIterate() callback used while loading the doclists
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
** fts3ExprLoadDoclists().
*/
static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
  int rc = SQLITE_OK;

  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;

  UNUSED_PARAMETER(iPhrase);

  p->nPhrase++;
  p->nToken += pExpr->pPhrase->nToken;

  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK ){
      rc = fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}







>





|

|

|







224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
/*
** This is an fts3ExprIterate() callback used while loading the doclists
** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
** fts3ExprLoadDoclists().
*/
static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
  int rc = SQLITE_OK;
  Fts3Phrase *pPhrase = pExpr->pPhrase;
  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;

  UNUSED_PARAMETER(iPhrase);

  p->nPhrase++;
  p->nToken += pPhrase->nToken;

  if( pPhrase->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pCsr, pExpr);
    pPhrase->isLoaded = 1;
    if( rc==SQLITE_OK ){
      rc = fts3ExprNearTrim(pExpr);
    }
  }

  return rc;
}
822
823
824
825
826
827
828

829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
static int fts3ExprGlobalHitsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  Fts3Cursor *pCsr = p->pCursor;

  char *pIter;
  char *pEnd;
  char *pFree = 0;
  u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];

  assert( pExpr->isLoaded );
  assert( pExpr->eType==FTSQUERY_PHRASE );

  if( pCsr->pDeferred ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;
    int ii;
    for(ii=0; ii<pPhrase->nToken; ii++){
      if( pPhrase->aToken[ii].bFulltext ) break;
    }
    if( ii<pPhrase->nToken ){
      int nFree = 0;
      int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
      if( rc!=SQLITE_OK ) return rc;
      pIter = pFree;
      pEnd = &pFree[nFree];
    }else{
      int iCol;                   /* Column index */
      for(iCol=0; iCol<p->nCol; iCol++){
        aOut[iCol*3 + 1] = (u32)p->nDoc;
        aOut[iCol*3 + 2] = (u32)p->nDoc;
      }
      return SQLITE_OK;
    }
  }else{
    pIter = pExpr->aDoclist;
    pEnd = &pExpr->aDoclist[pExpr->nDoclist];
  }

  /* Fill in the global hit count matrix row for this phrase. */
  while( pIter<pEnd ){
    while( *pIter++ & 0x80 );      /* Skip past docid. */
    fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
  }







>





|
<


<



















|
|







823
824
825
826
827
828
829
830
831
832
833
834
835
836

837
838

839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
static int fts3ExprGlobalHitsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  Fts3Cursor *pCsr = p->pCursor;
  Fts3Phrase *pPhrase = pExpr->pPhrase; 
  char *pIter;
  char *pEnd;
  char *pFree = 0;
  u32 *aOut = &p->aMatchinfo[3*iPhrase*p->nCol];

  assert( pPhrase->isLoaded );


  if( pCsr->pDeferred ){

    int ii;
    for(ii=0; ii<pPhrase->nToken; ii++){
      if( pPhrase->aToken[ii].bFulltext ) break;
    }
    if( ii<pPhrase->nToken ){
      int nFree = 0;
      int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree);
      if( rc!=SQLITE_OK ) return rc;
      pIter = pFree;
      pEnd = &pFree[nFree];
    }else{
      int iCol;                   /* Column index */
      for(iCol=0; iCol<p->nCol; iCol++){
        aOut[iCol*3 + 1] = (u32)p->nDoc;
        aOut[iCol*3 + 2] = (u32)p->nDoc;
      }
      return SQLITE_OK;
    }
  }else{
    pIter = pPhrase->aDoclist;
    pEnd = &pPhrase->aDoclist[pPhrase->nDoclist];
  }

  /* Fill in the global hit count matrix row for this phrase. */
  while( pIter<pEnd ){
    while( *pIter++ & 0x80 );      /* Skip past docid. */
    fts3LoadColumnlistCounts(&pIter, &aOut[1], 1);
  }
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
){
  MatchInfo *p = (MatchInfo *)pCtx;
  int iStart = iPhrase * p->nCol * 3;
  int i;

  for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;

  if( pExpr->aDoclist ){
    char *pCsr;

    pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1);
    if( pCsr ){
      fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
    }
  }







|







881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
){
  MatchInfo *p = (MatchInfo *)pCtx;
  int iStart = iPhrase * p->nCol * 3;
  int i;

  for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;

  if( pExpr->pPhrase->aDoclist ){
    char *pCsr;

    pCsr = sqlite3Fts3FindPositions(p->pCursor, pExpr, p->pCursor->iPrevId, -1);
    if( pCsr ){
      fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
    }
  }
Changes to ext/fts3/fts3_write.c.
2638
2639
2640
2641
2642
2643
2644

2645
2646

2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
/*
** Helper fucntion for FreeDeferredDoclists(). This function removes all
** references to deferred doclists from within the tree of Fts3Expr 
** structures headed by 
*/
static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
  if( pExpr ){

    fts3DeferredDoclistClear(pExpr->pLeft);
    fts3DeferredDoclistClear(pExpr->pRight);

    if( pExpr->isLoaded ){
      sqlite3_free(pExpr->aDoclist);
      pExpr->isLoaded = 0;
      pExpr->aDoclist = 0;
      pExpr->nDoclist = 0;
      pExpr->pCurrent = 0;
      pExpr->iCurrent = 0;
    }
  }
}

/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.







>


>
|
|
|
|
|
|
|







2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
/*
** Helper fucntion for FreeDeferredDoclists(). This function removes all
** references to deferred doclists from within the tree of Fts3Expr 
** structures headed by 
*/
static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
  if( pExpr ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;
    fts3DeferredDoclistClear(pExpr->pLeft);
    fts3DeferredDoclistClear(pExpr->pRight);
    if( pPhrase ){
      assert( pExpr->eType==FTSQUERY_PHRASE );
      sqlite3_free(pPhrase->aDoclist);
      pPhrase->isLoaded = 0;
      pPhrase->aDoclist = 0;
      pPhrase->nDoclist = 0;
      pPhrase->pCurrent = 0;
      pPhrase->iCurrent = 0;
    }
  }
}

/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
Changes to test/hook.test.
270
271
272
273
274
275
276




























277
278
279
280
281
282
283
      SELECT * FROM t1 EXCEPT SELECT * FROM t3;
      SELECT * FROM t1 ORDER BY b;
      SELECT * FROM t1 GROUP BY b;
    }
    set ::update_hook
  } [list]
}




























db update_hook {}
#
#----------------------------------------------------------------------------

#----------------------------------------------------------------------------
# Test the rollback-hook. The rollback-hook is a bit more complicated than
# either the commit or update hooks because a rollback can happen 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
      SELECT * FROM t1 EXCEPT SELECT * FROM t3;
      SELECT * FROM t1 ORDER BY b;
      SELECT * FROM t1 GROUP BY b;
    }
    set ::update_hook
  } [list]
}

do_test hook-4.4 {
  execsql {
    CREATE TABLE t4(a UNIQUE, b);
    INSERT INTO t4 VALUES(1, 'a');
    INSERT INTO t4 VALUES(2, 'b');
  }
  set ::update_hook [list]
  execsql {
    REPLACE INTO t4 VALUES(1, 'c');
  }
  set ::update_hook
} [list INSERT main t4 3 ]
do_execsql_test hook-4.4.1 {
  SELECT * FROM t4 ORDER BY a;
} {1 c 2 b}
do_test hook-4.4.2 {
  set ::update_hook [list]
  execsql {
    PRAGMA recursive_triggers = on;
    REPLACE INTO t4 VALUES(1, 'd');
  }
  set ::update_hook
} [list INSERT main t4 4 ]
do_execsql_test hook-4.4.3 {
  SELECT * FROM t4 ORDER BY a;
} {1 d 2 b}

db update_hook {}
#
#----------------------------------------------------------------------------

#----------------------------------------------------------------------------
# Test the rollback-hook. The rollback-hook is a bit more complicated than
# either the commit or update hooks because a rollback can happen