SQLite

Check-in [afac429300]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Additional test cases and cleanup of FTS3 parenthesis processing. (CVS 6094)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: afac4293000f81410d105a99956605bf7102fa62
User & Date: drh 2009-01-01 12:34:46.000
Context
2009-01-01
14:06
Fix some problems in the fts3 expression parser with mismatched parenthesis. (CVS 6095) (check-in: ccfe4580ac user: danielk1977 tags: trunk)
12:34
Additional test cases and cleanup of FTS3 parenthesis processing. (CVS 6094) (check-in: afac429300 user: drh tags: trunk)
07:42
Add a couple of extra tests for the fts3 expression parser to improve mcdc coverage. (CVS 6093) (check-in: 13146b3493 user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts2/fts2_tokenizer.c.
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
  sqlite3_step(pStmt);

  return sqlite3_finalize(pStmt);
}

static
int queryTokenizer(
  sqlite3 *db, 
  char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts2_tokenizer(?)";







|







235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
  sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
  sqlite3_step(pStmt);

  return sqlite3_finalize(pStmt);
}

static
int queryFts2Tokenizer(
  sqlite3 *db, 
  char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts2_tokenizer(?)";
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryTokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:







|







268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

/*
** Implementation of the scalar function fts2_tokenizer_internal_test().
** This function is used for testing only, it is not included in the
** build unless SQLITE_TEST is defined.
**
** The purpose of this is to test that the fts2_tokenizer() function
** can be used as designed by the C-code in the queryFts2Tokenizer and
** registerTokenizer() functions above. These two functions are repeated
** in the README.tokenizer file as an example, so it is important to
** test them.
**
** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
** function with no arguments. An assert() will fail if a problem is
** detected. i.e.:
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  int rc;
  const sqlite3_tokenizer_module *p1;
  const sqlite3_tokenizer_module *p2;
  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);

  /* Test the query function */
  sqlite3Fts2SimpleTokenizerModule(&p1);
  rc = queryTokenizer(db, "simple", &p2);
  assert( rc==SQLITE_OK );
  assert( p1==p2 );
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_ERROR );
  assert( p2==0 );
  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );

  /* Test the storage function */
  rc = registerTokenizer(db, "nosuchtokenizer", p1);
  assert( rc==SQLITE_OK );
  rc = queryTokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_OK );
  assert( p2==p1 );

  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}

#endif







|


|







|







292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  int rc;
  const sqlite3_tokenizer_module *p1;
  const sqlite3_tokenizer_module *p2;
  sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);

  /* Test the query function */
  sqlite3Fts2SimpleTokenizerModule(&p1);
  rc = queryFts2Tokenizer(db, "simple", &p2);
  assert( rc==SQLITE_OK );
  assert( p1==p2 );
  rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_ERROR );
  assert( p2==0 );
  assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );

  /* Test the storage function */
  rc = registerTokenizer(db, "nosuchtokenizer", p1);
  assert( rc==SQLITE_OK );
  rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
  assert( rc==SQLITE_OK );
  assert( p2==p1 );

  sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
}

#endif
Changes to ext/fts3/fts3_expr.c.
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
** is defined to accept an argument of type int, its behaviour when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int safe_isspace_expr(char c){
  return (c&0x80)==0 ? isspace(c) : 0;
}

/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this







|







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
** is defined to accept an argument of type int, its behaviour when passed
** an integer that falls outside of the range of the unsigned char type
** is undefined (and sometimes, "undefined" means segfault). This wrapper
** is defined to accept an argument of type char, and always returns 0 for
** any values that fall outside of the range of the unsigned char type (i.e.
** negative values).
*/
static int fts3isspace(char c){
  return (c&0x80)==0 ? isspace(c) : 0;
}

/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
152
153
154
155
156
157
158





159
160
161
162
163
164
165
166
  }
  
  *pnConsumed = nConsumed;
  *ppExpr = pRet;
  return rc;
}






void *realloc_or_free(void *pOrig, int nNew){
  void *pRet = sqlite3_realloc(pOrig, nNew);
  if( !pRet ){
    sqlite3_free(pOrig);
  }
  return pRet;
}








>
>
>
>
>
|







152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  }
  
  *pnConsumed = nConsumed;
  *ppExpr = pRet;
  return rc;
}


/*
** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
** then free the old allocation.
*/
void *fts3ReallocOrFree(void *pOrig, int nNew){
  void *pRet = sqlite3_realloc(pOrig, nNew);
  if( !pRet ){
    sqlite3_free(pOrig);
  }
  return pRet;
}

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zToken;
      int nToken, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
        p = realloc_or_free(p, nByte+ii*sizeof(struct PhraseToken));
        zTemp = realloc_or_free(zTemp, nTemp + nToken);
        if( !p || !zTemp ){
          goto no_mem;
        }
        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
          p->eType = FTSQUERY_PHRASE;







|
|







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zToken;
      int nToken, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
        p = fts3ReallocOrFree(p, nByte+ii*sizeof(struct PhraseToken));
        zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken);
        if( !p || !zTemp ){
          goto no_mem;
        }
        if( ii==0 ){
          memset(p, 0, nByte);
          p->pPhrase = (Fts3Phrase *)&p[1];
          p->eType = FTSQUERY_PHRASE;
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

  if( rc==SQLITE_DONE ){
    int jj;
    char *zNew;
    int nNew = 0;
    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
    p = realloc_or_free(p, nByte + nTemp);
    if( !p ){
      goto no_mem;
    }
    p->pPhrase = (Fts3Phrase *)&p[1];
    zNew = &(((char *)p)[nByte]);
    memcpy(zNew, zTemp, nTemp);
    for(jj=0; jj<p->pPhrase->nToken; jj++){







|







234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

  if( rc==SQLITE_DONE ){
    int jj;
    char *zNew;
    int nNew = 0;
    int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
    nByte += (p->pPhrase->nToken-1) * sizeof(struct PhraseToken);
    p = fts3ReallocOrFree(p, nByte + nTemp);
    if( !p ){
      goto no_mem;
    }
    p->pPhrase = (Fts3Phrase *)&p[1];
    zNew = &(((char *)p)[nByte]);
    memcpy(zNew, zTemp, nTemp);
    for(jj=0; jj<p->pPhrase->nToken; jj++){
277
278
279
280
281
282
283
284
285
286

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
*/
static int getNextNode(
  ParseContext *pParse,                   /* fts3 query parse context */
  const char *z, int n,                   /* Input string */
  Fts3Expr **ppExpr,                      /* OUT: expression */
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  struct Fts3Keyword {
    char *z;
    int n;

    int eType;
  } aKeyword[] = {
    { "OR" ,  2, FTSQUERY_OR   },
    { "AND",  3, FTSQUERY_AND  },
    { "NOT",  3, FTSQUERY_NOT  },
    { "NEAR", 4, FTSQUERY_NEAR }
  };
  int ii;
  int iCol;
  int iColLen;
  int rc;
  Fts3Expr *pRet = 0;

  const char *zInput = z;
  int nInput = n;

  /* Skip over any whitespace before checking for a keyword, an open or
  ** close bracket, or a quoted string. 
  */
  while( nInput>0 && safe_isspace_expr(*zInput) ){
    nInput--;
    zInput++;
  }

  /* See if we are dealing with a keyword. */
  for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
    struct Fts3Keyword *pKey = &aKeyword[ii];

    if( (0==sqlite3_fts3_enable_parentheses)
     && (pKey->eType==FTSQUERY_AND || pKey->eType==FTSQUERY_NOT) 
    ){
      continue;
    }

    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
      int nKey = pKey->n;
      char cNext;







|
|
|
>
|

|
|
|
|













|






|

|
<
<







282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321


322
323
324
325
326
327
328
*/
static int getNextNode(
  ParseContext *pParse,                   /* fts3 query parse context */
  const char *z, int n,                   /* Input string */
  Fts3Expr **ppExpr,                      /* OUT: expression */
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  static const struct Fts3Keyword {
    char z[4];                            /* Keyword text */
    u8 n;                                 /* Length of the keyword */
    u8 parenOnly;                         /* Only valid in paren mode */
    u8 eType;                             /* Keyword code */
  } aKeyword[] = {
    { "OR" ,  2, 0, FTSQUERY_OR   },
    { "AND",  3, 1, FTSQUERY_AND  },
    { "NOT",  3, 1, FTSQUERY_NOT  },
    { "NEAR", 4, 0, FTSQUERY_NEAR }
  };
  int ii;
  int iCol;
  int iColLen;
  int rc;
  Fts3Expr *pRet = 0;

  const char *zInput = z;
  int nInput = n;

  /* Skip over any whitespace before checking for a keyword, an open or
  ** close bracket, or a quoted string. 
  */
  while( nInput>0 && fts3isspace(*zInput) ){
    nInput--;
    zInput++;
  }

  /* See if we are dealing with a keyword. */
  for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
    const struct Fts3Keyword *pKey = &aKeyword[ii];

    if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){


      continue;
    }

    if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
      int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
      int nKey = pKey->n;
      char cNext;
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
            nNear = nNear * 10 + (zInput[nKey] - '0');
          }
        }
      }

      /* At this point this is probably a keyword. But for that to be true,
      ** the next byte must contain either whitespace, an open or close
      ** bracket, a quote character, or EOF. 
      */
      cNext = zInput[nKey];
      if( safe_isspace_expr(cNext) 
       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
      ){
        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
        memset(pRet, 0, sizeof(Fts3Expr));
        pRet->eType = pKey->eType;
        pRet->nNear = nNear;
        *ppExpr = pRet;







|


|







336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
            nNear = nNear * 10 + (zInput[nKey] - '0');
          }
        }
      }

      /* At this point this is probably a keyword. But for that to be true,
      ** the next byte must contain either whitespace, an open or close
      ** parenthesis, a quote character, or EOF. 
      */
      cNext = zInput[nKey];
      if( fts3isspace(cNext) 
       || cNext=='"' || cNext=='(' || cNext==')' || cNext==0
      ){
        pRet = (Fts3Expr *)sqlite3_malloc(sizeof(Fts3Expr));
        memset(pRet, 0, sizeof(Fts3Expr));
        pRet->eType = pKey->eType;
        pRet->nNear = nNear;
        *ppExpr = pRet;
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
#ifdef SQLITE_TEST

#include <stdio.h>

/*
** Function to query the hash-table of tokenizers (see README.tokenizers).
*/
static int queryTokenizer(
  sqlite3 *db, 
  const char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts3_tokenizer(?)";







|







707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
#ifdef SQLITE_TEST

#include <stdio.h>

/*
** Function to query the hash-table of tokenizers (see README.tokenizers).
*/
static int queryTestTokenizer(
  sqlite3 *db, 
  const char *zName,  
  const sqlite3_tokenizer_module **pp
){
  int rc;
  sqlite3_stmt *pStmt;
  const char zSql[] = "SELECT fts3_tokenizer(?)";
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815

816
817
818
819
820
821
822
823
*/
static void fts3ExprTest(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  sqlite3_tokenizer_module const *pModule = 0;
  sqlite3_tokenizer *pTokenizer;
  int rc;
  char **azCol = 0;
  const char *zExpr;
  int nExpr;
  int nCol;
  int ii;
  Fts3Expr *pExpr;
  sqlite3 *db = sqlite3_context_db_handle(context);

  if( argc<3 ){
    sqlite3_result_error(context, 
        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
    );
    return;
  }


  rc = queryTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule);
  if( rc==SQLITE_NOMEM ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }else if( !pModule ){
    sqlite3_result_error(context, "No such tokenizer module", -1);
    goto exprtest_out;
  }







|
















>
|







796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
*/
static void fts3ExprTest(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  sqlite3_tokenizer_module const *pModule = 0;
  sqlite3_tokenizer *pTokenizer = 0;
  int rc;
  char **azCol = 0;
  const char *zExpr;
  int nExpr;
  int nCol;
  int ii;
  Fts3Expr *pExpr;
  sqlite3 *db = sqlite3_context_db_handle(context);

  if( argc<3 ){
    sqlite3_result_error(context, 
        "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
    );
    return;
  }

  rc = queryTestTokenizer(db,
                          (const char *)sqlite3_value_text(argv[0]), &pModule);
  if( rc==SQLITE_NOMEM ){
    sqlite3_result_error_nomem(context);
    goto exprtest_out;
  }else if( !pModule ){
    sqlite3_result_error(context, "No such tokenizer module", -1);
    goto exprtest_out;
  }
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
    sqlite3Fts3ExprFree(pExpr);
  }else{
    sqlite3_result_error(context, "Error parsing expression", -1);
  }

exprtest_out:
  if( pTokenizer ){
    rc = pModule->xDestroy(pTokenizer);
  }
  sqlite3_free(azCol);
}

/*
** Register the query expression parser test function fts3_exprtest() 







|







859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
    sqlite3Fts3ExprFree(pExpr);
  }else{
    sqlite3_result_error(context, "Error parsing expression", -1);
  }

exprtest_out:
  if( pModule && pTokenizer ){
    rc = pModule->xDestroy(pTokenizer);
  }
  sqlite3_free(azCol);
}

/*
** Register the query expression parser test function fts3_exprtest() 
Changes to ext/fts3/fts3_expr.h.
17
18
19
20
21
22
23
24
25
26
27
28

29

30
31
32
33
34
35
36
37
38
39
40
41
42






43
44
45
46
47
48
49
50
51
52
53



54
55
56
57
58
59
60
61
62
63
64
65
66

/*
** The following describes the syntax supported by the fts3 MATCH
** operator in a similar format to that used by the lemon parser
** generator. This module does not use actually lemon, it uses a
** custom parser.
**
**   phrase ::= TOKEN.
**   phrase ::= TOKEN:COLUMN.
**   phrase ::= "TOKEN TOKEN TOKEN...".
**   phrase ::= phrase near phrase.
**

**   near ::= NEAR.

**   near ::= NEAR / INTEGER.
**
**   query ::= -TOKEN.
**   query ::= phrase.
**   query ::= LP query RP.
**   query ::= query NOT query.
**   query ::= query OR query.
**   query ::= query AND query.
*/

typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;







struct Fts3Phrase {
  int nToken;          /* Number of entries in aToken[] */
  int iColumn;         /* Index of column this phrase must match */
  int isNot;           /* Phrase prefixed by unary not (-) operator */
  struct PhraseToken {
    char *z;
    int n;             /* Number of bytes in buffer pointed to by z */
    int isPrefix;      /* True if token ends in with a "*" character */
  } aToken[1];
};




struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;
  Fts3Expr *pLeft;
  Fts3Expr *pRight;
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
};

int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, 
                         const char *, int, Fts3Expr **);
void sqlite3Fts3ExprFree(Fts3Expr *);








|
|
<
|

>
|
>
|

|
|
|
|
|
|





>
>
>
>
>
>

|



|
|
|
|


>
>
>



|
|
|







17
18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

/*
** The following describes the syntax supported by the fts3 MATCH
** operator in a similar format to that used by the lemon parser
** generator. This module does not use actually lemon, it uses a
** custom parser.
**
**   query ::= andexpr (OR andexpr)*.
**

**   andexpr ::= notexpr (AND? notexpr)*.
**
**   notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
**   notexpr ::= LP query RP.
**
**   nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
**
**   distance_opt ::= .
**   distance_opt ::= / INTEGER.
**
**   phrase ::= TOKEN.
**   phrase ::= COLUMN:TOKEN.
**   phrase ::= "TOKEN TOKEN TOKEN...".
*/

typedef struct Fts3Expr Fts3Expr;
typedef struct Fts3Phrase Fts3Phrase;

/*
** A "phrase" is a sequence of one or more tokens that must match in
** sequence.  A single token is the base case and the most common case.
** For a sequence of tokens contained in "...", nToken will be the number
** of tokens in the string.
*/
struct Fts3Phrase {
  int nToken;          /* Number of tokens in the phrase */
  int iColumn;         /* Index of column this phrase must match */
  int isNot;           /* Phrase prefixed by unary not (-) operator */
  struct PhraseToken {
    char *z;              /* Text of the token */
    int n;                /* Number of bytes in buffer pointed to by z */
    int isPrefix;         /* True if token ends in with a "*" character */
  } aToken[1];         /* One entry for each token in the phrase */
};

/*
** A tree of these objects forms the RHS of a MATCH operator.
*/
struct Fts3Expr {
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
};

int sqlite3Fts3ExprParse(sqlite3_tokenizer *, char **, int, int, 
                         const char *, int, Fts3Expr **);
void sqlite3Fts3ExprFree(Fts3Expr *);

80
81
82
83
84
85
86
87
#define FTSQUERY_AND    3
#define FTSQUERY_OR     4
#define FTSQUERY_PHRASE 5

#ifdef SQLITE_TEST
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif








<
90
91
92
93
94
95
96

#define FTSQUERY_AND    3
#define FTSQUERY_OR     4
#define FTSQUERY_PHRASE 5

#ifdef SQLITE_TEST
void sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif

Changes to test/fts3expr.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2006 September 9
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS3 module.
#
# $Id: fts3expr.test,v 1.4 2009/01/01 07:42:49 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2006 September 9
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS3 module.
#
# $Id: fts3expr.test,v 1.5 2009/01/01 12:34:46 drh Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
34
35
36
37
38
39
40



41
42
43
44
45
46
47
48
49
50
51
52





















53
54
55
56
57
58
59
60





















61
62






63
64
65
66
67
68
69
70
71
72
73
74
75
76







































































































77
78
79
80
81
82
83
do_test fts3expr-1.1 {
  test_fts3expr " tag "
} {PHRASE 3 0 tag}

do_test fts3expr-1.2 {
  test_fts3expr "ab AND cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}



do_test fts3expr-1.3 {
  test_fts3expr "ab OR cd"
} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.4 {
  test_fts3expr "ab NOT cd"
} {NOT {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.5 {
  test_fts3expr "ab NEAR cd"
} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6 {
  test_fts3expr "ab NEAR/5 cd"
} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}






















do_test fts3expr-1.7 {
  test_fts3expr {"one two three"}
} {PHRASE 3 0 one two three}
do_test fts3expr-1.8 {
  test_fts3expr {zero "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.9 {





















  test_fts3expr {"one* two three*"}
} {PHRASE 3 0 one+ two three+}







do_test fts3expr-1.10 {
  test_fts3expr {one* two}
} {AND {PHRASE 3 0 one+} {PHRASE 3 0 two}}
do_test fts3expr-1.11 {
  test_fts3expr {one two*}
} {AND {PHRASE 3 0 one} {PHRASE 3 0 two+}}

do_test fts3expr-1.14 {
  test_fts3expr {a:one two}
} {AND {PHRASE 0 0 one} {PHRASE 3 0 two}}
do_test fts3expr-1.15 {
  test_fts3expr {one b:two}
} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}








































































































proc strip_phrase_data {L} {
  if {[lindex $L 0] eq "PHRASE"} {
    return [lrange $L 3 end]
  }
  return [list \
    [lindex $L 0] \







>
>
>









|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




|


|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>
>
>
>
>
>














>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
do_test fts3expr-1.1 {
  test_fts3expr " tag "
} {PHRASE 3 0 tag}

do_test fts3expr-1.2 {
  test_fts3expr "ab AND cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.2.1 {
  test_fts3expr "ab cd"
} {AND {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.3 {
  test_fts3expr "ab OR cd"
} {OR {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.4 {
  test_fts3expr "ab NOT cd"
} {NOT {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.5 {
  test_fts3expr "ab NEAR cd"
} {NEAR/10 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.1 {
  test_fts3expr "ab NEAR/5 cd"
} {NEAR/5 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.2 {
  test_fts3expr "ab NEAR/87654321 cd"
} {NEAR/87654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.3 {
  test_fts3expr "ab NEAR/7654321 cd"
} {NEAR/7654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.4 {
  test_fts3expr "ab NEAR/654321 cd"
} {NEAR/654321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.5 {
  test_fts3expr "ab NEAR/54321 cd"
} {NEAR/54321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.6 {
  test_fts3expr "ab NEAR/4321 cd"
} {NEAR/4321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.7 {
  test_fts3expr "ab NEAR/321 cd"
} {NEAR/321 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}
do_test fts3expr-1.6.8 {
  test_fts3expr "ab NEAR/21 cd"
} {NEAR/21 {PHRASE 3 0 ab} {PHRASE 3 0 cd}}

do_test fts3expr-1.7 {
  test_fts3expr {"one two three"}
} {PHRASE 3 0 one two three}
do_test fts3expr-1.8.1 {
  test_fts3expr {zero "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.8.2 {
  test_fts3expr {zero AND "one two three" four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.8.3 {
  test_fts3expr {zero "one two three" AND four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.8.4 {
  test_fts3expr {zero AND "one two three" AND four}
} {AND {AND {PHRASE 3 0 zero} {PHRASE 3 0 one two three}} {PHRASE 3 0 four}}
do_test fts3expr-1.9.1 {
  test_fts3expr {"one* two three"}
} {PHRASE 3 0 one+ two three}
do_test fts3expr-1.9.2 {
  test_fts3expr {"one two* three"}
} {PHRASE 3 0 one two+ three}
do_test fts3expr-1.9.3 {
  test_fts3expr {"one* two* three"}
} {PHRASE 3 0 one+ two+ three}
do_test fts3expr-1.9.4 {
  test_fts3expr {"one two three*"}
} {PHRASE 3 0 one two three+}
do_test fts3expr-1.9.5 {
  test_fts3expr {"one* two three*"}
} {PHRASE 3 0 one+ two three+}
do_test fts3expr-1.9.6 {
  test_fts3expr {"one two* three*"}
} {PHRASE 3 0 one two+ three+}
do_test fts3expr-1.9.7 {
  test_fts3expr {"one* two* three*"}
} {PHRASE 3 0 one+ two+ three+}

do_test fts3expr-1.10 {
  test_fts3expr {one* two}
} {AND {PHRASE 3 0 one+} {PHRASE 3 0 two}}
do_test fts3expr-1.11 {
  test_fts3expr {one two*}
} {AND {PHRASE 3 0 one} {PHRASE 3 0 two+}}

do_test fts3expr-1.14 {
  test_fts3expr {a:one two}
} {AND {PHRASE 0 0 one} {PHRASE 3 0 two}}
do_test fts3expr-1.15 {
  test_fts3expr {one b:two}
} {AND {PHRASE 3 0 one} {PHRASE 1 0 two}}

do_test fts3expr-1.16 {
  test_fts3expr {one AND two AND three AND four AND five}
} [list AND \
        [list AND \
              [list AND \
                    [list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
                    {PHRASE 3 0 three} \
              ] \
              {PHRASE 3 0 four} \
        ] \
        {PHRASE 3 0 five} \
  ]
do_test fts3expr-1.17 {
  test_fts3expr {(one AND two) AND ((three AND four) AND five)}
} [list AND \
        [list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.18 {
  test_fts3expr {(one AND two) OR ((three AND four) AND five)}
} [list OR \
        [list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.19 {
  test_fts3expr {(one AND two) AND ((three AND four) OR five)}
} [list AND \
        [list AND {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list OR \
              [list AND {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.20 {
  test_fts3expr {(one OR two) AND ((three OR four) AND five)}
} [list AND \
        [list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.21 {
  test_fts3expr {(one OR two) AND ((three NOT four) AND five)}
} [list AND \
        [list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list NOT {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.22 {
  test_fts3expr {(one OR two) NOT ((three OR four) AND five)}
} [list NOT \
        [list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.23 {
  test_fts3expr {(((((one OR two))))) NOT (((((three OR four))) AND five))}
} [list NOT \
        [list OR {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.24 {
  test_fts3expr {one NEAR two}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.25 {
  test_fts3expr {(one NEAR two)}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.26 {
  test_fts3expr {((((((one NEAR two))))))}
} [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}]
do_test fts3expr-1.27 {
  test_fts3expr {(one NEAR two) OR ((three OR four) AND five)}
} [list OR \
        [list NEAR/10 {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]
do_test fts3expr-1.28 {
  test_fts3expr {(one NEAR/321 two) OR ((three OR four) AND five)}
} [list OR \
        [list NEAR/321 {PHRASE 3 0 one} {PHRASE 3 0 two}] \
        [list AND \
              [list OR {PHRASE 3 0 three} {PHRASE 3 0 four}] \
             {PHRASE 3 0 five} \
        ] \
  ]

proc strip_phrase_data {L} {
  if {[lindex $L 0] eq "PHRASE"} {
    return [lrange $L 3 end]
  }
  return [list \
    [lindex $L 0] \
299
300
301
302
303
304
305
306
  do_test fts3expr-6.$id {
    execsql { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid }
  } $res
}

set sqlite_fts3_enable_parentheses 0
finish_test








<
453
454
455
456
457
458
459

  do_test fts3expr-6.$id {
    execsql { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid }
  } $res
}

set sqlite_fts3_enable_parentheses 0
finish_test