/ Check-in [e21bf7a2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e21bf7a2ade6373e94ea403c665f78e1ad22143f
User & Date: dan 2014-05-07 19:59:36
References
2014-10-09
15:08
Allow FTS tokenizers to choose whether or not to consider the "*" character part of tokens or not. This restores the pre-[e21bf7a2ad] behaviour. Also fix a problem causing FTS to interpret tokens beginning with "*" characters as EOF. check-in: 49dfee7c user: dan tags: trunk
Context
2014-05-07
20:24
A better fix for the group_concat() problem. check-in: 1c086dee user: drh tags: trunk
19:59
Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. check-in: e21bf7a2 user: dan tags: trunk
18:23
Make sure the group_concat() function returns an empty string, not a NULL, if it has at least one input row. Fix for ticket [55746f9e65f8587]. check-in: d01cedaa user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to ext/fts3/fts3_expr.c.

181
182
183
184
185
186
187
188
189







190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
...
248
249
250
251
252
253
254
255


256
257
258
259
260
261
262
263
264
265
266
267
268
...
504
505
506
507
508
509
510















511
512
513
514
515
516
517
...
622
623
624
625
626
627
628

629

630

631
632
633
634
635
636
637
...
706
707
708
709
710
711
712


713
714
715
716
717
718
719
720
721
722
723
724
725
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;








  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
    int nByte;                               /* total space to allocate */

    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){
      int i;
      if( rc==SQLITE_DONE ) iStart = n;
      for(i=0; i<iStart; i++){
        if( z[i]=='(' ){
          pParse->nNest++;
          rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed);
          if( rc==SQLITE_OK && !pRet ){
            rc = SQLITE_DONE;
          }
          nConsumed = (int)(i + 1 + nConsumed);
          break;
        }

        if( z[i]==')' ){
          rc = SQLITE_DONE;
          pParse->nNest--;
          nConsumed = i+1;
          break;
        }
      }
    }

    if( nConsumed==0 && rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
................................................................................
            iStart--;
          }else{
            break;
          }
        }

      }
      nConsumed = iEnd;


    }

    pModule->xClose(pCursor);
  }
  
  *pnConsumed = nConsumed;
  *ppExpr = pRet;
  return rc;
}


/*
** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
................................................................................
    *pnConsumed = (int)((zInput - z) + ii + 1);
    if( ii==nInput ){
      return SQLITE_ERROR;
    }
    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  }

















  /* If control flows to this point, this must be a regular token, or 
  ** the end of the input. Read a regular token using the sqlite3_tokenizer
  ** interface. Before doing so, figure out if there is an explicit
  ** column specifier for the token. 
  **
  ** TODO: Strangely, it is not possible to associate a column specifier
................................................................................
  const char *zIn = z;
  int rc = SQLITE_OK;
  int isRequirePhrase = 1;

  while( rc==SQLITE_OK ){
    Fts3Expr *p = 0;
    int nByte = 0;

    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);

    if( rc==SQLITE_OK ){

      int isPhrase;

      if( !sqlite3_fts3_enable_parentheses 
       && p->eType==FTSQUERY_PHRASE && pParse->isNot 
      ){
        /* Create an implicit NOT operator. */
        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
................................................................................
            pRet = p;
          }
        }else{
          insertBinaryOperator(&pRet, pPrev, p);
        }
        isRequirePhrase = !isPhrase;
      }


      assert( nByte>0 );
    }
    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
    nIn -= nByte;
    zIn += nByte;
    pPrev = p;
  }

  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_DONE ){







|

>
>
>
>
>
>
>
|






<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
>
>





<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>

>

>







 







>
>





<







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203








204
















205
206
207
208
209
210
211
...
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245

246
247
248
249
250
251
252
...
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
...
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
...
708
709
710
711
712
713
714
715
716
717
718
719
720
721

722
723
724
725
726
727
728
  int *pnConsumed                         /* OUT: Number of bytes consumed */
){
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int i = 0;

  /* Set variable i to the maximum number of bytes of input to tokenize. */
  for(i=0; i<n; i++){
    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
    if( z[i]=='*' || z[i]=='"' ) break;
  }

  *pnConsumed = i;
  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
    int nByte;                               /* total space to allocate */

    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);








    if( rc==SQLITE_OK ){
















      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
        pRet->pPhrase = (Fts3Phrase *)&pRet[1];
................................................................................
            iStart--;
          }else{
            break;
          }
        }

      }
      *pnConsumed = iEnd;
    }else if( i && rc==SQLITE_DONE ){
      rc = SQLITE_OK;
    }

    pModule->xClose(pCursor);
  }
  

  *ppExpr = pRet;
  return rc;
}


/*
** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
................................................................................
    *pnConsumed = (int)((zInput - z) + ii + 1);
    if( ii==nInput ){
      return SQLITE_ERROR;
    }
    return getNextString(pParse, &zInput[1], ii-1, ppExpr);
  }

  if( sqlite3_fts3_enable_parentheses ){
    if( *zInput=='(' ){
      int nConsumed = 0;
      pParse->nNest++;
      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
      *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
      return rc;
    }else if( *zInput==')' ){
      pParse->nNest--;
      *pnConsumed = (zInput - z) + 1;
      *ppExpr = 0;
      return SQLITE_DONE;
    }
  }

  /* If control flows to this point, this must be a regular token, or 
  ** the end of the input. Read a regular token using the sqlite3_tokenizer
  ** interface. Before doing so, figure out if there is an explicit
  ** column specifier for the token. 
  **
  ** TODO: Strangely, it is not possible to associate a column specifier
................................................................................
  const char *zIn = z;
  int rc = SQLITE_OK;
  int isRequirePhrase = 1;

  while( rc==SQLITE_OK ){
    Fts3Expr *p = 0;
    int nByte = 0;

    rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
    assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
    if( rc==SQLITE_OK ){
      if( p ){
        int isPhrase;

        if( !sqlite3_fts3_enable_parentheses 
            && p->eType==FTSQUERY_PHRASE && pParse->isNot 
        ){
          /* Create an implicit NOT operator. */
          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
................................................................................
              pRet = p;
            }
          }else{
            insertBinaryOperator(&pRet, pPrev, p);
          }
          isRequirePhrase = !isPhrase;
        }
        pPrev = p;
      }
      assert( nByte>0 );
    }
    assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
    nIn -= nByte;
    zIn += nByte;

  }

  if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_DONE ){

Changes to test/fts3defer2.test.

54
55
56
57
58
59
60

61
62
63
64
65
66
67
do_execsql_test 1.2.0 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
} {{a b c d e f a x y}}

do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}


do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \







>







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
do_execsql_test 1.2.0 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
} {{a b c d e f a x y}}

do_execsql_test 1.2.1 {
  SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} {{a b c d e f a x y}}


do_execsql_test 1.2.2 {
  SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
  FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
} [list                              \
   {a b c d [e] [f] [a] x y}         \
   {0 1 8 1 0 0 10 1 0 2 12 1}       \

Changes to test/fts3expr.test.

505
506
507
508
509
510
511





512
do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah}
do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah}
do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah}

do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah}

set sqlite_fts3_enable_parentheses 0





finish_test







>
>
>
>
>

505
506
507
508
509
510
511
512
513
514
515
516
517
do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah}
do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah}
do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah}

do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah}

set sqlite_fts3_enable_parentheses 0

do_test fts3expr-9.1 {
  test_fts3expr "f (e NEAR/2 a)"
} {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}}

finish_test

Added test/fts3expr4.test.



















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# 2014 May 7
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS3 module.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts3expr4

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3||!icu {
  finish_test
  return
}

set sqlite_fts3_enable_parentheses 1

proc test_icu_fts3expr {expr} {
  db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')}
}

proc do_icu_expr_test {tn expr res} {
  uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res]
}

#-------------------------------------------------------------------------
#
do_icu_expr_test 1.1 "abcd"    {PHRASE 3 0 abcd}
do_icu_expr_test 1.2 " tag "   {PHRASE 3 0 tag}
do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z}
do_icu_expr_test 1.4 {x OR y}       {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
do_icu_expr_test 1.5 {(x OR y)}     {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )}

# In "col:word", if "col" is not the name of a column, the entire thing
# is passed to the tokenizer.
#
do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word}
do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word}

set sqlite_fts3_enable_parentheses 0

do_icu_expr_test 2.1 {
  f (e NEAR/2 a)
} {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}}

finish_test