/ Check-in [e21bf7a2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e21bf7a2ade6373e94ea403c665f78e1ad22143f
User & Date: dan 2014-05-07 19:59:36
References
2014-10-09
15:08
Allow FTS tokenizers to choose whether or not to consider the "*" character part of tokens or not. This restores the pre-[e21bf7a2ad] behaviour. Also fix a problem causing FTS to interpret tokens beginning with "*" characters as EOF. check-in: 49dfee7c user: dan tags: trunk
Context
2014-05-07
20:24
A better fix for the group_concat() problem. check-in: 1c086dee user: drh tags: trunk
19:59
Fix the way parenthesis in MATCH expressions are handled by FTS if the tokenizer considers them to be token characters. check-in: e21bf7a2 user: dan tags: trunk
18:23
Make sure the group_concat() function returns an empty string, not a NULL, if it has at least one input row. Fix for ticket [55746f9e65f8587]. check-in: d01cedaa user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_expr.c.

   181    181     int *pnConsumed                         /* OUT: Number of bytes consumed */
   182    182   ){
   183    183     sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
   184    184     sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
   185    185     int rc;
   186    186     sqlite3_tokenizer_cursor *pCursor;
   187    187     Fts3Expr *pRet = 0;
   188         -  int nConsumed = 0;
          188  +  int i = 0;
   189    189   
   190         -  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
          190  +  /* Set variable i to the maximum number of bytes of input to tokenize. */
          191  +  for(i=0; i<n; i++){
          192  +    if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
          193  +    if( z[i]=='*' || z[i]=='"' ) break;
          194  +  }
          195  +
          196  +  *pnConsumed = i;
          197  +  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
   191    198     if( rc==SQLITE_OK ){
   192    199       const char *zToken;
   193    200       int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
   194    201       int nByte;                               /* total space to allocate */
   195    202   
   196    203       rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
   197         -
   198         -    if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){
   199         -      int i;
   200         -      if( rc==SQLITE_DONE ) iStart = n;
   201         -      for(i=0; i<iStart; i++){
   202         -        if( z[i]=='(' ){
   203         -          pParse->nNest++;
   204         -          rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed);
   205         -          if( rc==SQLITE_OK && !pRet ){
   206         -            rc = SQLITE_DONE;
   207         -          }
   208         -          nConsumed = (int)(i + 1 + nConsumed);
   209         -          break;
   210         -        }
   211         -
   212         -        if( z[i]==')' ){
   213         -          rc = SQLITE_DONE;
   214         -          pParse->nNest--;
   215         -          nConsumed = i+1;
   216         -          break;
   217         -        }
   218         -      }
   219         -    }
   220         -
   221         -    if( nConsumed==0 && rc==SQLITE_OK ){
          204  +    if( rc==SQLITE_OK ){
   222    205         nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
   223    206         pRet = (Fts3Expr *)fts3MallocZero(nByte);
   224    207         if( !pRet ){
   225    208           rc = SQLITE_NOMEM;
   226    209         }else{
   227    210           pRet->eType = FTSQUERY_PHRASE;
   228    211           pRet->pPhrase = (Fts3Phrase *)&pRet[1];
................................................................................
   248    231               iStart--;
   249    232             }else{
   250    233               break;
   251    234             }
   252    235           }
   253    236   
   254    237         }
   255         -      nConsumed = iEnd;
          238  +      *pnConsumed = iEnd;
          239  +    }else if( i && rc==SQLITE_DONE ){
          240  +      rc = SQLITE_OK;
   256    241       }
   257    242   
   258    243       pModule->xClose(pCursor);
   259    244     }
   260    245     
   261         -  *pnConsumed = nConsumed;
   262    246     *ppExpr = pRet;
   263    247     return rc;
   264    248   }
   265    249   
   266    250   
   267    251   /*
   268    252   ** Enlarge a memory allocation.  If an out-of-memory allocation occurs,
................................................................................
   504    488       *pnConsumed = (int)((zInput - z) + ii + 1);
   505    489       if( ii==nInput ){
   506    490         return SQLITE_ERROR;
   507    491       }
   508    492       return getNextString(pParse, &zInput[1], ii-1, ppExpr);
   509    493     }
   510    494   
          495  +  if( sqlite3_fts3_enable_parentheses ){
          496  +    if( *zInput=='(' ){
          497  +      int nConsumed = 0;
          498  +      pParse->nNest++;
          499  +      rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
          500  +      if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; }
          501  +      *pnConsumed = (int)(zInput - z) + 1 + nConsumed;
          502  +      return rc;
          503  +    }else if( *zInput==')' ){
          504  +      pParse->nNest--;
          505  +      *pnConsumed = (zInput - z) + 1;
          506  +      *ppExpr = 0;
          507  +      return SQLITE_DONE;
          508  +    }
          509  +  }
   511    510   
   512    511     /* If control flows to this point, this must be a regular token, or 
   513    512     ** the end of the input. Read a regular token using the sqlite3_tokenizer
   514    513     ** interface. Before doing so, figure out if there is an explicit
   515    514     ** column specifier for the token. 
   516    515     **
   517    516     ** TODO: Strangely, it is not possible to associate a column specifier
................................................................................
   622    621     const char *zIn = z;
   623    622     int rc = SQLITE_OK;
   624    623     int isRequirePhrase = 1;
   625    624   
   626    625     while( rc==SQLITE_OK ){
   627    626       Fts3Expr *p = 0;
   628    627       int nByte = 0;
          628  +
   629    629       rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
          630  +    assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
   630    631       if( rc==SQLITE_OK ){
   631         -      int isPhrase;
   632         -
   633         -      if( !sqlite3_fts3_enable_parentheses 
   634         -       && p->eType==FTSQUERY_PHRASE && pParse->isNot 
   635         -      ){
   636         -        /* Create an implicit NOT operator. */
   637         -        Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
   638         -        if( !pNot ){
   639         -          sqlite3Fts3ExprFree(p);
   640         -          rc = SQLITE_NOMEM;
   641         -          goto exprparse_out;
   642         -        }
   643         -        pNot->eType = FTSQUERY_NOT;
   644         -        pNot->pRight = p;
   645         -        p->pParent = pNot;
   646         -        if( pNotBranch ){
   647         -          pNot->pLeft = pNotBranch;
   648         -          pNotBranch->pParent = pNot;
   649         -        }
   650         -        pNotBranch = pNot;
   651         -        p = pPrev;
   652         -      }else{
   653         -        int eType = p->eType;
   654         -        isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
   655         -
   656         -        /* The isRequirePhrase variable is set to true if a phrase or
   657         -        ** an expression contained in parenthesis is required. If a
   658         -        ** binary operator (AND, OR, NOT or NEAR) is encounted when
   659         -        ** isRequirePhrase is set, this is a syntax error.
   660         -        */
   661         -        if( !isPhrase && isRequirePhrase ){
   662         -          sqlite3Fts3ExprFree(p);
   663         -          rc = SQLITE_ERROR;
   664         -          goto exprparse_out;
   665         -        }
   666         -  
   667         -        if( isPhrase && !isRequirePhrase ){
   668         -          /* Insert an implicit AND operator. */
   669         -          Fts3Expr *pAnd;
   670         -          assert( pRet && pPrev );
   671         -          pAnd = fts3MallocZero(sizeof(Fts3Expr));
   672         -          if( !pAnd ){
          632  +      if( p ){
          633  +        int isPhrase;
          634  +
          635  +        if( !sqlite3_fts3_enable_parentheses 
          636  +            && p->eType==FTSQUERY_PHRASE && pParse->isNot 
          637  +        ){
          638  +          /* Create an implicit NOT operator. */
          639  +          Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr));
          640  +          if( !pNot ){
          641  +            sqlite3Fts3ExprFree(p);
          642  +            rc = SQLITE_NOMEM;
          643  +            goto exprparse_out;
          644  +          }
          645  +          pNot->eType = FTSQUERY_NOT;
          646  +          pNot->pRight = p;
          647  +          p->pParent = pNot;
          648  +          if( pNotBranch ){
          649  +            pNot->pLeft = pNotBranch;
          650  +            pNotBranch->pParent = pNot;
          651  +          }
          652  +          pNotBranch = pNot;
          653  +          p = pPrev;
          654  +        }else{
          655  +          int eType = p->eType;
          656  +          isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
          657  +
          658  +          /* The isRequirePhrase variable is set to true if a phrase or
          659  +          ** an expression contained in parenthesis is required. If a
          660  +          ** binary operator (AND, OR, NOT or NEAR) is encounted when
          661  +          ** isRequirePhrase is set, this is a syntax error.
          662  +          */
          663  +          if( !isPhrase && isRequirePhrase ){
   673    664               sqlite3Fts3ExprFree(p);
   674         -            rc = SQLITE_NOMEM;
          665  +            rc = SQLITE_ERROR;
   675    666               goto exprparse_out;
   676    667             }
   677         -          pAnd->eType = FTSQUERY_AND;
   678         -          insertBinaryOperator(&pRet, pPrev, pAnd);
   679         -          pPrev = pAnd;
   680         -        }
   681    668   
   682         -        /* This test catches attempts to make either operand of a NEAR
   683         -        ** operator something other than a phrase. For example, either of
   684         -        ** the following:
   685         -        **
   686         -        **    (bracketed expression) NEAR phrase
   687         -        **    phrase NEAR (bracketed expression)
   688         -        **
   689         -        ** Return an error in either case.
   690         -        */
   691         -        if( pPrev && (
          669  +          if( isPhrase && !isRequirePhrase ){
          670  +            /* Insert an implicit AND operator. */
          671  +            Fts3Expr *pAnd;
          672  +            assert( pRet && pPrev );
          673  +            pAnd = fts3MallocZero(sizeof(Fts3Expr));
          674  +            if( !pAnd ){
          675  +              sqlite3Fts3ExprFree(p);
          676  +              rc = SQLITE_NOMEM;
          677  +              goto exprparse_out;
          678  +            }
          679  +            pAnd->eType = FTSQUERY_AND;
          680  +            insertBinaryOperator(&pRet, pPrev, pAnd);
          681  +            pPrev = pAnd;
          682  +          }
          683  +
          684  +          /* This test catches attempts to make either operand of a NEAR
          685  +           ** operator something other than a phrase. For example, either of
          686  +           ** the following:
          687  +           **
          688  +           **    (bracketed expression) NEAR phrase
          689  +           **    phrase NEAR (bracketed expression)
          690  +           **
          691  +           ** Return an error in either case.
          692  +           */
          693  +          if( pPrev && (
   692    694               (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
   693    695            || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
   694         -        )){
   695         -          sqlite3Fts3ExprFree(p);
   696         -          rc = SQLITE_ERROR;
   697         -          goto exprparse_out;
   698         -        }
   699         -  
   700         -        if( isPhrase ){
   701         -          if( pRet ){
   702         -            assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
   703         -            pPrev->pRight = p;
   704         -            p->pParent = pPrev;
          696  +          )){
          697  +            sqlite3Fts3ExprFree(p);
          698  +            rc = SQLITE_ERROR;
          699  +            goto exprparse_out;
          700  +          }
          701  +
          702  +          if( isPhrase ){
          703  +            if( pRet ){
          704  +              assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
          705  +              pPrev->pRight = p;
          706  +              p->pParent = pPrev;
          707  +            }else{
          708  +              pRet = p;
          709  +            }
   705    710             }else{
   706         -            pRet = p;
          711  +            insertBinaryOperator(&pRet, pPrev, p);
   707    712             }
   708         -        }else{
   709         -          insertBinaryOperator(&pRet, pPrev, p);
          713  +          isRequirePhrase = !isPhrase;
   710    714           }
   711         -        isRequirePhrase = !isPhrase;
          715  +        pPrev = p;
   712    716         }
   713    717         assert( nByte>0 );
   714    718       }
   715    719       assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
   716    720       nIn -= nByte;
   717    721       zIn += nByte;
   718         -    pPrev = p;
   719    722     }
   720    723   
   721    724     if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
   722    725       rc = SQLITE_ERROR;
   723    726     }
   724    727   
   725    728     if( rc==SQLITE_DONE ){

Changes to test/fts3defer2.test.

    54     54   do_execsql_test 1.2.0 {
    55     55     SELECT content FROM t1 WHERE t1 MATCH 'f (e a)';
    56     56   } {{a b c d e f a x y}}
    57     57   
    58     58   do_execsql_test 1.2.1 {
    59     59     SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
    60     60   } {{a b c d e f a x y}}
           61  +
    61     62   
    62     63   do_execsql_test 1.2.2 {
    63     64     SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal'))
    64     65     FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)';
    65     66   } [list                              \
    66     67      {a b c d [e] [f] [a] x y}         \
    67     68      {0 1 8 1 0 0 10 1 0 2 12 1}       \

Changes to test/fts3expr.test.

   505    505   do_test fts3expr-8.5 { test_fts3expr "((blah.))" } {PHRASE 3 0 blah}
   506    506   do_test fts3expr-8.6 { test_fts3expr "(((blah,)))" } {PHRASE 3 0 blah}
   507    507   do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah}
   508    508   
   509    509   do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah}
   510    510   
   511    511   set sqlite_fts3_enable_parentheses 0
          512  +
          513  +do_test fts3expr-9.1 {
          514  +  test_fts3expr "f (e NEAR/2 a)"
          515  +} {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}}
          516  +
   512    517   finish_test

Added test/fts3expr4.test.

            1  +# 2014 May 7
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the FTS3 module.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +set testprefix fts3expr4
           18  +
           19  +# If SQLITE_ENABLE_FTS3 is defined, omit this file.
           20  +ifcapable !fts3||!icu {
           21  +  finish_test
           22  +  return
           23  +}
           24  +
           25  +set sqlite_fts3_enable_parentheses 1
           26  +
           27  +proc test_icu_fts3expr {expr} {
           28  +  db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')}
           29  +}
           30  +
           31  +proc do_icu_expr_test {tn expr res} {
           32  +  uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res]
           33  +}
           34  +
           35  +#-------------------------------------------------------------------------
           36  +#
           37  +do_icu_expr_test 1.1 "abcd"    {PHRASE 3 0 abcd}
           38  +do_icu_expr_test 1.2 " tag "   {PHRASE 3 0 tag}
           39  +do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z}
           40  +do_icu_expr_test 1.4 {x OR y}       {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
           41  +do_icu_expr_test 1.5 {(x OR y)}     {OR {PHRASE 3 0 x} {PHRASE 3 0 y}}
           42  +do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )}
           43  +
           44  +# In "col:word", if "col" is not the name of a column, the entire thing
           45  +# is passed to the tokenizer.
           46  +#
           47  +do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word}
           48  +do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word}
           49  +
           50  +set sqlite_fts3_enable_parentheses 0
           51  +
           52  +do_icu_expr_test 2.1 {
           53  +  f (e NEAR/2 a)
           54  +} {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}}
           55  +
           56  +finish_test
           57  +