Index: ext/fts3/fts3_expr.c ================================================================== --- ext/fts3/fts3_expr.c +++ ext/fts3/fts3_expr.c @@ -183,44 +183,27 @@ sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; int rc; sqlite3_tokenizer_cursor *pCursor; Fts3Expr *pRet = 0; - int nConsumed = 0; + int i = 0; - rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor); + /* Set variable i to the maximum number of bytes of input to tokenize. */ + for(i=0; iiLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char *zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; /* total space to allocate */ rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); - - if( (rc==SQLITE_OK || rc==SQLITE_DONE) && sqlite3_fts3_enable_parentheses ){ - int i; - if( rc==SQLITE_DONE ) iStart = n; - for(i=0; inNest++; - rc = fts3ExprParse(pParse, &z[i+1], n-i-1, &pRet, &nConsumed); - if( rc==SQLITE_OK && !pRet ){ - rc = SQLITE_DONE; - } - nConsumed = (int)(i + 1 + nConsumed); - break; - } - - if( z[i]==')' ){ - rc = SQLITE_DONE; - pParse->nNest--; - nConsumed = i+1; - break; - } - } - } - - if( nConsumed==0 && rc==SQLITE_OK ){ + if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); if( !pRet ){ rc = SQLITE_NOMEM; }else{ @@ -250,17 +233,18 @@ break; } } } - nConsumed = iEnd; + *pnConsumed = iEnd; + }else if( i && rc==SQLITE_DONE ){ + rc = SQLITE_OK; } pModule->xClose(pCursor); } - *pnConsumed = nConsumed; *ppExpr = pRet; return rc; } @@ -506,10 +490,25 @@ return SQLITE_ERROR; } return getNextString(pParse, &zInput[1], ii-1, ppExpr); } + if( sqlite3_fts3_enable_parentheses ){ + if( *zInput=='(' ){ + int nConsumed = 0; + pParse->nNest++; + rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed); + if( rc==SQLITE_OK && !*ppExpr ){ rc = SQLITE_DONE; } + *pnConsumed = (int)(zInput - z) + 1 + nConsumed; + return rc; + }else if( *zInput==')' ){ + pParse->nNest--; + *pnConsumed = (zInput - z) + 1; + *ppExpr = 0; + return SQLITE_DONE; + } + } /* If control flows to this point, this must be a regular token, or ** the end of the input. Read a regular token using the sqlite3_tokenizer ** interface. Before doing so, figure out if there is an explicit ** column specifier for the token. @@ -624,100 +623,104 @@ int isRequirePhrase = 1; while( rc==SQLITE_OK ){ Fts3Expr *p = 0; int nByte = 0; - rc = getNextNode(pParse, zIn, nIn, &p, &nByte); - if( rc==SQLITE_OK ){ - int isPhrase; - - if( !sqlite3_fts3_enable_parentheses - && p->eType==FTSQUERY_PHRASE && pParse->isNot - ){ - /* Create an implicit NOT operator. */ - Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); - if( !pNot ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pNot->eType = FTSQUERY_NOT; - pNot->pRight = p; - p->pParent = pNot; - if( pNotBranch ){ - pNot->pLeft = pNotBranch; - pNotBranch->pParent = pNot; - } - pNotBranch = pNot; - p = pPrev; - }else{ - int eType = p->eType; - isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); - - /* The isRequirePhrase variable is set to true if a phrase or - ** an expression contained in parenthesis is required. If a - ** binary operator (AND, OR, NOT or NEAR) is encounted when - ** isRequirePhrase is set, this is a syntax error. - */ - if( !isPhrase && isRequirePhrase ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase && !isRequirePhrase ){ - /* Insert an implicit AND operator. */ - Fts3Expr *pAnd; - assert( pRet && pPrev ); - pAnd = fts3MallocZero(sizeof(Fts3Expr)); - if( !pAnd ){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_NOMEM; - goto exprparse_out; - } - pAnd->eType = FTSQUERY_AND; - insertBinaryOperator(&pRet, pPrev, pAnd); - pPrev = pAnd; - } - - /* This test catches attempts to make either operand of a NEAR - ** operator something other than a phrase. For example, either of - ** the following: - ** - ** (bracketed expression) NEAR phrase - ** phrase NEAR (bracketed expression) - ** - ** Return an error in either case. - */ - if( pPrev && ( - (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) - || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) - )){ - sqlite3Fts3ExprFree(p); - rc = SQLITE_ERROR; - goto exprparse_out; - } - - if( isPhrase ){ - if( pRet ){ - assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); - pPrev->pRight = p; - p->pParent = pPrev; - }else{ - pRet = p; - } - }else{ - insertBinaryOperator(&pRet, pPrev, p); - } - isRequirePhrase = !isPhrase; + + rc = getNextNode(pParse, zIn, nIn, &p, &nByte); + assert( nByte>0 || (rc!=SQLITE_OK && p==0) ); + if( rc==SQLITE_OK ){ + if( p ){ + int isPhrase; + + if( !sqlite3_fts3_enable_parentheses + && p->eType==FTSQUERY_PHRASE && pParse->isNot + ){ + /* Create an implicit NOT operator. */ + Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); + if( !pNot ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pNot->eType = FTSQUERY_NOT; + pNot->pRight = p; + p->pParent = pNot; + if( pNotBranch ){ + pNot->pLeft = pNotBranch; + pNotBranch->pParent = pNot; + } + pNotBranch = pNot; + p = pPrev; + }else{ + int eType = p->eType; + isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); + + /* The isRequirePhrase variable is set to true if a phrase or + ** an expression contained in parenthesis is required. If a + ** binary operator (AND, OR, NOT or NEAR) is encounted when + ** isRequirePhrase is set, this is a syntax error. + */ + if( !isPhrase && isRequirePhrase ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase && !isRequirePhrase ){ + /* Insert an implicit AND operator. */ + Fts3Expr *pAnd; + assert( pRet && pPrev ); + pAnd = fts3MallocZero(sizeof(Fts3Expr)); + if( !pAnd ){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_NOMEM; + goto exprparse_out; + } + pAnd->eType = FTSQUERY_AND; + insertBinaryOperator(&pRet, pPrev, pAnd); + pPrev = pAnd; + } + + /* This test catches attempts to make either operand of a NEAR + ** operator something other than a phrase. For example, either of + ** the following: + ** + ** (bracketed expression) NEAR phrase + ** phrase NEAR (bracketed expression) + ** + ** Return an error in either case. + */ + if( pPrev && ( + (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) + || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) + )){ + sqlite3Fts3ExprFree(p); + rc = SQLITE_ERROR; + goto exprparse_out; + } + + if( isPhrase ){ + if( pRet ){ + assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); + pPrev->pRight = p; + p->pParent = pPrev; + }else{ + pRet = p; + } + }else{ + insertBinaryOperator(&pRet, pPrev, p); + } + isRequirePhrase = !isPhrase; + } + pPrev = p; } assert( nByte>0 ); } assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); nIn -= nByte; zIn += nByte; - pPrev = p; } if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ rc = SQLITE_ERROR; } Index: test/fts3defer2.test ================================================================== --- test/fts3defer2.test +++ test/fts3defer2.test @@ -56,10 +56,11 @@ } {{a b c d e f a x y}} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} + do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ Index: test/fts3expr.test ================================================================== --- test/fts3expr.test +++ test/fts3expr.test @@ -507,6 +507,11 @@ do_test fts3expr-8.7 { test_fts3expr "((((blah!))))" } {PHRASE 3 0 blah} do_test fts3expr-8.8 { test_fts3expr "(,(blah-),)" } {PHRASE 3 0 blah} set sqlite_fts3_enable_parentheses 0 + +do_test fts3expr-9.1 { + test_fts3expr "f (e NEAR/2 a)" +} {AND {PHRASE 3 0 f} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} + finish_test ADDED test/fts3expr4.test Index: test/fts3expr4.test ================================================================== --- /dev/null +++ test/fts3expr4.test @@ -0,0 +1,57 @@ +# 2014 May 7 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS3 module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix fts3expr4 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3||!icu { + finish_test + return +} + +set sqlite_fts3_enable_parentheses 1 + +proc test_icu_fts3expr {expr} { + db one {SELECT fts3_exprtest('icu', $expr, 'a', 'b', 'c')} +} + +proc do_icu_expr_test {tn expr res} { + uplevel [list do_test $tn [list test_icu_fts3expr $expr] $res] +} + +#------------------------------------------------------------------------- +# +do_icu_expr_test 1.1 "abcd" {PHRASE 3 0 abcd} +do_icu_expr_test 1.2 " tag " {PHRASE 3 0 tag} +do_icu_expr_test 1.3 {"x y z"} {PHRASE 3 0 x y z} +do_icu_expr_test 1.4 {x OR y} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} +do_icu_expr_test 1.5 {(x OR y)} {OR {PHRASE 3 0 x} {PHRASE 3 0 y}} +do_icu_expr_test 1.6 { "(x OR y)" } {PHRASE 3 0 ( x or y )} + +# In "col:word", if "col" is not the name of a column, the entire thing +# is passed to the tokenizer. +# +do_icu_expr_test 1.7 {a:word} {PHRASE 0 0 word} +do_icu_expr_test 1.8 {d:word} {PHRASE 3 0 d:word} + +set sqlite_fts3_enable_parentheses 0 + +do_icu_expr_test 2.1 { + f (e NEAR/2 a) +} {AND {AND {AND {PHRASE 3 0 f} {PHRASE 3 0 (}} {NEAR/2 {PHRASE 3 0 e} {PHRASE 3 0 a}}} {PHRASE 3 0 )}} + +finish_test +