Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -651,35 +651,37 @@ static int fts5ExprNearNextRowidMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; - int rc = SQLITE_OK; - int i, j; /* Phrase and token index, respectively */ i64 iLast; /* Lastest rowid any iterator points to */ - int bMatch; /* True if all terms are at the same rowid */ + int rc = SQLITE_OK; /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ iLast = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); - do { - bMatch = 1; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - for(j=0; jnTerm; j++){ - Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid!=iLast ) bMatch = 0; - if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast, &rc, &pNode->bEof) ){ - return rc; + if( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ){ + int i, j; /* Phrase and token index, respectively */ + int bMatch; /* True if all terms are at the same rowid */ + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iLast ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ + return rc; + } } } - } - }while( bMatch==0 ); + }while( bMatch==0 ); + } pNode->iRowid = iLast; return rc; } @@ -736,10 +738,80 @@ } } return rc; } +static int fts5ExprNearTest( + int *pRc, + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ +){ + Fts5ExprNearset *pNear = pNode->pNear; + int rc = *pRc; + + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + /* If this "NEAR" object is actually a single phrase that consists + ** of a single term only, then grab pointers into the poslist + ** managed by the fts5_index.c iterator object. This is much faster + ** than synthesizing a new poslist the way we have to for more + ** complicated phrase or NEAR expressions. */ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + Fts5ExprColset *pColset = pNear->pColset; + const u8 *pPos; + int nPos; + + if( rc!=SQLITE_OK ) return 0; + rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); + + /* If the term may match any column, then this must be a match. + ** Return immediately in this case. Otherwise, try to find the + ** part of the poslist that corresponds to the required column. + ** If it can be found, return. If it cannot, the next iteration + ** of the loop will test the next rowid in the database for this + ** term. */ + if( pColset==0 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.p = (u8*)pPos; + pPhrase->poslist.n = nPos; + }else if( pColset->nCol==1 ){ + assert( pPhrase->poslist.nSpace==0 ); + pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); + pPhrase->poslist.p = (u8*)pPos; + }else if( rc==SQLITE_OK ){ + rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); + } + + *pRc = rc; + return (pPhrase->poslist.n>0); + }else{ + int i; + + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pNear->pColset ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + rc = sqlite3Fts5IterPoslistBuffer( + pPhrase->aTerm[0].pIter, &pPhrase->poslist + ); + } + } + + *pRc = rc; + if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ + return 1; + } + } + + return 0; +} /* ** Argument pNode points to a NEAR node. All individual term iterators ** point to valid entries (not EOF). * @@ -758,76 +830,20 @@ */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ - Fts5ExprNearset *pNear = pNode->pNear; int rc = SQLITE_OK; + assert( pNode->pNear ); while( 1 ){ - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ - /* If this "NEAR" object is actually a single phrase that consists - ** of a single term only, then grab pointers into the poslist - ** managed by the fts5_index.c iterator object. This is much faster - ** than synthesizing a new poslist the way we have to for more - ** complicated phrase or NEAR expressions. */ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - Fts5ExprColset *pColset = pNear->pColset; - const u8 *pPos; - int nPos; - - rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); - - /* If the term may match any column, then this must be a match. - ** Return immediately in this case. Otherwise, try to find the - ** part of the poslist that corresponds to the required column. - ** If it can be found, return. If it cannot, the next iteration - ** of the loop will test the next rowid in the database for this - ** term. */ - if( pColset==0 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.p = (u8*)pPos; - pPhrase->poslist.n = nPos; - }else if( pColset->nCol==1 ){ - assert( pPhrase->poslist.nSpace==0 ); - pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); - pPhrase->poslist.p = (u8*)pPos; - }else if( rc==SQLITE_OK ){ - rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); - } - - if( pPhrase->poslist.n ) return rc; - }else{ - int i; - - /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextRowidMatch(pExpr, pNode); - if( rc!=SQLITE_OK || pNode->bEof ) break; - - /* Check that each phrase in the nearset matches the current row. - ** Populate the pPhrase->poslist buffers at the same time. If any - ** phrase is not a match, break out of the loop early. */ - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pNear->pColset ){ - int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); - if( bMatch==0 ) break; - }else{ - rc = sqlite3Fts5IterPoslistBuffer( - pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); - } - } - - if( i==pNear->nPhrase ){ - if( i==1 ) break; - if( fts5ExprNearIsMatch(&rc, pNear) ) break; - } - } + /* Advance the iterators until they all point to the same rowid */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + if( rc!=SQLITE_OK || pNode->bEof ) break; + + if( fts5ExprNearTest(&rc, pExpr, pNode) ) break; /* If control flows to here, then the current rowid is not a match. ** Advance all term iterators in all phrases to the next rowid. */ if( rc==SQLITE_OK ){ rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0); @@ -940,14 +956,15 @@ rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_AND: { - rc = fts5ExprNodeNext(pExpr, pNode->pLeft, bFromValid, iFrom); - if( rc==SQLITE_OK ){ - /* todo: update (iFrom/bFromValid) here */ - rc = fts5ExprNodeNext(pExpr, pNode->pRight, bFromValid, iFrom); + Fts5ExprNode *pLeft = pNode->pLeft; + rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); + if( rc==SQLITE_OK && pLeft->bEof==0 ){ + assert( !bFromValid || fts5RowidCmp(pExpr, pLeft->iRowid, iFrom)>=0 ); + rc = fts5ExprNodeNext(pExpr, pNode->pRight, 1, pLeft->iRowid); } break; } case FTS5_OR: { @@ -991,10 +1008,71 @@ || pNode->iRowid==iFrom || pExpr->bDesc==(pNode->iRowideType==FTS5_STRING ){ + Fts5ExprNearset *pNear = pNode->pNear; + int i; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + pPhrase->poslist.n = 0; + } + }else{ + fts5ExprNodeZeroPoslist(pNode->pLeft); + fts5ExprNodeZeroPoslist(pNode->pRight); + } +} + +static int fts5ExprNodeTest( + int *pRc, + Fts5Expr *pExpr, + i64 iRowid, + Fts5ExprNode *pNode +){ + int bRes = 0; + if( pNode->bEof || pNode->iRowid!=iRowid ){ + bRes = 0; + }else { + switch( pNode->eType ){ + case FTS5_STRING: + bRes = fts5ExprNearTest(pRc, pExpr, pNode); + if( *pRc ) bRes = 0; + break; + + case FTS5_AND: { + int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); + assert( (bRes1==0 || bRes1==1) && (bRes2==0 || bRes2==1) ); + + bRes = (bRes1 && bRes2); + if( bRes1!=bRes2 ){ + fts5ExprNodeZeroPoslist(bRes1 ? pNode->pLeft : pNode->pRight); + } + break; + } + + case FTS5_OR: { + int bRes1 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + int bRes2 = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pRight); + + bRes = (bRes1 || bRes2); + break; + } + + default: + assert( pNode->eType==FTS5_NOT ); + bRes = fts5ExprNodeTest(pRc, pExpr, iRowid, pNode->pLeft); + break; + } + } + + return bRes; +} + static void fts5ExprSetEof(Fts5ExprNode *pNode){ if( pNode ){ pNode->bEof = 1; fts5ExprSetEof(pNode->pLeft); @@ -1014,11 +1092,14 @@ int rc = SQLITE_OK; if( pNode->bEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { +#if 0 rc = fts5ExprNearNextMatch(pExpr, pNode); +#endif + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); break; } case FTS5_AND: { Fts5ExprNode *p1 = pNode->pLeft; @@ -1063,11 +1144,11 @@ if( cmp>0 ){ rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); cmp = fts5NodeCompare(pExpr, p1, p2); } assert( rc!=SQLITE_OK || cmp<=0 ); - if( rc || cmp<0 ) break; + if( 0==fts5ExprNodeTest(&rc, pExpr, p1->iRowid, p2) ) break; rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; pNode->iRowid = p1->iRowid; break; @@ -1094,11 +1175,14 @@ /* Initialize all term iterators in the NEAR object. */ rc = fts5ExprNearInitAll(pExpr, pNode); /* Attempt to advance to the first match */ if( rc==SQLITE_OK && pNode->bEof==0 ){ +#if 0 rc = fts5ExprNearNextMatch(pExpr, pNode); +#endif + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); } }else{ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); if( rc==SQLITE_OK ){ @@ -1110,11 +1194,10 @@ } return rc; } - /* ** Begin iterating through the set of documents in index pIdx matched by ** the MATCH expression passed as the first argument. If the "bDesc" parameter ** is passed a non-zero value, iteration is in descending rowid order. Or, ** if it is zero, in ascending order. @@ -1121,15 +1204,22 @@ ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, int bDesc){ + Fts5ExprNode *pRoot = p->pRoot; int rc = SQLITE_OK; - if( p->pRoot ){ + if( pRoot ){ p->pIndex = pIdx; p->bDesc = bDesc; - rc = fts5ExprNodeFirst(p, p->pRoot); + rc = fts5ExprNodeFirst(p, pRoot); + if( pRoot->bEof==0 + && 0==fts5ExprNodeTest(&rc, p, pRoot->iRowid, pRoot) + && rc==SQLITE_OK + ){ + rc = sqlite3Fts5ExprNext(p); + } } return rc; } /* @@ -1138,11 +1228,16 @@ ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprNext(Fts5Expr *p){ int rc; - rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); + do { + rc = fts5ExprNodeNext(p, p->pRoot, 0, 0); + }while( p->pRoot->bEof==0 + && fts5ExprNodeTest(&rc, p, p->pRoot->iRowid, p->pRoot)==0 + && rc==SQLITE_OK + ); return rc; } int sqlite3Fts5ExprEof(Fts5Expr *p){ return (p->pRoot==0 || p->pRoot->bEof); Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -277,9 +277,9 @@ } proc OR {a b} { sort_poslist [concat $a $b] } proc NOT {a b} { - if {[llength $b]} { return [list] } + if {[llength $b]>0} { return [list] } return $a } Index: ext/fts5/test/fts5auto.test ================================================================== --- ext/fts5/test/fts5auto.test +++ ext/fts5/test/fts5auto.test @@ -224,84 +224,104 @@ {b n w x w f q h p i} {e u b b i n a i o c d g} {v a z o i e n l x l r} {r u f o r k w m d w} {k s} {r f e j q p w} } -do_test 1.0 { - execsql { - BEGIN; - CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); - } - foreach {rowid a b c d e f} $data { - execsql { - INSERT INTO tt(rowid, a, b, c, d, e, f) - VALUES($rowid, $a, $b, $c, $d, $e, $f) - } - } - execsql { - COMMIT; - } +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); } {} -proc fts5_test_poslist {cmd} { - set res [list] - for {set i 0} {$i < [$cmd xInstCount]} {incr i} { - lappend res [string map {{ } .} [$cmd xInst $i]] - } - set res -} -sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist - -proc matchdata {expr} { +fts5_aux_test_functions db + +proc matchdata {expr {order ASC}} { set tclexpr [db one { SELECT fts5_expr_tcl( $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f' ) }] set res [list] - db eval {SELECT rowid, * FROM tt} { + db eval "SELECT rowid, * FROM tt ORDER BY rowid $order" { set cols [list $a $b $c $d $e $f] set ::pc 0 set rowdata [eval $tclexpr] - - if {$rowdata != ""} { - lappend res $rowid $rowdata - } + if {$rowdata != ""} { lappend res $rowid $rowdata } } set res } + +proc do_auto_test {tn expr} { + foreach order {asc desc} { + set res [matchdata $expr $order] + set testname "3.$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" + + set ::autotest_expr $expr + do_execsql_test $testname [subst -novar { + SELECT rowid, fts5_test_poslist(tt) FROM tt + WHERE tt MATCH $::autotest_expr ORDER BY rowid [set order] + }] $res + } + + +} #------------------------------------------------------------------------- # -do_execsql_test 2.0 { - SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH 'a AND b'; -} [matchdata "a AND b"] - -do_test 2.1 { - llength [matchdata "a AND b"] -} 62 - -foreach {tn expr} { - 1 { [a] : x } - 2 { [a b] : x } - 3 { [a b f] : x } - 4 { [f a b] : x } - 5 { [f a b] : x y } - 6 { [f a b] : x + y } - 7 { [c a b] : x + c } - 8 { [c d] : "l m" } - 9 { [c e] : "l m" } -} { - set res [matchdata $expr] - do_test 3.$tn.[llength $res] { +for {set fold 0} {$fold < 3} {incr fold} { + switch $fold { + 0 { set map {} } + 1 { set map { + a a b a c b d b e c f c g d h d + i e j e k f l f m g g g o h p h + q i r i s j t j u k v k w l x l + y m z m + }} + + 2 { set map { + a a b a c a d a e a f a g a h a + i b j b k b l b m b g b o b p b + q c r c s c t c u c v c w c x c + }} + } + + execsql { + BEGIN; + DELETE FROM tt; + } + foreach {rowid a b c d e f} [string map $map $data] { execsql { - SELECT rowid, fts5_test_poslist(tt) FROM tt WHERE tt MATCH $expr + INSERT INTO tt(rowid, a, b, c, d, e, f) + VALUES($rowid, $a, $b, $c, $d, $e, $f) } - } $res -} + } + execsql COMMIT + + + foreach {tn expr} { + 3.1 { [a] : x } + 3.2 { [a b] : x } + 3.3 { [a b f] : x } + 3.4 { [f a b] : x } + 3.5 { [f a b] : x y } + 3.6 { [f a b] : x + y } + 3.7 { [c a b] : x + c } + 3.8 { [c d] : "l m" } + 3.9 { [c e] : "l m" } + + 4.1 { a NOT b } + 4.2 { a NOT a:b } + 4.3 { a OR (b AND c) } + 4.4 { a OR (b AND [a b c]:c) } + 4.5 { a OR "b c" } + 4.6 { a OR b OR c } + 5.1 { a OR (b AND "b c") } + 5.2 { a OR (b AND "z c") } + } { + do_auto_test 3.$fold.$tn $expr + } +} finish_test