Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -79,10 +79,24 @@ #else # define assert_nc(x) assert(x) #endif typedef struct Fts5Global Fts5Global; +typedef struct Fts5ExprColset Fts5ExprColset; + +/* If a NEAR() clump or phrase may only match a specific set of columns, +** then an object of the following type is used to record the set of columns. +** Each entry in the aiCol[] array is a column that may be matched. +** +** This object is used by fts5_expr.c and fts5_index.c. +*/ +struct Fts5ExprColset { + int nCol; + int aiCol[1]; +}; + + /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. */ @@ -303,11 +317,11 @@ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); int sqlite3Fts5IndexClose(Fts5Index *p); /* ** for( -** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0); +** sqlite3Fts5IndexQuery(p, "token", 5, 0, 0, &pIter); ** 0==sqlite3Fts5IterEof(pIter); ** sqlite3Fts5IterNext(pIter) ** ){ ** i64 iRowid = sqlite3Fts5IterRowid(pIter); ** } @@ -319,11 +333,12 @@ */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ - Fts5IndexIter **ppIter + Fts5ExprColset *pColset, /* Match these columns only */ + Fts5IndexIter **ppIter /* OUT: New iterator object */ ); /* ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). @@ -565,11 +580,10 @@ typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; -typedef struct Fts5ExprColset Fts5ExprColset; struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */ }; Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -87,20 +87,10 @@ Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */ }; -/* -** If a NEAR() clump may only match a specific set of columns, then -** Fts5ExprNearset.pColset points to an object of the following type. -** Each entry in the aiCol[] array -*/ -struct Fts5ExprColset { - int nCol; - int aiCol[1]; -}; - /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { @@ -1000,10 +990,11 @@ } rc = sqlite3Fts5IndexQuery( pExpr->pIndex, p->zTerm, strlen(p->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), + pNear->pColset, &p->pIter ); assert( rc==SQLITE_OK || p->pIter==0 ); if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ bEof = 0; Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -3940,16 +3940,85 @@ return fts5IndexReturn(p); } static void fts5PoslistCallback( Fts5Index *p, - void *pCtx, + void *pContext, + const u8 *pChunk, int nChunk +){ + assert_nc( nChunk>=0 ); + if( nChunk>0 ){ + fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk); + } +} + +typedef struct PoslistCallbackCtx PoslistCallbackCtx; +struct PoslistCallbackCtx { + Fts5Buffer *pBuf; /* Append to this buffer */ + Fts5ExprColset *pColset; /* Restrict matches to this column */ + int eState; /* See above */ +}; + +/* +** TODO: Make this more efficient! +*/ +static int fts5IndexColsetTest(Fts5ExprColset *pColset, int iCol){ + int i; + for(i=0; inCol; i++){ + if( pColset->aiCol[i]==iCol ) return 1; + } + return 0; +} + +static void fts5PoslistFilterCallback( + Fts5Index *p, + void *pContext, const u8 *pChunk, int nChunk ){ + PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; assert_nc( nChunk>=0 ); if( nChunk>0 ){ - fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); + /* Search through to find the first varint with value 1. This is the + ** start of the next columns hits. */ + int i = 0; + int iStart = 0; + + if( pCtx->eState==2 ){ + int iCol; + fts5IndexGetVarint32(pChunk, i, iCol); + if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ + pCtx->eState = 1; + fts5BufferAppendVarint(&p->rc, pCtx->pBuf, 1); + }else{ + pCtx->eState = 0; + } + } + + do { + while( ieState ){ + fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]); + } + if( i=nChunk ){ + pCtx->eState = 2; + }else{ + fts5IndexGetVarint32(pChunk, i, iCol); + pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); + if( pCtx->eState ){ + fts5BufferAppendBlob(&p->rc, pCtx->pBuf, i-iStart, &pChunk[iStart]); + iStart = i; + } + } + } + }while( irc. It is assumed ** no error has already occurred when this function is called. */ -static void fts5MultiIterPoslist( +static int fts5MultiIterPoslist( Fts5Index *p, Fts5IndexIter *pMulti, + Fts5ExprColset *pColset, int bSz, /* Append a size field before the data */ Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ + int iSz; + int iData; + Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); if( bSz ){ /* WRITEPOSLISTSIZE */ + iSz = pBuf->n; fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2); + iData = pBuf->n; } - fts5SegiterPoslist(p, pSeg, pBuf); + + fts5SegiterPoslist(p, pSeg, pColset, pBuf); + + if( bSz && pColset ){ + int nActual = pBuf->n - iData; + if( nActual!=pSeg->nPos ){ + /* WRITEPOSLISTSIZE */ + if( nActual==0 ){ + return 1; + }else{ + int nReq = sqlite3Fts5GetVarintLen((u32)(nActual*2)); + while( iSz<(iData-nReq) ){ pBuf->p[iSz++] = 0x80; } + sqlite3Fts5PutVarint(&pBuf->p[iSz], nActual*2); + } + } + } } + + return 0; } static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ u8 *p = pIter->aPoslist + pIter->nPoslist; @@ -4147,11 +4249,12 @@ static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ - Fts5IndexIter **ppIter /* OUT: New iterator */ + Fts5ExprColset *pColset, /* Restrict matches to these columns */ + Fts5IndexIter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; @@ -4190,12 +4293,18 @@ } iLastRowid = 0; } if( 0==sqlite3Fts5BufferGrow(&p->rc, &doclist, 9) ){ - fts5MergeAppendDocid(&doclist, iLastRowid, iRowid); - fts5MultiIterPoslist(p, p1, 1, &doclist); + int iSave = doclist.n; + assert( doclist.n!=0 || iLastRowid==0 ); + fts5BufferSafeAppendVarint(&doclist, iRowid - iLastRowid); + if( fts5MultiIterPoslist(p, p1, pColset, 1, &doclist) ){ + doclist.n = iSave; + }else{ + iLastRowid = iRowid; + } } } for(i=0; irc==SQLITE_OK ){ @@ -4425,10 +4534,11 @@ */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ + Fts5ExprColset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet = 0; int iIdx = 0; @@ -4468,11 +4578,11 @@ fts5StructureRelease(pStruct); } }else{ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; - fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, &pRet); + fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); } if( p->rc ){ sqlite3Fts5IterClose(pRet); pRet = 0; @@ -4570,11 +4680,11 @@ *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); - fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); + fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); *pp = pIter->poslist.p; } return fts5IndexReturn(pIter->pIndex); } @@ -4586,11 +4696,11 @@ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ Fts5Index *p = pIter->pIndex; assert( p->rc==SQLITE_OK ); fts5BufferZero(pBuf); - fts5MultiIterPoslist(p, pIter, 0, pBuf); + fts5MultiIterPoslist(p, pIter, 0, 0, pBuf); return fts5IndexReturn(p); } /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). @@ -4761,11 +4871,11 @@ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ u64 cksum = *pCksum; Fts5IndexIter *pIdxIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ i64 dummy; const u8 *pPos; int nPos; @@ -5135,11 +5245,11 @@ /* If this is a new term, query for it. Update cksum3 with the results. */ fts5TestTerm(p, &term, z, n, cksum2, &cksum3); poslist.n = 0; - fts5MultiIterPoslist(p, pIter, 0, &poslist); + fts5MultiIterPoslist(p, pIter, 0, 0, &poslist); while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } Index: ext/fts5/fts5_vocab.c ================================================================== --- ext/fts5/fts5_vocab.c +++ ext/fts5/fts5_vocab.c @@ -400,11 +400,11 @@ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; int rc; const int flags = FTS5INDEX_QUERY_SCAN; fts5VocabResetCursor(pCsr); - rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter); + rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, 0, &pCsr->pIter); if( rc==SQLITE_OK ){ rc = fts5VocabNextMethod(pCursor); } return rc; Index: ext/fts5/test/fts5prefix.test ================================================================== --- ext/fts5/test/fts5prefix.test +++ ext/fts5/test/fts5prefix.test @@ -60,8 +60,91 @@ 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 } { do_execsql_test 2.3.$tn $q $res } +#------------------------------------------------------------------------- +# Check that prefix queries with: +# +# * a column filter, and +# * no prefix index. +# +# work Ok. +# +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t3 USING fts5(a, b, c); + INSERT INTO t3(t3, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1 + INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2 + INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3 + INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4 + INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5 + INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6 + INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7 + INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8 + INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9 + COMMIT; +} + +foreach {tn match res} { + 1 "a : c*" {1 2 4 6 7 9} + 2 "b : c*" {1 3 4 5 6 8 9} + 3 "c : c*" {1 2 4 6 7 8 9} + 4 "a : b*" {1 3 5 6 7 8 9} + 5 "b : b*" {1 2 3 5 7 9} + 6 "c : b*" {1 3 7 9} + 7 "a : a*" {1 3 4 5 6 8} + 8 "b : a*" {2 3 4 6 7 8} + 9 "c : a*" {2 3 4 5 6 7 8 9} +} { + do_execsql_test 3.1.$tn { + SELECT rowid FROM t3($match) + } $res +} + +do_test 3.2 { + expr srand(0) + execsql { DELETE FROM t3 } + for {set i 0} {$i < 1000} {incr i} { + set a [fts5_rnddoc 3] + set b [fts5_rnddoc 8] + set c [fts5_rnddoc 20] + execsql { INSERT INTO t3 VALUES($a, $b, $c) } + } + execsql { INSERT INTO t3(t3) VALUES('integrity-check') } +} {} + +proc gmatch {col pattern} { + expr {[lsearch -glob $col $pattern]>=0} +} +db func gmatch gmatch + +for {set x 0} {$x<2} {incr x} { + foreach {tn pattern} { + 1 {xa*} + 2 {xb*} + 3 {xc*} + 4 {xd*} + 5 {xe*} + 6 {xf*} + 7 {xg*} + 8 {xh*} + 9 {xi*} + 10 {xj*} + } { + foreach col {b} { + set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"] + set query "$col : $pattern" + do_execsql_test 3.3.$x.$tn.$col { + SELECT rowid FROM t3($query); + } $res + } + } + execsql { INSERT INTO t3(t3) VALUES('optimize') } + execsql { INSERT INTO t3(t3) VALUES('integrity-check') } +} + finish_test + Index: ext/fts5/test/fts5simple.test ================================================================== --- ext/fts5/test/fts5simple.test +++ ext/fts5/test/fts5simple.test @@ -237,9 +237,20 @@ do_execsql_test 9.3 { SELECT rowid FROM ft2('b AND c'); } {2} +#------------------------------------------------------------------------- +# +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE t3 USING fts5(a, b, c); + INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1 + INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c'); -- 2 +} + +do_execsql_test 10.1 { + SELECT rowid FROM t3('c: c*'); +} {2} finish_test