Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -473,11 +473,11 @@ Fts5Tokenizer**, fts5_tokenizer**, char **pzErr ); -Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*); +Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, Fts5Config **); /* ** End of interface to code in fts5.c. **************************************************************************/ Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -4595,13 +4595,11 @@ Fts5IndexIter *pRet = 0; int iIdx = 0; Fts5Buffer buf = {0, 0, 0}; /* If the QUERY_SCAN flag is set, all other flags must be clear. */ - assert( (flags & FTS5INDEX_QUERY_SCAN)==0 - || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN - ); + assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); #ifdef SQLITE_DEBUG Index: ext/fts5/fts5_main.c ================================================================== --- ext/fts5/fts5_main.c +++ ext/fts5/fts5_main.c @@ -2019,24 +2019,24 @@ /* ** Given cursor id iId, return a pointer to the corresponding Fts5Index ** object. Or NULL If the cursor id does not exist. ** -** If successful, set *pnCol to the number of indexed columns in the -** table before returning. +** If successful, set *ppConfig to point to the associated config object +** before returning. */ Fts5Index *sqlite3Fts5IndexFromCsrid( - Fts5Global *pGlobal, - i64 iCsrId, - int *pnCol + Fts5Global *pGlobal, /* FTS5 global context for db handle */ + i64 iCsrId, /* Id of cursor to find */ + Fts5Config **ppConfig /* OUT: Configuration object */ ){ Fts5Cursor *pCsr; Fts5Table *pTab; pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); pTab = (Fts5Table*)pCsr->base.pVtab; - *pnCol = pTab->pConfig->nCol; + *ppConfig = pTab->pConfig; return pTab->pIndex; } /* Index: ext/fts5/fts5_vocab.c ================================================================== --- ext/fts5/fts5_vocab.c +++ ext/fts5/fts5_vocab.c @@ -53,27 +53,37 @@ Fts5Index *pIndex; /* Associated FTS5 index */ int bEof; /* True if this cursor is at EOF */ Fts5IndexIter *pIter; /* Term/rowid iterator object */ + int nLeTerm; /* Size of zLeTerm in bytes */ + char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ + /* These are used by 'col' tables only */ - int nCol; + Fts5Config *pConfig; /* Fts5 table configuration */ int iCol; i64 *aCnt; i64 *aDoc; - /* Output values */ + /* Output values used by 'row' and 'col' tables */ i64 rowid; /* This table's current rowid value */ Fts5Buffer term; /* Current value of 'term' column */ - i64 aVal[3]; /* Up to three columns left of 'term' */ }; #define FTS5_VOCAB_COL 0 #define FTS5_VOCAB_ROW 1 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" + +/* +** Bits for the mask used as the idxNum value by xBestIndex/xFilter. +*/ +#define FTS5_VOCAB_TERM_EQ 0x01 +#define FTS5_VOCAB_TERM_GE 0x02 +#define FTS5_VOCAB_TERM_LE 0x04 + /* ** Translate a string containing an fts5vocab table type to an ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message @@ -228,10 +238,50 @@ */ static int fts5VocabBestIndexMethod( sqlite3_vtab *pVTab, sqlite3_index_info *pInfo ){ + int i; + int iTermEq = -1; + int iTermGe = -1; + int iTermLe = -1; + int idxNum = 0; + int nArg = 0; + + for(i=0; inConstraint; i++){ + struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; + if( p->usable==0 ) continue; + if( p->iColumn==0 ){ /* term column */ + if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i; + if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i; + if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i; + if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i; + if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i; + } + } + + if( iTermEq>=0 ){ + idxNum |= FTS5_VOCAB_TERM_EQ; + pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; + pInfo->aConstraintUsage[iTermEq].omit = 1; + pInfo->estimatedCost = 100; + }else{ + pInfo->estimatedCost = 1000000; + if( iTermGe>=0 ){ + idxNum |= FTS5_VOCAB_TERM_GE; + pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; + pInfo->estimatedCost = pInfo->estimatedCost / 2; + } + if( iTermLe>=0 ){ + idxNum |= FTS5_VOCAB_TERM_LE; + pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; + pInfo->estimatedCost = pInfo->estimatedCost / 2; + } + } + + pInfo->idxNum = idxNum; + return SQLITE_OK; } /* ** Implementation of xOpen method. @@ -240,16 +290,15 @@ sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr ){ Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; Fts5Index *pIndex = 0; - int nCol = 0; + Fts5Config *pConfig = 0; Fts5VocabCursor *pCsr = 0; int rc = SQLITE_OK; sqlite3_stmt *pStmt = 0; char *zSql = 0; - int nByte; zSql = sqlite3Fts5Mprintf(&rc, "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl ); @@ -260,11 +309,11 @@ assert( rc==SQLITE_OK || pStmt==0 ); if( rc==SQLITE_ERROR ) rc = SQLITE_OK; if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ i64 iId = sqlite3_column_int64(pStmt, 0); - pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol); + pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &pConfig); } if( rc==SQLITE_OK && pIndex==0 ){ rc = sqlite3_finalize(pStmt); pStmt = 0; @@ -274,18 +323,21 @@ ); rc = SQLITE_ERROR; } } - nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor); - pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); + if( rc==SQLITE_OK ){ + int nByte = pConfig->nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor); + pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); + } + if( pCsr ){ pCsr->pIndex = pIndex; pCsr->pStmt = pStmt; - pCsr->nCol = nCol; + pCsr->pConfig = pConfig; pCsr->aCnt = (i64*)&pCsr[1]; - pCsr->aDoc = &pCsr->aCnt[nCol]; + pCsr->aDoc = &pCsr->aCnt[pConfig->nCol]; }else{ sqlite3_finalize(pStmt); } *ppCsr = (sqlite3_vtab_cursor*)pCsr; @@ -294,10 +346,13 @@ static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ pCsr->rowid = 0; sqlite3Fts5IterClose(pCsr->pIter); pCsr->pIter = 0; + sqlite3_free(pCsr->zLeTerm); + pCsr->nLeTerm = -1; + pCsr->zLeTerm = 0; } /* ** Close the cursor. For additional information see the documentation ** on the xClose method of the virtual table interface. @@ -317,31 +372,40 @@ */ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; int rc = SQLITE_OK; + int nCol = pCsr->pConfig->nCol; pCsr->rowid++; if( pTab->eType==FTS5_VOCAB_COL ){ - for(pCsr->iCol++; pCsr->iColnCol; pCsr->iCol++){ + for(pCsr->iCol++; pCsr->iColiCol++){ if( pCsr->aCnt[pCsr->iCol] ) break; } } - if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){ + if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){ if( sqlite3Fts5IterEof(pCsr->pIter) ){ pCsr->bEof = 1; }else{ const char *zTerm; int nTerm; zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + if( pCsr->nLeTerm>=0 ){ + int nCmp = MIN(nTerm, pCsr->nLeTerm); + int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); + if( bCmp<0 || (bCmp==0 && pCsr->nLeTermbEof = 1; + return SQLITE_OK; + } + } + sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); - memset(pCsr->aVal, 0, sizeof(pCsr->aVal)); - memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64)); - memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64)); + memset(pCsr->aCnt, 0, nCol * sizeof(i64)); + memset(pCsr->aDoc, 0, nCol * sizeof(i64)); pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ i64 dummy; @@ -351,13 +415,13 @@ rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ if( pTab->eType==FTS5_VOCAB_ROW ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - pCsr->aVal[1]++; + pCsr->aCnt[0]++; } - pCsr->aVal[0]++; + pCsr->aDoc[0]++; }else{ int iCol = -1; while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ int ii = FTS5_POS2COLUMN(iPos); pCsr->aCnt[ii]++; @@ -367,24 +431,25 @@ } } } rc = sqlite3Fts5IterNextScan(pCsr->pIter); } + if( rc==SQLITE_OK ){ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); - if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; + if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ){ + break; + } if( sqlite3Fts5IterEof(pCsr->pIter) ) break; } } } } if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++; - pCsr->aVal[0] = pCsr->iCol; - pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol]; - pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol]; + assert( pCsr->iColpConfig->nCol ); } return rc; } /* @@ -397,14 +462,50 @@ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; int rc; - const int flags = FTS5INDEX_QUERY_SCAN; + + int iVal = 0; + int f = FTS5INDEX_QUERY_SCAN; + const char *zTerm = 0; + int nTerm = 0; + + sqlite3_value *pEq = 0; + sqlite3_value *pGe = 0; + sqlite3_value *pLe = 0; fts5VocabResetCursor(pCsr); - rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, 0, &pCsr->pIter); + if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++]; + if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++]; + if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++]; + + if( pEq ){ + zTerm = sqlite3_value_text(pEq); + nTerm = sqlite3_value_bytes(pEq); + f = 0; + }else{ + if( pGe ){ + zTerm = sqlite3_value_text(pGe); + nTerm = sqlite3_value_bytes(pGe); + } + if( pLe ){ + const char *zCopy = sqlite3_value_text(pLe); + pCsr->nLeTerm = sqlite3_value_bytes(pLe); + pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1); + if( pCsr->zLeTerm==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); + } + } + } + + + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); + } if( rc==SQLITE_OK ){ rc = fts5VocabNextMethod(pCursor); } return rc; @@ -423,21 +524,33 @@ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; - switch( iCol ){ - case 0: /* term */ - sqlite3_result_text( - pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT - ); - break; - - default: - assert( iCol<4 && iCol>0 ); - sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]); - break; + + if( iCol==0 ){ + sqlite3_result_text( + pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT + ); + } + else if( ((Fts5VocabTable*)(pCursor->pVtab))->eType==FTS5_VOCAB_COL ){ + assert( iCol==1 || iCol==2 || iCol==3 ); + if( iCol==1 ){ + const char *z = pCsr->pConfig->azCol[pCsr->iCol]; + sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); + }else if( iCol==2 ){ + sqlite3_result_int64(pCtx, pCsr->aDoc[pCsr->iCol]); + }else{ + sqlite3_result_int64(pCtx, pCsr->aCnt[pCsr->iCol]); + } + }else{ + assert( iCol==1 || iCol==2 ); + if( iCol==1 ){ + sqlite3_result_int64(pCtx, pCsr->aDoc[0]); + }else{ + sqlite3_result_int64(pCtx, pCsr->aCnt[0]); + } } return SQLITE_OK; } /* Index: ext/fts5/test/fts5vocab.test ================================================================== --- ext/fts5/test/fts5vocab.test +++ ext/fts5/test/fts5vocab.test @@ -54,11 +54,11 @@ SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} do_execsql_test 1.4.2 { SELECT * FROM v2; -} {x 0 2 4 y 0 1 1 z 0 1 1} +} {x one 2 4 y one 1 1 z one 1 1} do_execsql_test 1.5.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); SELECT * FROM v1 WHERE term<'d'; @@ -65,11 +65,11 @@ } {a 1 1 b 1 1 c 1 1} do_execsql_test 1.5.2 { SELECT * FROM v2 WHERE term<'d'; COMMIT; -} {a 0 1 1 b 0 1 1 c 0 1 1} +} {a one 1 1 b one 1 1 c one 1 1} do_execsql_test 1.6 { DELETE FROM t1 WHERE one = 'a b c'; SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} @@ -89,18 +89,18 @@ INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y'); INSERT INTO tt VALUES('c c a a c f', 'd g a e b g'); } set res_col { - a 0 6 11 a 1 7 9 - b 0 6 7 b 1 7 7 - c 0 6 12 c 1 5 8 - d 0 4 6 d 1 9 13 - e 0 6 7 e 1 6 6 - f 0 9 10 f 1 7 10 - g 0 5 7 g 1 5 7 - x 0 1 1 y 1 1 1 + a a 6 11 a b 7 9 + b a 6 7 b b 7 7 + c a 6 12 c b 5 8 + d a 4 6 d b 9 13 + e a 6 7 e b 6 6 + f a 9 10 f b 7 10 + g a 5 7 g b 5 7 + x a 1 1 y b 1 1 } set res_row { a 10 20 b 9 14 c 9 20 d 9 19 e 8 13 f 10 20 g 7 14 x 1 1 y 1 1 @@ -210,8 +210,131 @@ do_catchsql_test 6.2 { CREATE VIRTUAL TABLE vocab3 USING fts5vocab(lll, row); SELECT * FROM vocab3; } {1 {no such fts5 table: main.lll}} + +#------------------------------------------------------------------------- +# Test single term queries on fts5vocab tables (i.e. those with term=? +# constraints in the WHERE clause). +# +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE tx USING fts5(one, two); + INSERT INTO tx VALUES('g a ggg g a b eee', 'cc d aa ff g ee'); + INSERT INTO tx VALUES('dd fff i a i jjj', 'f fff hh jj e f'); + INSERT INTO tx VALUES('ggg a f f fff dd aa', 'd ggg f f j gg ddd'); + INSERT INTO tx VALUES('e bb h jjj ii gg', 'e aa e f c fff'); + INSERT INTO tx VALUES('j ff aa a h', 'h a j bbb bb'); + INSERT INTO tx VALUES('cc i ff c d f', 'dd ii fff f c cc d'); + INSERT INTO tx VALUES('jjj g i bb cc eee', 'hhh iii aaa b bbb aaa'); + INSERT INTO tx VALUES('hhh hhh hhh bb fff f', 'fff gg aa ii h a'); + INSERT INTO tx VALUES('b c cc aaa iii ggg f', 'iii ff ee a ff c cc'); + INSERT INTO tx VALUES('hhh b hhh aaa j i i', 'dd ee ee aa bbb iii'); + INSERT INTO tx VALUES('hh dd h b g ff i', 'ccc bb cc ccc f a d'); + INSERT INTO tx VALUES('g d b ggg jj', 'fff jj ff jj g gg ee'); + INSERT INTO tx VALUES('g ee ggg ggg cc bb eee', 'aa j jjj bbb dd eee ff'); + INSERT INTO tx VALUES('c jjj hh ddd dd h', 'e aaa h jjj gg'); + + CREATE VIRTUAL TABLE txr USING fts5vocab(tx, row); + CREATE VIRTUAL TABLE txc USING fts5vocab(tx, col); +} + +proc cont {L elem} { + set n 0 + foreach e $L { if {$elem==$e} {incr n} } + set n +} +db func cont cont + +foreach {term} { + a aa aaa + b bb bbb + c cc ccc + d dd ddd + e ee eee + f ff fff + g gg ggg + h hh hhh + i ii iii + j jj jjj +} { + set resr [db eval { + SELECT $term, + sum(cont(one || ' ' || two, $term) > 0), + sum(cont(one || ' ' || two, $term)) + FROM tx + }] + if {[lindex $resr 1]==0} {set resr [list]} + + set r1 [db eval { + SELECT $term, 'one', sum(cont(one, $term)>0), sum(cont(one, $term)) FROM tx + }] + if {[lindex $r1 2]==0} {set r1 [list]} + + set r2 [db eval { + SELECT $term, 'two', sum(cont(two, $term)>0), sum(cont(two, $term)) FROM tx + }] + if {[lindex $r2 2]==0} {set r2 [list]} + + set resc [concat $r1 $r2] + do_execsql_test 7.$term.1 {SELECT * FROM txc WHERE term=$term} $resc + do_execsql_test 7.$term.2 {SELECT * FROM txr WHERE term=$term} $resr +} + +do_execsql_test 7.1 { + CREATE TABLE txr_c AS SELECT * FROM txr; + CREATE TABLE txc_c AS SELECT * FROM txc; +} + +# Test range queries on the fts5vocab tables created above. +# +foreach {tn a b} { + 1 a jjj + 2 bb j + 3 ccc ddd + 4 dd xyz + 5 xzy dd + 6 h hh +} { + do_execsql_test 7.2.$tn.1 { + SELECT * FROM txr WHERE term>=$a + } [db eval {SELECT * FROM txr_c WHERE term>=$a}] + do_execsql_test 7.2.$tn.2 { + SELECT * FROM txr WHERE term<=$b + } [db eval {SELECT * FROM txr_c WHERE term <=$b}] + do_execsql_test 7.2.$tn.3 { + SELECT * FROM txr WHERE term>=$a AND term<=$b + } [db eval {SELECT * FROM txr_c WHERE term>=$a AND term <=$b}] + + do_execsql_test 7.2.$tn.4 { + SELECT * FROM txc WHERE term>=$a + } [db eval {SELECT * FROM txc_c WHERE term>=$a}] + do_execsql_test 7.2.$tn.5 { + SELECT * FROM txc WHERE term<=$b + } [db eval {SELECT * FROM txc_c WHERE term <=$b}] + do_execsql_test 7.2.$tn.6 { + SELECT * FROM txc WHERE term>=$a AND term<=$b + } [db eval {SELECT * FROM txc_c WHERE term>=$a AND term <=$b}] + + do_execsql_test 7.2.$tn.7 { + SELECT * FROM txr WHERE term>$a + } [db eval {SELECT * FROM txr_c WHERE term>$a}] + do_execsql_test 7.2.$tn.8 { + SELECT * FROM txr WHERE term<$b + } [db eval {SELECT * FROM txr_c WHERE term<$b}] + do_execsql_test 7.2.$tn.9 { + SELECT * FROM txr WHERE term>$a AND term<$b + } [db eval {SELECT * FROM txr_c WHERE term>$a AND term <$b}] + + do_execsql_test 7.2.$tn.10 { + SELECT * FROM txc WHERE term>$a + } [db eval {SELECT * FROM txc_c WHERE term>$a}] + do_execsql_test 7.2.$tn.11 { + SELECT * FROM txc WHERE term<$b + } [db eval {SELECT * FROM txc_c WHERE term<$b}] + do_execsql_test 7.2.$tn.12 { + SELECT * FROM txc WHERE term>$a AND term<$b + } [db eval {SELECT * FROM txc_c WHERE term>$a AND term <$b}] +} finish_test