Index: ext/fts5/fts5.h ================================================================== --- ext/fts5/fts5.h +++ ext/fts5/fts5.h @@ -107,10 +107,16 @@ ** xInst: ** Query for the details of phrase match iIdx within the current row. ** Phrase matches are numbered starting from zero, so the iIdx argument ** should be greater than or equal to zero and smaller than the value ** output by xInstCount(). +** +** Usually, output parameter *piPhrase is set to the phrase number, *piCol +** to the column in which it occurs and *piOff the token offset of the +** first token of the phrase. The exception is if the table was created +** with the offsets=0 option specified. In this case *piOff is always +** set to -1. ** ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) ** if an error occurs. ** ** xRowid: @@ -194,11 +200,11 @@ ** through instances of phrase iPhrase, use the following code: ** ** Fts5PhraseIter iter; ** int iCol, iOff; ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); -** iOff>=0; +** iCol>=0; ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) ** ){ ** // An instance of phrase iPhrase at offset iOff of column iCol ** } ** @@ -208,11 +214,11 @@ ** ** xPhraseNext() ** See xPhraseFirst above. */ struct Fts5ExtensionApi { - int iVersion; /* Currently always set to 1 */ + int iVersion; /* Currently always set to 3 */ void *(*xUserData)(Fts5Context*); int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); @@ -238,12 +244,15 @@ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) ); int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); void *(*xGetAuxdata)(Fts5Context*, int bClear); - void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); + int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); + + int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); + void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); }; /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -149,10 +149,11 @@ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ + int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; /* Values loaded from the %_config table */ @@ -177,10 +178,13 @@ #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 +#define FTS5_DETAIL_FULL 0 +#define FTS5_DETAIL_NONE 1 +#define FTS5_DETAIL_COLUMNS 2 int sqlite3Fts5ConfigParse( Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** @@ -290,10 +294,17 @@ char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); + +/* Bucket of terms object used by the integrity-check in offsets=0 mode. */ +typedef struct Fts5Termset Fts5Termset; +int sqlite3Fts5TermsetNew(Fts5Termset**); +int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); +void sqlite3Fts5TermsetFree(Fts5Termset*); + /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** @@ -434,10 +445,12 @@ int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); +int sqlite3Fts5IterCollist(Fts5IndexIter*, const u8 **, int*); + /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** @@ -490,11 +503,11 @@ typedef struct Fts5Hash Fts5Hash; /* ** Create a hash table, free a hash table. */ -int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize); +int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); void sqlite3Fts5HashFree(Fts5Hash*); int sqlite3Fts5HashWrite( Fts5Hash*, i64 iRowid, /* Rowid for this entry */ @@ -626,12 +639,22 @@ int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); + +typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; +Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); +int sqlite3Fts5ExprPopulatePoslists( + Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int +); +void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); +void sqlite3Fts5ExprClearEof(Fts5Expr*); int sqlite3Fts5ExprClonePhrase(Fts5Config*, Fts5Expr*, int, Fts5Expr**); + +int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ Index: ext/fts5/fts5_buffer.c ================================================================== --- ext/fts5/fts5_buffer.c +++ ext/fts5/fts5_buffer.c @@ -288,7 +288,90 @@ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ }; return (t & 0x80) || aBareword[(int)t]; } + + +/************************************************************************* +*/ +typedef struct Fts5TermsetEntry Fts5TermsetEntry; +struct Fts5TermsetEntry { + char *pTerm; + int nTerm; + int iIdx; /* Index (main or aPrefix[] entry) */ + Fts5TermsetEntry *pNext; +}; + +struct Fts5Termset { + Fts5TermsetEntry *apHash[512]; +}; + +int sqlite3Fts5TermsetNew(Fts5Termset **pp){ + int rc = SQLITE_OK; + *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); + return rc; +} + +int sqlite3Fts5TermsetAdd( + Fts5Termset *p, + int iIdx, + const char *pTerm, int nTerm, + int *pbPresent +){ + int rc = SQLITE_OK; + *pbPresent = 0; + if( p ){ + int i; + int hash; + Fts5TermsetEntry *pEntry; + + /* Calculate a hash value for this term */ + hash = 104 + iIdx; + for(i=0; iapHash); + + for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ + if( pEntry->iIdx==iIdx + && pEntry->nTerm==nTerm + && memcmp(pEntry->pTerm, pTerm, nTerm)==0 + ){ + *pbPresent = 1; + break; + } + } + + if( pEntry==0 ){ + pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); + if( pEntry ){ + pEntry->pTerm = (char*)&pEntry[1]; + pEntry->nTerm = nTerm; + pEntry->iIdx = iIdx; + memcpy(pEntry->pTerm, pTerm, nTerm); + pEntry->pNext = p->apHash[hash]; + p->apHash[hash] = pEntry; + } + } + } + + return rc; +} + +void sqlite3Fts5TermsetFree(Fts5Termset *p){ + if( p ){ + int i; + for(i=0; iapHash); i++){ + Fts5TermsetEntry *pEntry = p->apHash[i]; + while( pEntry ){ + Fts5TermsetEntry *pDel = pEntry; + pEntry = pEntry->pNext; + sqlite3_free(pDel); + } + } + sqlite3_free(p); + } +} + Index: ext/fts5/fts5_config.c ================================================================== --- ext/fts5/fts5_config.c +++ ext/fts5/fts5_config.c @@ -12,11 +12,10 @@ ** ** This is an SQLite module implementing full-text search. */ - #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 4050 #define FTS5_DEFAULT_AUTOMERGE 4 #define FTS5_DEFAULT_CRISISMERGE 16 @@ -193,10 +192,37 @@ if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ fts5Dequote(z); } } + +struct Fts5Enum { + const char *zName; + int eVal; +}; +typedef struct Fts5Enum Fts5Enum; + +static int fts5ConfigSetEnum( + const Fts5Enum *aEnum, + const char *zEnum, + int *peVal +){ + int nEnum = strlen(zEnum); + int i; + int iVal = -1; + + for(i=0; aEnum[i].zName; i++){ + if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ + if( iVal>=0 ) return SQLITE_ERROR; + iVal = aEnum[i].eVal; + } + } + + *peVal = iVal; + return iVal<0 ? SQLITE_ERROR : SQLITE_OK; +} + /* ** Parse a "special" CREATE VIRTUAL TABLE directive and update ** configuration object pConfig as appropriate. ** ** If successful, object pConfig is updated and SQLITE_OK returned. If @@ -342,10 +368,24 @@ }else{ pConfig->bColumnsize = (zArg[0]=='1'); } return rc; } + + if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){ + const Fts5Enum aDetail[] = { + { "none", FTS5_DETAIL_NONE }, + { "full", FTS5_DETAIL_FULL }, + { "columns", FTS5_DETAIL_COLUMNS }, + { 0, 0 } + }; + + if( rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail) ){ + *pzErr = sqlite3_mprintf("malformed detail=... directive"); + } + return rc; + } *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } @@ -498,10 +538,11 @@ pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; + pRet->eDetail = FTS5_DETAIL_FULL; #ifdef SQLITE_DEBUG pRet->bPrefixIndex = 1; #endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); Index: ext/fts5/fts5_expr.c ================================================================== --- ext/fts5/fts5_expr.c +++ ext/fts5/fts5_expr.c @@ -38,10 +38,11 @@ #endif struct Fts5Expr { Fts5Index *pIndex; + Fts5Config *pConfig; Fts5ExprNode *pRoot; int bDesc; /* Iterate in descending rowid order */ int nPhrase; /* Number of phrases in expression */ Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ }; @@ -233,10 +234,11 @@ sParse.rc = SQLITE_NOMEM; sqlite3Fts5ParseNodeFree(sParse.pExpr); }else{ pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; + pNew->pConfig = pConfig; pNew->apExprPhrase = sParse.apPhrase; pNew->nPhrase = sParse.nPhrase; sParse.apPhrase = 0; } } @@ -297,12 +299,13 @@ } /* ** Argument pTerm must be a synonym iterator. */ -static int fts5ExprSynonymPoslist( +static int fts5ExprSynonymList( Fts5ExprTerm *pTerm, + int bCollist, Fts5Colset *pColset, i64 iRowid, int *pbDel, /* OUT: Caller should sqlite3_free(*pa) */ u8 **pa, int *pn ){ @@ -317,13 +320,20 @@ for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ const u8 *a; int n; - i64 dummy; - rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy); + + if( bCollist ){ + rc = sqlite3Fts5IterCollist(pIter, &a, &n); + }else{ + i64 dummy; + rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy); + } + if( rc!=SQLITE_OK ) goto synonym_poslist_out; + if( n==0 ) continue; if( nIter==nAlloc ){ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( aNew==0 ){ rc = SQLITE_NOMEM; @@ -420,12 +430,12 @@ i64 dummy; int n = 0; int bFlag = 0; const u8 *a = 0; if( pTerm->pSynonym ){ - rc = fts5ExprSynonymPoslist( - pTerm, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n + rc = fts5ExprSynonymList( + pTerm, 0, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n ); }else{ rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy); } if( rc!=SQLITE_OK ) goto ismatch_out; @@ -755,34 +765,55 @@ Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = *pRc; - int i; - - /* Check that each phrase in the nearset matches the current row. - ** Populate the pPhrase->poslist buffers at the same time. If any - ** phrase is not a match, break out of the loop early. */ - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ - int bMatch = 0; - rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); - if( bMatch==0 ) break; - }else{ - rc = sqlite3Fts5IterPoslistBuffer( - pPhrase->aTerm[0].pIter, &pPhrase->poslist - ); - } - } - - *pRc = rc; - if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ - return 1; - } - - return 0; + + if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ + Fts5ExprTerm *pTerm; + Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; + pPhrase->poslist.n = 0; + for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ + Fts5IndexIter *pIter = pTerm->pIter; + if( sqlite3Fts5IterEof(pIter)==0 ){ + int n; + i64 iRowid; + rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return 0; + }else if( iRowid==pNode->iRowid && n>0 ){ + pPhrase->poslist.n = 1; + } + } + } + return pPhrase->poslist.n; + }else{ + int i; + + /* Check that each phrase in the nearset matches the current row. + ** Populate the pPhrase->poslist buffers at the same time. If any + ** phrase is not a match, break out of the loop early. */ + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch); + if( bMatch==0 ) break; + }else{ + rc = sqlite3Fts5IterPoslistBuffer( + pPhrase->aTerm[0].pIter, &pPhrase->poslist + ); + } + } + + *pRc = rc; + if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ + return 1; + } + return 0; + } } static int fts5ExprTokenTest( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ @@ -1216,10 +1247,13 @@ if( cmp || p2->bNomatch ) break; rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; pNode->iRowid = p1->iRowid; + if( p1->bEof ){ + fts5ExprNodeZeroPoslist(p2); + } break; } } } return rc; @@ -1601,10 +1635,11 @@ } if( rc==SQLITE_OK ){ /* All the allocations succeeded. Put the expression object together. */ pNew->pIndex = pExpr->pIndex; + pNew->pConfig = pExpr->pConfig; pNew->nPhrase = 1; pNew->apExprPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->nPhrase = 1; sCtx.pPhrase->pNode = pNew->pRoot; @@ -1742,10 +1777,19 @@ void sqlite3Fts5ParseSetColset( Fts5Parse *pParse, Fts5ExprNearset *pNear, Fts5Colset *pColset ){ + if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){ + pParse->rc = SQLITE_ERROR; + pParse->zErr = sqlite3_mprintf( + "fts5: column queries are not supported (detail=none)" + ); + sqlite3_free(pColset); + return; + } + if( pNear ){ pNear->pColset = pColset; }else{ sqlite3_free(pColset); } @@ -1803,15 +1847,24 @@ if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } - if( pNear->nPhrase==1 - && pNear->apPhrase[0]->nTerm==1 - && pNear->apPhrase[0]->aTerm[0].pSynonym==0 - ){ - pRet->eType = FTS5_TERM; + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ + if( pNear->apPhrase[0]->aTerm[0].pSynonym==0 ){ + pRet->eType = FTS5_TERM; + } + }else if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){ + assert( pParse->rc==SQLITE_OK ); + pParse->rc = SQLITE_ERROR; + assert( pParse->zErr==0 ); + pParse->zErr = sqlite3_mprintf( + "fts5: %s queries are not supported (detail!=full)", + pNear->nPhrase==1 ? "phrase": "NEAR" + ); + sqlite3_free(pRet); + pRet = 0; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); } @@ -1921,10 +1974,13 @@ zRet = fts5PrintfAppend(zRet, " {"); for(iTerm=0; zRet && iTermnTerm; iTerm++){ char *zTerm = pPhrase->aTerm[iTerm].zTerm; zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + if( pPhrase->aTerm[iTerm].bPrefix ){ + zRet = fts5PrintfAppend(zRet, "*"); + } } if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); if( zRet==0 ) return 0; } @@ -2233,5 +2289,231 @@ *pa = 0; nRet = 0; } return nRet; } + +struct Fts5PoslistPopulator { + Fts5PoslistWriter writer; + int bOk; /* True if ok to populate */ + int bMiss; +}; + +Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ + Fts5PoslistPopulator *pRet; + pRet = sqlite3_malloc(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); + if( pRet ){ + int i; + memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); + for(i=0; inPhrase; i++){ + Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; + Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; + assert( pExpr->apExprPhrase[i]->nTerm==1 ); + if( bLive && + (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) + ){ + pRet[i].bMiss = 1; + }else{ + pBuf->n = 0; + } + } + } + return pRet; +} + +struct Fts5ExprCtx { + Fts5Expr *pExpr; + Fts5PoslistPopulator *aPopulator; + i64 iOff; +}; +typedef struct Fts5ExprCtx Fts5ExprCtx; + +/* +** TODO: Make this more efficient! +*/ +static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ + int i; + for(i=0; inCol; i++){ + if( pColset->aiCol[i]==iCol ) return 1; + } + return 0; +} + +static int fts5ExprPopulatePoslistsCb( + void *pCtx, /* Copy of 2nd argument to xTokenize() */ + int tflags, /* Mask of FTS5_TOKEN_* flags */ + const char *pToken, /* Pointer to buffer containing token */ + int nToken, /* Size of token in bytes */ + int iStart, /* Byte offset of token within input text */ + int iEnd /* Byte offset of end of token within input text */ +){ + Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; + Fts5Expr *pExpr = p->pExpr; + int i; + + if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; + for(i=0; inPhrase; i++){ + Fts5ExprTerm *pTerm; + if( p->aPopulator[i].bOk==0 ) continue; + for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ + int nTerm = strlen(pTerm->zTerm); + if( (nTerm==nToken || (nTermbPrefix)) + && memcmp(pTerm->zTerm, pToken, nTerm)==0 + ){ + int rc = sqlite3Fts5PoslistWriterAppend( + &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff + ); + if( rc ) return rc; + break; + } + } + } + return SQLITE_OK; +} + +int sqlite3Fts5ExprPopulatePoslists( + Fts5Config *pConfig, + Fts5Expr *pExpr, + Fts5PoslistPopulator *aPopulator, + int iCol, + const char *z, int n +){ + int i; + Fts5ExprCtx sCtx; + sCtx.pExpr = pExpr; + sCtx.aPopulator = aPopulator; + sCtx.iOff = (((i64)iCol) << 32) - 1; + + for(i=0; inPhrase; i++){ + Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; + Fts5Colset *pColset = pNode->pNear->pColset; + if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) + || aPopulator[i].bMiss + ){ + aPopulator[i].bOk = 0; + }else{ + aPopulator[i].bOk = 1; + } + } + + return sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_AUX, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb + ); +} + +static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ + if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){ + pNode->pNear->apPhrase[0]->poslist.n = 0; + }else{ + int i; + for(i=0; inChild; i++){ + fts5ExprClearPoslists(pNode->apChild[i]); + } + } +} + +static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ + if( pNode ){ + pNode->iRowid = iRowid; + pNode->bEof = 0; + switch( pNode->eType ){ + case FTS5_TERM: + case FTS5_STRING: + return (pNode->pNear->apPhrase[0]->poslist.n>0); + + case FTS5_AND: { + int i; + for(i=0; inChild; i++){ + if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ + fts5ExprClearPoslists(pNode); + return 0; + } + } + return 1; + } + + case FTS5_OR: { + int i; + int bRet = 0; + for(i=0; inChild; i++){ + if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ + bRet = 1; + } + } + if( bRet==0 ){ + fts5ExprClearPoslists(pNode); + } + return bRet; + } + + default: { + assert( pNode->eType==FTS5_NOT ); + if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) + || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) + ){ + fts5ExprClearPoslists(pNode); + return 0; + } + return 1; + } + } + } +} + +void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ + fts5ExprCheckPoslists(pExpr->pRoot, iRowid); +} + +static void fts5ExprClearEof(Fts5ExprNode *pNode){ + int i; + for(i=0; inChild; i++){ + fts5ExprClearEof(pNode->apChild[i]); + } + pNode->bEof = 0; +} +void sqlite3Fts5ExprClearEof(Fts5Expr *pExpr){ + fts5ExprClearEof(pExpr->pRoot); +} + +/* +** This function is only called for detail=columns tables. +*/ +int sqlite3Fts5ExprPhraseCollist( + Fts5Expr *pExpr, + int iPhrase, + const u8 **ppCollist, + int *pnCollist +){ + Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; + Fts5ExprNode *pNode = pPhrase->pNode; + int rc = SQLITE_OK; + + assert( iPhrase>=0 && iPhrasenPhrase ); + if( pNode->bEof==0 + && pNode->iRowid==pExpr->pRoot->iRowid + && pPhrase->poslist.n>0 + ){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; + if( pTerm->pSynonym ){ + int bDel = 0; + u8 *a; + rc = fts5ExprSynonymList( + pTerm, 1, 0, pNode->iRowid, &bDel, &a, pnCollist + ); + if( bDel ){ + sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, *pnCollist, a); + *ppCollist = pPhrase->poslist.p; + sqlite3_free(a); + }else{ + *ppCollist = a; + } + }else{ + sqlite3Fts5IterCollist(pPhrase->aTerm[0].pIter, ppCollist, pnCollist); + } + }else{ + *ppCollist = 0; + *pnCollist = 0; + } + + return rc; +} + Index: ext/fts5/fts5_hash.c ================================================================== --- ext/fts5/fts5_hash.c +++ ext/fts5/fts5_hash.c @@ -24,10 +24,11 @@ ** segment. */ struct Fts5Hash { + int eDetail; /* Copy of Fts5Config.eDetail */ int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ Fts5HashEntry *pScan; /* Current ordered scan item */ Fts5HashEntry **aSlot; /* Array of hash slots */ @@ -60,10 +61,11 @@ int nAlloc; /* Total size of allocation */ int iSzPoslist; /* Offset of space for 4-byte poslist size */ int nData; /* Total bytes of data (incl. structure) */ u8 bDel; /* Set delete-flag @ iSzPoslist */ + u8 bContent; /* Set content-flag (detail=none mode) */ int iCol; /* Column of last value written */ int iPos; /* Position of last value written */ i64 iRowid; /* Rowid of last value written */ char zKey[8]; /* Nul-terminated entry key */ @@ -77,11 +79,11 @@ /* ** Allocate a new hash table. */ -int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){ +int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ int rc = SQLITE_OK; Fts5Hash *pNew; *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); if( pNew==0 ){ @@ -88,10 +90,11 @@ rc = SQLITE_NOMEM; }else{ int nByte; memset(pNew, 0, sizeof(Fts5Hash)); pNew->pnByte = pnByte; + pNew->eDetail = pConfig->eDetail; pNew->nSlot = 1024; nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); if( pNew->aSlot==0 ){ @@ -180,30 +183,50 @@ pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } -static void fts5HashAddPoslistSize(Fts5HashEntry *p){ +static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){ if( p->iSzPoslist ){ u8 *pPtr = (u8*)p; - int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ - int nPos = nSz*2 + p->bDel; /* Value of nPos field */ - - assert( p->bDel==0 || p->bDel==1 ); - if( nPos<=127 ){ - pPtr[p->iSzPoslist] = (u8)nPos; + if( pHash->eDetail==FTS5_DETAIL_NONE ){ + assert( p->nData==p->iSzPoslist ); + if( p->bDel ){ + pPtr[p->nData++] = 0x00; + if( p->bContent ){ + pPtr[p->nData++] = 0x00; + } + } }else{ - int nByte = sqlite3Fts5GetVarintLen((u32)nPos); - memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); - sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); - p->nData += (nByte-1); + int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ + int nPos = nSz*2 + p->bDel; /* Value of nPos field */ + + assert( p->bDel==0 || p->bDel==1 ); + if( nPos<=127 ){ + pPtr[p->iSzPoslist] = (u8)nPos; + }else{ + int nByte = sqlite3Fts5GetVarintLen((u32)nPos); + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); + p->nData += (nByte-1); + } } - p->bDel = 0; + p->iSzPoslist = 0; + p->bDel = 0; + p->bContent = 0; } } +/* +** Add an entry to the in-memory hash table. The key is the concatenation +** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). +** +** (bByte || pToken) -> (iRowid,iCol,iPos) +** +** Or, if iCol is negative, then the value is a delete marker. +*/ int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ @@ -212,10 +235,13 @@ ){ unsigned int iHash; Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ + int bNew; /* If non-delete entry should be written */ + + bNew = (pHash->eDetail==FTS5_DETAIL_FULL); /* Attempt to locate an existing hash entry */ iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( p->zKey[0]==bByte @@ -226,92 +252,120 @@ } } /* If an existing hash entry cannot be found, create a new one. */ if( p==0 ){ + /* Figure out how much space to allocate */ int nByte = FTS5_HASHENTRYSIZE + (nToken+1) + 1 + 64; if( nByte<128 ) nByte = 128; + /* Grow the Fts5Hash.aSlot[] array if necessary. */ if( (pHash->nEntry*2)>=pHash->nSlot ){ int rc = fts5HashResize(pHash); if( rc!=SQLITE_OK ) return rc; iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); } + /* Allocate new Fts5HashEntry and add it to the hash table. */ p = (Fts5HashEntry*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, FTS5_HASHENTRYSIZE); p->nAlloc = nByte; p->zKey[0] = bByte; memcpy(&p->zKey[1], pToken, nToken); assert( iHash==fts5HashKey(pHash->nSlot, (u8*)p->zKey, nToken+1) ); p->zKey[nToken+1] = '\0'; p->nData = nToken+1 + 1 + FTS5_HASHENTRYSIZE; - p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); - p->iSzPoslist = p->nData; - p->nData += 1; - p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; + + /* Add the first rowid field to the hash-entry */ + p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); + p->iRowid = iRowid; + + p->iSzPoslist = p->nData; + if( pHash->eDetail!=FTS5_DETAIL_NONE ){ + p->nData += 1; + p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); + } + nIncr += p->nData; - } - - /* Check there is enough space to append a new entry. Worst case scenario - ** is: - ** - ** + 9 bytes for a new rowid, - ** + 4 byte reserved for the "poslist size" varint. - ** + 1 byte for a "new column" byte, - ** + 3 bytes for a new column number (16-bit max) as a varint, - ** + 5 bytes for the new position offset (32-bit max). - */ - if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ - int nNew = p->nAlloc * 2; - Fts5HashEntry *pNew; - Fts5HashEntry **pp; - pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); - if( pNew==0 ) return SQLITE_NOMEM; - pNew->nAlloc = nNew; - for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); - *pp = pNew; - p = pNew; - } - pPtr = (u8*)p; - nIncr -= p->nData; + }else{ + + /* Appending to an existing hash-entry. Check that there is enough + ** space to append the largest possible new entry. Worst case scenario + ** is: + ** + ** + 9 bytes for a new rowid, + ** + 4 byte reserved for the "poslist size" varint. + ** + 1 byte for a "new column" byte, + ** + 3 bytes for a new column number (16-bit max) as a varint, + ** + 5 bytes for the new position offset (32-bit max). + */ + if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ + int nNew = p->nAlloc * 2; + Fts5HashEntry *pNew; + Fts5HashEntry **pp; + pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); + if( pNew==0 ) return SQLITE_NOMEM; + pNew->nAlloc = nNew; + for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); + *pp = pNew; + p = pNew; + } + nIncr -= p->nData; + } + assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); + + pPtr = (u8*)p; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ - fts5HashAddPoslistSize(p); + fts5HashAddPoslistSize(pHash, p); p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); + p->iRowid = iRowid; + bNew = 1; p->iSzPoslist = p->nData; - p->nData += 1; - p->iCol = 0; - p->iPos = 0; - p->iRowid = iRowid; + if( pHash->eDetail!=FTS5_DETAIL_NONE ){ + p->nData += 1; + p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); + p->iPos = 0; + } } if( iCol>=0 ){ - /* Append a new column value, if necessary */ - assert( iCol>=p->iCol ); - if( iCol!=p->iCol ){ - pPtr[p->nData++] = 0x01; - p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); - p->iCol = iCol; - p->iPos = 0; - } - - /* Append the new position offset */ - p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); - p->iPos = iPos; + if( pHash->eDetail==FTS5_DETAIL_NONE ){ + p->bContent = 1; + }else{ + /* Append a new column value, if necessary */ + assert( iCol>=p->iCol ); + if( iCol!=p->iCol ){ + if( pHash->eDetail==FTS5_DETAIL_FULL ){ + pPtr[p->nData++] = 0x01; + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); + p->iCol = iCol; + p->iPos = 0; + }else{ + bNew = 1; + p->iCol = iPos = iCol; + } + } + + /* Append the new position offset, if necessary */ + if( bNew ){ + p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); + p->iPos = iPos; + } + } }else{ /* This is a delete. Set the delete flag. */ p->bDel = 1; } - nIncr += p->nData; + nIncr += p->nData; *pHash->pnByte += nIncr; return SQLITE_OK; } @@ -421,11 +475,11 @@ for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ - fts5HashAddPoslistSize(p); + fts5HashAddPoslistSize(pHash, p); *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); }else{ *ppDoclist = 0; *pnDoclist = 0; @@ -457,11 +511,11 @@ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ int nTerm = (int)strlen(p->zKey); - fts5HashAddPoslistSize(p); + fts5HashAddPoslistSize(pHash, p); *pzTerm = p->zKey; *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (FTS5_HASHENTRYSIZE + nTerm + 1); }else{ *pzTerm = 0; Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -431,10 +431,13 @@ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ + /* Next method */ + void (*xNext)(Fts5Index*, Fts5SegIter*, int*); + /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; @@ -450,11 +453,11 @@ /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ - int bDel; /* True if the delete flag is set */ + u8 bDel; /* True if the delete flag is set */ }; /* ** Argument is a pointer to an Fts5Data structure that contains a ** leaf page. @@ -463,11 +466,10 @@ (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ ) #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 - /* ** Argument is a pointer to an Fts5Data structure that contains a leaf ** page. This macro evaluates to true if the leaf contains no terms, or ** false if it contains at least one term. @@ -1490,17 +1492,33 @@ ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ - int nSz; ASSERT_SZLEAF_OK(pIter->pLeaf); - fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); - pIter->bDel = (nSz & 0x0001); - pIter->nPos = nSz>>1; + if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ + int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); + pIter->bDel = 0; + pIter->nPos = 1; + if( iOffpLeaf->p[iOff]==0 ){ + pIter->bDel = 1; + iOff++; + if( iOffpLeaf->p[iOff]==0 ){ + pIter->nPos = 1; + iOff++; + }else{ + pIter->nPos = 0; + } + } + }else{ + int nSz; + fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); + pIter->bDel = (nSz & 0x0001); + pIter->nPos = nSz>>1; + assert_nc( pIter->nPos>=0 ); + } pIter->iLeafOffset = iOff; - assert_nc( pIter->nPos>=0 ); } } static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ @@ -1556,10 +1574,24 @@ pIter->iEndofDoclist += nExtra; } fts5SegIterLoadRowid(p, pIter); } + +static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); +static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); +static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); + +static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ + if( pIter->flags & FTS5_SEGITER_REVERSE ){ + pIter->xNext = fts5SegIterNext_Reverse; + }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ + pIter->xNext = fts5SegIterNext_None; + }else{ + pIter->xNext = fts5SegIterNext; + } +} /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when ** this function returns. @@ -1582,10 +1614,11 @@ return; } if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); + fts5SegIterSetNext(p, pIter); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } @@ -1613,10 +1646,11 @@ ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ + int eDetail = p->pConfig->eDetail; int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; @@ -1625,19 +1659,28 @@ } ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; - int nPos; - int bDummy; - i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); - i += nPos; + if( eDetail==FTS5_DETAIL_NONE ){ + /* todo */ + if( i=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); pIter->iRowid += iDelta; + /* If necessary, grow the pIter->aRowidOffset[] array. */ if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); if( aNew==0 ){ p->rc = SQLITE_NOMEM; @@ -1711,10 +1754,114 @@ */ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5IndexIter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } + +/* +** Advance iterator pIter to the next entry. +** +** This version of fts5SegIterNext() is only used by reverse iterators. +*/ +static void fts5SegIterNext_Reverse( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ +){ + assert( pIter->flags & FTS5_SEGITER_REVERSE ); + assert( pIter->pNextLeaf==0 ); + if( pIter->iRowidOffset>0 ){ + u8 *a = pIter->pLeaf->p; + int iOff; + int nPos; + int bDummy; + i64 iDelta; + + pIter->iRowidOffset--; + pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; + fts5SegIterLoadNPos(p, pIter); + iOff = pIter->iLeafOffset; + if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ + iOff += pIter->nPos; + } + fts5GetVarint(&a[iOff], (u64*)&iDelta); + pIter->iRowid -= iDelta; + }else{ + fts5SegIterReverseNewPage(p, pIter); + } +} + +/* +** Advance iterator pIter to the next entry. +** +** This version of fts5SegIterNext() is only used if detail=none and the +** iterator is not a reverse direction iterator. +*/ +static void fts5SegIterNext_None( + Fts5Index *p, /* FTS5 backend object */ + Fts5SegIter *pIter, /* Iterator to advance */ + int *pbNewTerm /* OUT: Set for new term */ +){ + int iOff; + + assert( p->rc==SQLITE_OK ); + assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); + assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); + + ASSERT_SZLEAF_OK(pIter->pLeaf); + iOff = pIter->iLeafOffset; + + /* Next entry is on the next page */ + if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ + fts5SegIterNextPage(p, pIter); + if( p->rc || pIter->pLeaf==0 ) return; + pIter->iRowid = 0; + iOff = 4; + } + + if( iOffiEndofDoclist ){ + /* Next entry is on the current page */ + i64 iDelta; + iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], &iDelta); + pIter->iLeafOffset = iOff; + pIter->iRowid += iDelta; + }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ + if( pIter->pSeg ){ + int nKeep = 0; + if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ + iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); + } + pIter->iLeafOffset = iOff; + fts5SegIterLoadTerm(p, pIter, nKeep); + }else{ + const u8 *pList = 0; + const char *zTerm = 0; + int nList; + sqlite3Fts5HashScanNext(p->pHash); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + if( pList==0 ) goto next_none_eof; + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList; + sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); + pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); + } + + if( pbNewTerm ) *pbNewTerm = 1; + }else{ + goto next_none_eof; + } + + fts5SegIterLoadNPos(p, pIter); + + return; + next_none_eof: + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; +} + /* ** Advance iterator pIter to the next entry. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It @@ -1724,151 +1871,141 @@ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */ ){ - assert( pbNewTerm==0 || *pbNewTerm==0 ); - if( p->rc==SQLITE_OK ){ - if( pIter->flags & FTS5_SEGITER_REVERSE ){ - assert( pIter->pNextLeaf==0 ); - if( pIter->iRowidOffset>0 ){ - u8 *a = pIter->pLeaf->p; - int iOff; - int nPos; - int bDummy; - i64 iDelta; - - pIter->iRowidOffset--; - pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; - iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); - iOff += nPos; - fts5GetVarint(&a[iOff], (u64*)&iDelta); - pIter->iRowid -= iDelta; - fts5SegIterLoadNPos(p, pIter); - }else{ - fts5SegIterReverseNewPage(p, pIter); - } - }else{ - Fts5Data *pLeaf = pIter->pLeaf; - int iOff; - int bNewTerm = 0; - int nKeep = 0; - - /* Search for the end of the position list within the current page. */ - u8 *a = pLeaf->p; - int n = pLeaf->szLeaf; - - ASSERT_SZLEAF_OK(pLeaf); - iOff = pIter->iLeafOffset + pIter->nPos; - - if( iOffiEndofDoclist ); - if( iOff>=pIter->iEndofDoclist ){ - bNewTerm = 1; - if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ - iOff += fts5GetVarint32(&a[iOff], nKeep); - } - }else{ - u64 iDelta; - iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); - pIter->iRowid += iDelta; - assert_nc( iDelta>0 ); - } - pIter->iLeafOffset = iOff; - - }else if( pIter->pSeg==0 ){ - const u8 *pList = 0; - const char *zTerm = 0; - int nList = 0; - assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); - if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ - sqlite3Fts5HashScanNext(p->pHash); - sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); - } - if( pList==0 ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - }else{ - pIter->pLeaf->p = (u8*)pList; - pIter->pLeaf->nn = nList; - pIter->pLeaf->szLeaf = nList; - pIter->iEndofDoclist = nList+1; - sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), - (u8*)zTerm); - pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); - *pbNewTerm = 1; - } - }else{ - iOff = 0; - /* Next entry is not on the current page */ - while( iOff==0 ){ - fts5SegIterNextPage(p, pIter); - pLeaf = pIter->pLeaf; - if( pLeaf==0 ) break; - ASSERT_SZLEAF_OK(pLeaf); - if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ - iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); - pIter->iLeafOffset = iOff; - - if( pLeaf->nn>pLeaf->szLeaf ){ - pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( - &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist - ); - } - - } - else if( pLeaf->nn>pLeaf->szLeaf ){ - pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( - &pLeaf->p[pLeaf->szLeaf], iOff - ); - pIter->iLeafOffset = iOff; - pIter->iEndofDoclist = iOff; - bNewTerm = 1; - } - if( iOff>=pLeaf->szLeaf ){ - p->rc = FTS5_CORRUPT; - return; - } - } - } - - /* Check if the iterator is now at EOF. If so, return early. */ - if( pIter->pLeaf ){ - if( bNewTerm ){ - if( pIter->flags & FTS5_SEGITER_ONETERM ){ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - }else{ - fts5SegIterLoadTerm(p, pIter, nKeep); - fts5SegIterLoadNPos(p, pIter); - if( pbNewTerm ) *pbNewTerm = 1; - } - }else{ - /* The following could be done by calling fts5SegIterLoadNPos(). But - ** this block is particularly performance critical, so equivalent - ** code is inlined. */ - int nSz; - assert( p->rc==SQLITE_OK ); - fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); - pIter->bDel = (nSz & 0x0001); - pIter->nPos = nSz>>1; - assert_nc( pIter->nPos>=0 ); - } - } + Fts5Data *pLeaf = pIter->pLeaf; + int iOff; + int bNewTerm = 0; + int nKeep = 0; + + assert( pbNewTerm==0 || *pbNewTerm==0 ); + assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); + + /* Search for the end of the position list within the current page. */ + u8 *a = pLeaf->p; + int n = pLeaf->szLeaf; + + ASSERT_SZLEAF_OK(pLeaf); + iOff = pIter->iLeafOffset + pIter->nPos; + + if( iOffiEndofDoclist ); + if( iOff>=pIter->iEndofDoclist ){ + bNewTerm = 1; + if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); + } + }else{ + u64 iDelta; + iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); + pIter->iRowid += iDelta; + assert_nc( iDelta>0 ); + } + pIter->iLeafOffset = iOff; + + }else if( pIter->pSeg==0 ){ + const u8 *pList = 0; + const char *zTerm = 0; + int nList = 0; + assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); + if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ + sqlite3Fts5HashScanNext(p->pHash); + sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); + } + if( pList==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + pIter->pLeaf->p = (u8*)pList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList+1; + sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), + (u8*)zTerm); + pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); + *pbNewTerm = 1; + } + }else{ + iOff = 0; + /* Next entry is not on the current page */ + while( iOff==0 ){ + fts5SegIterNextPage(p, pIter); + pLeaf = pIter->pLeaf; + if( pLeaf==0 ) break; + ASSERT_SZLEAF_OK(pLeaf); + if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ + iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; + + if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist + ); + } + + } + else if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], iOff + ); + pIter->iLeafOffset = iOff; + pIter->iEndofDoclist = iOff; + bNewTerm = 1; + } + assert_nc( iOffszLeaf ); + if( iOff>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + return; + } + } + } + + /* Check if the iterator is now at EOF. If so, return early. */ + if( pIter->pLeaf ){ + if( bNewTerm ){ + if( pIter->flags & FTS5_SEGITER_ONETERM ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + }else{ + fts5SegIterLoadTerm(p, pIter, nKeep); + fts5SegIterLoadNPos(p, pIter); + if( pbNewTerm ) *pbNewTerm = 1; + } + }else{ + /* The following could be done by calling fts5SegIterLoadNPos(). But + ** this block is particularly performance critical, so equivalent + ** code is inlined. + ** + ** Later: Switched back to fts5SegIterLoadNPos() because it supports + ** detail=none mode. Not ideal. + */ + int nSz; + assert( p->rc==SQLITE_OK ); + fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); + pIter->bDel = (nSz & 0x0001); + pIter->nPos = nSz>>1; + assert_nc( pIter->nPos>=0 ); } } } #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } + +#define fts5IndexSkipVarint(a, iOff) { \ + int iEnd = iOff+9; \ + while( (a[iOff++] & 0x80) && iOffpConfig->eDetail; Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ @@ -1879,11 +2016,27 @@ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset points to the first byte of ** position-list content for the current rowid. Back it up so that it ** points to the start of the position-list size field. */ - pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); +#if 0 + if( eDetail!=FTS5_DETAIL_NONE ){ + pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); + } +#else + int iPoslist; + if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + iPoslist = pIter->iTermLeafOffset; + }else{ + iPoslist = 4; + } + fts5IndexSkipVarint(pLeaf->p, iPoslist); + assert( eDetail==FTS5_DETAIL_NONE || iPoslist==( + pIter->iLeafOffset - sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel) + )); + pIter->iLeafOffset = iPoslist; +#endif /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ @@ -1963,15 +2116,10 @@ } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } -#define fts5IndexSkipVarint(a, iOff) { \ - int iEnd = iOff+9; \ - while( (a[iOff++] & 0x80) && iOffp = (u8*)pList; pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); - pIter->iEndofDoclist = pLeaf->nn+1; + pIter->iEndofDoclist = pLeaf->nn; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } } + + fts5SegIterSetNext(p, pIter); } /* ** Zero the iterator passed as the only argument. */ @@ -2479,11 +2631,11 @@ bMove = 0; } } do{ - if( bMove ) fts5SegIterNext(p, pIter, 0); + if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; }while( p->rc==SQLITE_OK ); @@ -2513,11 +2665,13 @@ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ - fts5SegIterNext(p, &pIter->aSeg[iEq], 0); + Fts5SegIter *pSeg = &pIter->aSeg[iEq]; + assert( p->rc==SQLITE_OK ); + pSeg->xNext(p, pSeg, 0); i = pIter->nSeg + iEq; } } } @@ -2600,11 +2754,11 @@ Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ - fts5SegIterNext(p, pSeg, &bNewTerm); + pSeg->xNext(p, pSeg, &bNewTerm); } if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst) ){ @@ -2628,11 +2782,12 @@ do { int iFirst = pIter->aFirst[1].iFirst; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; int bNewTerm = 0; - fts5SegIterNext(p, pSeg, &bNewTerm); + assert( p->rc==SQLITE_OK ); + pSeg->xNext(p, pSeg, &bNewTerm); if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); fts5MultiIterSetEof(pIter); @@ -2748,11 +2903,12 @@ ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ - fts5SegIterNext(p, &pNew->aSeg[iEq], 0); + Fts5SegIter *pSeg = &pNew->aSeg[iEq]; + if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } fts5MultiIterSetEof(pNew); fts5AssertMultiIterSetup(p, pNew); @@ -2798,10 +2954,11 @@ } pData = 0; }else{ pNew->bEof = 1; } + fts5SegIterSetNext(p, pIter); *ppOut = pNew; } fts5DataRelease(pData); @@ -2866,10 +3023,13 @@ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; + + /* This function does notmwork with detail=none databases. */ + assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } @@ -3290,12 +3450,11 @@ ** Append a rowid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, - i64 iRowid, - int nPos + i64 iRowid ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ @@ -3318,12 +3477,10 @@ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; - - fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); } } static void fts5WriteAppendPoslistData( Fts5Index *p, @@ -3515,10 +3672,11 @@ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bOldest; /* True if the output segment is the oldest */ + int eDetail = p->pConfig->eDetail; assert( iLvlnLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); @@ -3584,15 +3742,25 @@ fts5BufferSet(&p->rc, &term, nTerm, pTerm); } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ - nPos = pSegIter->nPos*2 + pSegIter->bDel; - fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); + fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); - /* Append the position-list data to the output */ - fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); + if( eDetail==FTS5_DETAIL_NONE ){ + if( pSegIter->bDel ){ + fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); + if( pSegIter->nPos>0 ){ + fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); + } + } + }else{ + /* Append the position-list data to the output */ + nPos = pSegIter->nPos*2 + pSegIter->bDel; + fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); + fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); + } } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->pgnoLast); @@ -3776,11 +3944,11 @@ pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; - + int eDetail = p->pConfig->eDetail; Fts5StructureSegment *pSeg; /* New segment within pStruct */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ Fts5SegWriter writer; @@ -3819,16 +3987,11 @@ /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOffp[0], (u16)pBuf->n); /* first rowid on page */ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); @@ -3837,38 +4000,56 @@ }else{ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); - if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ - /* The entire poslist will fit on the current leaf. So copy - ** it in one go. */ - fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); - }else{ - /* The entire poslist will not fit on this leaf. So it needs - ** to be broken into sections. The only qualification being - ** that each varint must be stored contiguously. */ - const u8 *pPoslist = &pDoclist[iOff]; - int iPos = 0; - while( p->rc==SQLITE_OK ){ - int nSpace = pgsz - pBuf->n - pPgidx->n; - int n = 0; - if( (nCopy - iPos)<=nSpace ){ - n = nCopy - iPos; - }else{ - n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); - } - assert( n>0 ); - fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); - iPos += n; - if( (pBuf->n + pPgidx->n)>=pgsz ){ - fts5WriteFlushLeaf(p, &writer); - } - if( iPos>=nCopy ) break; - } - } - iOff += nCopy; + if( eDetail==FTS5_DETAIL_NONE ){ + if( iOffp[pBuf->n++] = 0; + iOff++; + if( iOffp[pBuf->n++] = 0; + iOff++; + } + } + if( (pBuf->n + pPgidx->n)>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + } + }else{ + int bDummy; + int nPos; + int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); + nCopy += nPos; + if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( p->rc==SQLITE_OK ){ + int nSpace = pgsz - pBuf->n - pPgidx->n; + int n = 0; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + } + assert( n>0 ); + fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); + iPos += n; + if( (pBuf->n + pPgidx->n)>=pgsz ){ + fts5WriteFlushLeaf(p, &writer); + } + if( iPos>=nCopy ) break; + } + } + iOff += nCopy; + } } } /* TODO2: Doclist terminator written here. */ /* pBuf->p[pBuf->n++] = '\0'; */ @@ -3999,10 +4180,18 @@ Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */ }; +typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; +struct PoslistOffsetsCtx { + Fts5Buffer *pBuf; /* Append to this buffer */ + Fts5Colset *pColset; /* Restrict matches to this column */ + int iRead; + int iWrite; +}; + /* ** TODO: Make this more efficient! */ static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; @@ -4009,10 +4198,32 @@ for(i=0; inCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } + +static void fts5PoslistOffsetsCallback( + Fts5Index *p, + void *pContext, + const u8 *pChunk, int nChunk +){ + PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; + assert_nc( nChunk>=0 ); + if( nChunk>0 ){ + int i = 0; + while( iiRead - 2; + pCtx->iRead = iVal; + if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ + fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); + pCtx->iWrite = iVal; + } + } + } +} static void fts5PoslistFilterCallback( Fts5Index *p, void *pContext, const u8 *pChunk, int nChunk @@ -4077,16 +4288,24 @@ ){ if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ if( pColset==0 ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); }else{ - PoslistCallbackCtx sCtx; - sCtx.pBuf = pBuf; - sCtx.pColset = pColset; - sCtx.eState = fts5IndexColsetTest(pColset, 0); - assert( sCtx.eState==0 || sCtx.eState==1 ); - fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); + if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ + PoslistCallbackCtx sCtx; + sCtx.pBuf = pBuf; + sCtx.pColset = pColset; + sCtx.eState = fts5IndexColsetTest(pColset, 0); + assert( sCtx.eState==0 || sCtx.eState==1 ); + fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); + }else{ + PoslistOffsetsCtx sCtx; + memset(&sCtx, 0, sizeof(sCtx)); + sCtx.pBuf = pBuf; + sCtx.pColset = pColset; + fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); + } } } } /* @@ -4125,10 +4344,20 @@ prev = *p++; } return p - (*pa); } +static int fts5AppendRowid( + Fts5Index *p, + i64 iDelta, + Fts5IndexIter *pMulti, + Fts5Colset *pColset, + Fts5Buffer *pBuf +){ + fts5BufferAppendVarint(&p->rc, pBuf, iDelta); + return 0; +} /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends the following to buffer pBuf: ** @@ -4152,12 +4381,12 @@ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); assert( pSeg->nPos>0 ); if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+9+9) ){ - - if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf + if( p->pConfig->eDetail==FTS5_DETAIL_FULL + && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf && (pColset==0 || pColset->nCol==1) ){ const u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nPos; if( pColset ){ @@ -4198,16 +4427,16 @@ sqlite3Fts5PutVarint(&pBuf->p[iSv2], nActual*2); } } } } - } } return 0; } + static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; assert( pIter->aPoslist ); @@ -4264,10 +4493,73 @@ #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ (iLastRowid) = (iRowid); \ } + +/* +** Swap the contents of buffer *p1 with that of *p2. +*/ +static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ + Fts5Buffer tmp = *p1; + *p1 = *p2; + *p2 = tmp; +} + +static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ + int i = *piOff; + if( i>=pBuf->n ){ + *piOff = -1; + }else{ + u64 iVal; + *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); + *piRowid += iVal; + } +} + +/* +** This is the equivalent of fts5MergePrefixLists() for detail=none mode. +** In this case the buffers consist of a delta-encoded list of rowids only. +*/ +static void fts5MergeRowidLists( + Fts5Index *p, /* FTS5 backend object */ + Fts5Buffer *p1, /* First list to merge */ + Fts5Buffer *p2 /* Second list to merge */ +){ + int i1 = 0; + int i2 = 0; + i64 iRowid1 = 0; + i64 iRowid2 = 0; + i64 iOut = 0; + + Fts5Buffer out; + memset(&out, 0, sizeof(out)); + sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); + if( p->rc ) return; + + fts5NextRowid(p1, &i1, &iRowid1); + fts5NextRowid(p2, &i2, &iRowid2); + while( i1>=0 || i2>=0 ){ + if( i1>=0 && (i2<0 || iRowid1iOut ); + fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); + iOut = iRowid1; + fts5NextRowid(p1, &i1, &iRowid1); + }else{ + assert( iOut==0 || iRowid2>iOut ); + fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); + iOut = iRowid2; + if( i1>=0 && iRowid1==iRowid2 ){ + fts5NextRowid(p1, &i1, &iRowid1); + } + fts5NextRowid(p2, &i2, &iRowid2); + } + } + + fts5BufferSwap(&out, p1); + fts5BufferFree(&out); +} /* ** Buffers p1 and p2 contain doclists. This function merges the content ** of the two doclists together and sets buffer p1 to the result before ** returning. @@ -4333,11 +4625,13 @@ sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); if( iPos1==iPos2 ){ sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1,&iPos1); } } - p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + if( iNew!=writer.iPrev || tmp.n==0 ){ + p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); + } } /* WRITEPOSLISTSIZE */ fts5BufferSafeAppendVarint(&out, tmp.n * 2); fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); @@ -4350,16 +4644,10 @@ fts5BufferFree(&tmp); fts5BufferFree(&out); } } -static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ - Fts5Buffer tmp = *p1; - *p1 = *p2; - *p2 = tmp; -} - static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ @@ -4367,10 +4655,20 @@ Fts5IndexIter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; + + void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); + int (*xAppend)(Fts5Index*, i64, Fts5IndexIter*, Fts5Colset*, Fts5Buffer*); + if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ + xMerge = fts5MergeRowidLists; + xAppend = fts5AppendRowid; + }else{ + xMerge = fts5MergePrefixLists; + xAppend = fts5AppendPoslist; + } aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ @@ -4400,25 +4698,25 @@ assert( irc==SQLITE_OK ){ - fts5MergePrefixLists(p, &doclist, &aBuf[i]); + xMerge(p, &doclist, &aBuf[i]); } fts5BufferFree(&aBuf[i]); } fts5MultiIterFree(p, p1); @@ -4444,11 +4742,11 @@ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ assert( p->rc==SQLITE_OK ); /* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){ - p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); + p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); } /* Flush the hash table to disk if required */ if( iRowidiWriteRowid || (iRowid==p->iWriteRowid && p->bDelete==0) @@ -4565,11 +4863,15 @@ /* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ -static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ +static int sqlite3Fts5IndexCharlenToBytelen( + const char *p, + int nByte, + int nChar +){ int n = 0; int i; for(i=0; i=nByte ) return 0; /* Input contains fewer than nChar chars */ if( (unsigned char)p[n++]>=0xc0 ){ @@ -4622,11 +4924,12 @@ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); for(i=0; inPrefix && rc==SQLITE_OK; i++){ - int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); + const int nChar = pConfig->aPrefix[i]; + int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, nByte ); @@ -4800,13 +5103,20 @@ const u8 **pp, /* OUT: Pointer to position-list data */ int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; + int eDetail = pIter->pIndex->pConfig->eDetail; + assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; - if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ + if( eDetail==FTS5_DETAIL_NONE ){ + *pn = pSeg->nPos; + }else + if( eDetail==FTS5_DETAIL_FULL + && pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf + ){ u8 *pPos = &pSeg->pLeaf->p[pSeg->iLeafOffset]; if( pColset==0 || pIter->bFiltered ){ *pn = pSeg->nPos; *pp = pPos; }else if( pColset->nCol==1 ){ @@ -4819,15 +5129,28 @@ *pn = pIter->poslist.n; } }else{ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); - *pp = pIter->poslist.p; + if( eDetail==FTS5_DETAIL_FULL ){ + *pp = pIter->poslist.p; + } *pn = pIter->poslist.n; } return fts5IndexReturn(pIter->pIndex); } + +int sqlite3Fts5IterCollist( + Fts5IndexIter *pIter, + const u8 **pp, /* OUT: Pointer to position-list data */ + int *pn /* OUT: Size of position-list in bytes */ +){ + assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); + *pp = pIter->poslist.p; + *pn = pIter->poslist.n; + return SQLITE_OK; +} /* ** This function is similar to sqlite3Fts5IterPoslist(), except that it ** copies the position list into the buffer supplied as the second ** argument. @@ -4938,11 +5261,11 @@ */ /* ** Return a simple checksum value based on the arguments. */ -static u64 fts5IndexEntryCksum( +u64 sqlite3Fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, int iIdx, const char *pTerm, @@ -5008,34 +5331,41 @@ const char *z, /* Index key to query for */ int n, /* Size of index key in bytes */ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ + int eDetail = p->pConfig->eDetail; u64 cksum = *pCksum; Fts5IndexIter *pIdxIter = 0; + Fts5Buffer buf = {0, 0, 0}; int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - i64 dummy; - const u8 *pPos; - int nPos; i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, 0, &pPos, &nPos, &dummy); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(pPos, nPos, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); - } + + if( eDetail==FTS5_DETAIL_NONE ){ + cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); + }else{ + rc = sqlite3Fts5IterPoslistBuffer(pIdxIter, &buf); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(buf.p, buf.n, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); + } + } + } + if( rc==SQLITE_OK ){ rc = sqlite3Fts5IterNext(pIdxIter); } } sqlite3Fts5IterClose(pIdxIter); + fts5BufferFree(&buf); *pCksum = cksum; return rc; } @@ -5325,18 +5655,19 @@ /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums -** as calculated by fts5IndexEntryCksum() is cksum. +** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. ** ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the ** checksum does not match. Return SQLITE_OK if all checks pass without ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ + int eDetail = p->pConfig->eDetail; u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5IndexIter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ @@ -5384,16 +5715,22 @@ char *z = (char*)fts5MultiIterTerm(pIter, &n); /* If this is a new term, query for it. Update cksum3 with the results. */ fts5TestTerm(p, &term, z, n, cksum2, &cksum3); - poslist.n = 0; - fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst] , 0, &poslist); - while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ - int iCol = FTS5_POS2COLUMN(iPos); - int iTokOff = FTS5_POS2OFFSET(iPos); - cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); + if( eDetail==FTS5_DETAIL_NONE ){ + if( 0==fts5MultiIterIsEmpty(p, pIter) ){ + cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); + } + }else{ + poslist.n = 0; + fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); + while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + int iTokOff = FTS5_POS2OFFSET(iPos); + cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); + } } } fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); fts5MultiIterFree(p, pIter); @@ -5405,38 +5742,10 @@ #endif fts5BufferFree(&poslist); return fts5IndexReturn(p); } - -/* -** Calculate and return a checksum that is the XOR of the index entry -** checksum of all entries that would be generated by the token specified -** by the final 5 arguments. -*/ -u64 sqlite3Fts5IndexCksum( - Fts5Config *pConfig, /* Configuration object */ - i64 iRowid, /* Document term appears in */ - int iCol, /* Column term appears in */ - int iPos, /* Position term appears in */ - const char *pTerm, int nTerm /* Term at iPos */ -){ - u64 ret = 0; /* Return value */ - int iIdx; /* For iterating through indexes */ - - ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm); - - for(iIdx=0; iIdxnPrefix; iIdx++){ - int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); - if( nByte ){ - ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte); - } - } - - return ret; -} - /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar ** function only. */ Index: ext/fts5/fts5_main.c ================================================================== --- ext/fts5/fts5_main.c +++ ext/fts5/fts5_main.c @@ -224,10 +224,11 @@ #define FTS5CSR_REQUIRE_DOCSIZE 0x02 #define FTS5CSR_REQUIRE_INST 0x04 #define FTS5CSR_EOF 0x08 #define FTS5CSR_FREE_ZRANK 0x10 #define FTS5CSR_REQUIRE_RESEEK 0x20 +#define FTS5CSR_REQUIRE_POSLIST 0x40 #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) @@ -306,10 +307,17 @@ ** Return true if pTab is a contentless table. */ static int fts5IsContentless(Fts5Table *pTab){ return pTab->pConfig->eContent==FTS5_CONTENT_NONE; } + +/* +** Return true if pTab is an offsetless table. +*/ +static int fts5IsOffsetless(Fts5Table *pTab){ + return pTab->pConfig->eDetail!=FTS5_DETAIL_FULL; +} /* ** Delete a virtual table handle allocated by fts5InitVtab(). */ static void fts5FreeVtab(Fts5Table *pTab){ @@ -637,10 +645,11 @@ static void fts5CsrNewrow(Fts5Cursor *pCsr){ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE | FTS5CSR_REQUIRE_INST + | FTS5CSR_REQUIRE_POSLIST ); } static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); @@ -719,19 +728,22 @@ pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); - for(i=0; i<(pSorter->nIdx-1); i++){ - int iVal; - a += fts5GetVarint32(a, iVal); - iOff += iVal; - pSorter->aIdx[i] = iOff; - } - pSorter->aIdx[i] = &aBlob[nBlob] - a; - - pSorter->aPoslist = a; + /* nBlob==0 in detail=none mode. */ + if( nBlob>0 ){ + for(i=0; i<(pSorter->nIdx-1); i++){ + int iVal; + a += fts5GetVarint32(a, iVal); + iOff += iVal; + pSorter->aIdx[i] = iOff; + } + pSorter->aIdx[i] = &aBlob[nBlob] - a; + pSorter->aPoslist = a; + } + fts5CsrNewrow(pCsr); } return rc; } @@ -1165,10 +1177,11 @@ assert( pCsr->iLastRowid==LARGEST_INT64 ); assert( pCsr->iFirstRowid==SMALLEST_INT64 ); pCsr->ePlan = FTS5_PLAN_SOURCE; pCsr->pExpr = pTab->pSortCsr->pExpr; rc = fts5CursorFirst(pTab, pCsr, bDesc); + sqlite3Fts5ExprClearEof(pCsr->pExpr); }else if( pMatch ){ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); if( zExpr==0 ) zExpr = ""; rc = fts5CursorParseRank(pConfig, pCsr, pRank); @@ -1594,10 +1607,12 @@ fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); rc = sqlite3Fts5StorageRollback(pTab->pStorage); return rc; } +static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); + static void *fts5ApiUserData(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return pCsr->pAux->pUserData; } @@ -1643,21 +1658,76 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); } -static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){ - int n; - if( pCsr->pSorter ){ +static int fts5ApiColumnText( + Fts5Context *pCtx, + int iCol, + const char **pz, + int *pn +){ + int rc = SQLITE_OK; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){ + *pz = 0; + *pn = 0; + }else{ + rc = fts5SeekCursor(pCsr, 0); + if( rc==SQLITE_OK ){ + *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); + *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + } + } + return rc; +} + +static int fts5CsrPoslist( + Fts5Cursor *pCsr, + int iPhrase, + const u8 **pa, + int *pn +){ + Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; + int rc = SQLITE_OK; + int bLive = (pCsr->pSorter==0); + + if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){ + + if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ + Fts5PoslistPopulator *aPopulator; + int i; + aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); + if( aPopulator==0 ) rc = SQLITE_NOMEM; + for(i=0; inCol && rc==SQLITE_OK; i++){ + int n; const char *z; + rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5ExprPopulatePoslists( + pConfig, pCsr->pExpr, aPopulator, i, z, n + ); + } + } + sqlite3_free(aPopulator); + + if( pCsr->pSorter ){ + sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); + } + } + CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST); + } + + if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){ Fts5Sorter *pSorter = pCsr->pSorter; int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); - n = pSorter->aIdx[iPhrase] - i1; + *pn = pSorter->aIdx[iPhrase] - i1; *pa = &pSorter->aPoslist[i1]; }else{ - n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); + *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); } - return n; + + return rc; } /* ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated ** correctly for the current view. Return SQLITE_OK if successful, or an @@ -1678,47 +1748,50 @@ if( aIter ){ int nInst = 0; /* Number instances seen so far */ int i; /* Initialize all iterators */ - for(i=0; i=pCsr->nInstAlloc ){ - pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; - aInst = (int*)sqlite3_realloc( - pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3 - ); - if( aInst ){ - pCsr->aInst = aInst; - }else{ - rc = SQLITE_NOMEM; - break; - } - } - - aInst = &pCsr->aInst[3 * (nInst-1)]; - aInst[0] = iBest; - aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); - aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); - sqlite3Fts5PoslistReaderNext(&aIter[iBest]); + if( rc==SQLITE_OK ){ + while( 1 ){ + int *aInst; + int iBest = -1; + for(i=0; i=pCsr->nInstAlloc ){ + pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; + aInst = (int*)sqlite3_realloc( + pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3 + ); + if( aInst ){ + pCsr->aInst = aInst; + }else{ + rc = SQLITE_NOMEM; + break; + } + } + + aInst = &pCsr->aInst[3 * (nInst-1)]; + aInst[0] = iBest; + aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); + aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); + sqlite3Fts5PoslistReaderNext(&aIter[iBest]); + } } pCsr->nInstCount = nInst; CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST); } @@ -1747,10 +1820,16 @@ if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ if( iIdx<0 || iIdx>=pCsr->nInstCount ){ rc = SQLITE_RANGE; +#if 0 + }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ + *piPhrase = pCsr->aInst[iIdx*3]; + *piCol = pCsr->aInst[iIdx*3 + 2]; + *piOff = -1; +#endif }else{ *piPhrase = pCsr->aInst[iIdx*3]; *piCol = pCsr->aInst[iIdx*3 + 1]; *piOff = pCsr->aInst[iIdx*3 + 2]; } @@ -1760,31 +1839,10 @@ static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ return fts5CursorRowid((Fts5Cursor*)pCtx); } -static int fts5ApiColumnText( - Fts5Context *pCtx, - int iCol, - const char **pz, - int *pn -){ - int rc = SQLITE_OK; - Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){ - *pz = 0; - *pn = 0; - }else{ - rc = fts5SeekCursor(pCsr, 0); - if( rc==SQLITE_OK ){ - *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); - *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); - } - } - return rc; -} - static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ @@ -1925,23 +1983,94 @@ } *piOff += (iVal-2); } } -static void fts5ApiPhraseFirst( +static int fts5ApiPhraseFirst( Fts5Context *pCtx, int iPhrase, Fts5PhraseIter *pIter, int *piCol, int *piOff ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; - int n = fts5CsrPoslist(pCsr, iPhrase, &pIter->a); - pIter->b = &pIter->a[n]; - *piCol = 0; - *piOff = 0; - fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); + int n; + int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); + if( rc==SQLITE_OK ){ + pIter->b = &pIter->a[n]; + *piCol = 0; + *piOff = 0; + fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); + } + return rc; +} + +static void fts5ApiPhraseNextColumn( + Fts5Context *pCtx, + Fts5PhraseIter *pIter, + int *piCol +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; + + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + if( pIter->a>=pIter->b ){ + *piCol = -1; + }else{ + int iIncr; + pIter->a += fts5GetVarint32(&pIter->a[0], iIncr); + *piCol += (iIncr-2); + } + }else{ + while( 1 ){ + int dummy; + if( pIter->a>=pIter->b ){ + *piCol = -1; + return; + } + if( pIter->a[0]==0x01 ) break; + pIter->a += fts5GetVarint32(pIter->a, dummy); + } + pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); + } +} + +static int fts5ApiPhraseFirstColumn( + Fts5Context *pCtx, + int iPhrase, + Fts5PhraseIter *pIter, + int *piCol +){ + int rc = SQLITE_OK; + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; + + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + int n; + rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); + if( rc==SQLITE_OK ){ + pIter->b = &pIter->a[n]; + *piCol = 0; + fts5ApiPhraseNextColumn(pCtx, pIter, piCol); + } + }else{ + int n; + rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); + if( rc==SQLITE_OK ){ + pIter->b = &pIter->a[n]; + if( n<=0 ){ + *piCol = -1; + }else if( pIter->a[0]==0x01 ){ + pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); + }else{ + *piCol = 0; + } + } + } + + return rc; } + static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) ); @@ -1962,12 +2091,13 @@ fts5ApiQueryPhrase, fts5ApiSetAuxdata, fts5ApiGetAuxdata, fts5ApiPhraseFirst, fts5ApiPhraseNext, + fts5ApiPhraseFirstColumn, + fts5ApiPhraseNextColumn, }; - /* ** Implementation of API function xQueryPhrase(). */ static int fts5ApiQueryPhrase( @@ -2096,24 +2226,50 @@ int rc = SQLITE_OK; int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); Fts5Buffer val; memset(&val, 0, sizeof(Fts5Buffer)); - - /* Append the varints */ - for(i=0; i<(nPhrase-1); i++){ - const u8 *dummy; - int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); - sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); - } - - /* Append the position lists */ - for(i=0; ipExpr, i, &pPoslist); - sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); + switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ + case FTS5_DETAIL_FULL: + + /* Append the varints */ + for(i=0; i<(nPhrase-1); i++){ + const u8 *dummy; + int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); + sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); + } + + /* Append the position lists */ + for(i=0; ipExpr, i, &pPoslist); + sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); + } + break; + + case FTS5_DETAIL_COLUMNS: + + /* Append the varints */ + for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){ + const u8 *dummy; + int nByte; + rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); + sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); + } + + /* Append the position lists */ + for(i=0; rc==SQLITE_OK && ipExpr, i, &pPoslist, &nPoslist); + sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); + } + break; + + default: + break; } sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); return rc; } Index: ext/fts5/fts5_storage.c ================================================================== --- ext/fts5/fts5_storage.c +++ ext/fts5/fts5_storage.c @@ -823,32 +823,77 @@ struct Fts5IntegrityCtx { i64 iRowid; int iCol; int szCol; u64 cksum; + Fts5Termset *pTermset; Fts5Config *pConfig; }; + /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( - void *pContext, /* Pointer to Fts5InsertCtx object */ + void *pContext, /* Pointer to Fts5IntegrityCtx object */ int tflags, const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; + Fts5Termset *pTermset = pCtx->pTermset; + int bPresent; + int ii; + int rc = SQLITE_OK; + int iPos; + int iCol; + if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } - pCtx->cksum ^= sqlite3Fts5IndexCksum( - pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken - ); - return SQLITE_OK; + + switch( pCtx->pConfig->eDetail ){ + case FTS5_DETAIL_FULL: + iPos = pCtx->szCol-1; + iCol = pCtx->iCol; + break; + + case FTS5_DETAIL_COLUMNS: + iPos = pCtx->iCol; + iCol = 0; + break; + + default: + assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE ); + iPos = 0; + iCol = 0; + break; + } + + rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); + if( rc==SQLITE_OK && bPresent==0 ){ + pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( + pCtx->iRowid, iCol, iPos, 0, pToken, nToken + ); + } + + for(ii=0; rc==SQLITE_OK && iipConfig->nPrefix; ii++){ + const int nChar = pCtx->pConfig->aPrefix[ii]; + int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); + if( nByte ){ + rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); + if( bPresent==0 ){ + pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( + pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte + ); + } + } + } + + return rc; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return @@ -879,27 +924,42 @@ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; if( pConfig->bColumnsize ){ rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); + } + if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){ + rc = sqlite3Fts5TermsetNew(&ctx.pTermset); } for(i=0; rc==SQLITE_OK && inCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; - rc = sqlite3Fts5Tokenize(pConfig, - FTS5_TOKENIZE_DOCUMENT, - (const char*)sqlite3_column_text(pScan, i+1), - sqlite3_column_bytes(pScan, i+1), - (void*)&ctx, - fts5StorageIntegrityCallback - ); - if( pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + rc = sqlite3Fts5TermsetNew(&ctx.pTermset); + } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5Tokenize(pConfig, + FTS5_TOKENIZE_DOCUMENT, + (const char*)sqlite3_column_text(pScan, i+1), + sqlite3_column_bytes(pScan, i+1), + (void*)&ctx, + fts5StorageIntegrityCallback + ); + } + if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ rc = FTS5_CORRUPT; } aTotalSize[i] += ctx.szCol; + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + sqlite3Fts5TermsetFree(ctx.pTermset); + ctx.pTermset = 0; + } } + sqlite3Fts5TermsetFree(ctx.pTermset); + ctx.pTermset = 0; + if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); if( rc==SQLITE_OK ) rc = rc2; } Index: ext/fts5/fts5_tcl.c ================================================================== --- ext/fts5/fts5_tcl.c +++ ext/fts5/fts5_tcl.c @@ -233,10 +233,12 @@ { "xQueryPhrase", 2, "PHRASE SCRIPT" }, /* 11 */ { "xSetAuxdata", 1, "VALUE" }, /* 12 */ { "xGetAuxdata", 1, "CLEAR" }, /* 13 */ { "xSetAuxdataInt", 1, "INTEGER" }, /* 14 */ { "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */ + { "xPhraseForeach", 4, "IPHRASE COLVAR OFFVAR SCRIPT" }, /* 16 */ + { "xPhraseColumnForeach", 3, "IPHRASE COLVAR SCRIPT" }, /* 17 */ { 0, 0, 0} }; int rc; int iSub = 0; @@ -426,10 +428,66 @@ int iVal; int bClear; if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR; iVal = ((char*)p->pApi->xGetAuxdata(p->pFts, bClear) - (char*)0); Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal)); + break; + } + + CASE(16, "xPhraseForeach") { + int iPhrase; + int iCol; + int iOff; + const char *zColvar; + const char *zOffvar; + Tcl_Obj *pScript = objv[5]; + Fts5PhraseIter iter; + + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR; + zColvar = Tcl_GetString(objv[3]); + zOffvar = Tcl_GetString(objv[4]); + + for(p->pApi->xPhraseFirst(p->pFts, iPhrase, &iter, &iCol, &iOff); + iCol>=0; + p->pApi->xPhraseNext(p->pFts, &iter, &iCol, &iOff) + ){ + Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0); + Tcl_SetVar2Ex(interp, zOffvar, 0, Tcl_NewIntObj(iOff), 0); + rc = Tcl_EvalObjEx(interp, pScript, 0); + if( rc==TCL_CONTINUE ) rc = TCL_OK; + if( rc!=TCL_OK ){ + if( rc==TCL_BREAK ) rc = TCL_OK; + break; + } + } + + break; + } + + CASE(17, "xPhraseColumnForeach") { + int iPhrase; + int iCol; + const char *zColvar; + Tcl_Obj *pScript = objv[4]; + Fts5PhraseIter iter; + + if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR; + zColvar = Tcl_GetString(objv[3]); + + for(p->pApi->xPhraseFirstColumn(p->pFts, iPhrase, &iter, &iCol); + iCol>=0; + p->pApi->xPhraseNextColumn(p->pFts, &iter, &iCol) + ){ + Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0); + rc = Tcl_EvalObjEx(interp, pScript, 0); + if( rc==TCL_CONTINUE ) rc = TCL_OK; + if( rc!=TCL_OK ){ + if( rc==TCL_BREAK ) rc = TCL_OK; + break; + } + } + break; } default: assert( 0 ); Index: ext/fts5/fts5_test_mi.c ================================================================== --- ext/fts5/fts5_test_mi.c +++ ext/fts5/fts5_test_mi.c @@ -132,11 +132,11 @@ int iCol, iOff; u32 *aOut = (u32*)pUserData; int iPrev = -1; for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); - iOff>=0; + iCol>=0; pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) ){ aOut[iCol*3+1]++; if( iCol!=iPrev ) aOut[iCol*3 + 2]++; iPrev = iCol; @@ -209,22 +209,35 @@ ){ int i; int rc = SQLITE_OK; switch( f ){ - case 'b': + case 'b': { + int iPhrase; + int nInt = ((p->nCol + 31) / 32) * p->nPhrase; + for(i=0; inPhrase; iPhrase++){ + Fts5PhraseIter iter; + int iCol; + for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); + iCol>=0; + pApi->xPhraseNextColumn(pFts, &iter, &iCol) + ){ + aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32); + } + } + + break; + } + case 'x': case 'y': { int nMul = (f=='x' ? 3 : 1); int iPhrase; - if( f=='b' ){ - int nInt = ((p->nCol + 31) / 32) * p->nPhrase; - for(i=0; inCol*p->nPhrase); i++) aOut[i*nMul] = 0; - } + for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ Fts5PhraseIter iter; int iOff, iCol; for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); Index: ext/fts5/fts5_vocab.c ================================================================== --- ext/fts5/fts5_vocab.c +++ ext/fts5/fts5_vocab.c @@ -377,11 +377,11 @@ pCsr->rowid++; if( pTab->eType==FTS5_VOCAB_COL ){ for(pCsr->iCol++; pCsr->iColiCol++){ - if( pCsr->aCnt[pCsr->iCol] ) break; + if( pCsr->aDoc[pCsr->iCol] ) break; } } if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){ if( sqlite3Fts5IterEof(pCsr->pIter) ){ @@ -410,28 +410,56 @@ i64 dummy; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ - rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy); - if( rc==SQLITE_OK ){ - if( pTab->eType==FTS5_VOCAB_ROW ){ - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - pCsr->aCnt[0]++; - } - pCsr->aDoc[0]++; - }else{ - int iCol = -1; - while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ - int ii = FTS5_POS2COLUMN(iPos); - pCsr->aCnt[ii]++; - if( iCol!=ii ){ - pCsr->aDoc[ii]++; - iCol = ii; - } - } - } + switch( pCsr->pConfig->eDetail ){ + case FTS5_DETAIL_FULL: + rc = sqlite3Fts5IterPoslist(pCsr->pIter, 0, &pPos, &nPos, &dummy); + if( rc==SQLITE_OK ){ + if( pTab->eType==FTS5_VOCAB_ROW ){ + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + pCsr->aCnt[0]++; + } + pCsr->aDoc[0]++; + }else{ + int iCol = -1; + while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ + int ii = FTS5_POS2COLUMN(iPos); + pCsr->aCnt[ii]++; + if( iCol!=ii ){ + pCsr->aDoc[ii]++; + iCol = ii; + } + } + } + } + break; + + case FTS5_DETAIL_COLUMNS: + if( pTab->eType==FTS5_VOCAB_ROW ){ + pCsr->aDoc[0]++; + }else{ + Fts5Buffer buf = {0, 0, 0}; + rc = sqlite3Fts5IterPoslistBuffer(pCsr->pIter, &buf); + if( rc==SQLITE_OK ){ + while( 0==sqlite3Fts5PoslistNext64(buf.p, buf.n, &iOff,&iPos) ){ + assert_nc( iPos>=0 && iPosaDoc[iPos]++; + } + } + sqlite3Fts5BufferFree(&buf); + } + break; + + default: + assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); + pCsr->aDoc[0]++; + break; + } + + if( rc==SQLITE_OK ){ rc = sqlite3Fts5IterNextScan(pCsr->pIter); } if( rc==SQLITE_OK ){ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); @@ -443,11 +471,11 @@ } } } if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ - while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++; + while( pCsr->aDoc[pCsr->iCol]==0 ) pCsr->iCol++; assert( pCsr->iColpConfig->nCol ); } return rc; } @@ -523,34 +551,40 @@ sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + int eDetail = pCsr->pConfig->eDetail; + int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; + i64 iVal = 0; if( iCol==0 ){ sqlite3_result_text( pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT ); - } - else if( ((Fts5VocabTable*)(pCursor->pVtab))->eType==FTS5_VOCAB_COL ){ + }else if( eType==FTS5_VOCAB_COL ){ assert( iCol==1 || iCol==2 || iCol==3 ); if( iCol==1 ){ - const char *z = pCsr->pConfig->azCol[pCsr->iCol]; - sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); + if( eDetail!=FTS5_DETAIL_NONE ){ + const char *z = pCsr->pConfig->azCol[pCsr->iCol]; + sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); + } }else if( iCol==2 ){ - sqlite3_result_int64(pCtx, pCsr->aDoc[pCsr->iCol]); + iVal = pCsr->aDoc[pCsr->iCol]; }else{ - sqlite3_result_int64(pCtx, pCsr->aCnt[pCsr->iCol]); + iVal = pCsr->aCnt[pCsr->iCol]; } }else{ assert( iCol==1 || iCol==2 ); if( iCol==1 ){ - sqlite3_result_int64(pCtx, pCsr->aDoc[0]); + iVal = pCsr->aDoc[0]; }else{ - sqlite3_result_int64(pCtx, pCsr->aCnt[0]); + iVal = pCsr->aCnt[0]; } } + + if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); return SQLITE_OK; } /* ** This is the xRowid method. The SQLite core calls this routine to Index: ext/fts5/test/fts5_common.tcl ================================================================== --- ext/fts5/test/fts5_common.tcl +++ ext/fts5/test/fts5_common.tcl @@ -25,10 +25,32 @@ for {set i 0} {$i < [$cmd xInstCount]} {incr i} { lappend res [string map {{ } .} [$cmd xInst $i]] } set res } + +proc fts5_test_poslist2 {cmd} { + set res [list] + + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { + $cmd xPhraseForeach $i c o { + lappend res $i.$c.$o + } + } + + set res +} + +proc fts5_test_collist {cmd} { + set res [list] + + for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { + $cmd xPhraseColumnForeach $i c { lappend res $i.$c } + } + + set res +} proc fts5_test_columnsize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnSize $i] @@ -111,10 +133,12 @@ foreach f { fts5_test_columnsize fts5_test_columntext fts5_test_columntotalsize fts5_test_poslist + fts5_test_poslist2 + fts5_test_collist fts5_test_tokenize fts5_test_rowcount fts5_test_all fts5_test_queryphrase @@ -176,18 +200,27 @@ # Options: # # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) +# -dict VARNAME (array in caller frame to use for synonyms) # proc nearset {aCol args} { + + # Process the command line options. + # set O(-near) 10 set O(-col) {} set O(-pc) "" + set O(-dict) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } + + # Set $lPhrase to be a list of phrases. $nPhrase its length. + set lPhrase [lrange $args [expr $nOpt+1] end] + set nPhrase [llength $lPhrase] foreach {k v} [lrange $args 0 [expr $nOpt-1]] { if {[info exists O($k)]==0} { error "unrecognized option $k" } set O($k) $v } @@ -196,41 +229,66 @@ set counter 0 } else { upvar $O(-pc) counter } - # Set $phraselist to be a list of phrases. $nPhrase its length. - set phraselist [lrange $args [expr $nOpt+1] end] - set nPhrase [llength $phraselist] + if {$O(-dict)!=""} { upvar $O(-dict) aDict } for {set j 0} {$j < [llength $aCol]} {incr j} { for {set i 0} {$i < $nPhrase} {incr i} { set A($j,$i) [list] } } - set iCol -1 - foreach col $aCol { - incr iCol - if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue - set nToken [llength $col] - - set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] - for { } {$iFL < $nToken} {incr iFL} { - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set B($iPhrase) [list] - } - - for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { - set p [lindex $phraselist $iPhrase] - set nPm1 [expr {[llength $p] - 1}] - set iFirst [expr $iFL - $O(-near) - [llength $p]] - - for {set i $iFirst} {$i <= $iFL} {incr i} { - if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } - } - if {[llength $B($iPhrase)] == 0} break + # Loop through each column of the current row. + for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { + + # If there is a column filter, test whether this column is excluded. If + # so, skip to the next iteration of this loop. Otherwise, set zCol to the + # column value and nToken to the number of tokens that comprise it. + if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue + set zCol [lindex $aCol $iCol] + set nToken [llength $zCol] + + # Each iteration of the following loop searches a substring of the + # column value for phrase matches. The last token of the substring + # is token $iLast of the column value. The first token is: + # + # iFirst = ($iLast - $O(-near) - 1) + # + # where $sz is the length of the phrase being searched for. A phrase + # counts as matching the substring if its first token lies on or before + # $iLast and its last token on or after $iFirst. + # + # For example, if the query is "NEAR(a+b c, 2)" and the column value: + # + # "x x x x A B x x C x" + # 0 1 2 3 4 5 6 7 8 9" + # + # when (iLast==8 && iFirst=5) the range will contain both phrases and + # so both instances can be added to the output poslists. + # + set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] + for { } {$iLast < $nToken} {incr iLast} { + + catch { array unset B } + + for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { + set p [lindex $lPhrase $iPhrase] + set nPm1 [expr {[llength $p] - 1}] + set iFirst [expr $iLast - $O(-near) - [llength $p]] + + for {set i $iFirst} {$i <= $iLast} {incr i} { + set lCand [lrange $zCol $i [expr $i+$nPm1]] + set bMatch 1 + foreach tok $p term $lCand { + if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break } + } + if {$bMatch} { lappend B($iPhrase) $i } + } + + if {![info exists B($iPhrase)]} break } if {$iPhrase==$nPhrase} { for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] @@ -250,13 +308,25 @@ } } incr counter } - #puts $res + #puts "$aCol -> $res" sort_poslist $res } + +proc nearset_match {aDictVar tok term} { + if {[string match $tok $term]} { return 1 } + + upvar $aDictVar aDict + if {[info exists aDict($tok)]} { + foreach s $aDict($tok) { + if {[string match $s $term]} { return 1 } + } + } + return 0; +} #------------------------------------------------------------------------- # Usage: # # sort_poslist LIST @@ -324,6 +394,170 @@ incr iOff [gobble_whitespace text] } set ret } + +#------------------------------------------------------------------------- +# +proc foreach_detail_mode {prefix script} { + set saved $::testprefix + foreach d [list full col none] { + set s [string map [list %DETAIL% $d] $script] + set ::detail $d + set ::testprefix "$prefix-$d" + reset_db + uplevel $s + unset ::detail + } + set ::testprefix $saved +} + +proc detail_check {} { + if {$::detail != "none" && $::detail!="full" && $::detail!="col"} { + error "not in foreach_detail_mode {...} block" + } +} +proc detail_is_none {} { detail_check ; expr {$::detail == "none"} } +proc detail_is_col {} { detail_check ; expr {$::detail == "col" } } +proc detail_is_full {} { detail_check ; expr {$::detail == "full"} } + + +#------------------------------------------------------------------------- +# Convert a poslist of the type returned by fts5_test_poslist() to a +# collist as returned by fts5_test_collist(). +# +proc fts5_poslist2collist {poslist} { + set res [list] + foreach h $poslist { + regexp {(.*)\.[1234567890]+} $h -> cand + lappend res $cand + } + set res [lsort -command fts5_collist_elem_compare -unique $res] + return $res +} + +# Comparison function used by fts5_poslist2collist to sort collist entries. +proc fts5_collist_elem_compare {a b} { + foreach {a1 a2} [split $a .] {} + foreach {b1 b2} [split $b .] {} + + if {$a1==$b1} { return [expr $a2 - $b2] } + return [expr $a1 - $b1] +} + + +#-------------------------------------------------------------------------- +# Construct and return a tcl list equivalent to that returned by the SQL +# query executed against database handle [db]: +# +# SELECT +# rowid, +# fts5_test_poslist($tbl), +# fts5_test_collist($tbl) +# FROM $tbl('$expr') +# ORDER BY rowid $order; +# +proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} { + + # Figure out the set of columns in the FTS5 table. This routine does + # not handle tables with UNINDEXED columns, but if it did, it would + # have to be here. + db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } + + set d "" + if {$aDictVar != ""} { + upvar $aDictVar aDict + set d aDict + } + + set cols "" + foreach e $lCols { append cols ", '$e'" } + set tclexpr [db one [subst -novar { + SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] ) + }]] + + set res [list] + db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { + set cols [list] + foreach col $lCols { lappend cols $x($col) } + + set ::pc 0 + set rowdata [eval $tclexpr] + if {$rowdata != ""} { + lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata] + } + } + + set res +} + +#------------------------------------------------------------------------- +# Similar to [fts5_query_data], but omit the collist field. +# +proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} { + set res [list] + + if {$aDictVar!=""} { + upvar $aDictVar aDict + set dict aDict + } else { + set dict "" + } + + foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] { + lappend res $rowid $poslist + } + set res +} + +#------------------------------------------------------------------------- +# + +# This command will only work inside a [foreach_detail_mode] block. It tests +# whether or not expression $expr run on FTS5 table $tbl is supported by +# the current mode. If so, 1 is returned. If not, 0. +# +# detail=full (all queries supported) +# detail=col (all but phrase queries and NEAR queries) +# detail=none (all but phrase queries, NEAR queries, and column filters) +# +proc fts5_expr_ok {expr tbl} { + + if {![detail_is_full]} { + set nearset "nearset_rc" + if {[detail_is_col]} { set nearset "nearset_rf" } + + set ::expr_not_ok 0 + db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) } + + set cols "" + foreach e $lCols { append cols ", '$e'" } + set ::pc 0 + set tclexpr [db one [subst -novar { + SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] ) + }]] + eval $tclexpr + if {$::expr_not_ok} { return 0 } + } + + return 1 +} + +# Helper for [fts5_expr_ok] +proc nearset_rf {aCol args} { + set idx [lsearch -exact $args --] + if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} { + set ::expr_not_ok 1 + } + list +} + +# Helper for [fts5_expr_ok] +proc nearset_rc {aCol args} { + nearset_rf $aCol {*}$args + if {[lsearch $args -col]>=0} { + set ::expr_not_ok 1 + } + list +} Index: ext/fts5/test/fts5aa.test ================================================================== --- ext/fts5/test/fts5aa.test +++ ext/fts5/test/fts5aa.test @@ -19,10 +19,12 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $::testprefix { + do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); SELECT name, sql FROM sqlite_master; } { t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)} @@ -39,13 +41,13 @@ } { } #------------------------------------------------------------------------- # -reset_db + do_execsql_test 2.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } @@ -63,16 +65,17 @@ } do_execsql_test 2.4 { INSERT INTO t1(t1) VALUES('integrity-check'); } + #------------------------------------------------------------------------- # reset_db do_execsql_test 3.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); } foreach {i x y} { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} 3 {e e i f a} {e h f d f} @@ -91,11 +94,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} @@ -115,11 +118,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 5.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} 2 {dd aab d aaa b} {abcde c aaa aaa aaa} @@ -139,11 +142,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 6.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 6.1 { INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a'); @@ -274,11 +277,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 10.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x,y); + CREATE VIRTUAL TABLE t1 USING fts5(x,y, detail=%DETAIL%); } set d10 { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} 3 {e e i f a} {e h f d f} @@ -307,23 +310,23 @@ do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # do_catchsql_test 11.1 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank, detail=%DETAIL%); } {1 {reserved fts5 column name: rank}} do_catchsql_test 11.2 { - CREATE VIRTUAL TABLE rank USING fts5(a, b, c); + CREATE VIRTUAL TABLE rank USING fts5(a, b, c, detail=%DETAIL%); } {1 {reserved fts5 table name: rank}} do_catchsql_test 11.3 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid, detail=%DETAIL%); } {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # do_execsql_test 12.1 { - CREATE VIRTUAL TABLE t2 USING fts5(x,y); + CREATE VIRTUAL TABLE t2 USING fts5(x,y, detail=%DETAIL%); } {} do_catchsql_test 12.2 { SELECT t2 FROM t2 WHERE t2 MATCH '*stuff' } {1 {unknown special query: stuff}} @@ -335,11 +338,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 13.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); } {} do_execsql_test 13.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; @@ -359,19 +362,23 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 14.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH d(x,y) AS ( SELECT NULL, 'xyz xyz xyz xyz xyz xyz' UNION ALL SELECT NULL, 'xyz xyz xyz xyz xyz xyz' FROM d ) INSERT INTO t1 SELECT * FROM d LIMIT 200; } + +do_execsql_test 15.x { + INSERT INTO t1(t1) VALUES('integrity-check'); +} do_test 14.2 { set nRow 0 db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } { db eval { @@ -415,15 +422,15 @@ CREATE VIRTUAL TABLE n1 USING fts5(a); INSERT INTO n1 VALUES('a b c d'); } proc funk {} { + db eval { UPDATE n1_config SET v=50 WHERE k='version' } set fd [db incrblob main n1_data block 10] fconfigure $fd -encoding binary -translation binary puts -nonewline $fd "\x44\x45" close $fd - db eval { UPDATE n1_config SET v=50 WHERE k='version' } } db func funk funk do_catchsql_test 16.2 { SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d' @@ -431,11 +438,11 @@ #------------------------------------------------------------------------- # reset_db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE b2 USING fts5(x); + CREATE VIRTUAL TABLE b2 USING fts5(x, detail=%DETAIL%); INSERT INTO b2 VALUES('a'); INSERT INTO b2 VALUES('b'); INSERT INTO b2 VALUES('c'); } @@ -445,22 +452,24 @@ lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }] } set res } {{a b c} {a b c} {a b c}} -reset_db -do_execsql_test 18.1 { - CREATE VIRTUAL TABLE c2 USING fts5(x, y); - INSERT INTO c2 VALUES('x x x', 'x x x'); - SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; -} {1} +if {[string match n* %DETAIL%]==0} { + reset_db + do_execsql_test 17.3 { + CREATE VIRTUAL TABLE c2 USING fts5(x, y, detail=%DETAIL%); + INSERT INTO c2 VALUES('x x x', 'x x x'); + SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; + } {1} +} #------------------------------------------------------------------------- # reset_db do_execsql_test 17.1 { - CREATE VIRTUAL TABLE uio USING fts5(ttt); + CREATE VIRTUAL TABLE uio USING fts5(ttt, detail=%DETAIL%); INSERT INTO uio VALUES(NULL); INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; @@ -503,12 +512,12 @@ } {-9223372036854775808 9 10} #-------------------------------------------------------------------- # do_execsql_test 18.1 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b); - CREATE VIRTUAL TABLE t2 USING fts5(c, d); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); + CREATE VIRTUAL TABLE t2 USING fts5(c, d, detail=%DETAIL%); INSERT INTO t1 VALUES('abc*', NULL); INSERT INTO t2 VALUES(1, 'abcdefg'); } do_execsql_test 18.2 { SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c @@ -520,11 +529,11 @@ #-------------------------------------------------------------------- # fts5 table in the temp schema. # reset_db do_execsql_test 19.0 { - CREATE VIRTUAL TABLE temp.t1 USING fts5(x); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('w x 1'); SELECT rowid FROM t1 WHERE t1 MATCH 'x'; } {1 2} @@ -531,11 +540,11 @@ #-------------------------------------------------------------------- # Test that 6 and 7 byte varints can be read. # reset_db do_execsql_test 20.0 { - CREATE VIRTUAL TABLE temp.tmp USING fts5(x); + CREATE VIRTUAL TABLE temp.tmp USING fts5(x, detail=%DETAIL%); } set ::ids [list \ 0 [expr 1<<36] [expr 2<<36] [expr 1<<43] [expr 2<<43] ] do_test 20.1 { @@ -543,10 +552,11 @@ execsql { INSERT INTO tmp(rowid, x) VALUES($id, 'x y z') } } execsql { SELECT rowid FROM tmp WHERE tmp MATCH 'y' } } $::ids +} finish_test Index: ext/fts5/test/fts5ab.test ================================================================== --- ext/fts5/test/fts5ab.test +++ ext/fts5/test/fts5ab.test @@ -20,12 +20,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); INSERT INTO t1 VALUES('hello', 'world'); INSERT INTO t1 VALUES('one two', 'three four'); INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five'); } @@ -55,11 +57,11 @@ #------------------------------------------------------------------------- reset_db do_execsql_test 2.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); INSERT INTO t1 VALUES('one'); INSERT INTO t1 VALUES('two'); INSERT INTO t1 VALUES('three'); } @@ -157,11 +159,11 @@ #------------------------------------------------------------------------- # Documents with more than 2M tokens. # do_execsql_test 4.0 { - CREATE VIRTUAL TABLE s1 USING fts5(x); + CREATE VIRTUAL TABLE s1 USING fts5(x, detail=%DETAIL%); } foreach {tn doc} [list \ 1 [string repeat {a x } 1500000] \ 2 "[string repeat {a a } 1500000] x" \ ] { @@ -170,21 +172,27 @@ do_execsql_test 4.3 { SELECT rowid FROM s1 WHERE s1 MATCH 'x' } {1 2} -do_execsql_test 4.4 { - SELECT rowid FROM s1 WHERE s1 MATCH '"a x"' +if {[detail_is_full]} { + do_execsql_test 4.4 { + SELECT rowid FROM s1 WHERE s1 MATCH '"a x"' + } {1 2} +} + +do_execsql_test 4.5 { + SELECT rowid FROM s1 WHERE s1 MATCH 'a x' } {1 2} #------------------------------------------------------------------------- # Check that a special case of segment promotion works. The case is where # a new segment is written to level L, but the oldest segment within level # (L-2) is larger than it. # do_execsql_test 5.0 { - CREATE VIRTUAL TABLE s2 USING fts5(x); + CREATE VIRTUAL TABLE s2 USING fts5(x, detail=%DETAIL%); INSERT INTO s2(s2, rank) VALUES('pgsz', 32); INSERT INTO s2(s2, rank) VALUES('automerge', 0); } proc rnddoc {n} { @@ -220,11 +228,11 @@ # Test also the other type of segment promotion - when a new segment is written # that is larger than segments immediately following it. do_test 5.3 { execsql { DROP TABLE s2; - CREATE VIRTUAL TABLE s2 USING fts5(x); + CREATE VIRTUAL TABLE s2 USING fts5(x, detail=%DETAIL%); INSERT INTO s2(s2, rank) VALUES('pgsz', 32); INSERT INTO s2(s2, rank) VALUES('automerge', 0); } for {set i 1} {$i <= 16} {incr i} { @@ -239,11 +247,11 @@ } {2 0} #------------------------------------------------------------------------- # do_execsql_test 6.0 { - CREATE VIRTUAL TABLE s3 USING fts5(x); + CREATE VIRTUAL TABLE s3 USING fts5(x, detail=%DETAIL%); BEGIN; INSERT INTO s3 VALUES('a b c'); INSERT INTO s3 VALUES('A B C'); } @@ -274,16 +282,16 @@ } {} #------------------------------------------------------------------------- # set doc [string repeat "a b c " 500] -breakpoint do_execsql_test 7.0 { - CREATE VIRTUAL TABLE x1 USING fts5(x); + CREATE VIRTUAL TABLE x1 USING fts5(x, detail=%DETAIL%); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); INSERT INTO x1 VALUES($doc); } +} ;# foreach_detail_mode... finish_test Index: ext/fts5/test/fts5ac.test ================================================================== --- ext/fts5/test/fts5ac.test +++ ext/fts5/test/fts5ac.test @@ -20,10 +20,12 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + set data { 0 {p o q e z k z p n f y u z y n y} {l o o l v v k} 1 {p k h h p y l l h i p v n} {p p l u r i f a j g e r r x w} 2 {l s z j k i m p s} {l w e j t j e e i t w r o p o} 3 {x g y m y m h p} {k j j b r e y y a k y} @@ -122,238 +124,157 @@ 96 {f c x p y r b m o l m o a} {p c a q s u n n x d c f a o} 97 {u h h k m n k} {u b v n u a o c} 98 {s p e t c z d f n w f} {l s f j b l c e s h} 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } - -# Argument $expr is an FTS5 match expression designed to be executed against -# an FTS5 table with the following schema: -# -# CREATE VIRTUAL TABLE xy USING fts5(x, y); -# -# Assuming the table contains the same records as stored int the global -# $::data array (see above), this function returns a list containing one -# element for each match in the dataset. The elements are themselves lists -# formatted as follows: -# -# { ...} -# -# where each element is a list of phrase matches in the -# same form as returned by auxiliary scalar function fts5_test(). -# -proc matchdata {bPos expr {bAsc 1}} { - - set tclexpr [db one { - SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y') - }] - set res [list] - - #puts $tclexpr - foreach {id x y} $::data { - set cols [list $x $y] - set ::pc 0 - #set hits [lsort -command instcompare [eval $tclexpr]] - set hits [eval $tclexpr] - if {[llength $hits]>0} { - if {$bPos} { - lappend res [list $id $hits] - } else { - lappend res $id - } - } - } - - if {$bAsc} { - set res [lsort -integer -increasing -index 0 $res] - } else { - set res [lsort -integer -decreasing -index 0 $res] - } - - return [concat {*}$res] -} - -# -# End of test code -#------------------------------------------------------------------------- - -proc fts5_test_poslist {cmd} { - set res [list] - for {set i 0} {$i < [$cmd xInstCount]} {incr i} { - lappend res [string map {{ } .} [$cmd xInst $i]] - } - set res -} - foreach {tn2 sql} { 1 {} 2 {BEGIN} } { reset_db - sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist + fts5_aux_test_functions db - do_execsql_test 1.0 { - CREATE VIRTUAL TABLE xx USING fts5(x,y); + do_execsql_test 1.$tn2.0 { + CREATE VIRTUAL TABLE xx USING fts5(x,y, detail=%DETAIL%); INSERT INTO xx(xx, rank) VALUES('pgsz', 32); } execsql $sql - do_test $tn2.1.1 { + do_test 1.$tn2.1.1 { foreach {id x y} $data { execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) } } execsql { INSERT INTO xx(xx) VALUES('integrity-check') } } {} #------------------------------------------------------------------------- - # Test phrase queries. - # - foreach {tn phrase} { - 1 "o" - 2 "b q" - 3 "e a e" - 4 "m d g q q b k b w f q q p p" - 5 "l o o l v v k" - 6 "a" - 7 "b" - 8 "c" - 9 "no" - 10 "L O O L V V K" - } { - set expr "\"$phrase\"" - set res [matchdata 1 $expr] - - do_execsql_test $tn2.1.2.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res - } - - #------------------------------------------------------------------------- - # Test some AND and OR queries. - # - foreach {tn expr} { - 1.1 "a AND b" - 1.2 "a+b AND c" - 1.3 "d+c AND u" - 1.4 "d+c AND u+d" - - 2.1 "a OR b" - 2.2 "a+b OR c" - 2.3 "d+c OR u" - 2.4 "d+c OR u+d" - - 3.1 { a AND b AND c } - } { - set res [matchdata 1 $expr] - do_execsql_test $tn2.2.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res - } - - #------------------------------------------------------------------------- - # Queries on a specific column. - # - foreach {tn expr} { - 1.1 "x:a" - 1.2 "y:a" - 1.3 "x:b" - 1.4 "y:b" - 2.1 "{x}:a" - 2.2 "{y}:a" - 2.3 "{x}:b" - 2.4 "{y}:b" - - 3.1 "{x y}:a" - 3.2 "{y x}:a" - 3.3 "{x x}:b" - 3.4 "{y y}:b" - - 4.1 {{"x" "y"}:a} - 4.2 {{"y" x}:a} - 4.3 {{x "x"}:b} - 4.4 {{"y" y}:b} - } { - set res [matchdata 1 $expr] - do_execsql_test $tn2.3.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res - } - - #------------------------------------------------------------------------- - # Some NEAR queries. - # - foreach {tn expr} { - 1 "NEAR(a b)" - 2 "NEAR(r c)" - 2 { NEAR(r c, 5) } - 3 { NEAR(r c, 3) } - 4 { NEAR(r c, 2) } - 5 { NEAR(r c, 0) } - 6 { NEAR(a b c) } - 7 { NEAR(a b c, 8) } - 8 { x : NEAR(r c) } - 9 { y : NEAR(r c) } - } { - set res [matchdata 1 $expr] - do_execsql_test $tn2.4.1.$tn.[llength $res] { - SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr - } $res - } - - do_test $tn2.4.1 { nearset {{a b c}} -- a } {0.0.0} - do_test $tn2.4.2 { nearset {{a b c}} -- c } {0.0.2} - - foreach {tn expr tclexpr} { - 1 {a b} {AND [N $x -- {a}] [N $x -- {b}]} - } { - do_execsql_test $tn2.5.$tn { - SELECT fts5_expr_tcl($expr, 'N $x') - } [list $tclexpr] - } - - #------------------------------------------------------------------------- - # - do_execsql_test $tn2.6.integrity { - INSERT INTO xx(xx) VALUES('integrity-check'); - } - #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} - foreach {bAsc sql} { - 1 {SELECT rowid FROM xx WHERE xx MATCH $expr} - 0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC} - } { - foreach {tn expr} { - 0.1 x - 1 { NEAR(r c) } - 2 { NEAR(r c, 5) } - 3 { NEAR(r c, 3) } - 4 { NEAR(r c, 2) } - 5 { NEAR(r c, 0) } - 6 { NEAR(a b c) } - 7 { NEAR(a b c, 8) } - 8 { x : NEAR(r c) } - 9 { y : NEAR(r c) } - 10 { x : "r c" } - 11 { y : "r c" } - 12 { a AND b } - 13 { a AND b AND c } - 14a { a } - 14b { a OR b } - 15 { a OR b AND c } - 16 { c AND b OR a } - 17 { c AND (b OR a) } - 18 { c NOT (b OR a) } - 19 { c NOT b OR a AND d } - } { - set res [matchdata 0 $expr $bAsc] - do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res - } - } -} - -do_execsql_test 3.1 { - SELECT fts5_expr_tcl('a AND b'); -} {{AND [nearset -- {a}] [nearset -- {b}]}} + # + do_execsql_test 1.$tn2.integrity { + INSERT INTO xx(xx) VALUES('integrity-check'); + } + + #------------------------------------------------------------------------- + # + foreach {tn expr} { + 1.2 "a OR b" + 1.1 "a AND b" + 1.3 "o" + 1.4 "b q" + 1.5 "e a e" + 1.6 "m d g q q b k b w f q q p p" + 1.7 "l o o l v v k" + 1.8 "a" + 1.9 "b" + 1.10 "c" + 1.11 "no" + 1.12 "L O O L V V K" + 1.13 "a AND b AND c" + 1.14 "x:a" + + 2.1 "x:a" + 2.2 "y:a" + 2.3 "x:b" + 2.4 "y:b" + + 3.1 "{x}:a" + 3.2 "{y}:a" + 3.3 "{x}:b" + 3.4 "{y}:b" + + 4.1 "{x y}:a" + 4.2 "{y x}:a" + 4.3 "{x x}:b" + 4.4 "{y y}:b" + + 5.1 {{"x" "y"}:a} + 5.2 {{"y" x}:a} + 5.3 {{x "x"}:b} + 5.4 {{"y" y}:b} + + 6.1 "b + q" + 6.2 "e + a + e" + 6.3 "m + d + g + q + q + b + k + b + w + f + q + q + p + p" + 6.4 "l + o + o + l + v + v + k" + 6.5 "L + O + O + L + V + V + K" + + 7.1 "a+b AND c" + 7.2 "d+c AND u" + 7.3 "d+c AND u+d" + 7.4 "a+b OR c" + 7.5 "d+c OR u" + 7.6 "d+c OR u+d" + + 8.1 "NEAR(a b)" + 8.2 "NEAR(r c)" + 8.2 { NEAR(r c, 5) } + 8.3 { NEAR(r c, 3) } + 8.4 { NEAR(r c, 2) } + 8.5 { NEAR(r c, 0) } + 8.6 { NEAR(a b c) } + 8.7 { NEAR(a b c, 8) } + 8.8 { x : NEAR(r c) } + 8.9 { y : NEAR(r c) } + + 9.1 { NEAR(r c) } + 9.2 { NEAR(r c, 5) } + 9.3 { NEAR(r c, 3) } + 9.4 { NEAR(r c, 2) } + 9.5 { NEAR(r c, 0) } + 9.6 { NEAR(a b c) } + 9.7 { NEAR(a b c, 8) } + 9.8 { x : NEAR(r c) } + 9.9 { y : NEAR(r c) } + 9.10 { x : "r c" } + 9.11 { y : "r c" } + 9.12 { a AND b } + 9.13 { a AND b AND c } + 9.14a { a } + 9.14b { a OR b } + 9.15 { a OR b AND c } + 9.16 { c AND b OR a } + 9.17 { c AND (b OR a) } + 9.18 { c NOT (b OR a) } + 9.19 { (c NOT b) OR (a AND d) } + } { + + if {[fts5_expr_ok $expr xx]==0} { + do_test 1.$tn2.$tn.OMITTED { list } [list] + continue + } + + set res [fts5_query_data $expr xx] + do_execsql_test 1.$tn2.$tn.[llength $res].asc { + SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) + FROM xx WHERE xx match $expr + } $res + + + set res [fts5_query_data $expr xx DESC] + do_execsql_test 1.$tn2.$tn.[llength $res].desc { + SELECT rowid, fts5_test_poslist(xx), fts5_test_collist(xx) + FROM xx WHERE xx match $expr ORDER BY 1 DESC + } $res + } +} + +} + +do_execsql_test 2.1 { + SELECT fts5_expr_tcl('a AND b'); +} {{AND [nearset -- {a}] [nearset -- {b}]}} + +do_test 2.2.1 { nearset {{a b c}} -- a } {0.0.0} +do_test 2.2.2 { nearset {{a b c}} -- c } {0.0.2} + +foreach {tn expr tclexpr} { + 1 {a b} {AND [N $x -- {a}] [N $x -- {b}]} +} { + do_execsql_test 2.3.$tn { + SELECT fts5_expr_tcl($expr, 'N $x') + } [list $tclexpr] +} finish_test Index: ext/fts5/test/fts5ad.test ================================================================== --- ext/fts5/test/fts5ad.test +++ ext/fts5/test/fts5ad.test @@ -20,12 +20,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.0 { - CREATE VIRTUAL TABLE yy USING fts5(x, y); + CREATE VIRTUAL TABLE yy USING fts5(x, y, detail=%DETAIL%); INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching'); INSERT INTO yy VALUES('indices (or all matching', 'values if -inline is'); INSERT INTO yy VALUES('specified as well.) If', 'indices are returned, the'); } {} @@ -51,27 +53,27 @@ } $res } foreach {T create} { 2 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 3 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix="1,2,3,4", detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 4 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } 5 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix="1,2,3,4", detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } } { @@ -231,9 +233,11 @@ do_execsql_test $T.$bAsc.$tn.$n $sql $res } } catchsql COMMIT +} + } finish_test Index: ext/fts5/test/fts5ae.test ================================================================== --- ext/fts5/test/fts5ae.test +++ ext/fts5/test/fts5ae.test @@ -20,12 +20,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(a, b); + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 1.1 { INSERT INTO t1 VALUES('hello', 'world'); @@ -53,11 +55,11 @@ fts5_aux_test_functions db #------------------------------------------------------------------------- # do_execsql_test 2.0 { - CREATE VIRTUAL TABLE t2 USING fts5(x, y); + CREATE VIRTUAL TABLE t2 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e'); INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p'); } do_execsql_test 2.1 { @@ -74,35 +76,39 @@ } { 1 {0.0.0} 2 {1.0.2 1.0.10} } -do_execsql_test 2.3 { - SELECT rowid, fts5_test_poslist(t2) FROM t2 - WHERE t2 MATCH 'y:o' ORDER BY rowid; -} { - 1 {0.1.3 0.1.7} +if {[detail_is_full]} { + do_execsql_test 2.3 { + SELECT rowid, fts5_test_poslist(t2) FROM t2 + WHERE t2 MATCH 'y:o' ORDER BY rowid; + } { + 1 {0.1.3 0.1.7} + } } #------------------------------------------------------------------------- # do_execsql_test 3.0 { - CREATE VIRTUAL TABLE t3 USING fts5(x, y); + CREATE VIRTUAL TABLE t3 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t'); INSERT INTO t3 VALUES( 'r c', ''); } -do_execsql_test 3.1 { - SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)'; -} { - 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15} -} - -do_execsql_test 3.2 { - SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)'; -} { - 2 {0.0.0 1.0.1} +if {[detail_is_full]} { + do_execsql_test 3.1 { + SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)'; + } { + 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15} + } + + do_execsql_test 3.2 { + SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)'; + } { + 2 {0.0.0 1.0.1} + } } do_execsql_test 3.3 { INSERT INTO t3 VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); @@ -114,11 +120,11 @@ } #------------------------------------------------------------------------- # do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t4 USING fts5(x, y); + CREATE VIRTUAL TABLE t4 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t4 VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); } do_execsql_test 4.1 { @@ -132,11 +138,11 @@ # reset_db fts5_aux_test_functions db do_execsql_test 5.1 { - CREATE VIRTUAL TABLE t5 USING fts5(x, y); + CREATE VIRTUAL TABLE t5 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t5 VALUES('a b c d', 'e f g h i j'); INSERT INTO t5 VALUES('', 'a'); INSERT INTO t5 VALUES('a', ''); } do_execsql_test 5.2 { @@ -180,11 +186,11 @@ # Test the xTokenize() API # reset_db fts5_aux_test_functions db do_execsql_test 6.1 { - CREATE VIRTUAL TABLE t6 USING fts5(x, y); + CREATE VIRTUAL TABLE t6 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t6 VALUES('There are more', 'things in heaven and earth'); INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.'); } do_execsql_test 6.2 { @@ -198,11 +204,11 @@ # Test the xQueryPhrase() API # reset_db fts5_aux_test_functions db do_execsql_test 7.1 { - CREATE VIRTUAL TABLE t7 USING fts5(x, y); + CREATE VIRTUAL TABLE t7 USING fts5(x, y, detail=%DETAIL%); } do_test 7.2 { foreach {x y} { {q i b w s a a e l o} {i b z a l f p t e u} {b a z t a l o x d i} {b p a d b f h d w y} @@ -238,11 +244,11 @@ # #------------------------------------------------------------------------- # do_test 8.1 { - execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) } + execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y, detail=%DETAIL%) } foreach {rowid x y} { 0 {A o} {o o o C o o o o o o o o} 1 {o o B} {o o o C C o o o o o o o} 2 {A o o} {o o o o D D o o o o o o} 3 {o B} {o o o o o D o o o o o o} @@ -296,9 +302,11 @@ 4 {NEAR(a b)} 2 } { do_execsql_test 9.2.$tn { SELECT fts5_test_phrasecount(t9) FROM t9 WHERE t9 MATCH $q LIMIT 1 } $cnt +} + } finish_test Index: ext/fts5/test/fts5af.test ================================================================== --- ext/fts5/test/fts5af.test +++ ext/fts5/test/fts5af.test @@ -22,13 +22,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=%DETAIL%); } proc do_snippet_test {tn doc match res} { uplevel #0 [list set v1 $doc] @@ -109,36 +110,39 @@ 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} } { do_snippet_test 1.$tn $doc X $res } -foreach {tn doc res} { - 1.1 {X Y o o o o o} {[X Y] o o o o o} - 1.2 {o X Y o o o o} {o [X Y] o o o o} - 1.3 {o o X Y o o o} {o o [X Y] o o o} - 1.4 {o o o X Y o o} {o o o [X Y] o o} - 1.5 {o o o o X Y o} {o o o o [X Y] o} - 1.6 {o o o o o X Y} {o o o o o [X Y]} - - 2.1 {X Y o o o o o o} {[X Y] o o o o o...} - 2.2 {o X Y o o o o o} {o [X Y] o o o o...} - 2.3 {o o X Y o o o o} {o o [X Y] o o o...} - 2.4 {o o o X Y o o o} {...o o [X Y] o o o} - 2.5 {o o o o X Y o o} {...o o o [X Y] o o} - 2.6 {o o o o o X Y o} {...o o o o [X Y] o} - 2.7 {o o o o o o X Y} {...o o o o o [X Y]} - - 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} - 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} - 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} - 3.4 {o o o X Y o o o o} {...o o [X Y] o o o...} - 3.5 {o o o o X Y o o o} {...o o [X Y] o o o} - 3.6 {o o o o o X Y o o} {...o o o [X Y] o o} - 3.7 {o o o o o o X Y o} {...o o o o [X Y] o} - 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} - -} { - do_snippet_test 2.$tn $doc "X + Y" $res -} +if {[detail_is_full]} { + foreach {tn doc res} { + 1.1 {X Y o o o o o} {[X Y] o o o o o} + 1.2 {o X Y o o o o} {o [X Y] o o o o} + 1.3 {o o X Y o o o} {o o [X Y] o o o} + 1.4 {o o o X Y o o} {o o o [X Y] o o} + 1.5 {o o o o X Y o} {o o o o [X Y] o} + 1.6 {o o o o o X Y} {o o o o o [X Y]} + + 2.1 {X Y o o o o o o} {[X Y] o o o o o...} + 2.2 {o X Y o o o o o} {o [X Y] o o o o...} + 2.3 {o o X Y o o o o} {o o [X Y] o o o...} + 2.4 {o o o X Y o o o} {...o o [X Y] o o o} + 2.5 {o o o o X Y o o} {...o o o [X Y] o o} + 2.6 {o o o o o X Y o} {...o o o o [X Y] o} + 2.7 {o o o o o o X Y} {...o o o o o [X Y]} + + 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} + 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} + 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} + 3.4 {o o o X Y o o o o} {...o o [X Y] o o o...} + 3.5 {o o o o X Y o o o} {...o o [X Y] o o o} + 3.6 {o o o o o X Y o o} {...o o o [X Y] o o} + 3.7 {o o o o o o X Y o} {...o o o o [X Y] o} + 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} + } { + do_snippet_test 2.$tn $doc "X + Y" $res + } +} + +} ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5ag.test ================================================================== --- ext/fts5/test/fts5ag.test +++ ext/fts5/test/fts5ag.test @@ -31,12 +31,14 @@ # and # # ... WHERE fts MATCH ? ORDER BY rank [ASC|DESC] # +foreach_detail_mode $testprefix { + do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(x, y, z); + CREATE VIRTUAL TABLE t1 USING fts5(x, y, z, detail=%DETAIL%); } do_test 1.1 { foreach {x y z} { {j s m y m r n l u k} {z k f u z g h s w g} {r n o s s b v n w w} @@ -117,22 +119,27 @@ 2.1 a 2.2 b 2.3 c 2.4 d - 2.5 {"m m"} - 2.6 {e + s} - 3.0 {a AND b} 3.1 {a OR b} 3.2 {b OR c AND d} - 3.3 {NEAR(c d)} } { do_fts5ag_test $tn $expr +} - if {[set_test_counter errors]} break +if {[detail_is_full]} { + foreach {tn expr} { + 4.1 {"m m"} + 4.2 {e + s} + 4.3 {NEAR(c d)} + } { + do_fts5ag_test $tn $expr + } } +} ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5ah.test ================================================================== --- ext/fts5/test/fts5ah.test +++ ext/fts5/test/fts5ah.test @@ -18,17 +18,21 @@ # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } + +foreach_detail_mode $testprefix { #------------------------------------------------------------------------- # This file contains tests for very large doclists. # +set Y [list] +set W [list] do_test 1.0 { - execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } + execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, detail=%DETAIL%) } execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) } set v {w w w w w w w w w w w w w w w w w w w w} execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) } for {set i 1} {$i <= 10000} {incr i} { set v {x x x x x x x x x x x x x x x x x x x x} @@ -68,11 +72,16 @@ do_test 1.4 { set nRead [reads] execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } set nReadX [expr [reads] - $nRead] - expr $nReadX>1000 + #puts -nonewline "(nReadX=$nReadX)" + if {[detail_is_full]} { set expect 1000 } + if {[detail_is_col]} { set expect 250 } + if {[detail_is_none]} { set expect 80 } + + expr $nReadX>$expect } {1} do_test 1.5 { set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }] set bwd [execsql_reads { @@ -85,21 +94,26 @@ 1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W] 2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W] 3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W] 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] " { + if {[detail_is_full]==0 && ($tn==1 || $tn==2)} continue + + if {[detail_is_full]} { set ratio 8 } + if {[detail_is_col]} { set ratio 4 } + if {[detail_is_none]} { set ratio 2 } do_test 1.6.$tn.1 { set n [execsql_reads $q] #puts -nonewline "(n=$n nReadX=$nReadX)" - expr {$n < ($nReadX / 8)} + expr {$n < ($nReadX / $ratio)} } {1} do_test 1.6.$tn.2 { set n [execsql_reads "$q ORDER BY rowid DESC"] #puts -nonewline "(n=$n nReadX=$nReadX)" - expr {$n < ($nReadX / 8)} + expr {$n < ($nReadX / $ratio)} } {1} do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res] do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res] } @@ -107,25 +121,30 @@ #------------------------------------------------------------------------- # Now test that adding range constraints on the rowid field reduces the # number of pages loaded from disk. # foreach {tn fraction tail cnt} { - 1 0.6 {rowid > 5000} 5000 - 2 0.2 {rowid > 9000} 1000 - 3 0.2 {rowid < 1000} 999 - 4 0.2 {rowid BETWEEN 4000 AND 5000} 1001 - 5 0.6 {rowid >= 5000} 5001 - 6 0.2 {rowid >= 9000} 1001 - 7 0.2 {rowid <= 1000} 1000 - 8 0.6 {rowid > '5000'} 5000 - 9 0.2 {rowid > '9000'} 1000 + 1 0.6 {rowid > 5000} 5000 + 2 0.2 {rowid > 9000} 1000 + 3 0.2 {rowid < 1000} 999 + 4 0.2 {rowid BETWEEN 4000 AND 5000} 1001 + 5 0.6 {rowid >= 5000} 5001 + 6 0.2 {rowid >= 9000} 1001 + 7 0.2 {rowid <= 1000} 1000 + 8 0.6 {rowid > '5000'} 5000 + 9 0.2 {rowid > '9000'} 1000 10 0.1 {rowid = 444} 1 } { set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail" set n [execsql_reads $q] set ret [llength [execsql $q]] + # Because the position lists for 'x' are quite long in this db, the + # advantage is a bit smaller in detail=none mode. Update $fraction to + # reflect this. + if {[detail_is_none] && $fraction<0.5} { set fraction [expr $fraction*2] } + do_test "1.7.$tn.asc.(n=$n ret=$ret)" { expr {$n < ($fraction*$nReadX) && $ret==$cnt} } {1} set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC" @@ -141,10 +160,11 @@ } {10000} do_execsql_test 1.8.2 { SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text'; } {10000} +} ;# foreach_detail_mode #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} finish_test Index: ext/fts5/test/fts5ai.test ================================================================== --- ext/fts5/test/fts5ai.test +++ ext/fts5/test/fts5ai.test @@ -21,12 +21,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(a); + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=%DETAIL%); } {} do_execsql_test 1.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); @@ -46,10 +48,11 @@ COMMIT; } do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check'); +} } finish_test Index: ext/fts5/test/fts5ak.test ================================================================== --- ext/fts5/test/fts5ak.test +++ ext/fts5/test/fts5ak.test @@ -21,12 +21,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.1 { - CREATE VIRTUAL TABLE ft1 USING fts5(x); + CREATE VIRTUAL TABLE ft1 USING fts5(x, detail=%DETAIL%); INSERT INTO ft1 VALUES('i d d a g i b g d d'); INSERT INTO ft1 VALUES('h d b j c c g a c a'); INSERT INTO ft1 VALUES('e j a e f h b f h h'); INSERT INTO ft1 VALUES('j f h d g h i b d f'); INSERT INTO ft1 VALUES('d c j d c j b c g e'); @@ -33,10 +35,13 @@ INSERT INTO ft1 VALUES('i a d e g j g d a a'); INSERT INTO ft1 VALUES('j f c e d a h j d b'); INSERT INTO ft1 VALUES('i c c f a d g h j e'); INSERT INTO ft1 VALUES('i d i g c d c h b f'); INSERT INTO ft1 VALUES('g d a e h a b c f j'); + + CREATE VIRTUAL TABLE ft2 USING fts5(x, detail=%DETAIL%); + INSERT INTO ft2 VALUES('a b c d e f g h i j'); } do_execsql_test 1.2 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e'; } { @@ -47,23 +52,10 @@ {i c c f a d g h j [e]} {g d a [e] h a b c f j} } do_execsql_test 1.3 { - SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d'; -} { - {[h d] b j c c g a c a} - {j f [h d] g h i b d f} -} - -do_execsql_test 1.4 { - SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d'; -} { - {i [d d] a g i b g [d d]} -} - -do_execsql_test 1.5 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e' } { {[e] j a [e] f h b f h h} {d c j d c j b c g [e]} {i a d [e] g j g d a a} @@ -70,61 +62,75 @@ {j f c [e] d a h j d b} {i c c f a d g h j [e]} {g d a [e] h a b c f j} } -do_execsql_test 1.6 { +do_execsql_test 1.4 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d' +} { + {a b c [d] e [f] g h i j} +} + +do_execsql_test 1.5 { + SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f' +} { + {a b c [d] e [f] g h i j} +} + +#------------------------------------------------------------------------- +# Tests below this point require detail=full. +#------------------------------------------------------------------------- +if {[detail_is_full]==0} continue + + +do_execsql_test 2.1 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d'; +} { + {[h d] b j c c g a c a} + {j f [h d] g h i b d f} +} + +do_execsql_test 2.2 { + SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d'; +} { + {i [d d] a g i b g [d d]} +} + +do_execsql_test 2.3 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d'; } { {i [d d] a g i b g [d d]} } -do_execsql_test 2.1 { - CREATE VIRTUAL TABLE ft2 USING fts5(x); - INSERT INTO ft2 VALUES('a b c d e f g h i j'); -} - -do_execsql_test 2.2 { +do_execsql_test 2.4 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e' } {{a [b c d e] f g h i j}} -do_execsql_test 2.3 { +do_execsql_test 2.5 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g' } { {a [b c d] [e f g] h i j} } -do_execsql_test 2.4 { +do_execsql_test 2.6 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c' } { {a [b c d] e f g h i j} } -do_execsql_test 2.5 { +do_execsql_test 2.7 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e' } { {a [b c d e] f g h i j} } -do_execsql_test 2.6.1 { - SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d' -} { - {a b c [d] e [f] g h i j} -} - -do_execsql_test 2.6.2 { - SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f' -} { - {a b c [d] e [f] g h i j} -} - #------------------------------------------------------------------------- # The example from the docs. # do_execsql_test 3.1 { -- Assuming this: - CREATE VIRTUAL TABLE ft USING fts5(a); + CREATE VIRTUAL TABLE ft USING fts5(a, detail=%DETAIL%); INSERT INTO ft VALUES('a b c x c d e'); INSERT INTO ft VALUES('a b c c d e'); INSERT INTO ft VALUES('a b c d e'); -- The following SELECT statement returns these three rows: @@ -136,8 +142,9 @@ {[a b c] x [c d e]} {[a b c] [c d e]} {[a b c d e]} } +} finish_test Index: ext/fts5/test/fts5al.test ================================================================== --- ext/fts5/test/fts5al.test +++ ext/fts5/test/fts5al.test @@ -21,12 +21,14 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + do_execsql_test 1.1 { - CREATE VIRTUAL TABLE ft1 USING fts5(x); + CREATE VIRTUAL TABLE ft1 USING fts5(x, detail=%DETAIL%); SELECT * FROM ft1_config; } {version 4} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); @@ -81,11 +83,11 @@ #------------------------------------------------------------------------- # Assorted tests of the tcl interface for creating extension functions. # do_execsql_test 3.1 { - CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1 VALUES('q w e r t y'); INSERT INTO t1 VALUES('y t r e w q'); } proc argtest {cmd args} { return $args } @@ -120,15 +122,17 @@ } { {{0 0 0}} {{0 0 5}} } -do_execsql_test 3.4.2 { - SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w' -} { - {{1 0 1}} - {{0 0 2} {1 0 4}} +if {[detail_is_full]} { + do_execsql_test 3.4.2 { + SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w' + } { + {{1 0 1}} + {{0 0 2} {1 0 4}} + } } proc coltest {cmd} { list [$cmd xColumnSize 0] [$cmd xColumnText 0] } @@ -147,11 +151,11 @@ # 4.1.*: Mapped to a function with no arguments. # 4.2.*: Mapped to a function with one or more arguments. # do_execsql_test 4.0 { - CREATE VIRTUAL TABLE t2 USING fts5(a, b); + CREATE VIRTUAL TABLE t2 USING fts5(a, b, detail=%DETAIL%); INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e'); INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q'); INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t'); INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s'); INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l'); @@ -216,36 +220,38 @@ proc rowidplus {cmd ival} { expr [$cmd xRowid] + $ival } sqlite3_fts5_create_function db rowidplus rowidplus -do_execsql_test 4.2.1 { - INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) '); - SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' -} { - 10 110 -} -do_execsql_test 4.2.2 { - INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) '); - SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' -} { - 10 121 -} - -do_execsql_test 4.2.3 { - SELECT rowid, rank FROM t2 - WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)' -} { - 10 122 +if {[detail_is_full]} { + do_execsql_test 4.2.1 { + INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) '); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' + } { + 10 110 + } + do_execsql_test 4.2.2 { + INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) '); + SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' + } { + 10 121 + } + + do_execsql_test 4.2.3 { + SELECT rowid, rank FROM t2 + WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)' + } { + 10 122 + } } proc rowidmod {cmd imod} { expr [$cmd xRowid] % $imod } sqlite3_fts5_create_function db rowidmod rowidmod do_execsql_test 4.3.1 { - CREATE VIRTUAL TABLE t3 USING fts5(x); + CREATE VIRTUAL TABLE t3 USING fts5(x, detail=%DETAIL%); INSERT INTO t3 VALUES('a one'); INSERT INTO t3 VALUES('a two'); INSERT INTO t3 VALUES('a three'); INSERT INTO t3 VALUES('a four'); INSERT INTO t3 VALUES('a five'); @@ -285,9 +291,10 @@ } {1 {no such function: xyz}} do_catchsql_test 4.4.4 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL } {1 {parse error in rank function: }} +} ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5auto.test ================================================================== --- ext/fts5/test/fts5auto.test +++ ext/fts5/test/fts5auto.test @@ -20,11 +20,10 @@ ifcapable !fts5 { finish_test return } - set data { -4026076 {n x w k b p x b n t t d s} {f j j s p j o} {w v i y r} {i p y s} {a o q v e n q r} {q v g u c y a z y} @@ -230,51 +229,21 @@ CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); } {} fts5_aux_test_functions db -proc matchdata {expr tbl collist {order ASC}} { - - set cols "" - foreach e $collist { - append cols ", '$e'" - } - - set tclexpr [db one [subst -novar { - SELECT fts5_expr_tcl( - $expr, 'nearset $cols -pc ::pc' [set cols] - ) - }]] - set res [list] - - db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { - set cols [list] - foreach col $x(*) { - if {$col != "rowid"} { lappend cols $x($col) } - } - # set cols [list $a $b $c $d $e $f] - set ::pc 0 - set rowdata [eval $tclexpr] - if {$rowdata != ""} { lappend res $x(rowid) $rowdata } - } - - set res -} - -proc do_auto_test {tn tbl cols expr} { +proc do_auto_test {tn tbl expr} { foreach order {asc desc} { - set res [matchdata $expr $tbl $cols $order] + set res [fts5_poslist_data $expr $tbl $order] set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" set ::autotest_expr $expr do_execsql_test $testname [subst -novar { SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order] }] $res } - - } #------------------------------------------------------------------------- # @@ -330,11 +299,11 @@ B.6 { a OR b OR c } C.1 { a OR (b AND "b c") } C.2 { a OR (b AND "z c") } } { - do_auto_test 3.$fold.$tn tt {a b c d e f} $expr + do_auto_test 3.$fold.$tn tt $expr } } proc replace_elems {list args} { set ret $list @@ -364,16 +333,13 @@ 3 z 4 {c1 : x} 5 {c2 : x} 6 {c3 : x} 7 {c1 : y} 8 {c2 : y} 9 {c3 : y} 10 {c1 : z} 11 {c2 : z} 12 {c3 : z} - - } { -breakpoint - do_auto_test 4.$tn yy {c1 c2 c3} $expr + do_auto_test 4.$tn yy $expr } finish_test ADDED ext/fts5/test/fts5detail.test Index: ext/fts5/test/fts5detail.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5detail.test @@ -0,0 +1,240 @@ +# 2015 December 18 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5detail + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +fts5_aux_test_functions db + +#-------------------------------------------------------------------------- +# Simple tests. +# +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, c, detail=col); + INSERT INTO t1 VALUES('h d g', 'j b b g b', 'i e i d h g g'); -- 1 + INSERT INTO t1 VALUES('h j d', 'j h d a h', 'f d d g g f b'); -- 2 + INSERT INTO t1 VALUES('j c i', 'f f h e f', 'c j i j c h f'); -- 3 + INSERT INTO t1 VALUES('e g g', 'g e d h i', 'e d b e g d c'); -- 4 + INSERT INTO t1 VALUES('b c c', 'd i h a f', 'd i j f a b c'); -- 5 + INSERT INTO t1 VALUES('e d e', 'b c j g d', 'a i f d h b d'); -- 6 + INSERT INTO t1 VALUES('g h e', 'b c d i d', 'e f c i f i c'); -- 7 + INSERT INTO t1 VALUES('c f j', 'j j i e a', 'h a c f d h e'); -- 8 + INSERT INTO t1 VALUES('a h i', 'c i a f a', 'c f d h g d g'); -- 9 + INSERT INTO t1 VALUES('j g g', 'e f e f f', 'h j b i c g e'); -- 10 +} + +do_execsql_test 1.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +foreach {tn match res} { + 1 "a:a" {9} + 2 "b:g" {1 4 6} + 3 "c:h" {1 3 6 8 9 10} +} { + do_execsql_test 1.2.$tn.1 { + SELECT rowid FROM t1($match); + } $res + + do_execsql_test 1.2.$tn.2 { + SELECT rowid FROM t1($match || '*'); + } $res +} + +do_catchsql_test 1.3.1 { + SELECT rowid FROM t1('h + d'); +} {1 {fts5: phrase queries are not supported (detail!=full)}} + +do_catchsql_test 1.3.2 { + SELECT rowid FROM t1('NEAR(h d)'); +} {1 {fts5: NEAR queries are not supported (detail!=full)}} + + +#------------------------------------------------------------------------- +# integrity-check with both detail= and prefix= options. +# +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t2 USING fts5(a, detail=col, prefix="1"); + INSERT INTO t2(a) VALUES('aa ab'); +} + +#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r} + +do_execsql_test 2.1 { + INSERT INTO t2(t2) VALUES('integrity-check'); +} + +do_execsql_test 2.2 { + SELECT fts5_test_poslist(t2) FROM t2('aa'); +} {0.0.0} + +set ::pc 0 +#puts [nearset {{ax bx cx}} -pc ::pc -near 10 -- b*] +#exit + +#------------------------------------------------------------------------- +# Check that the xInstCount, xInst, xPhraseFirst and xPhraseNext APIs +# work with detail=col tables. +# +set data { + 1 {abb aca aca} {aba bab aab aac caa} {abc cbc ccb bcc bab ccb aca} + 2 {bca aca acb} {ccb bcc bca aab bcc} {bab aaa aac cbb bba aca abc} + 3 {cca abc cab} {aab aba bcc cac baa} {bab cbb acb aba aab ccc cca} + 4 {ccb bcb aba} {aba bbb bcc cac bbb} {cbb aaa bca bcc aab cac aca} + 5 {bca bbc cac} {aba cbb cac cca aca} {cab acb cbc ccb cac bbb bcb} + 6 {acc bba cba} {bab bbc bbb bcb aca} {bca ccc cbb aca bac ccc ccb} + 7 {aba bab aaa} {abb bca aac bcb bcc} {bcb bbc aba aaa cba abc acc} + 8 {cab aba aaa} {ccb aca caa bbc bcc} {aaa abc ccb bbb cac cca abb} + 9 {bcb bab bac} {bcb cba cac bbb abc} {aba aca cbb acb abb ccc ccb} + 10 {aba aab ccc} {abc ccc bcc cab bbb} {aab bcc cbb ccc aaa bac baa} + 11 {bab acb cba} {aac cab cab bca cbc} {aab cbc aac baa ccb acc cac} + 12 {ccc cbb cbc} {aaa aab bcc aac bbc} {cbc cbc bac bac ccc bbc acc} + 13 {cab bbc abc} {bbb bab bba aca bab} {baa bbb aab bbb ccb bbb ccc} + 14 {bbc cab caa} {acb aac abb cba acc} {cba bba bba acb abc abb baa} + 15 {aba cca bcc} {aaa acb abc aab ccb} {cca bcb acc aaa caa cca cbc} + 16 {bcb bba aba} {cbc acb cab caa ccb} {aac aaa bbc cab cca cba abc} + 17 {caa cbb acc} {ccb bcb bca aaa bcc} {bbb aca bcb bca cbc cbc cca} + 18 {cbb bbc aac} {ccc bbc aaa aab baa} {cab cab cac cca bbc abc bbc} + 19 {ccc acc aaa} {aab cbb bca cca caa} {bcb aca aca cab acc bac bcc} + 20 {aab ccc bcb} {bbc cbb bbc aaa bcc} {cbc aab ccc aaa bcb bac cbc} + 21 {aba cab ccc} {bbc cbc cba acc bbb} {acc aab aac acb aca bca acb} + 22 {bcb bca baa} {cca bbc aca ccb cbb} {aab abc bbc aaa cab bcc bcc} + 23 {cac cbb caa} {bbc aba bbb bcc ccb} {bbc bbb cab bbc cac abb acc} + 24 {ccb acb caa} {cab bba cac bbc aac} {aac bca abc cab bca cab bcb} + 25 {bbb aca bca} {bcb acc ccc cac aca} {ccc acb acc cac cac bba bbc} + 26 {bab acc caa} {caa cab cac bac aca} {aba cac caa acc bac ccc aaa} + 27 {bca bca aaa} {ccb aca bca aaa baa} {bab acc aaa cca cba cca bac} + 28 {ccb cac cac} {bca abb bba bbc baa} {aca ccb aac cab ccc cab caa} + 29 {abc bca cab} {cac cbc cbb ccc bcc} {bcc aaa aaa acc aac cac aac} + 30 {aca acc acb} {aab aac cbb caa acb} {acb bbc bbc acc cbb bbc aac} + 31 {aba aca baa} {aca bcc cab bab acb} {bcc acb baa bcb bbc acc aba} + 32 {abb cbc caa} {cba abb bbb cbb aca} {bac aca caa cac caa ccb bbc} + 33 {bcc bcb bcb} {cca cab cbc abb bab} {caa bbc aac bbb cab cba aaa} + 34 {caa cab acc} {ccc ccc bcc acb bcc} {bac bba aca bcb bba bcb cac} + 35 {bac bcb cba} {bcc acb bbc cba bab} {abb cbb abc abc bac acc cbb} + 36 {cab bab ccb} {bca bba bab cca acc} {acc aab bcc bac acb cbb caa} + 37 {aca cbc cab} {bba aac aca aac aaa} {baa cbb cba aba cab bca bcb} + 38 {acb aab baa} {baa bab bca bbc bbb} {abc baa acc aba cab baa cac} + 39 {bcb aac cba} {bcb baa caa cac bbc} {cbc ccc bab ccb bbb caa aba} + 40 {cba ccb abc} {cbb caa cba aac bab} {cbb bbb bca bbb bac cac bca} +} + +set data { + 1 {abb aca aca} {aba bab aab aac caa} {abc cbc ccb bcc bab ccb aca} +} + +proc matchdata {expr {bAsc 1}} { + + set tclexpr [db one { + SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y', 'z') + }] + set res [list] + + #puts "$expr -> $tclexpr" + foreach {id x y z} $::data { + set cols [list $x $y $z] + set ::pc 0 + #set hits [lsort -command instcompare [eval $tclexpr]] + set hits [eval $tclexpr] + if {[llength $hits]>0} { + lappend res [list $id $hits] + } + } + + if {$bAsc} { + set res [lsort -integer -increasing -index 0 $res] + } else { + set res [lsort -integer -decreasing -index 0 $res] + } + + return [concat {*}$res] +} + +foreach {tn tbl} { + 1 { CREATE VIRTUAL TABLE t3 USING fts5(x, y, z, detail=col) } + 2 { CREATE VIRTUAL TABLE t3 USING fts5(x, y, z, detail=none) } +} { + reset_db + fts5_aux_test_functions db + execsql $tbl + foreach {id x y z} $data { + execsql { INSERT INTO t3(rowid, x, y, z) VALUES($id, $x, $y, $z) } + } + foreach {tn2 expr} { + 1 aaa 2 ccc 3 bab 4 aac + 5 aa* 6 cc* 7 ba* 8 aa* + 9 a* 10 b* 11 c* + } { + + set res [matchdata $expr] + + do_execsql_test 3.$tn.$tn2.1 { + SELECT rowid, fts5_test_poslist(t3) FROM t3($expr) + } $res + + do_execsql_test 3.$tn.$tn2.2 { + SELECT rowid, fts5_test_poslist2(t3) FROM t3($expr) + } $res + } +} + +#------------------------------------------------------------------------- +# Simple tests for detail=none tables. +# +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t4 USING fts5(a, b, c, detail=none); + INSERT INTO t4 VALUES('a b c', 'b c d', 'e f g'); + INSERT INTO t4 VALUES('1 2 3', '4 5 6', '7 8 9'); +} + +do_catchsql_test 4.1 { + SELECT * FROM t4('a:a') +} {1 {fts5: column queries are not supported (detail=none)}} + +#------------------------------------------------------------------------- +# Test that for the same content detail=none uses less space than +# detail=col, and that detail=col uses less space than detail=full +# +reset_db +do_test 5.1 { + foreach {tbl detail} {t1 none t2 col t3 full} { + execsql "CREATE VIRTUAL TABLE $tbl USING fts5(x, y, z, detail=$detail)" + foreach {rowid x y z} $::data { + execsql "INSERT INTO $tbl (rowid, x, y, z) VALUES(\$rowid, \$x, \$y, \$z)" + } + } +} {} + +do_execsql_test 5.2 { + SELECT + (SELECT sum(length(block)) from t1_data) < + (SELECT sum(length(block)) from t2_data) +} {1} + +do_execsql_test 5.3 { + SELECT + (SELECT sum(length(block)) from t2_data) < + (SELECT sum(length(block)) from t3_data) +} {1} + + + +finish_test + Index: ext/fts5/test/fts5dlidx.test ================================================================== --- ext/fts5/test/fts5dlidx.test +++ ext/fts5/test/fts5dlidx.test @@ -24,19 +24,19 @@ if { $tcl_platform(wordSize)<8 } { finish_test return } -if 1 { +foreach_detail_mode $testprefix { proc do_fb_test {tn sql res} { set res2 [lsort -integer -decr $res] uplevel [list do_execsql_test $tn.1 $sql $res] uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2] } -# This test populates the FTS5 table containing $nEntry entries. Rows are +# This test populates the FTS5 table with $nEntry entries. Rows are # numbered from 0 to ($nEntry-1). The rowid for row $i is: # # ($iFirst + $i*$nStep) # # Each document is of the form "a b c a b c a b c...". If the row number ($i) @@ -75,24 +75,26 @@ do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc - do_fb_test $tn.5.1 { - SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc - do_fb_test $tn.5.2 { - SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc + if {[detail_is_full]} { + do_fb_test $tn.5.1 { + SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc + do_fb_test $tn.5.2 { + SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc + } } foreach {tn pgsz} { 1 32 2 200 } { do_execsql_test $tn.0 { DROP TABLE IF EXISTS t1; - CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz); } do_dlidx_test1 1.$tn.1 10 100 10000 0 1000 do_dlidx_test1 1.$tn.2 10 10 10000 0 128 @@ -105,11 +107,11 @@ proc do_dlidx_test2 {tn nEntry iFirst nStep} { set str [string repeat "a " 500] execsql { BEGIN; DROP TABLE IF EXISTS t1; - CREATE VIRTUAL TABLE t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); INSERT INTO t1(t1, rank) VALUES('pgsz', 64); INSERT INTO t1 VALUES('b a'); WITH iii(ii, i) AS ( SELECT 1, $iFirst UNION ALL @@ -128,12 +130,10 @@ } {1} } do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128] -} - #-------------------------------------------------------------------- # reset_db set ::vocab [list \ @@ -156,11 +156,11 @@ set ret } db func rnddoc rnddoc do_execsql_test 3.1 { - CREATE VIRTUAL TABLE abc USING fts5(a); + CREATE VIRTUAL TABLE abc USING fts5(a, detail=%DETAIL%); INSERT INTO abc(abc, rank) VALUES('pgsz', 32); INSERT INTO abc VALUES ( rnddoc() ); INSERT INTO abc VALUES ( rnddoc() ); INSERT INTO abc VALUES ( rnddoc() ); @@ -189,9 +189,12 @@ foreach v $vocab { do_execsql_test 3.3.[incr i] { SELECT rowid FROM abc WHERE abc MATCH $v } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} } + +} ;# foreach_detail_mode + finish_test Index: ext/fts5/test/fts5eb.test ================================================================== --- ext/fts5/test/fts5eb.test +++ ext/fts5/test/fts5eb.test @@ -40,10 +40,19 @@ 8 {"" NOT abc} {"abc"} 9 {"" AND abc} {"abc"} 10 {abc + "" + def} {"abc" + "def"} 11 {abc "" def} {"abc" AND "def"} 12 {r+e OR w} {"r" + "e" OR "w"} + + 13 {a AND b NOT c} {"a" AND ("b" NOT "c")} + 14 {a OR b NOT c} {"a" OR ("b" NOT "c")} + 15 {a NOT b AND c} {("a" NOT "b") AND "c"} + 16 {a NOT b OR c} {("a" NOT "b") OR "c"} + + 17 {a AND b OR c} {("a" AND "b") OR "c"} + 18 {a OR b AND c} {"a" OR ("b" AND "c")} + } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } do_catchsql_test 2.1 { ADDED ext/fts5/test/fts5fault8.test Index: ext/fts5/test/fts5fault8.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5fault8.test @@ -0,0 +1,49 @@ +# 2015 September 3 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# +# This file is focused on OOM errors. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +source $testdir/malloc_common.tcl +set testprefix fts5fault8 + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +foreach_detail_mode $testprefix { + +if {[detail_is_none]==0} continue + +fts5_aux_test_functions db +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%); + INSERT INTO t1 VALUES('a b c d', '1 2 3 4'); + INSERT INTO t1 VALUES('a b a b', NULL); + INSERT INTO t1 VALUES(NULL, '1 2 1 2'); +} + +do_faultsim_test 1 -faults oom-t* -body { + execsql { + SELECT rowid, fts5_test_poslist(t1) FROM t1 WHERE t1 MATCH 'b OR 2' + } +} -test { + faultsim_test_result {0 {1 {0.0.1 1.1.1} 2 {0.0.1 0.0.3} 3 {1.1.1 1.1.3}}} \ + {1 SQLITE_NOMEM} +} + +} + +finish_test + Index: ext/fts5/test/fts5matchinfo.test ================================================================== --- ext/fts5/test/fts5matchinfo.test +++ ext/fts5/test/fts5matchinfo.test @@ -14,10 +14,12 @@ set testprefix fts5matchinfo # If SQLITE_ENABLE_FTS5 is not defined, omit this file. ifcapable !fts5 { finish_test ; return } +foreach_detail_mode $testprefix { + proc mit {blob} { set scan(littleEndian) i* set scan(bigEndian) I* binary scan $blob $scan($::tcl_platform(byteOrder)) r return $r @@ -25,11 +27,11 @@ db func mit mit sqlite3_fts5_register_matchinfo db do_execsql_test 1.0 { - CREATE VIRTUAL TABLE t1 USING fts5(content); + CREATE VIRTUAL TABLE t1 USING fts5(content, detail=%DETAIL%); } do_execsql_test 1.1 { INSERT INTO t1(content) VALUES('I wandered lonely as a cloud'); INSERT INTO t1(content) VALUES('That floats on high o''er vales and hills,'); @@ -39,11 +41,11 @@ } {{1 1 1 2 2} {1 1 1 2 2}} # Now create an FTS4 table that does not specify matchinfo=fts3. # do_execsql_test 1.2 { - CREATE VIRTUAL TABLE t2 USING fts5(content); + CREATE VIRTUAL TABLE t2 USING fts5(content, detail=%DETAIL%); INSERT INTO t2 SELECT * FROM t1; SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'I'; } {{1 1 1 2 2} {1 1 1 2 2}} @@ -147,13 +149,21 @@ lappend res [list {*}$elem] } return $res } +# Similar to [do_matchinfo_test], except that this is a no-op if the FTS5 +# mode is not detail=full. +# +proc do_matchinfo_p_test {tn tbl expr results} { + if {[detail_is_full]} { + uplevel [list do_matchinfo_test $tn $tbl $expr $results] + } +} do_execsql_test 4.1.0 { - CREATE VIRTUAL TABLE t4 USING fts5(x, y); + CREATE VIRTUAL TABLE t4 USING fts5(x, y, detail=%DETAIL%); INSERT INTO t4 VALUES('a b c d e', 'f g h i j'); INSERT INTO t4 VALUES('f g h i j', 'a b c d e'); } do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} { @@ -183,11 +193,11 @@ xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - xpxsscplax - } -do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} { +do_matchinfo_p_test 4.1.2 t4 {t4 MATCH '"g h i"'} { p {1 1} c {2 2} x { {0 1 1 1 1 1} {1 1 1 0 1 1} @@ -201,12 +211,12 @@ xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc - sxsxs - } do_matchinfo_test 4.1.3 t4 {t4 MATCH 'a b'} { s {{2 0} {0 2}} } -do_matchinfo_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } -do_matchinfo_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } +do_matchinfo_p_test 4.1.4 t4 {t4 MATCH '"a b" c'} { s {{2 0} {0 2}} } +do_matchinfo_p_test 4.1.5 t4 {t4 MATCH 'a "b c"'} { s {{2 0} {0 2}} } do_matchinfo_test 4.1.6 t4 {t4 MATCH 'd d'} { s {{1 0} {0 1}} } do_matchinfo_test 4.1.7 t4 {t4 MATCH 'f OR abcd'} { x { {0 1 1 1 1 1 0 0 0 0 0 0} {1 1 1 0 1 1 0 0 0 0 0 0} @@ -218,11 +228,11 @@ {1 1 1 0 1 1 0 0 0 0 0 0} } } do_execsql_test 4.2.0 { - CREATE VIRTUAL TABLE t5 USING fts5(content); + CREATE VIRTUAL TABLE t5 USING fts5(content, detail=%DETAIL%); INSERT INTO t5 VALUES('a a a a a'); INSERT INTO t5 VALUES('a b a b a'); INSERT INTO t5 VALUES('c b c b c'); INSERT INTO t5 VALUES('x x x x x'); } @@ -231,11 +241,11 @@ s {2 1} } do_matchinfo_test 4.2.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } -do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_p_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; # It used to be that the second 'a' token would be deferred. That doesn't @@ -248,24 +258,24 @@ } do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } -do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_p_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } do_execsql_test 4.4.0.1 { INSERT INTO t5(t5) VALUES('optimize') } do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.4.1 t5 {t5 MATCH 'a a'} { s {2 1} } do_matchinfo_test 4.4.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.4.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.4.4 t5 {t5 MATCH 'a a a'} { s {3 1} } -do_matchinfo_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } +do_matchinfo_p_test 4.4.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_execsql_test 4.5.0 { - CREATE VIRTUAL TABLE t6 USING fts5(a, b, c); + CREATE VIRTUAL TABLE t6 USING fts5(a, b, c, detail=%DETAIL%); INSERT INTO t6 VALUES('a', 'b', 'c'); } do_matchinfo_test 4.5.1 t6 {t6 MATCH 'a b c'} { s {{1 1 1}} } @@ -272,11 +282,11 @@ #------------------------------------------------------------------------- # Test the outcome of matchinfo() when used within a query that does not # use the full-text index (i.e. lookup by rowid or full-table scan). # do_execsql_test 7.1 { - CREATE VIRTUAL TABLE t10 USING fts5(content); + CREATE VIRTUAL TABLE t10 USING fts5(content, detail=%DETAIL%); INSERT INTO t10 VALUES('first record'); INSERT INTO t10 VALUES('second record'); } do_execsql_test 7.2 { SELECT typeof(matchinfo(t10)), length(matchinfo(t10)) FROM t10; @@ -297,11 +307,11 @@ # the number of rows. i.e. when the following query returns true: # # SELECT sum(length(content)) < count(*) FROM fts4table; # do_execsql_test 8.1 { - CREATE VIRTUAL TABLE t11 USING fts5(content); + CREATE VIRTUAL TABLE t11 USING fts5(content, detail=%DETAIL%); INSERT INTO t11(t11, rank) VALUES('pgsz', 32); INSERT INTO t11 VALUES('quitealongstringoftext'); INSERT INTO t11 VALUES('anotherquitealongstringoftext'); INSERT INTO t11 VALUES('athirdlongstringoftext'); INSERT INTO t11 VALUES('andonemoreforgoodluck'); @@ -316,34 +326,36 @@ SELECT mit(matchinfo(t11, 'nxa')) FROM t11 WHERE t11 MATCH 'a*' } {{204 1 3 3 0} {204 1 3 3 0} {204 1 3 3 0}} #------------------------------------------------------------------------- -do_execsql_test 9.1 { - CREATE VIRTUAL TABLE t12 USING fts5(content); - INSERT INTO t12 VALUES('a b c d'); - SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; -} {{0 1 1 0 1 1 1 1 1}} -do_execsql_test 9.2 { - INSERT INTO t12 VALUES('a d c d'); - SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; -} { - {0 2 2 0 3 2 1 2 2} {1 2 2 1 3 2 1 2 2} -} -do_execsql_test 9.3 { - INSERT INTO t12 VALUES('a d d a'); - SELECT mit(matchinfo(t12, 'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; -} { - {0 4 3 0 5 3 1 4 3} {1 4 3 1 5 3 1 4 3} {2 4 3 2 5 3 2 4 3} +if {[detail_is_full]} { + do_execsql_test 9.1 { + CREATE VIRTUAL TABLE t12 USING fts5(content, detail=%DETAIL%); + INSERT INTO t12 VALUES('a b c d'); + SELECT mit(matchinfo(t12,'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; + } {{0 1 1 0 1 1 1 1 1}} + do_execsql_test 9.2 { + INSERT INTO t12 VALUES('a d c d'); + SELECT mit(matchinfo(t12,'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; + } { + {0 2 2 0 3 2 1 2 2} {1 2 2 1 3 2 1 2 2} + } + do_execsql_test 9.3 { + INSERT INTO t12 VALUES('a d d a'); + SELECT mit(matchinfo(t12,'x')) FROM t12 WHERE t12 MATCH 'NEAR(a d, 1) OR a'; + } { + {0 4 3 0 5 3 1 4 3} {1 4 3 1 5 3 1 4 3} {2 4 3 2 5 3 2 4 3} + } } #--------------------------------------------------------------------------- # Test for a memory leak # do_execsql_test 10.1 { DROP TABLE t10; - CREATE VIRTUAL TABLE t10 USING fts5(idx, value); + CREATE VIRTUAL TABLE t10 USING fts5(idx, value, detail=%DETAIL%); INSERT INTO t10 values (1, 'one'),(2, 'two'),(3, 'three'); SELECT t10.rowid, t10.* FROM t10 JOIN (SELECT 1 AS idx UNION SELECT 2 UNION SELECT 3) AS x WHERE t10 MATCH x.idx @@ -356,11 +368,11 @@ # Test the 'y' matchinfo flag # reset_db sqlite3_fts5_register_matchinfo db do_execsql_test 11.0 { - CREATE VIRTUAL TABLE tt USING fts5(x, y); + CREATE VIRTUAL TABLE tt USING fts5(x, y, detail=%DETAIL%); INSERT INTO tt VALUES('c d a c d d', 'e a g b d a'); -- 1 INSERT INTO tt VALUES('c c g a e b', 'c g d g e c'); -- 2 INSERT INTO tt VALUES('b e f d e g', 'b a c b c g'); -- 3 INSERT INTO tt VALUES('a c f f g d', 'd b f d e g'); -- 4 INSERT INTO tt VALUES('g a c f c f', 'd g g b c c'); -- 5 @@ -408,10 +420,12 @@ 5 {1 0 1 0 0 1} 6 {1 0 1 0 2 2} 7 {2 1 0 0 0 0} 8 {1 2 1 2 2 1} 9 {1 1 1 1 1 3} 10 {1 3 0 0 0 0} } } { + + if {[string match *:* $expr] && [detail_is_none]} continue do_execsql_test 11.1.$tn.1 { SELECT rowid, mit(matchinfo(tt, 'y')) FROM tt WHERE tt MATCH $expr } $res set r2 [list] @@ -441,15 +455,17 @@ db func mit mit do_test 12.0 { set cols [list] for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" } - execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])" + execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,], detail=%DETAIL%)" } {} do_execsql_test 12.1 { INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc'); SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc'; } [list [list [expr 1<<4] [expr 1<<(45-32)]]] + +} ;# foreach_detail_mode finish_test Index: ext/fts5/test/fts5simple.test ================================================================== --- ext/fts5/test/fts5simple.test +++ ext/fts5/test/fts5simple.test @@ -16,11 +16,13 @@ # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } - + + if 1 { + #------------------------------------------------------------------------- # set doc "x x [string repeat {y } 50]z z" do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); @@ -348,8 +350,37 @@ do_execsql_test 4.1 { SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads' } {0 {} 4} +#------------------------------------------------------------------------- +reset_db +do_execsql_test 15.0 { + CREATE VIRTUAL TABLE x2 USING fts5(x, prefix=1); + INSERT INTO x2 VALUES('ab'); +} + +do_execsql_test 15.1 { + INSERT INTO x2(x2) VALUES('integrity-check'); +} + +} + +#------------------------------------------------------------------------- +foreach_detail_mode $testprefix { + reset_db + fts5_aux_test_functions db + do_execsql_test 16.0 { + CREATE VIRTUAL TABLE x3 USING fts5(x, detail=%DETAIL%); + INSERT INTO x3 VALUES('a b c d e f'); + } + do_execsql_test 16.1 { + SELECT fts5_test_poslist(x3) FROM x3('(a NOT b) OR c'); + } {2.0.2} + + do_execsql_test 16.1 { + SELECT fts5_test_poslist(x3) FROM x3('a OR c'); + } {{0.0.0 1.0.2}} +} finish_test ADDED ext/fts5/test/fts5simple2.test Index: ext/fts5/test/fts5simple2.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5simple2.test @@ -0,0 +1,304 @@ +# 2015 September 05 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5simple2 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +if 1 { + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + INSERT INTO t1 VALUES('a b c'); +} +do_execsql_test 1.1 { + SELECT rowid FROM t1('c a b') +} {1} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 2.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + BEGIN; + INSERT INTO t1 VALUES('b c d'); + INSERT INTO t1 VALUES('b c d'); + COMMIT; +} +do_execsql_test 2.1 { + SELECT rowid FROM t1('b c d') +} {1 2} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + BEGIN; + INSERT INTO t1 VALUES('b c d'); + INSERT INTO t1 VALUES('b c d'); +} +do_execsql_test 3.1 { + SELECT rowid FROM t1('b c d'); COMMIT; +} {1 2} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + BEGIN; + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a2 b2 c2'); + INSERT INTO t1 VALUES('a3 b3 c3'); + COMMIT; +} +do_execsql_test 4.1 { + SELECT rowid FROM t1('b*'); +} {1 2 3} + + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 5.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + BEGIN; + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a2 b2 c2'); + INSERT INTO t1 VALUES('a1 b1 c1'); + COMMIT; +} +do_execsql_test 5.1 { SELECT rowid FROM t1('b*') } {1 2 3} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 6.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=full); + BEGIN; + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a1 b1 c1'); + COMMIT; +} + +do_execsql_test 6.1 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 2 1} +do_execsql_test 6.2 { SELECT rowid FROM t1('b1') ORDER BY rowid DESC } {3 2 1} +do_execsql_test 6.3 { SELECT rowid FROM t1('c1') ORDER BY rowid DESC } {3 2 1} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 7.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + BEGIN; + INSERT INTO t1 VALUES('a1 b1'); + INSERT INTO t1 VALUES('a1 b2'); + COMMIT; +} +do_execsql_test 7.1 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {2 1} +do_execsql_test 7.2 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {2 1} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 8.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a2 b2 c2'); + INSERT INTO t1 VALUES('a1 b1 c1'); +} +do_execsql_test 8.0.1 { SELECT rowid FROM t1('b*') } {1 2 3} +do_execsql_test 8.0.2 { SELECT rowid FROM t1('a1') } {1 3} +do_execsql_test 8.0.3 { SELECT rowid FROM t1('c2') } {2} + +do_execsql_test 8.0.4 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC } {3 2 1} +do_execsql_test 8.0.5 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC } {3 1} +do_execsql_test 8.0.8 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC } {2} + +do_execsql_test 8.1.0 { INSERT INTO t1(t1) VALUES('optimize') } + +do_execsql_test 8.1.1 { SELECT rowid FROM t1('b*') } {1 2 3} +do_execsql_test 8.1.2 { SELECT rowid FROM t1('a1') } {1 3} +do_execsql_test 8.1.3 { SELECT rowid FROM t1('c2') } {2} + +do_execsql_test 8.2.1 { SELECT rowid FROM t1('b*') ORDER BY rowid DESC} {3 2 1} +do_execsql_test 8.2.2 { SELECT rowid FROM t1('a1') ORDER BY rowid DESC} {3 1} +do_execsql_test 8.2.3 { SELECT rowid FROM t1('c2') ORDER BY rowid DESC} {2} + +#-------------------------------------------------------------------------- +# +reset_db +do_execsql_test 9.0.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + INSERT INTO t1 VALUES('a1 b1 c1'); + INSERT INTO t1 VALUES('a2 b2 c2'); + INSERT INTO t1 VALUES('a1 b1 c1'); +} +do_execsql_test 9.0.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {} + +reset_db +do_execsql_test 9.1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=none); + INSERT INTO t1 VALUES('a1 b1 c1', 'x y z'); + INSERT INTO t1 VALUES('a2 b2 c2', '1 2 3'); + INSERT INTO t1 VALUES('a1 b1 c1', 'x 2 z'); +} +do_execsql_test 9.2.1 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {} + +#-------------------------------------------------------------------------- +# +reset_db +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + INSERT INTO t1 VALUES('b1'); + INSERT INTO t1 VALUES('b1'); + DELETE FROM t1 WHERE rowid=1; +} + +do_execsql_test 10.1 { + SELECT rowid FROM t1('b1'); +} {2} + +do_execsql_test 10.2 { + SELECT rowid FROM t1('b1') ORDER BY rowid DESC; +} {2} + +do_execsql_test 10.3 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} {} + +#-------------------------------------------------------------------------- +# +reset_db +do_execsql_test 11.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x, y, detail=none); + INSERT INTO t1(t1, rank) VALUES('pgsz', 32); + WITH d(x,y) AS ( + SELECT NULL, 'xyz' UNION ALL SELECT NULL, 'xyz' FROM d + ) + INSERT INTO t1 SELECT * FROM d LIMIT 23; +} + +#db eval { SELECT rowid AS r, quote(block) AS b FROM t1_data } { puts "$r: $b" } +do_execsql_test 11.2 { + SELECT rowid FROM t1; +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23} + +do_execsql_test 11.3 { + SELECT rowid FROM t1('xyz'); +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23} + +do_execsql_test 11.4 { + INSERT INTO t1(t1) VALUES('integrity-check'); +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 12.0 { + CREATE VIRTUAL TABLE yy USING fts5(x, detail=none); + INSERT INTO yy VALUES('in if'); + INSERT INTO yy VALUES('if'); +} {} + +do_execsql_test 12.1 { + SELECT rowid FROM yy('i*'); +} {1 2} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 13.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, prefix=1, detail=none); +} {} +foreach {rowid a} { + 0 {f} + 1 {u} + 2 {k} + 3 {a} + 4 {a} + 5 {u} + 6 {u} + 7 {u} + 8 {f} + 9 {f} + 10 {a} + 11 {p} + 12 {f} + 13 {u} + 14 {a} + 15 {a} +} { + do_execsql_test 13.1.$rowid { + INSERT INTO t1(rowid, a) VALUES($rowid, $a); + } +} + +#------------------------------------------------------------------------- +# +reset_db +fts5_aux_test_functions db +do_execsql_test 14.0 { + CREATE VIRTUAL TABLE t1 USING fts5(a, detail=none); + INSERT INTO t1 VALUES('a b c d'); +} {} + +do_execsql_test 14.1 { + SELECT fts5_test_poslist(t1) FROM t1('b') ORDER BY rank; +} {0.0.1} + +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 15.1 { + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=none); + BEGIN; + INSERT INTO t1(rowid, x) VALUES(1, 'sqlite'); + INSERT INTO t1(rowid, x) VALUES(2, 'sqlite'); + COMMIT; +} {} + +do_test 15.1 { + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {} + +do_test 15.2 { + execsql { DELETE FROM t1 } +} {} + +do_execsql_test 15.3.1 { + SELECT rowid FROM t1('sqlite'); +} {} + +do_execsql_test 15.3.2 { + SELECT rowid FROM t1('sqlite') ORDER BY rowid DESC; +} {} + +do_test 15.4 { + execsql { INSERT INTO t1(t1) VALUES('integrity-check') } +} {} + +finish_test + ADDED ext/fts5/test/fts5synonym2.test Index: ext/fts5/test/fts5synonym2.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5synonym2.test @@ -0,0 +1,154 @@ +# 2014 Dec 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests focusing on custom tokenizers that support synonyms. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5synonym2 + +# If SQLITE_ENABLE_FTS5 is defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +#------------------------------------------------------------------------- +# Code for a simple Tcl tokenizer that supports synonyms at query time. +# +foreach SYNDICT { + {zero 0} + {one 1 i} + {two 2 ii} + {three 3 iii} + {four 4 iv} + {five 5 v} + {six 6 vi} + {seven 7 vii} + {eight 8 viii} + {nine 9 ix} +} { + foreach s $SYNDICT { + set o [list] + foreach x $SYNDICT {if {$x!=$s} {lappend o $x}} + set ::syn($s) $o + } +} + +proc tcl_tokenize {tflags text} { + foreach {w iStart iEnd} [fts5_tokenize_split $text] { + sqlite3_fts5_token $w $iStart $iEnd + if {$tflags == "query"} { + foreach s $::syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd } + } + } +} + +proc tcl_create {args} { + return "tcl_tokenize" +} + +# +# End of tokenizer code. +#------------------------------------------------------------------------- + +foreach_detail_mode $testprefix { + +sqlite3_fts5_create_tokenizer db tcl tcl_create +fts5_aux_test_functions db + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%); + INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i'); + INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi'); + INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3'); + INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7'); + INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii'); + INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4'); + INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4'); + INSERT INTO ss VALUES('v 9 8 iii 4', '9 4 seven two vi vii'); + INSERT INTO ss VALUES('3 ix two 9 0 nine i', 'five ii nine two viii i five'); + INSERT INTO ss VALUES('six iii 9 two eight 2', 'nine i nine vii nine'); + INSERT INTO ss VALUES('6 three zero seven vii five', '8 vii ix 0 7 seven'); + INSERT INTO ss VALUES('8 vii 8 7 3 4', 'eight iii four viii nine iv three'); + INSERT INTO ss VALUES('4 v 7 two 0 one 8', 'vii 1 two five i zero 9'); + INSERT INTO ss VALUES('3 ii vii vi eight', '8 4 ix one three eight'); + INSERT INTO ss VALUES('iv eight seven 6 9 seven', 'one vi two five seven'); + INSERT INTO ss VALUES('i i 5 i v vii eight', '2 seven i 2 2 four'); + INSERT INTO ss VALUES('0 i iii nine 3 ix five', '0 eight iv 0 six 2'); + INSERT INTO ss VALUES('iv vii three 3 9 one 8', '2 ii 6 eight ii six six'); + INSERT INTO ss VALUES('eight one two nine six', '8 9 3 viii vi'); + INSERT INTO ss VALUES('one 0 four ii eight one 3', 'iii eight vi vi vi'); + INSERT INTO ss VALUES('4 0 eight 0 0', '1 four one vii seven ii'); + INSERT INTO ss VALUES('1 zero nine 2 2', 'viii iv two vi nine v iii'); + INSERT INTO ss VALUES('5 five viii four four vi', '8 five 7 vii 6 4'); + INSERT INTO ss VALUES('7 ix four 8 vii', 'nine three nine ii ix vii'); + INSERT INTO ss VALUES('nine iv v i 0 v', 'two iv vii six i ix 4'); + INSERT INTO ss VALUES('one v v one viii 3 8', '2 1 3 five iii'); + INSERT INTO ss VALUES('six ii 5 nine 4 viii seven', 'eight i ix ix 7 four'); + INSERT INTO ss VALUES('9 ii two seven three 7 0', 'six viii seven 7 five'); + INSERT INTO ss VALUES('five two 4 viii nine', '9 7 nine zero 1 two one'); + INSERT INTO ss VALUES('viii 8 iii i ii 8 3', '4 2 7 v 8 8'); + INSERT INTO ss VALUES('four vii 4 iii zero 0 vii', '3 viii iii zero 9 i'); + INSERT INTO ss VALUES('0 seven v five i five v', 'one 4 2 ix 9'); + INSERT INTO ss VALUES('two 5 two two ix 4 1', '3 nine ii v nine 3 five'); + INSERT INTO ss VALUES('five 5 7 4 6 vii', 'three 2 ix 2 8 6'); + INSERT INTO ss VALUES('six iii vi iv seven eight', '8 six 7 0 4'); + INSERT INTO ss VALUES('vi vi iv 3 0 one one', '9 6 eight ix iv'); + INSERT INTO ss VALUES('7 2 2 iii 0', '0 0 seven 1 nine'); + INSERT INTO ss VALUES('8 6 iv six ii', 'iv 6 3 4 ii five'); + INSERT INTO ss VALUES('0 two two seven ii', 'vii ix four 4 zero vi vi'); + INSERT INTO ss VALUES('2 one eight 8 9 7', 'vi 3 0 3 vii'); + INSERT INTO ss VALUES('iii ii ix iv three', 'vi i 6 1 two'); + INSERT INTO ss VALUES('eight four nine 8 seven', 'one three i nine iii one'); + INSERT INTO ss VALUES('iii seven five ix 8', 'ii 7 seven 0 four ii'); + INSERT INTO ss VALUES('four 0 1 5 two', 'iii 9 5 ii ii 2 4'); + INSERT INTO ss VALUES('iii nine four vi 8 five six', 'i i ii seven vi vii'); + INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii'); + INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4'); + INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii'); + INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one'); +} + +foreach {tn expr} { + 1.1 "one" 1.2 "two" 1.3 "three" 1.4 "four" + 1.5 "v" 1.6 "vi" 1.7 "vii" 1.8 "viii" + 1.9 "9" 1.10 "0" 1.11 "1" 1.12 "2" + + 2.1 "one OR two OR three OR four" + 2.2 "(one AND two) OR (three AND four)" + 2.3 "(one AND two) OR (three AND four) NOT five" + 2.4 "(one AND two) NOT 6" + + 3.1 "b:one AND a:two" + 3.2 "b:one OR a:two" + 3.3 "a:one OR b:1 OR {a b} : i" + + 4.1 "NEAR(one two, 2)" + 4.2 "NEAR(one two three, 2)" + 4.3 "NEAR(eight nine, 1) OR NEAR(six seven, 1)" +} { + if {[fts5_expr_ok $expr ss]==0} { + do_test 1.$tn.OMITTED { list } [list] + continue + } + + set res [fts5_query_data $expr ss ASC ::syn] + breakpoint + do_execsql_test 1.$tn.[llength $res].asc { + SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr) + } $res +} + +} + +finish_test + Index: ext/fts5/test/fts5vocab.test ================================================================== --- ext/fts5/test/fts5vocab.test +++ ext/fts5/test/fts5vocab.test @@ -19,13 +19,48 @@ ifcapable !fts5 { finish_test return } +foreach_detail_mode $testprefix { + +proc null_list_entries {iFirst nInterval L} { + for {set i $iFirst} {$i < [llength $L]} {incr i $nInterval} { + lset L $i {} + } + return $L +} + +proc star_from_row {L} { + if {[detail_is_full]==0} { + set L [null_list_entries 2 3 $L] + } + return $L +} + +proc star_from_col {L} { + if {[detail_is_col]} { + set L [null_list_entries 3 4 $L] + } + if {[detail_is_none]} { + set L [null_list_entries 1 4 $L] + set L [null_list_entries 3 4 $L] + } + return $L +} + +proc row_to_col {L} { + if {[detail_is_none]==0} { error "this is for detail=none mode" } + set ret [list] + foreach {a b c} $L { + lappend ret $a {} $b {} + } + set ret +} do_execsql_test 1.1.1 { - CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1); + CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1, detail=%DETAIL%); CREATE VIRTUAL TABLE v1 USING fts5vocab(t1, 'row'); PRAGMA table_info = v1; } { 0 term {} 0 {} 0 1 doc {} 0 {} 0 @@ -50,36 +85,36 @@ INSERT INTO t1 VALUES('x x x'); } do_execsql_test 1.4.1 { SELECT * FROM v1; -} {x 2 4 y 1 1 z 1 1} +} [star_from_row {x 2 4 y 1 1 z 1 1}] do_execsql_test 1.4.2 { SELECT * FROM v2; -} {x one 2 4 y one 1 1 z one 1 1} +} [star_from_col {x one 2 4 y one 1 1 z one 1 1}] do_execsql_test 1.5.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); SELECT * FROM v1 WHERE term<'d'; -} {a 1 1 b 1 1 c 1 1} +} [star_from_row {a 1 1 b 1 1 c 1 1}] do_execsql_test 1.5.2 { SELECT * FROM v2 WHERE term<'d'; COMMIT; -} {a one 1 1 b one 1 1 c one 1 1} +} [star_from_col {a one 1 1 b one 1 1 c one 1 1}] do_execsql_test 1.6 { DELETE FROM t1 WHERE one = 'a b c'; SELECT * FROM v1; -} {x 2 4 y 1 1 z 1 1} +} [star_from_row {x 2 4 y 1 1 z 1 1}] #------------------------------------------------------------------------- # do_execsql_test 2.0 { - CREATE VIRTUAL TABLE tt USING fts5(a, b); + CREATE VIRTUAL TABLE tt USING fts5(a, b, detail=%DETAIL%); INSERT INTO tt VALUES('d g b f d f', 'f c e c d a'); INSERT INTO tt VALUES('f a e a a b', 'e d c f d d'); INSERT INTO tt VALUES('b c a a a b', 'f f c c b c'); INSERT INTO tt VALUES('f d c a c e', 'd g d e g d'); INSERT INTO tt VALUES('g d e f a g x', 'f f d a a b'); @@ -88,24 +123,27 @@ INSERT INTO tt VALUES('g d e f d e', 'a c d b a g'); INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y'); INSERT INTO tt VALUES('c c a a c f', 'd g a e b g'); } -set res_col { +set res_row [star_from_row { + a 10 20 b 9 14 c 9 20 d 9 19 + e 8 13 f 10 20 g 7 14 x 1 1 + y 1 1 +}] +set res_col [star_from_col { a a 6 11 a b 7 9 b a 6 7 b b 7 7 c a 6 12 c b 5 8 d a 4 6 d b 9 13 e a 6 7 e b 6 6 f a 9 10 f b 7 10 g a 5 7 g b 5 7 x a 1 1 y b 1 1 -} -set res_row { - a 10 20 b 9 14 c 9 20 d 9 19 - e 8 13 f 10 20 g 7 14 x 1 1 - y 1 1 +}] +if {[detail_is_none]} { + set res_col [row_to_col $res_row] } foreach {tn tbl resname} { 1 "fts5vocab(tt, 'col')" res_col 2 "fts5vocab(tt, 'row')" res_row @@ -151,13 +189,13 @@ # reset_db forcedelete test.db2 do_execsql_test 5.0 { ATTACH 'test.db2' AS aux; - CREATE VIRTUAL TABLE t1 USING fts5(x); - CREATE VIRTUAL TABLE temp.t1 USING fts5(x); - CREATE VIRTUAL TABLE aux.t1 USING fts5(x); + CREATE VIRTUAL TABLE t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE temp.t1 USING fts5(x, detail=%DETAIL%); + CREATE VIRTUAL TABLE aux.t1 USING fts5(x, detail=%DETAIL%); INSERT INTO main.t1 VALUES('a b c'); INSERT INTO main.t1 VALUES('d e f'); INSERT INTO main.t1 VALUES('a e c'); @@ -176,22 +214,22 @@ CREATE VIRTUAL TABLE temp.vt1 USING fts5vocab(t1, row); CREATE VIRTUAL TABLE temp.vt2 USING fts5vocab(temp, t1, row); CREATE VIRTUAL TABLE temp.va USING fts5vocab(aux, t1, row); } -do_execsql_test 5.2 { SELECT * FROM vm } { - a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1 -} -do_execsql_test 5.3 { SELECT * FROM vt1 } { - 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 -} -do_execsql_test 5.4 { SELECT * FROM vt2 } { - 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 -} -do_execsql_test 5.5 { SELECT * FROM va } { - m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2 -} +do_execsql_test 5.2 { SELECT * FROM vm } [star_from_row { + a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1 +}] +do_execsql_test 5.3 { SELECT * FROM vt1 } [star_from_row { + 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 +}] +do_execsql_test 5.4 { SELECT * FROM vt2 } [star_from_row { + 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 +}] +do_execsql_test 5.5 { SELECT * FROM va } [star_from_row { + m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2 +}] #------------------------------------------------------------------------- # do_execsql_test 6.0 { CREATE TABLE iii(iii); @@ -216,11 +254,11 @@ #------------------------------------------------------------------------- # Test single term queries on fts5vocab tables (i.e. those with term=? # constraints in the WHERE clause). # do_execsql_test 7.0 { - CREATE VIRTUAL TABLE tx USING fts5(one, two); + CREATE VIRTUAL TABLE tx USING fts5(one, two, detail=%DETAIL%); INSERT INTO tx VALUES('g a ggg g a b eee', 'cc d aa ff g ee'); INSERT INTO tx VALUES('dd fff i a i jjj', 'f fff hh jj e f'); INSERT INTO tx VALUES('ggg a f f fff dd aa', 'd ggg f f j gg ddd'); INSERT INTO tx VALUES('e bb h jjj ii gg', 'e aa e f c fff'); INSERT INTO tx VALUES('j ff aa a h', 'h a j bbb bb'); @@ -274,10 +312,14 @@ SELECT $term, 'two', sum(cont(two, $term)>0), sum(cont(two, $term)) FROM tx }] if {[lindex $r2 2]==0} {set r2 [list]} set resc [concat $r1 $r2] + + set resc [star_from_col $resc] + set resr [star_from_row $resr] + if {[detail_is_none]} { set resc [row_to_col $resr] } do_execsql_test 7.$term.1 {SELECT * FROM txc WHERE term=$term} $resc do_execsql_test 7.$term.2 {SELECT * FROM txr WHERE term=$term} $resr } do_execsql_test 7.1 { @@ -338,12 +380,16 @@ do_execsql_test 7.3.1 { SELECT count(*) FROM txr, txr_c WHERE txr.term = txr_c.term; } {30} -do_execsql_test 7.3.2 { - SELECT count(*) FROM txc, txc_c - WHERE txc.term = txc_c.term AND txc.col=txc_c.col; -} {57} +if {![detail_is_none]} { + do_execsql_test 7.3.2 { + SELECT count(*) FROM txc, txc_c + WHERE txc.term = txc_c.term AND txc.col=txc_c.col; + } {57} +} + +} finish_test ADDED ext/fts5/tool/fts5speed.tcl Index: ext/fts5/tool/fts5speed.tcl ================================================================== --- /dev/null +++ ext/fts5/tool/fts5speed.tcl @@ -0,0 +1,59 @@ + + +set Q { + {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron'"} + {25 "SELECT count(*) FROM t1 WHERE t1 MATCH 'hours'"} + {300 "SELECT count(*) FROM t1 WHERE t1 MATCH 'acid'"} + {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"} + {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"} + {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"} + + {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"} + {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} + {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} +} + +proc usage {} { + global Q + puts stderr "Usage: $::argv0 DATABASE QUERY" + puts stderr "" + for {set i 1} {$i <= [llength $Q]} {incr i} { + puts stderr " $i. [lindex $Q [expr $i-1]]" + } + puts stderr "" + exit -1 +} + + +set nArg [llength $argv] +if {$nArg!=2 && $nArg!=3} usage +set database [lindex $argv 0] +set iquery [lindex $argv 1] +if {$iquery<1 || $iquery>[llength $Q]} usage +set nRepeat 0 +if {$nArg==3} { set nRepeat [lindex $argv 2] } + + +sqlite3 db $database +catch { load_static_extension db fts5 } + +incr iquery -1 +set sql [lindex $Q $iquery 1] +if {$nRepeat==0} { + set nRepeat [lindex $Q $iquery 0] +} + +puts "sql: $sql" +puts "nRepeat: $nRepeat" +if {[regexp matchinfo $sql]} { + sqlite3_fts5_register_matchinfo db + db eval $sql +} else { + puts "result: [db eval $sql]" +} + +for {set i 1} {$i < $nRepeat} {incr i} { + db eval $sql +} + +