Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add tests and many fixes for snippet implementation. Some tests are still failing. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | matchinfo |
Files: | files | file ages | folders |
SHA1: |
a257d81d4b850eced3971937f5ee39f2 |
User & Date: | dan 2013-01-08 20:35:12.005 |
Context
2013-01-09
| ||
17:16 | Fixes for snippet function and tests. Add API to determine the number of tokens in an FTS query phrase. check-in: 0d5a640f1f user: dan tags: matchinfo | |
2013-01-08
| ||
20:35 | Add tests and many fixes for snippet implementation. Some tests are still failing. check-in: a257d81d4b user: dan tags: matchinfo | |
11:45 | Fix an fts5 problem to do with initializing the global size record. Also have the checksum routine ignore size records when calculating the index checksum. check-in: e7b52edf68 user: dan tags: matchinfo | |
Changes
Changes to src/fts5.c.
︙ | ︙ | |||
928 929 930 931 932 933 934 | rc = SQLITE4_NOMEM; }else{ pNode->eType = TOKEN_PRIMITIVE; pNode->pPhrase = pPhrase; *pp = pNode; } } | | | 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 | rc = SQLITE4_NOMEM; }else{ pNode->eType = TOKEN_PRIMITIVE; pNode->pPhrase = pPhrase; *pp = pNode; } } nStr += pPhrase->nStr; break; } case TOKEN_AND: case TOKEN_OR: case TOKEN_NOT: { Fts5ExprNode **pp = aHier[nHier-1].ppNode; |
︙ | ︙ | |||
1277 1278 1279 1280 1281 1282 1283 | if( iStream>=p->nStream ){ int nOld = p->nStream; int nNew = 4; while( nNew<=iStream ) nNew = nNew*2; p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64)); if( p->aSz==0 ) goto tokenize_cb_out; | | > | 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 | if( iStream>=p->nStream ){ int nOld = p->nStream; int nNew = 4; while( nNew<=iStream ) nNew = nNew*2; p->aSz = (i64*)sqlite4DbReallocOrFree(db, p->aSz, nNew*p->nCol*sizeof(i64)); if( p->aSz==0 ) goto tokenize_cb_out; memset(&p->aSz[nOld * p->nCol], 0, (nNew-nOld)*p->nCol*sizeof(i64)); p->nStream = nNew; } p->aSz[iStream*p->nCol + p->iCol]++; pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken); if( pTerm==0 ){ /* Size the initial allocation so that it fits in the lookaside buffer */ int nAlloc = sizeof(TokenizeTerm) + nToken + 32; |
︙ | ︙ | |||
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 | if( pnRow ){ int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream; pSz = sqlite4DbMallocZero(db, nByte); if( pSz==0 ){ rc = SQLITE4_NOMEM; }else{ pSz->aSz = (i64 *)&pSz[1]; *pnRow = 0; rc = SQLITE4_OK; } }else{ rc = SQLITE4_CORRUPT_BKPT; } }else if( rc==SQLITE4_OK ){ | > > | 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 | if( pnRow ){ int nByte = sizeof(Fts5Size) + sizeof(i64) * pInfo->nCol * nMinStream; pSz = sqlite4DbMallocZero(db, nByte); if( pSz==0 ){ rc = SQLITE4_NOMEM; }else{ pSz->aSz = (i64 *)&pSz[1]; pSz->nStream = nMinStream; pSz->nCol = pInfo->nCol; *pnRow = 0; rc = SQLITE4_OK; } }else{ rc = SQLITE4_CORRUPT_BKPT; } }else if( rc==SQLITE4_OK ){ |
︙ | ︙ | |||
1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 | int iOff = 0; int iCol; if( nRow>=0 ){ iOff += sqlite4PutVarint(&a[iOff], nRow); } iOff += sqlite4PutVarint(&a[iOff], pSz->nStream); for(iCol=0; iCol<pSz->nCol; iCol++){ int i; for(i=0; i<pSz->nStream; i++){ | > | | 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 | int iOff = 0; int iCol; if( nRow>=0 ){ iOff += sqlite4PutVarint(&a[iOff], nRow); } iOff += sqlite4PutVarint(&a[iOff], pSz->nStream); for(iCol=0; iCol<pSz->nCol; iCol++){ int i; for(i=0; i<pSz->nStream; i++){ iOff += sqlite4PutVarint(&a[iOff], pSz->aSz[i*pSz->nCol+iCol]); } } return sqlite4KVStoreReplace(p, aKey, nKey, a, iOff); } static int fts5CsrLoadGlobal(Fts5Cursor *pCsr){ |
︙ | ︙ | |||
1643 1644 1645 1646 1647 1648 1649 | nByte += nCol * sizeof(char *); } pInfo = sqlite4DbMallocZero(db, nByte); if( pInfo ){ pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema); pInfo->iRoot = pIdx->tnum; | | | 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 | nByte += nCol * sizeof(char *); } pInfo = sqlite4DbMallocZero(db, nByte); if( pInfo ){ pInfo->iDb = sqlite4SchemaToIndex(db, pIdx->pSchema); pInfo->iRoot = pIdx->tnum; pInfo->iTbl = sqlite4FindPrimaryKey(pIdx->pTable, 0)->tnum; pInfo->nCol = pIdx->pTable->nCol; fts5TokenizerCreate(pParse, pIdx->pFts, &pInfo->pTokenizer, &pInfo->p); if( pInfo->p==0 ){ assert( pParse->nErr ); sqlite4DbFree(db, pInfo); pInfo = 0; |
︙ | ︙ | |||
2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 | */ static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr->pIter); sqlite4DbFree(db, pCsr->aKey); sqlite4DbFree(db, pCsr->anRow); sqlite4DbFree(db, pCsr); } } | > > > > > > > > | 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 | */ static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ if( pCsr->aMem ){ int i; for(i=0; i<pCsr->pInfo->nCol; i++){ sqlite4DbFree(db, pCsr->aMem[i].zMalloc); } sqlite4DbFree(db, pCsr->aMem); } fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr->pIter); sqlite4DbFree(db, pCsr->aKey); sqlite4DbFree(db, pCsr->anRow); sqlite4DbFree(db, pCsr); } } |
︙ | ︙ | |||
2694 2695 2696 2697 2698 2699 2700 | if( iC<0 && iS<0 ){ int nFin = pSz->nCol * pSz->nStream; for(i=0; i<nFin; i++) nToken += pSz->aSz[i]; }else if( iC<0 ){ for(i=0; i<pSz->nCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS]; }else if( iS<0 ){ | | | 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 | if( iC<0 && iS<0 ){ int nFin = pSz->nCol * pSz->nStream; for(i=0; i<nFin; i++) nToken += pSz->aSz[i]; }else if( iC<0 ){ for(i=0; i<pSz->nCol; i++) nToken += pSz->aSz[i*pSz->nStream + iS]; }else if( iS<0 ){ for(i=0; i<pSz->nStream; i++) nToken += pSz->aSz[pSz->nStream*iC + i]; }else if( iC<pSz->nCol && iS<pSz->nStream ){ nToken = pSz->aSz[iC * pSz->nStream + iS]; } return nToken; } |
︙ | ︙ | |||
3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 | } if( rc==SQLITE4_OK ){ assert( pIter->iMatch<=iMatch ); while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){ fts5InstanceListNext(&pIter->aList[pIter->iCurrent]); fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); } if( pIter->iCurrent<0 ){ rc = SQLITE4_NOTFOUND; }else{ InstanceList *p = &pIter->aList[pIter->iCurrent]; *piOff = p->iOff; *piC = p->iCol; | > | 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 | } if( rc==SQLITE4_OK ){ assert( pIter->iMatch<=iMatch ); while( pIter->iCurrent>=0 && pIter->iMatch<iMatch ){ fts5InstanceListNext(&pIter->aList[pIter->iCurrent]); fts5IterSetCurrent(pIter, pCsr->pExpr->nPhrase); pIter->iMatch++; } if( pIter->iCurrent<0 ){ rc = SQLITE4_NOTFOUND; }else{ InstanceList *p = &pIter->aList[pIter->iCurrent]; *piOff = p->iOff; *piC = p->iCol; |
︙ | ︙ |
Changes to src/fts5func.c.
︙ | ︙ | |||
8 9 10 11 12 13 14 | ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* */ /* | | > < < < < | 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* */ /* ** The BM25 and BM25F implementations in this file are based on information ** found in: ** ** Stephen Robertson and Hugo Zaragoza: "The Probablistic Relevance ** Framework: BM25 and Beyond", 2009. */ #include "sqliteInt.h" #include <math.h> /* temporary: For log() */ static char fts5Tolower(char c){ if( c>='A' && c<='Z' ) c = c + ('a' - 'A'); |
︙ | ︙ | |||
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | if( rc==SQLITE4_OK ){ sqlite4_result_double(pCtx, rank); }else{ sqlite4_result_error_code(pCtx, rc); } } typedef struct SnippetCtx SnippetCtx; struct SnippetCtx { sqlite4 *db; /* Database handle */ int nToken; /* Number of tokens in snippet */ int iOff; /* First token in snippet */ u64 mask; /* Snippet mask. Highlight these terms */ | > > > > > > > > > > > > > > > > > > | < < > < > > | | | | | | | | | | | | | | > > | | | > > > > > > > > > > < < < | > | | | < < > | | | | > < > > > > > > | | | | > > | > > | | > | | > | > > > > | > > | | > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > | > | > > | > > > > > > > | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 | if( rc==SQLITE4_OK ){ sqlite4_result_double(pCtx, rank); }else{ sqlite4_result_error_code(pCtx, rc); } } typedef struct Snippet Snippet; typedef struct SnippetText SnippetText; struct Snippet { int iCol; int iOff; u64 hlmask; }; struct SnippetText { char *zOut; /* Pointer to snippet text */ int nOut; /* Size of zOut in bytes */ int nAlloc; /* Bytes of space allocated at zOut */ }; typedef struct SnippetCtx SnippetCtx; struct SnippetCtx { sqlite4 *db; /* Database handle */ int nToken; /* Number of tokens in snippet */ int iOff; /* First token in snippet */ u64 mask; /* Snippet mask. Highlight these terms */ const char *zStart; const char *zEnd; const char *zEllipses; SnippetText *pOut; int iFrom; int iTo; const char *zText; /* Document to extract snippet from */ int rc; /* Set to NOMEM if OOM is encountered */ }; static void fts5SnippetAppend(SnippetCtx *p, const char *z, int n){ if( p->rc==SQLITE4_OK ){ SnippetText *pOut = p->pOut; if( n<0 ) n = strlen(z); if( (pOut->nOut + n) > pOut->nAlloc ){ int nNew = (pOut->nOut+n) * 2; pOut->zOut = sqlite4DbReallocOrFree(p->db, pOut->zOut, nNew); if( pOut->zOut==0 ){ p->rc = SQLITE4_NOMEM; return; } pOut->nAlloc = sqlite4DbMallocSize(p->db, pOut->zOut); } memcpy(&pOut->zOut[pOut->nOut], z, n); pOut->nOut += n; } } static int fts5SnippetCb( void *pCtx, int iStream, int iOff, const char *z, int n, int iSrc, int nSrc ){ SnippetCtx *p = (SnippetCtx *)pCtx; if( iOff<p->iOff ){ return 0; }else if( iOff>=(p->iOff + p->nToken) ){ fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom); fts5SnippetAppend(p, "...", 3); p->iFrom = -1; return 1; }else{ int bHighlight; /* True to highlight term */ bHighlight = (p->mask & (1 << (iOff-p->iOff))); if( p->iFrom==0 && p->iOff!=0 ){ p->iFrom = iSrc; if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1); } if( bHighlight ){ fts5SnippetAppend(p, &p->zText[p->iFrom], iSrc - p->iFrom); fts5SnippetAppend(p, p->zStart, -1); fts5SnippetAppend(p, &p->zText[iSrc], nSrc); fts5SnippetAppend(p, p->zEnd, -1); p->iTo = p->iFrom = iSrc+nSrc; }else{ p->iTo = iSrc + nSrc; } } return 0; } static int fts5SnippetText( sqlite4_context *pCtx, Snippet *pSnip, SnippetText *pText, int nToken, const char *zStart, const char *zEnd, const char *zEllipses ){ int rc; sqlite4_value *pVal = 0; u64 mask = pSnip->hlmask; int iOff = pSnip->iOff; int iCol = pSnip->iCol; rc = sqlite4_mi_column_value(pCtx, iCol, &pVal); if( rc==SQLITE4_OK ){ SnippetCtx sCtx; int nText; nText = sqlite4_value_bytes(pVal); memset(&sCtx, 0, sizeof(sCtx)); sCtx.zText = (const char *)sqlite4_value_text(pVal); sCtx.db = sqlite4_context_db_handle(pCtx); sCtx.nToken = nToken; sCtx.iOff = iOff; sCtx.mask = mask; sCtx.zStart = zStart; sCtx.zEnd = zEnd; sCtx.zEllipses = zEllipses; sCtx.pOut = pText; sqlite4_mi_tokenize(pCtx, sCtx.zText, nText, &sCtx, fts5SnippetCb); if( sCtx.rc==SQLITE4_OK && sCtx.iFrom>0 ){ fts5SnippetAppend(&sCtx, &sCtx.zText[sCtx.iFrom], nText - sCtx.iFrom); } rc = sCtx.rc; } return rc; } static int fts5BestSnippet( sqlite4_context *pCtx, /* Context snippet() was called in */ int iColumn, /* In this column (-1 means any column) */ u64 *pMask, /* IN/OUT: Mask of high-priority phrases */ int nToken, /* Number of tokens in requested snippet */ Snippet *pSnip /* Populate this object */ ){ sqlite4 *db = sqlite4_context_db_handle(pCtx); int nPhrase; int rc = SQLITE4_OK; int i; int iPrev = 0; int iPrevCol = 0; u64 *aMask; u64 mask = *pMask; u64 allmask = 0; int iBestOff = nToken-1; int iBestCol = (iColumn >= 0 ? iColumn : 0); int nBest = 0; u64 hlmask = 0; /* Highlight mask associated with iBestOff */ u64 missmask = 0; /* Mask of missing terms in iBestOff snip. */ sqlite4_mi_phrase_count(pCtx, &nPhrase); aMask = sqlite4DbMallocZero(db, sizeof(u64) * nPhrase); if( !aMask ) return SQLITE4_NOMEM; /* Iterate through all matches for all phrases */ for(i=0; rc==SQLITE4_OK; i++){ int iOff; int iCol; int iStream; int iPhrase; rc = sqlite4_mi_match_detail(pCtx, i, &iOff, &iCol, &iStream, &iPhrase); if( rc==SQLITE4_OK ){ u64 tmask = 0; u64 miss = 0; int iMask; int nShift; int nScore = 0; if( iColumn>=0 && iColumn!=iCol ) continue; allmask |= (1 << iPhrase); nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100); for(iMask=0; iMask<nPhrase; iMask++){ if( nShift<64){ aMask[iMask] = aMask[iMask] >> nShift; }else{ aMask[iMask] = 0; } } aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1)); for(iMask=0; iMask<nPhrase; iMask++){ if( aMask[iMask] ){ nScore += (((1 << iMask) & mask) ? 100 : 1); }else{ miss |= (1 << iMask); } tmask = tmask | aMask[iMask]; } if( nScore>nBest ){ hlmask = tmask; missmask = miss; nBest = nScore; iBestOff = iOff; iBestCol = iCol; } iPrev = iOff; iPrevCol = iCol; } } if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; pSnip->iOff = iBestOff-nToken+1; pSnip->iCol = iBestCol; pSnip->hlmask = hlmask; *pMask = mask & missmask & allmask; sqlite4DbFree(db, aMask); return rc; } static void fts5SnippetImprove( sqlite4_context *pCtx, int nToken, /* Size of required snippet */ int nSz, /* Total size of column in tokens */ Snippet *pSnip ){ int i; int nLead = 0; int nShift = 0; u64 mask = pSnip->hlmask; int iOff = pSnip->iOff; if( mask==0 ) return; assert( mask & (1 << (nToken-1)) ); for(i=0; (mask & (1<<i))==0; i++); nLead = i; nShift = (nLead/2); if( iOff+nShift > nSz-nToken ) nShift = (nSz-nToken) - iOff; if( iOff+nShift < 0 ) nShift = -1 * iOff; iOff += nShift; mask = mask >> nShift; pSnip->iOff = iOff; pSnip->hlmask = mask; } static void fts5Snippet(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){ Snippet aSnip[4]; int nSnip; int iCol = -1; int nToken = -15; int rc; int nPhrase; const char *zStart = "<b>"; const char *zEnd = "</b>"; const char *zEllipses = "..."; if( nArg>0 ) zStart = (const char *)sqlite4_value_text(apArg[0]); if( nArg>1 ) zEnd = (const char *)sqlite4_value_text(apArg[1]); if( nArg>2 ) zEllipses = (const char *)sqlite4_value_text(apArg[2]); if( nArg>3 ) iCol = sqlite4_value_int(apArg[3]); if( nArg>4 ) nToken = sqlite4_value_int(apArg[4]); rc = sqlite4_mi_phrase_count(pCtx, &nPhrase); for(nSnip=1; rc==SQLITE4_OK && nSnip<5; nSnip = ((nSnip==2) ? 3 : (nSnip+1))){ int nTok; int i; u64 mask = ((u64)1 << nPhrase) - 1; if( nToken<0 ){ nTok = nToken * -1; }else{ nTok = (nToken + (nSnip-1)) / nSnip; } memset(aSnip, 0, sizeof(aSnip)); for(i=0; rc==SQLITE4_OK && i<nSnip; i++){ rc = fts5BestSnippet(pCtx, iCol, &mask, nTok, &aSnip[i]); } if( mask==0 || nSnip==4 ){ SnippetText text = {0, 0, 0}; for(i=0; rc==SQLITE4_OK && i<nSnip; i++){ int nSz; rc = sqlite4_mi_size(pCtx, aSnip[i].iCol, -1, &nSz); if( rc==SQLITE4_OK ){ fts5SnippetImprove(pCtx, nTok, nSz, &aSnip[i]); rc = fts5SnippetText( pCtx, &aSnip[i], &text, nTok, zStart, zEnd, zEllipses ); } } sqlite4_result_text(pCtx, text.zOut, text.nOut, SQLITE4_TRANSIENT); sqlite4DbFree(sqlite4_context_db_handle(pCtx), text.zOut); break; } } if( rc!=SQLITE4_OK ){ sqlite4_result_error_code(pCtx, rc); } } static int fts5SimpleTokenize( void *pCtx, sqlite4_tokenizer *p, |
︙ | ︙ |
Changes to test/fts5query1.test.
︙ | ︙ | |||
140 141 142 143 144 145 146 | do_execsql_test 8.0 { CREATE TABLE t8(a PRIMARY KEY, b, c); CREATE INDEX i8 ON t8 USING fts5(); INSERT INTO t8 VALUES('one', 'a b c', 'a a a'); INSERT INTO t8 VALUES('two', 'd e f', 'b b b'); } | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | do_execsql_test 8.0 { CREATE TABLE t8(a PRIMARY KEY, b, c); CREATE INDEX i8 ON t8 USING fts5(); INSERT INTO t8 VALUES('one', 'a b c', 'a a a'); INSERT INTO t8 VALUES('two', 'd e f', 'b b b'); } #do_execsql_test 8.1 { # SELECT rank(t8) FROM t8 WHERE t8 MATCH 'b a' #} do_execsql_test 9.0 { CREATE TABLE t9(a PRIMARY KEY, b); CREATE INDEX i9 ON t9 USING fts5(); INSERT INTO t9 VALUES('one', 'a b c d e f g h i j k l m n o p q r s t u v w x y z ' || 'a b c d e f g h i j k l m n o p q r s t u v w x y z' ); } #do_execsql_test 9.1 { # SELECT snippet(t9) FROM t9 WHERE t9 MATCH 'b' #} do_execsql_test 10.1 { CREATE TABLE ft(content); CREATE INDEX fti ON ft USING fts5(); } do_execsql_test 10.2 { INSERT INTO ft VALUES('a b c d e'); INSERT INTO ft VALUES('f g h i j'); } do_execsql_test 10.3 { SELECT rowid FROM ft WHERE ft MATCH 'c' } {1} do_execsql_test 10.4 { SELECT rowid FROM ft WHERE ft MATCH 'f' } {2} breakpoint do_execsql_test 10.5 { DELETE FROM ft; CREATE TABLE ft2(a, b, c); CREATE INDEX fti2 ON ft2 USING fts5(); INSERT INTO ft2 VALUES('1 2 3 4 5', '6 7 8 9 10', '11 12 13 14 15'); SELECT snippet(ft2, '[', ']', '...', -1, 3) FROM ft2 WHERE ft2 MATCH '5'; } finish_test |
Added test/fts5snippet.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | # 2010 January 07 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # The tests in this file test the FTS3 auxillary functions offsets(), # snippet() and matchinfo() work. At time of writing, running this file # provides full coverage of fts3_snippet.c. # set testdir [file dirname $argv0] source $testdir/tester.tcl # If SQLITE4_ENABLE_FTS3 is not defined, omit this file. source $testdir/fts3_common.tcl set sqlite_fts3_enable_parentheses 1 set DO_MALLOC_TEST 0 # Transform the list $L to its "normal" form. So that it can be compared to # another list with the same set of elements using [string compare]. # proc normalize {L} { set ret [list] foreach l $L {lappend ret $l} return $ret } # Document text used by a few tests. Contains the English names of all # integers between 1 and 300. # set numbers [normalize { one two three four five six seven eight nine ten eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen twenty twentyone twentytwo twentythree twentyfour twentyfive twentysix twentyseven twentyeight twentynine thirty thirtyone thirtytwo thirtythree thirtyfour thirtyfive thirtysix thirtyseven thirtyeight thirtynine forty fortyone fortytwo fortythree fortyfour fortyfive fortysix fortyseven fortyeight fortynine fifty fiftyone fiftytwo fiftythree fiftyfour fiftyfive fiftysix fiftyseven fiftyeight fiftynine sixty sixtyone sixtytwo sixtythree sixtyfour sixtyfive sixtysix sixtyseven sixtyeight sixtynine seventy seventyone seventytwo seventythree seventyfour seventyfive seventysix seventyseven seventyeight seventynine eighty eightyone eightytwo eightythree eightyfour eightyfive eightysix eightyseven eightyeight eightynine ninety ninetyone ninetytwo ninetythree ninetyfour ninetyfive ninetysix ninetyseven ninetyeight ninetynine onehundred onehundredone onehundredtwo onehundredthree onehundredfour onehundredfive onehundredsix onehundredseven onehundredeight onehundrednine onehundredten onehundredeleven onehundredtwelve onehundredthirteen onehundredfourteen onehundredfifteen onehundredsixteen onehundredseventeen onehundredeighteen onehundrednineteen onehundredtwenty onehundredtwentyone onehundredtwentytwo onehundredtwentythree onehundredtwentyfour onehundredtwentyfive onehundredtwentysix onehundredtwentyseven onehundredtwentyeight onehundredtwentynine onehundredthirty onehundredthirtyone onehundredthirtytwo onehundredthirtythree onehundredthirtyfour onehundredthirtyfive onehundredthirtysix onehundredthirtyseven onehundredthirtyeight onehundredthirtynine onehundredforty onehundredfortyone onehundredfortytwo onehundredfortythree onehundredfortyfour onehundredfortyfive onehundredfortysix onehundredfortyseven onehundredfortyeight onehundredfortynine onehundredfifty onehundredfiftyone onehundredfiftytwo onehundredfiftythree onehundredfiftyfour onehundredfiftyfive onehundredfiftysix onehundredfiftyseven onehundredfiftyeight onehundredfiftynine onehundredsixty onehundredsixtyone onehundredsixtytwo onehundredsixtythree onehundredsixtyfour onehundredsixtyfive onehundredsixtysix onehundredsixtyseven onehundredsixtyeight onehundredsixtynine onehundredseventy onehundredseventyone onehundredseventytwo onehundredseventythree onehundredseventyfour onehundredseventyfive onehundredseventysix onehundredseventyseven onehundredseventyeight onehundredseventynine onehundredeighty onehundredeightyone onehundredeightytwo onehundredeightythree onehundredeightyfour onehundredeightyfive onehundredeightysix onehundredeightyseven onehundredeightyeight onehundredeightynine onehundredninety onehundredninetyone onehundredninetytwo onehundredninetythree onehundredninetyfour onehundredninetyfive onehundredninetysix onehundredninetyseven onehundredninetyeight onehundredninetynine twohundred twohundredone twohundredtwo twohundredthree twohundredfour twohundredfive twohundredsix twohundredseven twohundredeight twohundrednine twohundredten twohundredeleven twohundredtwelve twohundredthirteen twohundredfourteen twohundredfifteen twohundredsixteen twohundredseventeen twohundredeighteen twohundrednineteen twohundredtwenty twohundredtwentyone twohundredtwentytwo twohundredtwentythree twohundredtwentyfour twohundredtwentyfive twohundredtwentysix twohundredtwentyseven twohundredtwentyeight twohundredtwentynine twohundredthirty twohundredthirtyone twohundredthirtytwo twohundredthirtythree twohundredthirtyfour twohundredthirtyfive twohundredthirtysix twohundredthirtyseven twohundredthirtyeight twohundredthirtynine twohundredforty twohundredfortyone twohundredfortytwo twohundredfortythree twohundredfortyfour twohundredfortyfive twohundredfortysix twohundredfortyseven twohundredfortyeight twohundredfortynine twohundredfifty twohundredfiftyone twohundredfiftytwo twohundredfiftythree twohundredfiftyfour twohundredfiftyfive twohundredfiftysix twohundredfiftyseven twohundredfiftyeight twohundredfiftynine twohundredsixty twohundredsixtyone twohundredsixtytwo twohundredsixtythree twohundredsixtyfour twohundredsixtyfive twohundredsixtysix twohundredsixtyseven twohundredsixtyeight twohundredsixtynine twohundredseventy twohundredseventyone twohundredseventytwo twohundredseventythree twohundredseventyfour twohundredseventyfive twohundredseventysix twohundredseventyseven twohundredseventyeight twohundredseventynine twohundredeighty twohundredeightyone twohundredeightytwo twohundredeightythree twohundredeightyfour twohundredeightyfive twohundredeightysix twohundredeightyseven twohundredeightyeight twohundredeightynine twohundredninety twohundredninetyone twohundredninetytwo twohundredninetythree twohundredninetyfour twohundredninetyfive twohundredninetysix twohundredninetyseven twohundredninetyeight twohundredninetynine threehundred }] foreach {DO_MALLOC_TEST enc} { 0 utf8 1 utf8 1 utf16 } { if {$DO_MALLOC_TEST} continue db close forcedelete test.db sqlite4 db test.db sqlite4_db_config_lookaside db 0 0 0 db eval "PRAGMA encoding = \"$enc\"" # Set variable $T to the test name prefix for this iteration of the loop. # set T "fts3snippet-$enc" ########################################################################## # Test the snippet function. # proc do_snippet_test {name expr iCol nTok args} { set res [list] foreach a $args { lappend res [string trim $a] } do_select_test $name { SELECT snippet(ft,'{','}','...',$iCol,$nTok) FROM ft WHERE ft MATCH $expr } $res } do_test $T.3.1 { execsql { DROP TABLE IF EXISTS ft; CREATE TABLE ft(content); CREATE INDEX fti ON ft USING fts5(); INSERT INTO ft VALUES('one two three four five six seven eight nine ten'); } } {} do_snippet_test $T.3.2 one 0 5 "{one} two three four five..." do_snippet_test $T.3.3 two 0 5 "one {two} three four five..." do_snippet_test $T.3.4 three 0 5 "one two {three} four five..." do_snippet_test $T.3.5 four 0 5 "...two three {four} five six..." do_snippet_test $T.3.6 five 0 5 "...three four {five} six seven..." do_snippet_test $T.3.7 six 0 5 "...four five {six} seven eight..." do_snippet_test $T.3.8 seven 0 5 "...five six {seven} eight nine..." do_snippet_test $T.3.9 eight 0 5 "...six seven {eight} nine ten" do_snippet_test $T.3.10 nine 0 5 "...six seven eight {nine} ten" do_snippet_test $T.3.11 ten 0 5 "...six seven eight nine {ten}" do_test $T.4.1 { execsql { INSERT INTO ft VALUES( 'one two three four five ' || 'six seven eight nine ten ' || 'eleven twelve thirteen fourteen fifteen ' || 'sixteen seventeen eighteen nineteen twenty ' || 'one two three four five ' || 'six seven eight nine ten ' || 'eleven twelve thirteen fourteen fifteen ' || 'sixteen seventeen eighteen nineteen twenty' ); } } {} do_snippet_test $T.4.2 {one nine} 0 5 { {one} two three...eight {nine} ten } { {one} two three...eight {nine} ten... } do_snippet_test $T.4.3 {one nine} 0 -5 { {one} two three four five...six seven eight {nine} ten } { {one} two three four five...seven eight {nine} ten eleven... } do_snippet_test $T.4.3 {one nineteen} 0 -5 { ...eighteen {nineteen} twenty {one} two... } do_snippet_test $T.4.4 {two nineteen} 0 -5 { ...eighteen {nineteen} twenty one {two}... } do_snippet_test $T.4.5 {three nineteen} 0 -5 { ...{nineteen} twenty one two {three}... } do_snippet_test $T.4.6 {four nineteen} 0 -5 { ...two three {four} five six...seventeen eighteen {nineteen} twenty one... } do_snippet_test $T.4.7 {four NEAR nineteen} 0 -5 { ...seventeen eighteen {nineteen} twenty one...two three {four} five six... } do_snippet_test $T.4.8 {four nineteen} 0 5 { ...three {four} five...eighteen {nineteen} twenty... } do_snippet_test $T.4.9 {four NEAR nineteen} 0 5 { ...eighteen {nineteen} twenty...three {four} five... } do_snippet_test $T.4.10 {four NEAR nineteen} 0 -5 { ...seventeen eighteen {nineteen} twenty one...two three {four} five six... } do_snippet_test $T.4.11 {four NOT (nineteen twentyone)} 0 5 { ...two three {four} five six... } { ...two three {four} five six... } do_snippet_test $T.4.12 {four OR nineteen NEAR twentyone} 0 5 { ...two three {four} five six... } { ...two three {four} five six... } do_test $T.5.1 { execsql { DROP TABLE IF EXISTS ft; CREATE TABLE ft(a, b, c); CREATE INDEX fti ON ft USING fts5(); INSERT INTO ft VALUES( 'one two three four five', 'four five six seven eight', 'seven eight nine ten eleven' ); } } {} do_snippet_test $T.5.2 {five} -1 3 {...three four {five}} do_snippet_test $T.5.3 {five} 0 3 {...three four {five}} do_snippet_test $T.5.4 {five} 1 3 {four {five} six...} do_snippet_test $T.5.5 {five} 2 3 {seven eight nine...} do_test $T.5.6 { execsql { UPDATE ft SET b = NULL } } {} do_snippet_test $T.5.7 {five} -1 3 {...three four {five}} do_snippet_test $T.5.8 {five} 0 3 {...three four {five}} do_snippet_test $T.5.9 {five} 1 3 {} do_snippet_test $T.5.10 {five} 2 3 {seven eight nine...} do_snippet_test $T.5.11 {one "seven eight nine"} -1 -3 { {one} two three...{seven} {eight} {nine}... } do_test $T.6.1 { execsql { DROP TABLE IF EXISTS ft; CREATE TABLE ft(x); CREATE INDEX fti ON ft USING fts5(); INSERT INTO ft VALUES($numbers); } } {} do_snippet_test $T.6.2 { one fifty onehundred onehundredfifty twohundredfifty threehundred } -1 4 { {one}...{fifty}...{onehundred}...{onehundredfifty}... } do_snippet_test $T.6.3 { one fifty onehundred onehundredfifty twohundredfifty threehundred } -1 -4 { {one} two three four...fortyeight fortynine {fifty} fiftyone...ninetyeight ninetynine {onehundred} onehundredone...onehundredfortyeight onehundredfortynine {onehundredfifty} onehundredfiftyone... } do_test $T.7.1 { execsql { BEGIN; DROP TABLE IF EXISTS ft; CREATE TABLE ft(x); CREATE INDEX fti ON ft USING fts5(); } set testresults [list] for {set i 1} {$i < 150} {incr i} { set commas [string repeat , $i] execsql {INSERT INTO ft VALUES('one' || $commas || 'two')} lappend testresults "{one}$commas{two}" } execsql COMMIT } {} eval [list do_snippet_test $T.7.2 {one two} -1 3] $testresults } set sqlite_fts3_enable_parentheses 0 finish_test |