Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Modify snippets code to run more efficiently. And to avoid a bug relating to snippets based on full-text queries that contain duplicate terms. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
a2b1183d9e9898d06d623b342bbb552e |
User & Date: | dan 2010-01-11 12:00:48.000 |
Context
2010-01-11
| ||
18:26 | Add a few documentation evidence comments to the built-in function implementations. (check-in: 8bd0f8147d user: drh tags: trunk) | |
12:00 | Modify snippets code to run more efficiently. And to avoid a bug relating to snippets based on full-text queries that contain duplicate terms. (check-in: a2b1183d9e user: dan tags: trunk) | |
2010-01-09
| ||
07:33 | Fix handling of an OOM error in the fts3 offsets() function. Fix a couple of snippet related test cases in e_fts3.test. (check-in: 14dc46a74a user: dan tags: trunk) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
2109 2110 2111 2112 2113 2114 2115 | */ int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){ return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); } /* ** After ExprLoadDoclist() (see above) has been called, this function is | | | 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 | */ int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){ return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); } /* ** After ExprLoadDoclist() (see above) has been called, this function is ** used to iterate/search through the position lists that make up the doclist ** stored in pExpr->aDoclist. */ char *sqlite3Fts3FindPositions( Fts3Expr *pExpr, /* Access this expressions doclist */ sqlite3_int64 iDocid, /* Docid associated with requested pos-list */ int iCol /* Column of requested pos-list */ ){ |
︙ | ︙ |
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | #define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1) static void fts3GetDeltaPosition(char **pp, int *piPos){ int iVal; *pp += sqlite3Fts3GetVarint32(*pp, &iVal); *piPos += (iVal-2); } /* ** Iterate through all phrase nodes in an FTS3 query, except those that ** are part of a sub-tree that is the right-hand-side of a NOT operator. ** For each phrase node found, the supplied callback function is invoked. ** ** If the callback function returns anything other than SQLITE_OK, ** the iteration is abandoned and the error code returned immediately. ** Otherwise, SQLITE_OK is returned after a callback has been made for ** all eligible phrase nodes. */ static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ | > > > > > > > > > > > > > > > > > > > > > | | < < < | < < < < < < < | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | #define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1) static void fts3GetDeltaPosition(char **pp, int *piPos){ int iVal; *pp += sqlite3Fts3GetVarint32(*pp, &iVal); *piPos += (iVal-2); } static int fts3ExprIterate2( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int *piPhrase, /* Pointer to phrase counter */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int rc; int eType = pExpr->eType; if( eType!=FTSQUERY_PHRASE ){ assert( pExpr->pLeft && pExpr->pRight ); rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); } }else{ rc = x(pExpr, *piPhrase, pCtx); (*piPhrase)++; } return rc; } /* ** Iterate through all phrase nodes in an FTS3 query, except those that ** are part of a sub-tree that is the right-hand-side of a NOT operator. ** For each phrase node found, the supplied callback function is invoked. ** ** If the callback function returns anything other than SQLITE_OK, ** the iteration is abandoned and the error code returned immediately. ** Otherwise, SQLITE_OK is returned after a callback has been made for ** all eligible phrase nodes. */ static int fts3ExprIterate( Fts3Expr *pExpr, /* Expression to iterate phrases of */ int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ void *pCtx /* Second argument to pass to callback */ ){ int iPhrase = 0; return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); } typedef struct LoadDoclistCtx LoadDoclistCtx; struct LoadDoclistCtx { Fts3Table *pTab; /* FTS3 Table */ int nPhrase; /* Number of phrases so far */ int nToken; /* Number of tokens so far */ |
︙ | ︙ | |||
91 92 93 94 95 96 97 | pExpr = pLeft; pParent = pExpr->pParent; } return rc; } | | | | 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | pExpr = pLeft; pParent = pExpr->pParent; } return rc; } static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){ int rc = SQLITE_OK; LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; p->nPhrase++; p->nToken += pExpr->pPhrase->nToken; if( pExpr->isLoaded==0 ){ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); pExpr->isLoaded = 1; if( rc==SQLITE_OK ){ rc = fts3ExprNearTrim(pExpr); } } return rc; } static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, int iPhrase, void *ctx){ if( pExpr->aDoclist ){ pExpr->pCurrent = pExpr->aDoclist; pExpr->iCurrent = 0; pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent, &pExpr->iCurrent); } return SQLITE_OK; } |
︙ | ︙ | |||
136 137 138 139 140 141 142 | } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; } /* | | < | < < < | < | < < < < < < < | < | < | < | < < < < | < < < < < < < < < < | < < < | < | < < | < > > | > | < < | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | < > | < | < < < < | < < < < < < < < < < | < | < < | | < < < < | < < < | < < < < < < < < | | | < < < | < < < < < < < < < < < > | < < < < < | | | > | | < < < | | | < < < | > > | | < | | < < < < | < < < < < < < | | < | < < > | < | < | < < < < < < < | > > < < | < < < < < > | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 | } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; } /* ** The following types are used as part of the implementation of the ** fts3BestSnippet() routine. */ typedef struct SnippetCtx SnippetCtx; typedef struct SnippetPhrase SnippetPhrase; struct SnippetCtx { Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ int iCol; /* Extract snippet from this column */ int nSnippet; /* Requested snippet length (in tokens) */ int nPhrase; /* Number of phrases in query */ SnippetPhrase *aPhrase; /* Array of size nPhrase */ int iCurrent; /* First token of current snippet */ }; struct SnippetPhrase { int nToken; /* Number of tokens in phrase */ char *pList; /* Pointer to start of phrase position list */ int iHead; /* Next value in position list */ char *pHead; /* Position list data following iHead */ int iTail; /* Next value in trailing position list */ char *pTail; /* Position list data following iTail */ }; /* ** Advance the position list iterator specified by the first two ** arguments so that it points to the first element with a value greater ** than or equal to parameter iNext. */ static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ char *pIter = *ppIter; if( pIter ){ int iIter = *piIter; while( iIter<iNext ){ if( 0==(*pIter & 0xFE) ){ iIter = -1; pIter = 0; break; } fts3GetDeltaPosition(&pIter, &iIter); } *piIter = iIter; *ppIter = pIter; } } static int fts3SnippetNextCandidate(SnippetCtx *pIter){ int i; /* Loop counter */ if( pIter->iCurrent<0 ){ /* The SnippetCtx object has just been initialized. The first snippet ** candidate always starts at offset 0 (even if this candidate has a ** score of 0.0). */ pIter->iCurrent = 0; /* Advance the 'head' iterator of each phrase to the first offset that ** is greater than or equal to (iNext+nSnippet). */ for(i=0; i<pIter->nPhrase; i++){ SnippetPhrase *pPhrase = &pIter->aPhrase[i]; fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); } }else{ int iStart; int iEnd = 0x7FFFFFFF; for(i=0; i<pIter->nPhrase; i++){ SnippetPhrase *pPhrase = &pIter->aPhrase[i]; if( pPhrase->pHead && pPhrase->iHead<iEnd ){ iEnd = pPhrase->iHead; } } if( iEnd==0x7FFFFFFF ){ return 1; } pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; for(i=0; i<pIter->nPhrase; i++){ SnippetPhrase *pPhrase = &pIter->aPhrase[i]; fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); } } return 0; } static void fts3SnippetDetails( SnippetCtx *pIter, /* Snippet iterator */ u64 mCovered, /* Bitmask of phrases already covered */ int *piToken, /* OUT: First token of proposed snippet */ int *piScore, /* OUT: "Score" for this snippet */ u64 *pmCover, /* OUT: Bitmask of phrases covered */ u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ ){ int iStart = pIter->iCurrent; /* First token of snippet */ int iScore = 0; int i; u64 mCover = 0; u64 mHighlight = 0; for(i=0; i<pIter->nPhrase; i++){ SnippetPhrase *pPhrase = &pIter->aPhrase[i]; if( pPhrase->pTail ){ char *pCsr = pPhrase->pTail; int iCsr = pPhrase->iTail; while( iCsr<(iStart+pIter->nSnippet) ){ int j; u64 mPhrase = (u64)1 << i; u64 mPos = (u64)1 << (iCsr - iStart); assert( iCsr>=iStart ); if( (mCover|mCovered)&mPhrase ){ iScore++; }else{ iScore += 1000; } mCover |= mPhrase; for(j=0; j<pPhrase->nToken; j++){ mHighlight |= (mPos>>j); } if( 0==(*pCsr & 0x0FE) ) break; fts3GetDeltaPosition(&pCsr, &iCsr); } } } *piToken = iStart; *piScore = iScore; *pmCover = mCover; *pmHighlight = mHighlight; } /* ** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). ** Each invocation populates an element of the SnippetCtx.aPhrase[] array. */ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ SnippetCtx *p = (SnippetCtx *)ctx; SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; char *pCsr; pPhrase->nToken = pExpr->pPhrase->nToken; pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol); if( pCsr ){ int iFirst = 0; pPhrase->pList = pCsr; fts3GetDeltaPosition(&pCsr, &iFirst); pPhrase->pHead = pCsr; pPhrase->pTail = pCsr; pPhrase->iHead = iFirst; pPhrase->iTail = iFirst; }else{ assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 ); } return SQLITE_OK; } #define BITMASK_SIZE 64 typedef struct SnippetFragment SnippetFragment; struct SnippetFragment { int iCol; /* Column snippet is extracted from */ int iPos; /* Index of first token in snippet */ u64 covered; /* Mask of query phrases covered */ u64 hlmask; /* Mask of snippet terms to highlight */ }; static int fts3BestSnippet( int nSnippet, /* Desired snippet length */ Fts3Cursor *pCsr, /* Cursor to create snippet for */ int iCol, /* Index of column to create snippet from */ u64 mCovered, /* Mask of phrases already covered */ u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ SnippetFragment *pFragment, /* OUT: Best snippet found */ int *piScore /* OUT: Score of snippet pFragment */ ){ int rc; /* Return Code */ int nList; /* Number of phrases in expression */ SnippetCtx sCtx; /* Snippet context object */ int nByte; /* Number of bytes of space to allocate */ int iBestScore = -1; int i; memset(&sCtx, 0, sizeof(sCtx)); /* Iterate through the phrases in the expression to count them. The same ** callback makes sure the doclists are loaded for each phrase. */ rc = fts3ExprLoadDoclists(pCsr, &nList, 0); if( rc!=SQLITE_OK ){ return rc; } /* Now that it is known how many phrases there are, allocate and zero ** the required space using malloc(). */ nByte = sizeof(SnippetPhrase) * nList; sCtx.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); if( !sCtx.aPhrase ){ return SQLITE_NOMEM; } memset(sCtx.aPhrase, 0, nByte); /* Initialize the contents of the SnippetCtx object. Then iterate through ** the set of phrases in the expression to populate the aPhrase[] array. */ sCtx.pCsr = pCsr; sCtx.iCol = iCol; sCtx.nSnippet = nSnippet; sCtx.nPhrase = nList; sCtx.iCurrent = -1; (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx); for(i=0; i<nList; i++){ if( sCtx.aPhrase[i].pHead ){ *pmSeen |= (u64)1 << i; } } pFragment->iCol = iCol; while( !fts3SnippetNextCandidate(&sCtx) ){ int iPos; int iScore; u64 mCover; u64 mHighlight; fts3SnippetDetails(&sCtx, mCovered, &iPos, &iScore, &mCover, &mHighlight); assert( iScore>=0 ); if( iScore>iBestScore ){ pFragment->iPos = iPos; pFragment->hlmask = mHighlight; pFragment->covered = mCover; iBestScore = iScore; } } sqlite3_free(sCtx.aPhrase); *piScore = iBestScore; return SQLITE_OK; } typedef struct StrBuffer StrBuffer; struct StrBuffer { char *z; int n; int nAlloc; }; |
︙ | ︙ | |||
635 636 637 638 639 640 641 642 643 644 645 646 647 648 | /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ** for a single query. */ static int fts3ExprGlobalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; char *pCsr; char *pEnd; const int iStart = 2 + p->nCol*p->iPhrase; | > | 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 | /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ** for a single query. */ static int fts3ExprGlobalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; char *pCsr; char *pEnd; const int iStart = 2 + p->nCol*p->iPhrase; |
︙ | ︙ | |||
658 659 660 661 662 663 664 665 666 667 | p->iPhrase++; return SQLITE_OK; } static int fts3ExprLocalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; | > | | 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | p->iPhrase++; return SQLITE_OK; } static int fts3ExprLocalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; p->iPhrase++; if( pExpr->aDoclist ){ char *pCsr; int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase); memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32)); pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1); |
︙ | ︙ | |||
832 833 834 835 836 837 838 | sqlite3_int64 iDocid; TermOffset *aTerm; }; /* ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). */ | | | 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 | sqlite3_int64 iDocid; TermOffset *aTerm; }; /* ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). */ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ TermOffsetCtx *p = (TermOffsetCtx *)ctx; int nTerm; /* Number of tokens in phrase */ int iTerm; /* For looping through nTerm phrase terms */ char *pList; /* Pointer to position list for phrase */ int iPos = 0; /* First position in position-list */ pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); |
︙ | ︙ |