Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Allow FTS4 multi-token phrases to use a combination of in-memory and incrementally loaded doclists. This allows phrases to (partially) benefit from incremental doclists without disabling the deferred token optimization. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-docid-range-constraints |
Files: | files | file ages | folders |
SHA1: |
f6819c5f3363d358e7ef65fe6978f139 |
User & Date: | dan 2013-10-03 19:27:14.455 |
Context
2013-10-03
| ||
20:28 | Merge latest trunk changes. (Closed-Leaf check-in: 24aa20da22 user: dan tags: fts4-docid-range-constraints) | |
19:27 | Allow FTS4 multi-token phrases to use a combination of in-memory and incrementally loaded doclists. This allows phrases to (partially) benefit from incremental doclists without disabling the deferred token optimization. (check-in: f6819c5f33 user: dan tags: fts4-docid-range-constraints) | |
2013-10-02
| ||
08:04 | Add a test to check that the new multi-token phrase optimization is actually helping. (check-in: bc3a2ed5fb user: dan tags: fts4-docid-range-constraints) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 | int i; /* Determine if doclists may be loaded from disk incrementally. This is ** possible if the bOptOk argument is true, the FTS doclists will be ** scanned in forward order, and the phrase consists of ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" ** tokens or prefix tokens that cannot use a prefix-index. */ int bIncrOk = (bOptOk && pCsr->bDesc==pTab->bDescIdx && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 #ifdef SQLITE_TEST && pTab->bNoIncrDoclist==0 #endif ); for(i=0; bIncrOk==1 && i<p->nToken; i++){ Fts3PhraseToken *pToken = &p->aToken[i]; | > | > | | > > | > > > < | 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 | int i; /* Determine if doclists may be loaded from disk incrementally. This is ** possible if the bOptOk argument is true, the FTS doclists will be ** scanned in forward order, and the phrase consists of ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" ** tokens or prefix tokens that cannot use a prefix-index. */ int bHaveIncr = 0; int bIncrOk = (bOptOk && pCsr->bDesc==pTab->bDescIdx && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 #ifdef SQLITE_TEST && pTab->bNoIncrDoclist==0 #endif ); for(i=0; bIncrOk==1 && i<p->nToken; i++){ Fts3PhraseToken *pToken = &p->aToken[i]; if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ bIncrOk = 0; } if( pToken->pSegcsr ) bHaveIncr = 1; } if( bIncrOk && bHaveIncr ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ Fts3PhraseToken *pToken = &p->aToken[i]; Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; if( pSegcsr ){ rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); } } p->bIncr = 1; }else{ /* Load the full doclist for the phrase into memory. */ rc = fts3EvalPhraseLoad(pCsr, p); p->bIncr = 0; } assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); return rc; } /* ** This function is used to iterate backwards (from the end to start) |
︙ | ︙ | |||
4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 | p += sqlite3Fts3GetVarint(p, &iVar); *piDocid += ((bDescIdx ? -1 : 1) * iVar); } } *ppIter = p; } /* ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). */ typedef struct TokenDoclist TokenDoclist; struct TokenDoclist { sqlite3_int64 iDocid; char *pList; int nList; }; /* ** Token pToken is an incrementally loaded token that is part of a ** multi-token phrase. Advance it to the next matching document in the ** database and populate output variable *p with the details of the new ** entry. Or, if the iterator has reached EOF, set *pbEof to true. ** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. */ static int incrPhraseTokenNext( Fts3Table *pTab, /* Virtual table handle */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | > > > > > > > > > > > > > > > > > > | > > > > > | < | > > > | > > < | > > > | | > > > < > | > | < > | | | > | | 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 | p += sqlite3Fts3GetVarint(p, &iVar); *piDocid += ((bDescIdx ? -1 : 1) * iVar); } } *ppIter = p; } /* ** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof ** to true if EOF is reached. */ static void fts3EvalDlPhraseNext( Fts3Table *pTab, Fts3Doclist *pDL, u8 *pbEof ){ char *pIter; /* Used to iterate through aAll */ char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ if( pDL->pNextDocid ){ pIter = pDL->pNextDocid; }else{ pIter = pDL->aAll; } if( pIter>=pEnd ){ /* We have already reached the end of this doclist. EOF. */ *pbEof = 1; }else{ sqlite3_int64 iDelta; pIter += sqlite3Fts3GetVarint(pIter, &iDelta); if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ pDL->iDocid += iDelta; }else{ pDL->iDocid -= iDelta; } pDL->pList = pIter; fts3PoslistCopy(0, &pIter); pDL->nList = (int)(pIter - pDL->pList); /* pIter now points just past the 0x00 that terminates the position- ** list for document pDL->iDocid. However, if this position-list was ** edited in place by fts3EvalNearTrim(), then pIter may not actually ** point to the start of the next docid value. The following line deals ** with this case by advancing pIter past the zero-padding added by ** fts3EvalNearTrim(). */ while( pIter<pEnd && *pIter==0 ) pIter++; pDL->pNextDocid = pIter; assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); *pbEof = 0; } } /* ** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). */ typedef struct TokenDoclist TokenDoclist; struct TokenDoclist { int bIgnore; sqlite3_int64 iDocid; char *pList; int nList; }; /* ** Token pToken is an incrementally loaded token that is part of a ** multi-token phrase. Advance it to the next matching document in the ** database and populate output variable *p with the details of the new ** entry. Or, if the iterator has reached EOF, set *pbEof to true. ** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. */ static int incrPhraseTokenNext( Fts3Table *pTab, /* Virtual table handle */ Fts3Phrase *pPhrase, /* Phrase to advance token of */ int iToken, /* Specific token to advance */ TokenDoclist *p, /* OUT: Docid and doclist for new entry */ u8 *pbEof /* OUT: True if iterator is at EOF */ ){ int rc = SQLITE_OK; if( pPhrase->iDoclistToken==iToken ){ assert( p->bIgnore==0 ); assert( pPhrase->aToken[iToken].pSegcsr==0 ); fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); p->pList = pPhrase->doclist.pList; p->nList = pPhrase->doclist.nList; p->iDocid = pPhrase->doclist.iDocid; }else{ Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; assert( pToken->pDeferred==0 ); assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); if( pToken->pSegcsr ){ assert( p->bIgnore==0 ); rc = sqlite3Fts3MsrIncrNext( pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList ); if( p->pList==0 ) *pbEof = 1; }else{ p->bIgnore = 1; } } return rc; } /* ** The phrase iterator passed as the second argument: ** ** * features at least one token that uses an incremental doclist, and ** ** * does not contain any deferred tokens. ** ** Advance it to the next matching documnent in the database and populate ** the Fts3Doclist.pList and nList fields. ** ** If there is no "next" entry and no error occurs, then *pbEof is set to ** 1 before returning. Otherwise, if no error occurs and the iterator is ** successfully advanced, *pbEof is set to 0. ** ** If an error occurs, return an SQLite error code. Otherwise, return ** SQLITE_OK. */ static int fts3EvalIncrPhraseNext( Fts3Cursor *pCsr, /* FTS Cursor handle */ Fts3Phrase *p, /* Phrase object to advance to next docid */ u8 *pbEof /* OUT: Set to 1 if EOF */ ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; u8 bEof = 0; /* This is only called if it is guaranteed that the phrase has at least ** one incremental token. In which case the bIncr flag is set. */ assert( p->bIncr==1 ); if( p->nToken==1 && p->bIncr ){ rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); if( pDL->pList==0 ) bEof = 1; }else{ int bDescDoclist = pCsr->bDesc; struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; memset(a, 0, sizeof(a)); assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS ); while( bEof==0 ){ int bMaxSet = 0; sqlite3_int64 iMax; /* Largest docid for all iterators */ int i; /* Used to iterate through tokens */ /* Advance the iterator for each token in the phrase once. */ for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ iMax = a[i].iDocid; bMaxSet = 1; } } assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); assert( rc!=SQLITE_OK || bMaxSet ); /* Keep advancing iterators until they all point to the same document */ for(i=0; i<p->nToken; i++){ while( rc==SQLITE_OK && bEof==0 && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 ){ rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ iMax = a[i].iDocid; i = 0; } } } /* Check if the current entries really are a phrase match */ if( bEof==0 ){ int nList = 0; int nByte = a[p->nToken-1].nList; char *aDoclist = sqlite3_malloc(nByte+1); if( !aDoclist ) return SQLITE_NOMEM; memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); for(i=0; i<(p->nToken-1); i++){ if( a[i].bIgnore==0 ){ char *pL = a[i].pList; char *pR = aDoclist; char *pOut = aDoclist; int nDist = p->nToken-1-i; int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); if( res==0 ) break; nList = (pOut - aDoclist); } } if( i==(p->nToken-1) ){ pDL->iDocid = iMax; pDL->pList = aDoclist; pDL->nList = nList; pDL->bFreeList = 1; break; } sqlite3_free(aDoclist); } |
︙ | ︙ | |||
4322 4323 4324 4325 4326 4327 4328 | rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ | < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < | 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 | rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ fts3EvalDlPhraseNext(pTab, pDL, pbEof); } return rc; } /* ** |
︙ | ︙ | |||
4636 4637 4638 4639 4640 4641 4642 | pToken->pSegcsr = 0; }else{ /* Set nLoad4 to the value of (4^nOther) for the next iteration of the ** for-loop. Except, limit the value to 2^24 to prevent it from ** overflowing the 32-bit integer it is stored in. */ if( ii<12 ) nLoad4 = nLoad4*4; | | | 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 | pToken->pSegcsr = 0; }else{ /* Set nLoad4 to the value of (4^nOther) for the next iteration of the ** for-loop. Except, limit the value to 2^24 to prevent it from ** overflowing the 32-bit integer it is stored in. */ if( ii<12 ) nLoad4 = nLoad4*4; if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ /* Either this is the cheapest token in the entire query, or it is ** part of a multi-token phrase. Either way, the entire doclist will ** (eventually) be loaded into memory. It may as well be now. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); |
︙ | ︙ | |||
5234 5235 5236 5237 5238 5239 5240 | Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ int i; for(i=0; i<pPhrase->nToken; i++){ | | > > | > | 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 | Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalInvalidatePoslist(pPhrase); if( pPhrase->bIncr ){ int i; for(i=0; i<pPhrase->nToken; i++){ Fts3PhraseToken *pToken = &pPhrase->aToken[i]; assert( pToken->pDeferred==0 ); if( pToken->pSegcsr ){ sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); } } *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); } pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } |
︙ | ︙ |
Changes to test/fts4incr.test.
︙ | ︙ | |||
9 10 11 12 13 14 15 | # #************************************************************************* # set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/fts3_common.tcl | | | | 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | # #************************************************************************* # set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/fts3_common.tcl set ::testprefix fts4incr # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts3 { finish_test return } # Create the fts_kjv_genesis procedure which fills and FTS3/4 table # with the complete text of the Book of Genesis. # source $testdir/genesis.tcl do_test 1.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts4(words) } fts_kjv_genesis } {} do_execsql_test 1.1 { SELECT min(docid), max(docid) FROM t1; } {1001001 1050026} |
︙ | ︙ |