Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
3972a787df5ec253b99b148385655e7b |
User & Date: | dan 2011-06-08 18:39:07.487 |
Context
2011-06-09
| ||
10:48 | Fix problems to do with using both OR and NEAR operators in a single expression. (check-in: 4e8dd19eef user: dan tags: fts3-prefix-search) | |
2011-06-08
| ||
18:39 | Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). (check-in: 3972a787df user: dan tags: fts3-prefix-search) | |
2011-06-07
| ||
18:35 | Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. (check-in: 9d10a6846b user: dan tags: fts3-prefix-search) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
1118 1119 1120 1121 1122 1123 1124 | ** database. TODO: For xConnect(), it could verify that said tables exist. */ if( isCreate ){ rc = fts3CreateTables(p); } /* Figure out the page-size for the database. This is required in order to | | < < | 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 | ** database. TODO: For xConnect(), it could verify that said tables exist. */ if( isCreate ){ rc = fts3CreateTables(p); } /* Figure out the page-size for the database. This is required in order to ** estimate the cost of loading large doclists from the database. */ fts3DatabasePageSize(&rc, p); p->nNodeSize = p->nPgsz-35; /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: |
︙ | ︙ | |||
1961 1962 1963 1964 1965 1966 1967 | *pbFirst = 1; } #define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2)) static int fts3DoclistOrMerge( int bDescIdx, /* True if arguments are desc */ | | | | < | 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 | *pbFirst = 1; } #define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2)) static int fts3DoclistOrMerge( int bDescIdx, /* True if arguments are desc */ char *a1, int n1, /* First doclist */ char *a2, int n2, /* Second doclist */ char **paOut, int *pnOut /* OUT: Malloc'd doclist */ ){ sqlite3_int64 i1 = 0; sqlite3_int64 i2 = 0; sqlite3_int64 iPrev = 0; char *pEnd1 = &a1[n1]; char *pEnd2 = &a2[n2]; char *p1 = a1; char *p2 = a2; char *p; char *aOut; int bFirstOut = 0; *paOut = 0; *pnOut = 0; aOut = sqlite3_malloc(n1+n2); if( !aOut ) return SQLITE_NOMEM; |
︙ | ︙ | |||
2012 2013 2014 2015 2016 2017 2018 | *pnOut = (p-aOut); return SQLITE_OK; } static void fts3DoclistPhraseMerge( int bDescIdx, /* True if arguments are desc */ int nDist, /* Distance from left to right (1=adjacent) */ | | | | 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 | *pnOut = (p-aOut); return SQLITE_OK; } static void fts3DoclistPhraseMerge( int bDescIdx, /* True if arguments are desc */ int nDist, /* Distance from left to right (1=adjacent) */ char *aLeft, int nLeft, /* Left doclist */ char *aRight, int *pnRight /* IN/OUT: Right/output doclist */ ){ sqlite3_int64 i1 = 0; sqlite3_int64 i2 = 0; sqlite3_int64 iPrev = 0; char *pEnd1 = &aLeft[nLeft]; char *pEnd2 = &aRight[*pnRight]; char *p1 = aLeft; |
︙ | ︙ | |||
2058 2059 2060 2061 2062 2063 2064 | fts3PoslistCopy(0, &p2); fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); } } *pnRight = p - aOut; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 | fts3PoslistCopy(0, &p2); fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2); } } *pnRight = p - aOut; } /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all ** other doclists (except the aaOutput[0] one) and return SQLITE_OK. ** |
︙ | ︙ | |||
2162 2163 2164 2165 2166 2167 2168 | if( pTS->aaOutput[i] ){ if( !aOut ){ aOut = pTS->aaOutput[i]; nOut = pTS->anOutput[i]; pTS->aaOutput[i] = 0; }else{ int nNew; | | | 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 | if( pTS->aaOutput[i] ){ if( !aOut ){ aOut = pTS->aaOutput[i]; nOut = pTS->anOutput[i]; pTS->aaOutput[i] = 0; }else{ int nNew; char *aNew; int rc = fts3DoclistOrMerge(p->bDescIdx, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew ); if( rc!=SQLITE_OK ){ sqlite3_free(aOut); return rc; |
︙ | ︙ | |||
2227 2228 2229 2230 2231 2232 2233 | for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ if( pTS->aaOutput[iOut]==0 ){ assert( iOut>0 ); pTS->aaOutput[iOut] = aMerge; pTS->anOutput[iOut] = nMerge; break; }else{ | | | 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 | for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ if( pTS->aaOutput[iOut]==0 ){ assert( iOut>0 ); pTS->aaOutput[iOut] = aMerge; pTS->anOutput[iOut] = nMerge; break; }else{ char *aNew; int nNew; int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew ); if( rc!=SQLITE_OK ){ if( aMerge!=aDoclist ) sqlite3_free(aMerge); |
︙ | ︙ | |||
2399 2400 2401 2402 2403 2404 2405 | ){ Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ int i; | < | 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 | ){ Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ int i; int bFound = 0; /* True once an index has been found */ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; if( isPrefix ){ for(i=1; bFound==0 && i<p->nIndex; i++){ if( p->aIndex[i].nPrefix==nTerm ){ bFound = 1; |
︙ | ︙ | |||
2432 2433 2434 2435 2436 2437 2438 | if( bFound==0 ){ rc = sqlite3Fts3SegReaderCursor( p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr ); pSegcsr->bLookup = !isPrefix; } | < < < < | 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 | if( bFound==0 ){ rc = sqlite3Fts3SegReaderCursor( p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr ); pSegcsr->bLookup = !isPrefix; } } *ppSegcsr = pSegcsr; return rc; } static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ |
︙ | ︙ | |||
3049 3050 3051 3052 3053 3054 3055 | "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", p->zDb, p->zName, zName ); return rc; } static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ | < | | | | 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 | "ALTER TABLE %Q.'%q_segdir' RENAME TO '%q_segdir';", p->zDb, p->zName, zName ); return rc; } static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ UNUSED_PARAMETER(iSavepoint); assert( ((Fts3Table *)pVtab)->inTransaction ); assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); return fts3SyncMethod(pVtab); } static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); UNUSED_PARAMETER(iSavepoint); UNUSED_PARAMETER(pVtab); assert( p->inTransaction ); |
︙ | ︙ | |||
3324 3325 3326 3327 3328 3329 3330 | }else{ sqlite3_free(aDoclist); } return rc; } static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ | < | 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 | }else{ sqlite3_free(aDoclist); } return rc; } static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ int iToken; int rc = SQLITE_OK; int nMaxUndeferred = -1; char *aPoslist = 0; int nPoslist = 0; int iPrev = -1; |
︙ | ︙ | |||
3446 3447 3448 3449 3450 3451 3452 | ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int rc; | < | < | 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 | ** This function is called for each Fts3Phrase in a full-text query ** expression to initialize the mechanism for returning rows. Once this ** function has been called successfully on an Fts3Phrase, it may be ** used with fts3EvalPhraseNext() to iterate through the matching docids. */ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int rc; Fts3PhraseToken *pFirst = &p->aToken[0]; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; assert( p->doclist.aAll==0 ); if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 && pFirst->pSegcsr && pFirst->pSegcsr->bLookup ){ /* Use the incremental approach. */ int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); rc = sqlite3Fts3MsrIncrStart( pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); |
︙ | ︙ | |||
3561 3562 3563 3564 3565 3566 3567 | } }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ | | > | > > > > > > > > > > | 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 | } }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof ); pDL->pList = pDL->pNextDocid; }else{ char *pIter; /* Used to iterate through aAll */ char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ if( pDL->pNextDocid ){ pIter = pDL->pNextDocid; }else{ pIter = pDL->aAll; } if( pIter>=pEnd ){ /* We have already reached the end of this doclist. EOF. */ *pbEof = 1; }else{ sqlite3_int64 iDelta; pIter += sqlite3Fts3GetVarint(pIter, &iDelta); if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ pDL->iDocid += iDelta; }else{ pDL->iDocid -= iDelta; } pDL->pList = pIter; fts3PoslistCopy(0, &pIter); pDL->nList = (pIter - pDL->pList); /* pIter now points just past the 0x00 that terminates the position- ** list for document pDL->iDocid. However, if this position-list was ** edited in place by fts3EvalNearTrim2(), then pIter may not actually ** point to the start of the next docid value. The following line deals ** with this case by advancing pIter past the zero-padding added by ** fts3EvalNearTrim2(). */ while( pIter<pEnd && *pIter==0 ) pIter++; pDL->pNextDocid = pIter; assert( *pIter || pIter>=&pDL->aAll[pDL->nAll] ); *pbEof = 0; } } return rc; } |
︙ | ︙ | |||
3613 3614 3615 3616 3617 3618 3619 | fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 | fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3PhraseToken *pToken; Fts3Expr *pRoot; int nOvfl; int iCol; |
︙ | ︙ | |||
3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 | a += sqlite3Fts3GetVarint(a, &nByte); } if( nDoc==0 || nByte==0 ){ sqlite3_reset(pStmt); return SQLITE_CORRUPT_VTAB; } pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); assert( pCsr->nRowAvg>0 ); rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; | > | 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 | a += sqlite3Fts3GetVarint(a, &nByte); } if( nDoc==0 || nByte==0 ){ sqlite3_reset(pStmt); return SQLITE_CORRUPT_VTAB; } pCsr->nDoc = nDoc; pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); assert( pCsr->nRowAvg>0 ); rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; |
︙ | ︙ | |||
3898 3899 3900 3901 3902 3903 3904 | ); apOr = (Fts3Expr **)&aTC[nToken]; if( !aTC ){ rc = SQLITE_NOMEM; }else{ int ii; | < > | | | | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 | ); apOr = (Fts3Expr **)&aTC[nToken]; if( !aTC ){ rc = SQLITE_NOMEM; }else{ int ii; Fts3TokenAndCost *pTC = aTC; Fts3Expr **ppOr = apOr; fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); nToken = pTC-aTC; nOr = ppOr-apOr; if( rc==SQLITE_OK ){ rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){ rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken); } } sqlite3_free(aTC); } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); return rc; |
︙ | ︙ | |||
3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 | char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ ){ int nParam1 = nNear + pPhrase->nToken; int nParam2 = nNear + *pnToken; char *p2; char *pOut; int res; p2 = pOut = pPhrase->doclist.pList; res = fts3PoslistNearMerge( &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 ); | > > > > > > | | | > | 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 | char *aTmp, /* Temporary space to use */ char **paPoslist, /* IN/OUT: Position list */ int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ ){ int nParam1 = nNear + pPhrase->nToken; int nParam2 = nNear + *pnToken; int nNew; char *p2; char *pOut; int res; p2 = pOut = pPhrase->doclist.pList; res = fts3PoslistNearMerge( &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 ); if( res ){ nNew = (pOut - pPhrase->doclist.pList) - 1; assert( pPhrase->doclist.pList[nNew]=='\0' ); assert( nNew<=pPhrase->doclist.nList && nNew>0 ); memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); pPhrase->doclist.nList = nNew; *paPoslist = pPhrase->doclist.pList; *pnToken = pPhrase->nToken; } return res; } static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ int res = 1; |
︙ | ︙ | |||
4301 4302 4303 4304 4305 4306 4307 | pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); } return rc; } | > | < | < | > | | | | | < > | > > > > > > > | > > > | | < < | > > | | | | < | | | > > > > > > > > > > > > > > > > > > > > > | > | > > > > | > > > > > | > > > > > | > > > | > > > > > > > > > > > > > > > > > > > | > > > > | > > > > | > > > > > | > > > > | > > | | > > | | > > | | > > > > | > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > | > | > > | | < | < < < | > > > > > | | | | > | < > | | | | < | 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 | pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); } return rc; } static void fts3EvalRestart( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase ){ fts3EvalFreeDeferredDoclist(pPhrase); if( pPhrase->bIncr ){ sqlite3Fts3EvalPhraseCleanup(pPhrase); memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); *pRc = sqlite3Fts3EvalStart(pCsr, pExpr, 0); }else{ pPhrase->doclist.pNextDocid = 0; pPhrase->doclist.iDocid = 0; } } pExpr->iDocid = 0; pExpr->bEof = 0; pExpr->bStart = 0; fts3EvalRestart(pCsr, pExpr->pLeft, pRc); fts3EvalRestart(pCsr, pExpr->pRight, pRc); } } static void fts3EvalUpdateCounts( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc ){ if( pExpr && *pRc==SQLITE_OK ){ Fts3Phrase *pPhrase = pExpr->pPhrase; if( pPhrase && pPhrase->doclist.pList ){ int iCol = 0; char *p = pPhrase->doclist.pList; assert( *p ); while( 1 ){ u8 c = 0; int iCnt = 0; while( 0xFE & (*p | c) ){ if( (c&0x80)==0 ) iCnt++; c = *p++ & 0x80; } /* aMI[iCol*3 + 1] = Number of occurrences ** aMI[iCol*3 + 2] = Number of rows containing at least one instance */ pExpr->aMI[iCol*3 + 1] += iCnt; pExpr->aMI[iCol*3 + 2] += (iCnt>0); if( *p==0x00 ) break; p++; p += sqlite3Fts3GetVarint32(p, &iCol); } } fts3EvalUpdateCounts(pCsr, pExpr->pLeft, pRc); fts3EvalUpdateCounts(pCsr, pExpr->pRight, pRc); } } static int fts3EvalNearStats( Fts3Cursor *pCsr, Fts3Expr *pExpr ){ int rc = SQLITE_OK; /* Return code */ assert( pExpr->eType==FTSQUERY_PHRASE ); if( pExpr->aMI==0 ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; Fts3Expr *pRoot; /* Root of NEAR expression */ Fts3Expr *p; /* Iterator used for several purposes */ sqlite3_int64 iPrevId = pCsr->iPrevId; sqlite3_int64 iDocid; u8 bEof; /* Find the root of the NEAR expression */ pRoot = pExpr; while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ pRoot = pRoot->pParent; } iDocid = pRoot->iDocid; bEof = pRoot->bEof; /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ for(p=pRoot; p; p=p->pLeft){ Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); assert( pE->aMI==0 ); pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); if( !pE->aMI ) return SQLITE_NOMEM; memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); } fts3EvalRestart(pCsr, pRoot, &rc); while( pCsr->isEof==0 && rc==SQLITE_OK ){ do { /* Ensure the %_content statement is reset. */ if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); assert( sqlite3_data_count(pCsr->pStmt)==0 ); /* Advance to the next document */ fts3EvalNext(pCsr, pRoot, &rc); pCsr->isEof = pRoot->bEof; pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pRoot->iDocid; }while( pCsr->isEof==0 && pRoot->eType==FTSQUERY_NEAR && fts3EvalLoadDeferred(pCsr, &rc) ); if( pCsr->isEof==0 ){ fts3EvalUpdateCounts(pCsr, pRoot, &rc); } } pCsr->isEof = 0; pCsr->iPrevId = iPrevId; if( bEof ){ pRoot->bEof = bEof; }else{ fts3EvalRestart(pCsr, pRoot, &rc); while( pRoot->iDocid<iDocid && rc==SQLITE_OK ){ fts3EvalNext(pCsr, pRoot, &rc); assert( pRoot->bEof==0 ); } fts3EvalLoadDeferred(pCsr, &rc); } } return rc; } /* ** This function is used by the matchinfo() module to query a phrase ** expression node for the following information: ** ** 1. The total number of occurrences of the phrase in each column of ** the FTS table (considering all rows), and ** ** 2. For each column, the number of rows in the table for which the ** column contains at least one instance of the phrase. ** ** If no error occurs, SQLITE_OK is returned and the values for each column ** written into the array aiOut as follows: ** ** aiOut[iCol*3 + 1] = Number of occurrences ** aiOut[iCol*3 + 2] = Number of rows containing at least one instance ** ** Caveats: ** ** * If a phrase consists entirely of deferred tokens, then all output ** values are set to the number of documents in the table. In other ** words we assume that very common tokens occur exactly once in each ** column of each row of the table. ** ** * If a phrase contains some deferred tokens (and some non-deferred ** tokens), count the potential occurrence identified by considering ** the non-deferred tokens instead of actual phrase occurrences. ** ** * If the phrase is part of a NEAR expression, then only phrase instances ** that meet the NEAR constraint are included in the counts. */ int sqlite3Fts3EvalPhraseStats( Fts3Cursor *pCsr, /* FTS cursor handle */ Fts3Expr *pExpr, /* Phrase expression */ u32 *aiOut /* Array to write results into (see above) */ ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int iCol; if( pExpr->bDeferred ){ assert( pCsr->nDoc>0 ); for(iCol=0; iCol<pTab->nColumn; iCol++){ aiOut[iCol*3 + 1] = pCsr->nDoc; aiOut[iCol*3 + 2] = pCsr->nDoc; } }else{ rc = fts3EvalNearStats(pCsr, pExpr); if( rc==SQLITE_OK ){ assert( pExpr->aMI ); for(iCol=0; iCol<pTab->nColumn; iCol++){ aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; } } } return rc; } /* ** The expression pExpr passed as the second argument to this function ** must be of type FTSQUERY_PHRASE. ** |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
236 237 238 239 240 241 242 243 244 245 246 247 248 249 | sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ u8 bDesc; /* True to sort in descending order */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int nMatchinfo; /* Number of elements in aMatchinfo[] */ char *zMatchinfo; /* Matchinfo specification */ }; | > | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ u8 bDesc; /* True to sort in descending order */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ int nDoc; /* Documents in table */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int nMatchinfo; /* Number of elements in aMatchinfo[] */ char *zMatchinfo; /* Matchinfo specification */ }; |
︙ | ︙ | |||
319 320 321 322 323 324 325 | ** ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist ** points to a malloced buffer, size nDoclist bytes, containing the results ** of this phrase query in FTS3 doclist format. As usual, the initial ** "Length" field found in doclists stored on disk is omitted from this ** buffer. ** | | | > > > > > > > > | > > | 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 | ** ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist ** points to a malloced buffer, size nDoclist bytes, containing the results ** of this phrase query in FTS3 doclist format. As usual, the initial ** "Length" field found in doclists stored on disk is omitted from this ** buffer. ** ** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global ** matchinfo data. If it is not NULL, it points to an array of size nCol*3, ** where nCol is the number of columns in the queried FTS table. The array ** is populated as follows: ** ** aMI[iCol*3 + 0] = Undefined ** aMI[iCol*3 + 1] = Number of occurrences ** aMI[iCol*3 + 2] = Number of rows containing at least one instance ** ** The aMI array is allocated using sqlite3_malloc(). It should be freed ** when the expression node is. */ struct Fts3Expr { int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ /* The following are used by the fts3_eval.c module. */ sqlite3_int64 iDocid; /* Current docid */ u8 bEof; /* True this expression is at EOF already */ u8 bStart; /* True if iDocid is valid */ u8 bDeferred; /* True if this expression is entirely deferred */ u32 *aMI; }; /* ** Candidate values for Fts3Query.eType. Note that the order of the first ** four values is in order of precedence when parsing expressions. For ** example, the following: ** |
︙ | ︙ | |||
366 367 368 369 370 371 372 | void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); int sqlite3Fts3SegReaderPending( Fts3Table*,int,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); | < < | 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 | void sqlite3Fts3PendingTermsClear(Fts3Table *); int sqlite3Fts3Optimize(Fts3Table *); int sqlite3Fts3SegReaderNew(int, sqlite3_int64, sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); int sqlite3Fts3SegReaderPending( Fts3Table*,int,const char*,int,int,Fts3SegReader**); void sqlite3Fts3SegReaderFree(Fts3SegReader *); int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **); int sqlite3Fts3ReadLock(Fts3Table *); int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); void sqlite3Fts3SegmentsClose(Fts3Table *); /* Special values interpreted by sqlite3SegReaderCursor() */ #define FTS3_SEGCURSOR_PENDING -1 #define FTS3_SEGCURSOR_ALL -2 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); |
︙ | ︙ | |||
437 438 439 440 441 442 443 | int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); | | < | 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | int sqlite3Fts3PutVarint(char *, sqlite3_int64); int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, char ** ); |
︙ | ︙ | |||
476 477 478 479 480 481 482 | Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ); | < < < | 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ); void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int); int sqlite3Fts3EvalNext(Fts3Cursor *pCsr); int sqlite3Fts3MsrIncrStart( Fts3Table*, Fts3MultiSegReader*, int, const char*, int); |
︙ | ︙ |
Changes to ext/fts3/fts3_expr.c.
︙ | ︙ | |||
765 766 767 768 769 770 771 772 773 774 775 776 777 778 | */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); sqlite3Fts3EvalPhraseCleanup(p->pPhrase); sqlite3_free(p); } } /**************************************************************************** ***************************************************************************** ** Everything after this point is just test code. | > | 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 | */ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); sqlite3Fts3EvalPhraseCleanup(p->pPhrase); sqlite3_free(p->aMI); sqlite3_free(p); } } /**************************************************************************** ***************************************************************************** ** Everything after this point is just test code. |
︙ | ︙ |
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
716 717 718 719 720 721 722 | if( !c ) nEntry++; } *ppCollist = pEnd; return nEntry; } | < < < < < < < < < < < < < < < < < < < < | 716 717 718 719 720 721 722 723 724 725 726 727 728 729 | if( !c ) nEntry++; } *ppCollist = pEnd; return nEntry; } /* ** fts3ExprIterate() callback used to collect the "global" matchinfo stats ** for a single query. ** ** fts3ExprIterate() callback to load the 'global' elements of a ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements ** of the matchinfo array that are constant for all rows returned by the |
︙ | ︙ | |||
769 770 771 772 773 774 775 | */ static int fts3ExprGlobalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; | < < < < < < < < < < < < | | | < < < < < < < < < < < | 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 | */ static int fts3ExprGlobalHitsCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; return sqlite3Fts3EvalPhraseStats( p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] ); } /* ** fts3ExprIterate() callback used to collect the "local" part of the ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the ** array that are different for each row returned by the query. */ |
︙ | ︙ |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
90 91 92 93 94 95 96 | ** a contiguous set of segment b-tree leaf nodes. Although the details of ** this structure are only manipulated by code in this file, opaque handles ** of type Fts3SegReader* are also used by code in fts3.c to iterate through ** terms when querying the full-text index. See functions: ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() | < | 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | ** a contiguous set of segment b-tree leaf nodes. Although the details of ** this structure are only manipulated by code in this file, opaque handles ** of type Fts3SegReader* are also used by code in fts3.c to iterate through ** terms when querying the full-text index. See functions: ** ** sqlite3Fts3SegReaderNew() ** sqlite3Fts3SegReaderFree() ** sqlite3Fts3SegReaderIterate() ** ** Methods used to manipulate Fts3SegReader structures: ** ** fts3SegReaderNext() ** fts3SegReaderFirstDocid() ** fts3SegReaderNextDocid() |
︙ | ︙ | |||
1291 1292 1293 1294 1295 1296 1297 | } } } return SQLITE_OK; } | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 | } } } return SQLITE_OK; } int sqlite3Fts3MsrOvfl( Fts3Cursor *pCsr, Fts3MultiSegReader *pMsr, int *pnOvfl ){ Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; |
︙ | ︙ | |||
2412 2413 2414 2415 2416 2417 2418 | if( nMerge==0 ){ *paPoslist = 0; return SQLITE_OK; } while( 1 ){ | < | 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 | if( nMerge==0 ){ *paPoslist = 0; return SQLITE_OK; } while( 1 ){ Fts3SegReader *pSeg; pSeg = pMsr->apSegment[0]; if( pSeg->pOffsetList==0 ){ *paPoslist = 0; break; }else{ |
︙ | ︙ | |||
2954 2955 2956 2957 2958 2959 2960 | }else{ rc = SQLITE_ERROR; } return rc; } | < < < < < < < < < < < < < < | 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 | }else{ rc = SQLITE_ERROR; } return rc; } /* ** Delete all cached deferred doclists. Deferred doclists are cached ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. */ void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ Fts3DeferredToken *pDef; for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ |
︙ | ︙ |
Changes to test/fts3matchinfo.test.
︙ | ︙ | |||
240 241 242 243 244 245 246 | do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; | > > > | | | > | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | do_matchinfo_test 4.2.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.2.4 t5 {t5 MATCH 'a a a'} { s {3 1} } do_matchinfo_test 4.2.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.2.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1} } do_execsql_test 4.3.0 "INSERT INTO t5 VALUES('x y [string repeat {b } 50000]')"; # It used to be that the second 'a' token would be deferred. That doesn't # work any longer. if 0 { do_matchinfo_test 4.3.1 t5 {t5 MATCH 'a a'} { x {{5 8 2 5 5 5} {3 8 2 3 5 5}} s {2 1} } } do_matchinfo_test 4.3.2 t5 {t5 MATCH 'a b'} { s {2} } do_matchinfo_test 4.3.3 t5 {t5 MATCH 'a b a'} { s {3} } do_matchinfo_test 4.3.4 t5 {t5 MATCH 'a a a'} { s {3 1} } do_matchinfo_test 4.3.5 t5 {t5 MATCH '"a b" "a b"'} { s {2} } do_matchinfo_test 4.3.6 t5 {t5 MATCH 'a OR b'} { s {1 2 1 1} } |
︙ | ︙ |