Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fixes for the matchinfo() function related to FTS4 common token handling. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | experimental |
Files: | files | file ages | folders |
SHA1: |
deb80eac9112d21835dfd3cee08ed8f0 |
User & Date: | dan 2010-10-23 19:07:30.000 |
Context
2010-10-25
| ||
09:01 | Add test for matchinfo when a phrase includes some common tokens. (check-in: 80a54ebc41 user: dan tags: experimental) | |
2010-10-23
| ||
19:07 | Fixes for the matchinfo() function related to FTS4 common token handling. (check-in: deb80eac91 user: dan tags: experimental) | |
2010-10-22
| ||
19:03 | Add new test file fts3defer2.test. (check-in: 5a4d5bfcae user: dan tags: experimental) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
2104 2105 2106 2107 2108 2109 2110 | rc = fts3DeferExpression(pCsr, pExpr->pLeft); if( rc==SQLITE_OK ){ rc = fts3DeferExpression(pCsr, pExpr->pRight); } if( pExpr->eType==FTSQUERY_PHRASE ){ int iCol = pExpr->pPhrase->iColumn; int i; | < | 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 | rc = fts3DeferExpression(pCsr, pExpr->pLeft); if( rc==SQLITE_OK ){ rc = fts3DeferExpression(pCsr, pExpr->pRight); } if( pExpr->eType==FTSQUERY_PHRASE ){ int iCol = pExpr->pPhrase->iColumn; int i; for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; if( pToken->pDeferred==0 ){ rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); } } } |
︙ | ︙ | |||
2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 | char *pOut = 0; int nOut = 0; int rc = SQLITE_OK; int ii; int iCol = pPhrase->iColumn; int isTermPos = (pPhrase->nToken>1 || isReqPos); Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int iPrevTok = 0; int nDoc = 0; /* If this is an xFilter() evaluation, create a segment-reader for each | > | | > > > > | | | | > > | > > > > | 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 | char *pOut = 0; int nOut = 0; int rc = SQLITE_OK; int ii; int iCol = pPhrase->iColumn; int isTermPos = (pPhrase->nToken>1 || isReqPos); Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int isFirst = 1; int iPrevTok = 0; int nDoc = 0; /* If this is an xFilter() evaluation, create a segment-reader for each ** phrase token. Or, if this is an xNext() or snippet/offsets/matchinfo ** evaluation, only create segment-readers if there are no Fts3DeferredToken ** objects attached to the phrase-tokens. */ for(ii=0; ii<pPhrase->nToken; ii++){ Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; if( pTok->pArray==0 ){ if( (pCsr->eEvalmode==FTS3_EVAL_FILTER) || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0) || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext) ){ rc = fts3TermSegReaderArray( pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pArray ); if( rc!=SQLITE_OK ) return rc; } } } for(ii=0; ii<pPhrase->nToken; ii++){ Fts3PhraseToken *pTok; /* Token to find doclist for */ int iTok; /* The token being queried this iteration */ char *pList; /* Pointer to token doclist */ int nList; /* Size of buffer at pList */ /* Select a token to process. If this is an xFilter() call, then tokens ** are processed in order from least to most costly. Otherwise, tokens ** are processed in the order in which they occur in the phrase. */ if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){ assert( isReqPos ); iTok = ii; pTok = &pPhrase->aToken[iTok]; if( pTok->bFulltext==0 ) continue; }else if( pCsr->eEvalmode==FTS3_EVAL_NEXT || isReqPos ){ iTok = ii; pTok = &pPhrase->aToken[iTok]; }else{ int nMinCost = 0x7FFFFFFF; int jj; /* Find the remaining token with the lowest cost. */ |
︙ | ︙ | |||
2223 2224 2225 2226 2227 2228 2229 | */ if( nMinCost>nDoc && ii>0 ){ rc = fts3DeferExpression(pCsr, pCsr->pExpr); break; } } | | > | | | > | 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 | */ if( nMinCost>nDoc && ii>0 ){ rc = fts3DeferExpression(pCsr, pCsr->pExpr); break; } } if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); }else{ assert( pTok->pArray ); rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); pTok->bFulltext = 1; } assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pArray==0 ); if( rc!=SQLITE_OK ) break; if( isFirst ){ pOut = pList; nOut = nList; if( pCsr->eEvalmode==FTS3_EVAL_FILTER && pPhrase->nToken>1 ){ nDoc = fts3DoclistCountDocids(1, pOut, nOut); } isFirst = 0; }else{ /* Merge the new term list and the current output. */ char *aLeft, *aRight; int nLeft, nRight; int nDist; int mt; |
︙ | ︙ | |||
2278 2279 2280 2281 2282 2283 2284 | } assert( nOut==0 || pOut!=0 ); iPrevTok = iTok; } if( rc==SQLITE_OK ){ | | | | 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 | } assert( nOut==0 || pOut!=0 ); iPrevTok = iTok; } if( rc==SQLITE_OK ){ if( ii!=pPhrase->nToken && pCsr->eEvalmode==FTS3_EVAL_FILTER ){ assert( pCsr->eEvalmode==FTS3_EVAL_FILTER && isReqPos==0 ); fts3DoclistStripPositions(pOut, &nOut); } *paOut = pOut; *pnOut = nOut; }else{ sqlite3_free(pOut); } |
︙ | ︙ | |||
2394 2395 2396 2397 2398 2399 2400 | static int fts3ExprAllocateSegReaders( Fts3Cursor *pCsr, /* FTS3 table */ Fts3Expr *pExpr, /* Expression to create seg-readers for */ int *pnExpr /* OUT: Number of AND'd expressions */ ){ int rc = SQLITE_OK; /* Return code */ | > | | 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 | static int fts3ExprAllocateSegReaders( Fts3Cursor *pCsr, /* FTS3 table */ Fts3Expr *pExpr, /* Expression to create seg-readers for */ int *pnExpr /* OUT: Number of AND'd expressions */ ){ int rc = SQLITE_OK; /* Return code */ assert( pCsr->eEvalmode!=FTS3_EVAL_MATCHINFO ); if( pCsr->eEvalmode==FTS3_EVAL_NEXT ) return SQLITE_OK; if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ (*pnExpr)++; pnExpr = 0; } if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; |
︙ | ︙ | |||
2555 2556 2557 2558 2559 2560 2561 | if( pExpr->eType==FTSQUERY_PHRASE ){ rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), paOut, pnOut ); fts3ExprFreeSegReaders(pExpr); | | | 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 | if( pExpr->eType==FTSQUERY_PHRASE ){ rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), paOut, pnOut ); fts3ExprFreeSegReaders(pExpr); }else if( p->eEvalmode==FTS3_EVAL_FILTER && pExpr->eType==FTSQUERY_AND ){ ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */ int nExpr = 0; /* Size of aExpr[] */ char *aRet = 0; /* Doclist to return to caller */ int nRet = 0; /* Length of aRet[] in bytes */ int nDoc = 0x7FFFFFFF; assert( !isReqPos ); |
︙ | ︙ | |||
2624 2625 2626 2627 2628 2629 2630 | char *aRight; int nLeft; int nRight; assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR || pExpr->eType==FTSQUERY_NOT | | | 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 | char *aRight; int nLeft; int nRight; assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR || pExpr->eType==FTSQUERY_NOT || (pExpr->eType==FTSQUERY_AND && p->eEvalmode==FTS3_EVAL_NEXT) ); if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) ){ switch( pExpr->eType ){ case FTSQUERY_NEAR: { |
︙ | ︙ | |||
2721 2722 2723 2724 2725 2726 2727 | if( rc==SQLITE_OK ){ sqlite3Fts3FreeDeferredDoclists(pCsr); rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } if( rc==SQLITE_OK ){ char *a = 0; int n = 0; | < < | 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 | if( rc==SQLITE_OK ){ sqlite3Fts3FreeDeferredDoclists(pCsr); rc = sqlite3Fts3CacheDeferredDoclists(pCsr); } if( rc==SQLITE_OK ){ char *a = 0; int n = 0; rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); assert( n>=0 ); *pbRes = (n>0); sqlite3_free(a); } } return rc; } |
︙ | ︙ | |||
2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 | ** subsequently to determine whether or not an EOF was hit. */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ int res; int rc = SQLITE_OK; /* Return code */ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; do { if( pCsr->aDoclist==0 ){ if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ pCsr->isEof = 1; rc = sqlite3_reset(pCsr->pStmt); break; } | > | 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 | ** subsequently to determine whether or not an EOF was hit. */ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ int res; int rc = SQLITE_OK; /* Return code */ Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; pCsr->eEvalmode = FTS3_EVAL_NEXT; do { if( pCsr->aDoclist==0 ){ if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ pCsr->isEof = 1; rc = sqlite3_reset(pCsr->pStmt); break; } |
︙ | ︙ | |||
2999 3000 3001 3002 3003 3004 3005 | ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ int rc; | > | > > | > > > > > > > > > > > > | 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 | ** Load the doclist associated with expression pExpr to pExpr->aDoclist. ** The loaded doclist contains positions as well as the document ids. ** This is used by the matchinfo(), snippet() and offsets() auxillary ** functions. */ int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ int rc; assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); return rc; } int sqlite3Fts3ExprLoadFtDoclist( Fts3Cursor *pCsr, Fts3Expr *pExpr, char **paDoclist, int *pnDoclist ){ int rc; assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); pCsr->eEvalmode = FTS3_EVAL_MATCHINFO; rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1); pCsr->eEvalmode = FTS3_EVAL_NEXT; return rc; } /* ** After ExprLoadDoclist() (see above) has been called, this function is ** used to iterate/search through the position lists that make up the doclist ** stored in pExpr->aDoclist. |
︙ | ︙ |
Changes to ext/fts3/fts3Int.h.
︙ | ︙ | |||
159 160 161 162 163 164 165 | Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ | | > > > > | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ char *pNextId; /* Pointer into the body of aDoclist */ char *aDoclist; /* List of docids for full-text queries */ int nDoclist; /* Size of buffer at aDoclist */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ u32 *aMatchinfo; /* Information about most recent match */ int eEvalmode; /* An FTS3_EVAL_XX constant */ int nRowAvg; /* Average size of database rows, in pages */ }; #define FTS3_EVAL_FILTER 0 #define FTS3_EVAL_NEXT 1 #define FTS3_EVAL_MATCHINFO 2 /* ** The Fts3Cursor.eSearch member is always set to one of the following. ** Actualy, Fts3Cursor.eSearch can be greater than or equal to ** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index ** of the column to be searched. For example, in ** |
︙ | ︙ | |||
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 | #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ Fts3SegReaderArray *pArray; /* Segment-reader for this token */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ }; struct Fts3Phrase { int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ int isNot; /* Phrase prefixed by unary not (-) operator */ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* | > > > > > > > > | 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ /* ** A "phrase" is a sequence of one or more tokens that must match in ** sequence. A single token is the base case and the most common case. ** For a sequence of tokens contained in double-quotes (i.e. "one two three") ** nToken will be the number of tokens in the string. ** ** The nDocMatch and nMatch variables contain data that may be used by the ** matchinfo() function. They are populated when the full-text index is ** queried for hits on the phrase. If one or more tokens in the phrase ** are deferred, the nDocMatch and nMatch variables are populated based ** on the assumption that the */ struct Fts3PhraseToken { char *z; /* Text of the token */ int n; /* Number of bytes in buffer z */ int isPrefix; /* True if token ends with a "*" character */ int bFulltext; /* True if full-text index was used */ Fts3SegReaderArray *pArray; /* Segment-reader for this token */ Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ }; struct Fts3Phrase { /* Variables populated by fts3_expr.c when parsing a MATCH expression */ int nToken; /* Number of tokens in the phrase */ int iColumn; /* Index of column this phrase must match */ int isNot; /* Phrase prefixed by unary not (-) operator */ Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ }; /* |
︙ | ︙ | |||
224 225 226 227 228 229 230 | int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ | < < | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | int eType; /* One of the FTSQUERY_XXX values defined below */ int nNear; /* Valid if eType==FTSQUERY_NEAR */ Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ int isLoaded; /* True if aDoclist/nDoclist are initialized. */ char *aDoclist; /* Buffer containing doclist */ int nDoclist; /* Size of aDoclist in bytes */ sqlite3_int64 iCurrent; char *pCurrent; }; |
︙ | ︙ | |||
303 304 305 306 307 308 309 310 311 312 313 314 315 316 | int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, const char **, char ** | > | 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); int sqlite3Fts3GetVarint32(const char *, int *); int sqlite3Fts3VarintLen(sqlite3_uint64); void sqlite3Fts3Dequote(char *); char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *); int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *); int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, sqlite3_tokenizer **, const char **, char ** |
︙ | ︙ |
Changes to ext/fts3/fts3_expr.c.
︙ | ︙ | |||
101 102 103 104 105 106 107 108 109 110 111 112 113 114 | ** is defined to accept an argument of type char, and always returns 0 for ** any values that fall outside of the range of the unsigned char type (i.e. ** negative values). */ static int fts3isspace(char c){ return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; } /* ** Extract the next token from buffer z (length n) using the tokenizer ** and other information (column names etc.) in pParse. Create an Fts3Expr ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this ** single token and set *ppExpr to point to it. If the end of the buffer is ** reached before a token is found, set *ppExpr to zero. It is the | > > > > > > > > > > > > | 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | ** is defined to accept an argument of type char, and always returns 0 for ** any values that fall outside of the range of the unsigned char type (i.e. ** negative values). */ static int fts3isspace(char c){ return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; } /* ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, ** zero the memory before returning a pointer to it. If unsuccessful, ** return NULL. */ static void *fts3MallocZero(int nByte){ void *pRet = sqlite3_malloc(nByte); if( pRet ) memset(pRet, 0, nByte); return pRet; } /* ** Extract the next token from buffer z (length n) using the tokenizer ** and other information (column names etc.) in pParse. Create an Fts3Expr ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this ** single token and set *ppExpr to point to it. If the end of the buffer is ** reached before a token is found, set *ppExpr to zero. It is the |
︙ | ︙ | |||
139 140 141 142 143 144 145 | int nByte; /* total space to allocate */ pCursor->pTokenizer = pTokenizer; rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; | | < | 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | int nByte; /* total space to allocate */ pCursor->pTokenizer = pTokenizer; rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); if( rc==SQLITE_OK ){ nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; pRet = (Fts3Expr *)fts3MallocZero(nByte); if( !pRet ){ rc = SQLITE_NOMEM; }else{ pRet->eType = FTSQUERY_PHRASE; pRet->pPhrase = (Fts3Phrase *)&pRet[1]; pRet->pPhrase->nToken = 1; pRet->pPhrase->iColumn = iCol; pRet->pPhrase->aToken[0].n = nToken; pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
︙ | ︙ | |||
229 230 231 232 233 234 235 236 237 | goto no_mem; } if( ii==0 ){ memset(p, 0, nByte); p->pPhrase = (Fts3Phrase *)&p[1]; } p->pPhrase = (Fts3Phrase *)&p[1]; p->pPhrase->nToken = ii+1; p->pPhrase->aToken[ii].n = nToken; | > < < | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 | goto no_mem; } if( ii==0 ){ memset(p, 0, nByte); p->pPhrase = (Fts3Phrase *)&p[1]; } p->pPhrase = (Fts3Phrase *)&p[1]; memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); p->pPhrase->nToken = ii+1; p->pPhrase->aToken[ii].n = nToken; memcpy(&zTemp[nTemp], zToken, nToken); nTemp += nToken; if( iEnd<nInput && zInput[iEnd]=='*' ){ p->pPhrase->aToken[ii].isPrefix = 1; }else{ p->pPhrase->aToken[ii].isPrefix = 0; } |
︙ | ︙ | |||
370 371 372 373 374 375 376 | ** the next byte must contain either whitespace, an open or close ** parenthesis, a quote character, or EOF. */ cNext = zInput[nKey]; if( fts3isspace(cNext) || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 ){ | | < | 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 | ** the next byte must contain either whitespace, an open or close ** parenthesis, a quote character, or EOF. */ cNext = zInput[nKey]; if( fts3isspace(cNext) || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 ){ pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); if( !pRet ){ return SQLITE_NOMEM; } pRet->eType = pKey->eType; pRet->nNear = nNear; *ppExpr = pRet; *pnConsumed = (int)((zInput - z) + nKey); return SQLITE_OK; } |
︙ | ︙ | |||
550 551 552 553 554 555 556 | if( rc==SQLITE_OK ){ int isPhrase; if( !sqlite3_fts3_enable_parentheses && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot ){ /* Create an implicit NOT operator. */ | | < | 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 | if( rc==SQLITE_OK ){ int isPhrase; if( !sqlite3_fts3_enable_parentheses && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot ){ /* Create an implicit NOT operator. */ Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); if( !pNot ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pNot->eType = FTSQUERY_NOT; pNot->pRight = p; if( pNotBranch ){ pNot->pLeft = pNotBranch; } pNotBranch = pNot; p = pPrev; |
︙ | ︙ | |||
584 585 586 587 588 589 590 | goto exprparse_out; } if( isPhrase && !isRequirePhrase ){ /* Insert an implicit AND operator. */ Fts3Expr *pAnd; assert( pRet && pPrev ); | | < | 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 | goto exprparse_out; } if( isPhrase && !isRequirePhrase ){ /* Insert an implicit AND operator. */ Fts3Expr *pAnd; assert( pRet && pPrev ); pAnd = fts3MallocZero(sizeof(Fts3Expr)); if( !pAnd ){ sqlite3Fts3ExprFree(p); rc = SQLITE_NOMEM; goto exprparse_out; } pAnd->eType = FTSQUERY_AND; insertBinaryOperator(&pRet, pPrev, pAnd); pPrev = pAnd; } /* This test catches attempts to make either operand of a NEAR ** operator something other than a phrase. For example, either of |
︙ | ︙ |
Changes to ext/fts3/fts3_snippet.c.
︙ | ︙ | |||
264 265 266 267 268 269 270 | sCtx.pCsr = pCsr; rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx); if( rc==SQLITE_OK ){ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0); } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; | < | 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | sCtx.pCsr = pCsr; rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx); if( rc==SQLITE_OK ){ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0); } if( pnPhrase ) *pnPhrase = sCtx.nPhrase; if( pnToken ) *pnToken = sCtx.nToken; return rc; } /* ** Advance the position list iterator specified by the first two ** arguments so that it points to the first element with a value greater ** than or equal to parameter iNext. |
︙ | ︙ | |||
789 790 791 792 793 794 795 | */ static int fts3ExprGlobalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; | > | > > > > > > > > | > > > > > > > > > > > > > > | | > > > | | | > | 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 | */ static int fts3ExprGlobalMatchinfoCb( Fts3Expr *pExpr, /* Phrase expression node */ int iPhrase, /* Phrase number (numbered from zero) */ void *pCtx /* Pointer to MatchInfo structure */ ){ MatchInfo *p = (MatchInfo *)pCtx; Fts3Cursor *pCsr = p->pCursor; char *pIter; char *pEnd; char *pFree = 0; const int iStart = 2 + (iPhrase * p->nCol * 3) + 1; assert( pExpr->isLoaded ); assert( pExpr->eType==FTSQUERY_PHRASE ); if( pCsr->pDeferred ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int ii; for(ii=0; ii<pPhrase->nToken; ii++){ if( pPhrase->aToken[ii].bFulltext ) break; } if( ii<pPhrase->nToken ){ int nFree = 0; int rc = sqlite3Fts3ExprLoadFtDoclist(pCsr, pExpr, &pFree, &nFree); if( rc!=SQLITE_OK ) return rc; pIter = pFree; pEnd = &pFree[nFree]; }else{ int nDoc = p->aMatchinfo[2 + 3*p->nCol*p->aMatchinfo[0]]; for(ii=0; ii<p->nCol; ii++){ p->aMatchinfo[iStart + ii*3] = nDoc; p->aMatchinfo[iStart + ii*3 + 1] = nDoc; } return SQLITE_OK; } }else{ pIter = pExpr->aDoclist; pEnd = &pExpr->aDoclist[pExpr->nDoclist]; } /* Fill in the global hit count matrix row for this phrase. */ while( pIter<pEnd ){ while( *pIter++ & 0x80 ); /* Skip past docid. */ fts3LoadColumnlistCounts(&pIter, &p->aMatchinfo[iStart], 1); } sqlite3_free(pFree); return SQLITE_OK; } /* ** fts3ExprIterate() callback used to collect the "local" matchinfo stats ** for a single query. The "local" stats are those elements of the matchinfo ** array that are different for each row returned by the query. |
︙ | ︙ | |||
871 872 873 874 875 876 877 | sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo); if( !sInfo.aMatchinfo ){ return SQLITE_NOMEM; } memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo); | < < > | 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 | sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo); if( !sInfo.aMatchinfo ){ return SQLITE_NOMEM; } memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo); /* First element of match-info is the number of phrases in the query */ sInfo.aMatchinfo[0] = nPhrase; sInfo.aMatchinfo[1] = sInfo.nCol; if( pTab->bHasDocsize ){ int ofst = 2 + 3*sInfo.aMatchinfo[0]*sInfo.aMatchinfo[1]; rc = sqlite3Fts3MatchinfoDocsizeGlobal(pCsr, &sInfo.aMatchinfo[ofst]); } (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo); pCsr->aMatchinfo = sInfo.aMatchinfo; pCsr->isMatchinfoNeeded = 1; } sInfo.aMatchinfo = pCsr->aMatchinfo; if( rc==SQLITE_OK && pCsr->isMatchinfoNeeded ){ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo); |
︙ | ︙ | |||
989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ rc = fts3SnippetText(pCsr, &aSnippet[i], i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res ); } snippet_out: if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); sqlite3_free(res.z); }else{ sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); } } | > | 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 | for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ rc = fts3SnippetText(pCsr, &aSnippet[i], i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res ); } snippet_out: sqlite3Fts3SegmentsClose(pTab); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); sqlite3_free(res.z); }else{ sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); } } |
︙ | ︙ | |||
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 | pMod->xClose(pC); if( rc!=SQLITE_OK ) goto offsets_out; } offsets_out: sqlite3_free(sCtx.aTerm); assert( rc!=SQLITE_DONE ); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); sqlite3_free(res.z); }else{ sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); } return; } /* ** Implementation of matchinfo() function. */ void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){ int rc; if( !pCsr->pExpr ){ sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); return; } rc = fts3GetMatchinfo(pCsr); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pContext, rc); }else{ Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab; int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3); if( pTab->bHasDocsize ){ n += sizeof(u32)*(1 + 2*pTab->nColumn); } sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); } } #endif | > > | 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 | pMod->xClose(pC); if( rc!=SQLITE_OK ) goto offsets_out; } offsets_out: sqlite3_free(sCtx.aTerm); assert( rc!=SQLITE_DONE ); sqlite3Fts3SegmentsClose(pTab); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); sqlite3_free(res.z); }else{ sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); } return; } /* ** Implementation of matchinfo() function. */ void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){ int rc; if( !pCsr->pExpr ){ sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); return; } rc = fts3GetMatchinfo(pCsr); sqlite3Fts3SegmentsClose((Fts3Table *)pCsr->base.pVtab ); if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pContext, rc); }else{ Fts3Table *pTab = (Fts3Table*)pCsr->base.pVtab; int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3); if( pTab->bHasDocsize ){ n += sizeof(u32)*(1 + 2*pTab->nColumn); } sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); } } #endif |
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
2640 2641 2642 2643 2644 2645 2646 | ** references to deferred doclists from within the tree of Fts3Expr ** structures headed by */ static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ if( pExpr ){ fts3DeferredDoclistClear(pExpr->pLeft); fts3DeferredDoclistClear(pExpr->pRight); | | > | > | 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 | ** references to deferred doclists from within the tree of Fts3Expr ** structures headed by */ static void fts3DeferredDoclistClear(Fts3Expr *pExpr){ if( pExpr ){ fts3DeferredDoclistClear(pExpr->pLeft); fts3DeferredDoclistClear(pExpr->pRight); if( pExpr->isLoaded ){ sqlite3_free(pExpr->aDoclist); pExpr->isLoaded = 0; pExpr->aDoclist = 0; pExpr->nDoclist = 0; pExpr->pCurrent = 0; pExpr->iCurrent = 0; } } } /* ** Delete all cached deferred doclists. Deferred doclists are cached ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. */ void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ Fts3DeferredToken *pDef; for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ sqlite3_free(pDef->pList); pDef->pList = 0; } if( pCsr->pDeferred ){ fts3DeferredDoclistClear(pCsr->pExpr); } } /* ** Free all entries in the pCsr->pDeffered list. Entries are added to ** this list using sqlite3Fts3DeferToken(). */ void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ |
︙ | ︙ |
Changes to test/fts3defer2.test.
︙ | ︙ | |||
53 54 55 56 57 58 59 | do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ | | | | | > | < | | | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 3 3 1 3 3 3 13336 9] ] do_execsql_test 1.2.3 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH 'f (e NEAR/3 a)'; } [list \ {[a] b c d [e] [f] [a] x y} \ {0 2 0 1 0 1 8 1 0 0 10 1 0 2 12 1} \ [list 3 1 1 1 1 1 3 3 2 3 3 3 13336 9] ] do_execsql_test 1.3.1 { DROP TABLE t1 } #----------------------------------------------------------------------------- # Test cases fts3defer2-2.* focus specifically on the matchinfo function. # do_execsql_test 2.1.1 { CREATE VIRTUAL TABLE t2 USING fts4; } do_execsql_test 2.1.2 "INSERT INTO t2 VALUES('[string repeat {a } 10000]')" do_execsql_test 2.1.3 "INSERT INTO t2 VALUES('b [string repeat {z } 10000]')" do_execsql_test 2.1.4 [string repeat "INSERT INTO t2 VALUES('x');" 50] do_execsql_test 2.1.5 { INSERT INTO t2 VALUES('a b c d e f g'); INSERT INTO t2 VALUES('a b c d e f g'); } foreach {tn sql} { 1 {} 2 { INSERT INTO t2(t2) VALUES('optimize') } 3 { UPDATE t2_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } } { execsql $sql do_execsql_test 2.2.$tn.1 { SELECT mit(matchinfo(t2)) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 7] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] } finish_test |