Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add further tests for fts5 prefix queries. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
accdc98b1291f07b802fd23f3ebc7dbc |
User & Date: | dan 2015-10-07 09:02:50.876 |
Context
2015-10-07
| ||
13:24 | Add tests for fts5 phrase queries with column filters. (check-in: f20f9f813f user: dan tags: trunk) | |
09:02 | Add further tests for fts5 prefix queries. (check-in: accdc98b12 user: dan tags: trunk) | |
04:20 | Fix a typo in the previous check-in. (check-in: 80027709c3 user: mistachkin tags: trunk) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
77 78 79 80 81 82 83 | extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else # define assert_nc(x) assert(x) #endif typedef struct Fts5Global Fts5Global; | | | | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else # define assert_nc(x) assert(x) #endif typedef struct Fts5Global Fts5Global; typedef struct Fts5Colset Fts5Colset; /* If a NEAR() clump or phrase may only match a specific set of columns, ** then an object of the following type is used to record the set of columns. ** Each entry in the aiCol[] array is a column that may be matched. ** ** This object is used by fts5_expr.c and fts5_index.c. */ struct Fts5Colset { int nCol; int aiCol[1]; }; /************************************************************************** |
︙ | ︙ | |||
331 332 333 334 335 336 337 | ** Open a new iterator to iterate though all rowids that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | | | 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 | ** Open a new iterator to iterate though all rowids that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5Colset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ); /* ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). */ |
︙ | ︙ | |||
645 646 647 648 649 650 651 | Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); | | | | | 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 | Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); Fts5Colset *sqlite3Fts5ParseColset( Fts5Parse*, Fts5Colset*, Fts5Token * ); void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5Colset*); void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); /* ** End of interface to code in fts5_expr.c. **************************************************************************/ |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
91 92 93 94 95 96 97 | /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ | | | 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ int nPhrase; /* Number of entries in aPhrase[] array */ Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */ }; /* ** Parse context. |
︙ | ︙ | |||
262 263 264 265 266 267 268 | if( p ){ sqlite3Fts5ParseNodeFree(p->pRoot); sqlite3_free(p->apExprPhrase); sqlite3_free(p); } } | | | 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | if( p ){ sqlite3Fts5ParseNodeFree(p->pRoot); sqlite3_free(p->apExprPhrase); sqlite3_free(p); } } static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } |
︙ | ︙ | |||
391 392 393 394 395 396 397 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5ExprNode *pNode, /* Node pPhrase belongs to */ | | | 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 | ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5ExprNode *pNode, /* Node pPhrase belongs to */ Fts5Colset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; |
︙ | ︙ | |||
791 792 793 794 795 796 797 | while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){ prev = *p++; } return p - (*pa); } static int fts5ExprExtractColset ( | | | 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 | while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){ prev = *p++; } return p - (*pa); } static int fts5ExprExtractColset ( Fts5Colset *pColset, /* Colset to filter on */ const u8 *pPos, int nPos, /* Position list */ Fts5Buffer *pBuf /* Output buffer */ ){ int rc = SQLITE_OK; int i; fts5BufferZero(pBuf); |
︙ | ︙ | |||
854 855 856 857 858 859 860 | ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; | | | 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 | ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; Fts5Colset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); assert( pPhrase->aTerm[0].pSynonym==0 ); |
︙ | ︙ | |||
1741 1742 1743 1744 1745 1746 1747 | nNear = FTS5_DEFAULT_NEARDIST; } pNear->nNear = nNear; } /* ** The second argument passed to this function may be NULL, or it may be | | | | | | | 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 | nNear = FTS5_DEFAULT_NEARDIST; } pNear->nNear = nNear; } /* ** The second argument passed to this function may be NULL, or it may be ** an existing Fts5Colset object. This function returns a pointer to ** a new colset object containing the contents of (p) with new value column ** number iCol appended. ** ** If an OOM error occurs, store an error code in pParse and return NULL. ** The old colset object (if any) is not freed in this case. */ static Fts5Colset *fts5ParseColset( Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ Fts5Colset *p, /* Existing colset object */ int iCol /* New column to add to colset object */ ){ int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ Fts5Colset *pNew; /* New colset object to return */ assert( pParse->rc==SQLITE_OK ); assert( iCol>=0 && iCol<pParse->pConfig->nCol ); pNew = sqlite3_realloc(p, sizeof(Fts5Colset) + sizeof(int)*nCol); if( pNew==0 ){ pParse->rc = SQLITE_NOMEM; }else{ int *aiCol = pNew->aiCol; int i, j; for(i=0; i<nCol; i++){ if( aiCol[i]==iCol ) return pNew; |
︙ | ︙ | |||
1784 1785 1786 1787 1788 1789 1790 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); #endif } return pNew; } | | | | | 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); #endif } return pNew; } Fts5Colset *sqlite3Fts5ParseColset( Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ Fts5Colset *pColset, /* Existing colset object */ Fts5Token *p ){ Fts5Colset *pRet = 0; int iCol; char *z; /* Dequoted copy of token p */ z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); if( pParse->rc==SQLITE_OK ){ Fts5Config *pConfig = pParse->pConfig; sqlite3Fts5Dequote(z); |
︙ | ︙ | |||
1819 1820 1821 1822 1823 1824 1825 | return pRet; } void sqlite3Fts5ParseSetColset( Fts5Parse *pParse, Fts5ExprNearset *pNear, | | | 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 | return pRet; } void sqlite3Fts5ParseSetColset( Fts5Parse *pParse, Fts5ExprNearset *pNear, Fts5Colset *pColset ){ if( pNear ){ pNear->pColset = pColset; }else{ sqlite3_free(pColset); } } |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
3950 3951 3952 3953 3954 3955 3956 | fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk); } } typedef struct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx { Fts5Buffer *pBuf; /* Append to this buffer */ | | | | 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 | fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pContext, nChunk, pChunk); } } typedef struct PoslistCallbackCtx PoslistCallbackCtx; struct PoslistCallbackCtx { Fts5Buffer *pBuf; /* Append to this buffer */ Fts5Colset *pColset; /* Restrict matches to this column */ int eState; /* See above */ }; /* ** TODO: Make this more efficient! */ static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } |
︙ | ︙ | |||
4025 4026 4027 4028 4029 4030 4031 | ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. */ static void fts5SegiterPoslist( Fts5Index *p, Fts5SegIter *pSeg, | | | 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 | ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. */ static void fts5SegiterPoslist( Fts5Index *p, Fts5SegIter *pSeg, Fts5Colset *pColset, Fts5Buffer *pBuf ){ if( pColset==0 ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); }else{ PoslistCallbackCtx sCtx; sCtx.pBuf = pBuf; |
︙ | ︙ | |||
4051 4052 4053 4054 4055 4056 4057 | ** ** If an error occurs, an error code is left in p->rc. It is assumed ** no error has already occurred when this function is called. */ static int fts5MultiIterPoslist( Fts5Index *p, Fts5IndexIter *pMulti, | | | 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 | ** ** If an error occurs, an error code is left in p->rc. It is assumed ** no error has already occurred when this function is called. */ static int fts5MultiIterPoslist( Fts5Index *p, Fts5IndexIter *pMulti, Fts5Colset *pColset, int bSz, /* Append a size field before the data */ Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ int iSz; int iData; |
︙ | ︙ | |||
4247 4248 4249 4250 4251 4252 4253 | } static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ | | | 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 | } static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5Colset *pColset, /* Restrict matches to these columns */ Fts5IndexIter **ppIter /* OUT: New iterator */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); |
︙ | ︙ | |||
4532 4533 4534 4535 4536 4537 4538 | ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ | | | 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 | ** Open a new iterator to iterate though all rowid that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5Colset *pColset, /* Match these columns only */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet = 0; int iIdx = 0; Fts5Buffer buf = {0, 0, 0}; |
︙ | ︙ |
Changes to ext/fts5/fts5parse.y.
︙ | ︙ | |||
97 98 99 100 101 102 103 | A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } cnearset(A) ::= colset(X) COLON nearset(Y). { sqlite3Fts5ParseSetColset(pParse, Y, X); A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); } | | | | 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } cnearset(A) ::= colset(X) COLON nearset(Y). { sqlite3Fts5ParseSetColset(pParse, Y, X); A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); } %type colset {Fts5Colset*} %destructor colset { sqlite3_free($$); } %type colsetlist {Fts5Colset*} %destructor colsetlist { sqlite3_free($$); } colset(A) ::= LCP colsetlist(X) RCP. { A = X; } colset(A) ::= STRING(X). { A = sqlite3Fts5ParseColset(pParse, 0, &X); } |
︙ | ︙ |
Changes to ext/fts5/test/fts5prefix.test.
︙ | ︙ | |||
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | execsql { INSERT INTO t3(t3) VALUES('integrity-check') } } {} proc gmatch {col pattern} { expr {[lsearch -glob $col $pattern]>=0} } db func gmatch gmatch for {set x 0} {$x<2} {incr x} { foreach {tn pattern} { 1 {xa*} 2 {xb*} 3 {xc*} 4 {xd*} 5 {xe*} 6 {xf*} 7 {xg*} 8 {xh*} 9 {xi*} 10 {xj*} } { | > > > > > > > > > > > > > > > > | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 | execsql { INSERT INTO t3(t3) VALUES('integrity-check') } } {} proc gmatch {col pattern} { expr {[lsearch -glob $col $pattern]>=0} } db func gmatch gmatch proc ghl {col pattern} { foreach t $col { if {[string match $pattern $t]} { lappend res "*$t*" } else { lappend res $t } } set res } db func ghl ghl set COLS(a) 0 set COLS(b) 1 set COLS(c) 2 for {set x 0} {$x<2} {incr x} { foreach {tn pattern} { 1 {xa*} 2 {xb*} 3 {xc*} 4 {xd*} 5 {xe*} 6 {xf*} 7 {xg*} 8 {xh*} 9 {xi*} 10 {xj*} } { foreach col {a b c} { # Check that the list of returned rowids is correct. # set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"] set query "$col : $pattern" do_execsql_test 3.3.$x.$tn.$col.rowid { SELECT rowid FROM t3($query); } $res # Check that the highlight() function works. # set res [db eval \ "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')" ] set idx $COLS($col) do_execsql_test 3.3.$x.$tn.$col.highlight { SELECT highlight(t3, $idx, '*', '*') FROM t3($query); } $res } foreach colset {{a b} {b c} {c a} {a c} {b a}} { # Check that the list of returned rowids is correct. # foreach {col1 col2} $colset {} set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')" set res [db eval "SELECT rowid FROM t3 WHERE $expr"] set query "{$colset} : $pattern" do_execsql_test 3.3.$x.$tn.{$colset}.rowid { SELECT rowid FROM t3($query); } $res set resq "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')" append resq " FROM t3 WHERE $expr" set res [db eval $resq] set idx1 $COLS($col1) set idx2 $COLS($col2) do_execsql_test 3.3.$x.$tn.{$colset}.highlight { SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*') FROM t3($query) } $res } } execsql { INSERT INTO t3(t3) VALUES('optimize') } execsql { INSERT INTO t3(t3) VALUES('integrity-check') } } finish_test |