Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the "^" syntax from fts3/4 to fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
24d7058e2799133dd681d2fef341025c |
User & Date: | dan 2017-11-24 19:24:44.918 |
Context
2017-11-26
| ||
02:14 | Fix an CSV output quoting problem in the command-line shell on Windows. (check-in: 6500cdbd36 user: drh tags: trunk) | |
2017-11-25
| ||
17:51 | Add experimental feature to detect threading bugs in apps that use SQLITE_CONFIG_MULTITHREADED. Enabled at compile time using SQLITE_ENABLE_MULTITHREADED_CHECKS. (check-in: a66886ac13 user: dan tags: multithreaded-checks) | |
2017-11-24
| ||
19:24 | Add the "^" syntax from fts3/4 to fts5. (check-in: 24d7058e27 user: dan tags: trunk) | |
16:55 | Enhance the configure script to detect zLib. (check-in: e3b6e22049 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
717 718 719 720 721 722 723 724 725 726 727 728 729 730 | Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, Fts5ExprPhrase *pPhrase, Fts5Token *pToken, int bPrefix ); Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); | > > | 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 | Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, Fts5ExprPhrase *pPhrase, Fts5Token *pToken, int bPrefix ); void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
83 84 85 86 87 88 89 | #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) /* ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { | | > | 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 | #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) /* ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence |
︙ | ︙ | |||
164 165 166 167 168 169 170 171 172 173 174 175 176 177 | case '{': tok = FTS5_LCP; break; case '}': tok = FTS5_RCP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; case '*': tok = FTS5_STAR; break; case '-': tok = FTS5_MINUS; break; case '\0': tok = FTS5_EOF; break; case '"': { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; 1; z2++){ | > | 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | case '{': tok = FTS5_LCP; break; case '}': tok = FTS5_RCP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; case '*': tok = FTS5_STAR; break; case '-': tok = FTS5_MINUS; break; case '^': tok = FTS5_CARET; break; case '\0': tok = FTS5_EOF; break; case '"': { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; 1; z2++){ |
︙ | ︙ | |||
423 424 425 426 427 428 429 430 431 432 433 434 435 436 | int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>ArraySize(aStatic) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; | > | 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; int bFirst = pPhrase->aTerm[0].bFirst; fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>ArraySize(aStatic) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; |
︙ | ︙ | |||
477 478 479 480 481 482 483 | } if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; } } }while( bMatch==0 ); /* Append position iPos to the output */ | > | | > | 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 | } if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; } } }while( bMatch==0 ); /* Append position iPos to the output */ if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){ rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); if( rc!=SQLITE_OK ) goto ismatch_out; } for(i=0; i<pPhrase->nTerm; i++){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; } } ismatch_out: |
︙ | ︙ | |||
732 733 734 735 736 737 738 | int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; | | > > | 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 | int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset || pPhrase->aTerm[0].bFirst ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); } |
︙ | ︙ | |||
913 914 915 916 917 918 919 920 921 922 923 924 925 926 | int bMatch; /* True if all terms are at the same rowid */ const int bDesc = pExpr->bDesc; /* Check that this node should not be FTS5_TERM */ assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 || pNear->apPhrase[0]->aTerm[0].pSynonym ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ if( pLeft->aTerm[0].pSynonym ){ | > | 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 | int bMatch; /* True if all terms are at the same rowid */ const int bDesc = pExpr->bDesc; /* Check that this node should not be FTS5_TERM */ assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 || pNear->apPhrase[0]->aTerm[0].pSynonym || pNear->apPhrase[0]->aTerm[0].bFirst ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ if( pLeft->aTerm[0].pSynonym ){ |
︙ | ︙ | |||
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 | sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } /* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is ** appended to it and the results returned. ** ** If an OOM error occurs, both the pNear and pPhrase objects are freed and | > > > > > > > > > > | 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 | sqlite3_free(pSyn); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } /* ** Set the "bFirst" flag on the first token of the phrase passed as the ** only argument. */ void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ if( pPhrase && pPhrase->nTerm ){ pPhrase->aTerm[0].bFirst = 1; } } /* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is ** appended to it and the results returned. ** ** If an OOM error occurs, both the pNear and pPhrase objects are freed and |
︙ | ︙ | |||
1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 | const char *zTerm = p->zTerm; rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), 0, 0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; } } }else{ /* This happens when parsing a token or quoted phrase that contains ** no token characters at all. (e.g ... MATCH '""'). */ sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); } if( rc==SQLITE_OK ){ /* All the allocations succeeded. Put the expression object together. */ pNew->pIndex = pExpr->pIndex; pNew->pConfig = pExpr->pConfig; pNew->nPhrase = 1; pNew->apExprPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->nPhrase = 1; sCtx.pPhrase->pNode = pNew->pRoot; | > | > > > | 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 | const char *zTerm = p->zTerm; rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), 0, 0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; } } }else{ /* This happens when parsing a token or quoted phrase that contains ** no token characters at all. (e.g ... MATCH '""'). */ sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); } if( rc==SQLITE_OK ){ /* All the allocations succeeded. Put the expression object together. */ pNew->pIndex = pExpr->pIndex; pNew->pConfig = pExpr->pConfig; pNew->nPhrase = 1; pNew->apExprPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->nPhrase = 1; sCtx.pPhrase->pNode = pNew->pRoot; if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 && pOrig->aTerm[0].bFirst==0 ){ pNew->pRoot->eType = FTS5_TERM; pNew->pRoot->xNext = fts5ExprNodeNext_TERM; }else{ pNew->pRoot->eType = FTS5_STRING; pNew->pRoot->xNext = fts5ExprNodeNext_STRING; } }else{ |
︙ | ︙ | |||
2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ switch( pNode->eType ){ case FTS5_STRING: { Fts5ExprNearset *pNear = pNode->pNear; if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 ){ pNode->eType = FTS5_TERM; pNode->xNext = fts5ExprNodeNext_TERM; }else{ pNode->xNext = fts5ExprNodeNext_STRING; } break; | > | 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ switch( pNode->eType ){ case FTS5_STRING: { Fts5ExprNearset *pNear = pNode->pNear; if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 && pNear->apPhrase[0]->aTerm[0].bFirst==0 ){ pNode->eType = FTS5_TERM; pNode->xNext = fts5ExprNodeNext_TERM; }else{ pNode->xNext = fts5ExprNodeNext_STRING; } break; |
︙ | ︙ | |||
2093 2094 2095 2096 2097 2098 2099 | pNear->apPhrase[iPhrase]->pNode = pRet; if( pNear->apPhrase[iPhrase]->nTerm==0 ){ pRet->xNext = 0; pRet->eType = FTS5_EOF; } } | | > | > > | | | | | | | | | | | | | 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 | pNear->apPhrase[iPhrase]->pNode = pRet; if( pNear->apPhrase[iPhrase]->nTerm==0 ){ pRet->xNext = 0; pRet->eType = FTS5_EOF; } } if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; if( pNear->nPhrase!=1 || pPhrase->nTerm>1 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) ){ assert( pParse->rc==SQLITE_OK ); pParse->rc = SQLITE_ERROR; assert( pParse->zErr==0 ); pParse->zErr = sqlite3_mprintf( "fts5: %s queries are not supported (detail!=full)", pNear->nPhrase==1 ? "phrase": "NEAR" ); sqlite3_free(pRet); pRet = 0; } } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); } } } |
︙ | ︙ |
Changes to ext/fts5/fts5parse.y.
︙ | ︙ | |||
144 145 146 147 148 149 150 | %type nearset {Fts5ExprNearset*} %type nearphrases {Fts5ExprNearset*} %destructor nearset { sqlite3Fts5ParseNearsetFree($$); } %destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); } | > > > | > | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | %type nearset {Fts5ExprNearset*} %type nearphrases {Fts5ExprNearset*} %destructor nearset { sqlite3Fts5ParseNearsetFree($$); } %destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); } nearset(A) ::= phrase(Y). { A = sqlite3Fts5ParseNearset(pParse, 0, Y); } nearset(A) ::= CARET phrase(Y). { sqlite3Fts5ParseSetCaret(Y); A = sqlite3Fts5ParseNearset(pParse, 0, Y); } nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. { sqlite3Fts5ParseNear(pParse, &X); sqlite3Fts5ParseSetDistance(pParse, Y, &Z); A = Y; } nearphrases(A) ::= phrase(X). { |
︙ | ︙ | |||
185 186 187 188 189 190 191 | A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z); } /* ** Optional "*" character. */ %type star_opt {int} | < | 189 190 191 192 193 194 195 196 197 | A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z); } /* ** Optional "*" character. */ %type star_opt {int} star_opt(A) ::= STAR. { A = 1; } star_opt(A) ::= . { A = 0; } |
Changes to ext/fts5/test/fts5faultB.test.
︙ | ︙ | |||
125 126 127 128 129 130 131 132 133 134 | } do_faultsim_test 4.2 -faults oom* -body { execsql { SELECT rowid FROM t1('{a b c} : (a AND d)') } } -test { faultsim_test_result {0 {2 3}} } finish_test | > > > > > > > > > > > > > > > > > | 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | } do_faultsim_test 4.2 -faults oom* -body { execsql { SELECT rowid FROM t1('{a b c} : (a AND d)') } } -test { faultsim_test_result {0 {2 3}} } #------------------------------------------------------------------------- # Test OOM injection while parsing a CARET expression # reset_db do_execsql_test 5.0 { CREATE VIRTUAL TABLE t1 USING fts5(a); INSERT INTO t1 VALUES('a b c d'); -- 1 INSERT INTO t1 VALUES('d a b c'); -- 2 INSERT INTO t1 VALUES('c d a b'); -- 3 INSERT INTO t1 VALUES('b c d a'); -- 4 } do_faultsim_test 5.1 -faults oom* -body { execsql { SELECT rowid FROM t1('^a OR ^b') } } -test { faultsim_test_result {0 {1 4}} } finish_test |
Added ext/fts5/test/fts5first.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | # 2017 November 25 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5first ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE x1 USING fts5(a, b); } foreach {tn expr ok} { 1 {^abc} 1 2 {^abc + def} 1 3 {^ "abc def"} 1 4 {^"abc def"} 1 5 {abc ^def} 1 6 {abc + ^def} 0 7 {abc ^+ def} 0 8 {"^abc"} 1 9 {NEAR(^abc def)} 0 } { set res(0) {/1 {fts5: syntax error near .*}/} set res(1) {0 {}} do_catchsql_test 1.$tn { SELECT * FROM x1($expr) } $res($ok) } #------------------------------------------------------------------------- # do_execsql_test 2.0 { INSERT INTO x1 VALUES('a b c', 'b c a'); } foreach {tn expr match} { 1 {^a} 1 2 {^b} 1 3 {^c} 0 4 {^a + b} 1 5 {^b + c} 1 6 {^c + a} 0 7 {^"c a"} 0 8 {a:^a} 1 9 {a:^b} 0 10 {a:^"a b"} 1 } { do_execsql_test 2.$tn { SELECT EXISTS (SELECT rowid FROM x1($expr)) } $match } #------------------------------------------------------------------------- # do_execsql_test 3.0 { DELETE FROM x1; INSERT INTO x1 VALUES('b a', 'c a'); INSERT INTO x1 VALUES('a a', 'c c'); INSERT INTO x1 VALUES('a b', 'a a'); } fts5_aux_test_functions db foreach {tn expr expect} { 1 {^a} {{2 1}} 2 {^c AND ^b} {{0 2} {1 0}} } { do_execsql_test 3.$tn { SELECT fts5_test_queryphrase(x1) FROM x1($expr) LIMIT 1 } [list $expect] } #------------------------------------------------------------------------- # do_execsql_test 3.1 { CREATE VIRTUAL TABLE x2 USING fts5(a, b, c, detail=column); } do_catchsql_test 3.2 { SELECT * FROM x2('a + b'); } {1 {fts5: phrase queries are not supported (detail!=full)}} do_catchsql_test 3.3 { SELECT * FROM x2('^a'); } {1 {fts5: phrase queries are not supported (detail!=full)}} finish_test |