Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add support for phrase queries to fts5. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
0780ef93050829a40fcb3873b2529ab5 |
User & Date: | dan 2012-12-28 18:57:05.769 |
Context
2012-12-28
| ||
20:01 | Add support for NEAR queries to fts5. check-in: ed403fecf2 user: dan tags: trunk | |
18:57 | Add support for phrase queries to fts5. check-in: 0780ef9305 user: dan tags: trunk | |
2012-12-27
| ||
18:01 | Fill in some functions so that a tiny subset of fts5 queries work. check-in: fb07003744 user: dan tags: trunk | |
Changes
Changes to src/fts5.c.
︙ | ︙ | |||
116 117 118 119 120 121 122 | /* Space for dequoted copies of strings */ char *aSpace; int iSpace; int nSpace; /* Total size of aSpace in bytes */ }; struct Fts5Token { | | | > > > < > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | /* Space for dequoted copies of strings */ char *aSpace; int iSpace; int nSpace; /* Total size of aSpace in bytes */ }; struct Fts5Token { /* TODO: The first three members are redundant in some senses, since the ** same information is encoded in the aPrefix[]/nPrefix key. */ int bPrefix; /* True for a prefix search */ int n; /* Size of z[] in bytes */ char *z; /* Token value */ KVCursor *pCsr; /* Cursor to iterate thru entries for token */ KVByteArray *aPrefix; /* KV prefix to iterate through */ KVSize nPrefix; /* Size of aPrefix in bytes */ }; struct Fts5Str { Fts5Token *aToken; int nToken; u8 *aList; int nList; int nListAlloc; }; struct Fts5Phrase { int iCol; /* Column of table to search (-1 -> all) */ int nStr; Fts5Str *aStr; int *aiNear; }; struct Fts5ExprNode { int eType; Fts5Phrase *pPhrase; Fts5ExprNode *pLeft; Fts5ExprNode *pRight; }; struct Fts5Expr { Fts5ExprNode *pRoot; }; /* ** FTS5 specific cursor data. */ struct Fts5Cursor { sqlite4 *db; Fts5Info *pInfo; Fts5Expr *pExpr; /* MATCH expression for this cursor */ KVByteArray *aKey; /* Buffer for primary key */ int nKeyAlloc; /* Bytes allocated at aKey[] */ }; /* ** This type is used when reading (decoding) an instance-list. */ typedef struct InstanceList InstanceList; struct InstanceList { u8 *aList; int nList; int iList; /* The current entry */ int iCol; int iWeight; int iOff; }; /* ** Return true for EOF, or false if the next entry is valid. */ static int fts5InstanceListNext(InstanceList *p){ int i = p->iList; int bRet = 1; while( bRet && i<p->nList ){ u32 iVal; i += getVarint32(&p->aList[i], iVal); if( (iVal & 0x03)==0x01 ){ p->iCol = (iVal>>2); p->iOff = 0; } else if( (iVal & 0x03)==0x03 ){ p->iWeight = (iVal>>2); } else{ p->iOff += (iVal>>1); bRet = 0; } } p->iList = i; return bRet; } static void fts5InstanceListAppend( InstanceList *p, /* Instance list to append to */ int iCol, /* Column of new entry */ int iWeight, /* Weight of new entry */ int iOff /* Offset of new entry */ ){ assert( iCol>=p->iCol ); assert( iCol>p->iCol || iOff>=p->iOff ); if( iCol!=p->iCol ){ p->iList += putVarint32(&p->aList[p->iList], (iCol<<2)|0x01); p->iCol = iCol; p->iOff = 0; } if( iWeight!=p->iWeight ){ p->iList += putVarint32(&p->aList[p->iList], (iWeight<<2)|0x03); p->iWeight = iWeight; } p->iList += putVarint32(&p->aList[p->iList], (iOff-p->iOff)<<1); p->iOff = iOff; assert( p->iList<=p->nList ); } static void fts5InstanceListInit(u8 *aList, int nList, InstanceList *p){ memset(p, 0, sizeof(InstanceList)); p->aList = aList; p->nList = nList; } /* ** Return true if argument c is one of the special non-whitespace ** characters that ends an unquoted expression token. */ static int fts5IsSpecial(char c){ return (c==':' || c=='(' || c==')' || c=='+' || c=='"'); |
︙ | ︙ | |||
458 459 460 461 462 463 464 465 466 467 468 469 470 471 | int i; for(i=0; i<p->nStr; i++){ int iTok; for(iTok=0; iTok<p->aStr[i].nToken; iTok++){ sqlite4KVCursorClose(p->aStr[i].aToken[iTok].pCsr); } sqlite4DbFree(db, p->aStr[i].aToken); } sqlite4DbFree(db, p->aiNear); sqlite4DbFree(db, p->aStr); sqlite4DbFree(db, p); } } | > | 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 | int i; for(i=0; i<p->nStr; i++){ int iTok; for(iTok=0; iTok<p->aStr[i].nToken; iTok++){ sqlite4KVCursorClose(p->aStr[i].aToken[iTok].pCsr); } sqlite4DbFree(db, p->aStr[i].aToken); sqlite4DbFree(db, p->aStr[i].aList); } sqlite4DbFree(db, p->aiNear); sqlite4DbFree(db, p->aStr); sqlite4DbFree(db, p); } } |
︙ | ︙ | |||
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 | i64 *piCksum /* OUT: Checksum value */ ){ i64 cksum = 0; u8 const *aKey; int nKey; /* Key blob */ u8 const *aVal; int nVal; /* List of token instances */ u8 const *aToken; int nToken; /* Token for this entry */ u8 const *aPk; int nPk; /* Entry primary key blob */ int nTnum; u32 tnum; | > < < < < | < < | < < < < < < < < | < | > | < | 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 | i64 *piCksum /* OUT: Checksum value */ ){ i64 cksum = 0; u8 const *aKey; int nKey; /* Key blob */ u8 const *aVal; int nVal; /* List of token instances */ u8 const *aToken; int nToken; /* Token for this entry */ u8 const *aPk; int nPk; /* Entry primary key blob */ InstanceList sList; /* Used to iterate through pVal */ int nTnum; u32 tnum; aKey = (const u8 *)sqlite4_value_blob(pKey); nKey = sqlite4_value_bytes(pKey); aVal = (const u8 *)sqlite4_value_blob(pVal); nVal = sqlite4_value_bytes(pVal); /* Find the token and primary key blobs for this entry. */ nTnum = getVarint32(aKey, tnum); aToken = &aKey[nTnum+1]; nToken = sqlite4Strlen30((const char *)aToken); aPk = &aToken[nToken+1]; nPk = (&aKey[nKey] - aPk); fts5InstanceListInit((u8 *)aVal, nVal, &sList); while( 0==fts5InstanceListNext(&sList) ){ i64 v = fts5TermInstanceCksum( aPk, nPk, aToken, nToken, sList.iWeight, sList.iCol, sList.iOff ); cksum = cksum ^ v; } *piCksum = cksum; return SQLITE4_OK; } typedef struct CksumCtx CksumCtx; |
︙ | ︙ | |||
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 | static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr); } } int sqlite4Fts5Open( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Index description */ const char *zMatch, /* Match expression */ int bDesc, /* True to iterate in desc. order of PK */ Fts5Cursor **ppCsr, /* OUT: New FTS cursor object */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 | static int fts5OpenCursors(sqlite4 *db, Fts5Info *pInfo, Fts5Cursor *pCsr){ return fts5OpenExprCursors(db, pInfo, pCsr->pExpr->pRoot); } void sqlite4Fts5Close(sqlite4 *db, Fts5Cursor *pCsr){ if( pCsr ){ fts5ExpressionFree(db, pCsr->pExpr); sqlite4DbFree(db, pCsr->aKey); sqlite4DbFree(db, pCsr); } } /* ** Obtain the primary key value from the entry cursor pToken->pCsr currently ** points to. Set *paPk to point to a buffer containing the PK, and *pnPk ** to the size of the buffer in bytes before returning. ** ** Return SQLITE4_OK if everything goes according to plan, or an error code ** if an error occurs. If an error occurs the final values of *paPk and *pnPk ** are undefined. */ static int fts5TokenPk(Fts5Token *p, const u8 **paPk, int *pnPk){ int rc; const u8 *aKey; int nKey; rc = sqlite4KVCursorKey(p->pCsr, &aKey, &nKey); if( rc==SQLITE4_OK ){ if( nKey<p->nPrefix || memcmp(p->aPrefix, aKey, p->nPrefix) ){ rc = SQLITE4_NOTFOUND; }else{ *paPk = &aKey[p->nPrefix]; *pnPk = nKey - p->nPrefix; } } return rc; } static int fts5KeyCompare( const u8 *aLeft, int nLeft, const u8 *aRight, int nRight ){ int nMin = (nLeft > nRight) ? nRight : nLeft; int res; res = memcmp(aLeft, aRight, nMin); return (res ? res : (nLeft-nRight)); } static int fts5TokenAdvanceToMatch( InstanceList *p, InstanceList *pFirst, int iOff, int *pbEof ){ int iReq = pFirst->iOff + iOff; while( p->iCol<pFirst->iCol || (p->iCol==pFirst->iCol && p->iOff < iReq) ){ int bEof = fts5InstanceListNext(p); if( bEof ){ *pbEof = 1; return 0; } } return (p->iCol==pFirst->iCol && p->iOff==iReq); } /* ** This function tests if the cursors embedded in the Fts5Str object ** currently point to a match for the entire string. If so, *pbMatch ** is set to true before returning. ** ** If the cursors do not point to a match, then *piAdvance is set to ** the index of the individual cursor that should be advanced before ** retrying this function. Or, if one or more of the cursors are at EOF ** (implying that there will be no further matches), *piAdvance is set ** to a negative value. */ static int fts5StringIsMatch( Fts5Cursor *pCsr, /* Cursor that owns this string */ Fts5Str *pStr, /* String expression to test */ int *pbMatch, /* OUT: True for a match, false otherwise */ int *piAdvance /* OUT: Token to advance before retrying */ ){ const u8 *aPk1 = 0; int nPk1 = 0; int rc = SQLITE4_OK; int i; *pbMatch = 0; *piAdvance = 0; pStr->nList = 0; rc = fts5TokenPk(&pStr->aToken[0], &aPk1, &nPk1); for(i=1; rc==SQLITE4_OK && i<pStr->nToken; i++){ const u8 *aPk = 0; int nPk = 0; rc = fts5TokenPk(&pStr->aToken[i], &aPk, &nPk); if( rc==SQLITE4_OK ){ int res = fts5KeyCompare(aPk1, nPk1, aPk, nPk); if( res<0 ){ return SQLITE4_OK; } if( res>0 ){ *piAdvance = i; return SQLITE4_OK; } } } if( pStr->nToken>1 ){ /* At this point, it is established that all of the token cursors in the ** string point to an entry with the same primary key. Now synthesize a ** position list for the entire string. */ int bEof = 0; int nByte = sizeof(InstanceList) * pStr->nToken; InstanceList *aIn; InstanceList out; memset(&out, 0, sizeof(InstanceList)); aIn = (InstanceList *)sqlite4DbMallocZero(pCsr->db, nByte); if( !aIn ) rc = SQLITE4_NOMEM; for(i=0; rc==SQLITE4_OK && i<pStr->nToken; i++){ const u8 *aData; int nData; rc = sqlite4KVCursorData(pStr->aToken[i].pCsr, 0, -1, &aData, &nData); if( rc==SQLITE4_OK ){ fts5InstanceListInit((u8 *)aData, nData, &aIn[i]); fts5InstanceListNext(&aIn[i]); } } /* Allocate the output list */ if( rc==SQLITE4_OK ){ int nReq = aIn[0].nList; if( nReq<=pStr->nListAlloc ){ out.aList = pStr->aList; out.nList = pStr->nListAlloc; }else{ pStr->aList = out.aList = sqlite4DbMallocZero(pCsr->db, nReq*2); pStr->nListAlloc = out.nList = nReq*2; if( out.aList==0 ) rc = SQLITE4_NOMEM; } } while( rc==SQLITE4_OK && bEof==0 ){ for(i=1; i<pStr->nToken; i++){ int bMatch = fts5TokenAdvanceToMatch(&aIn[i], &aIn[0], i, &bEof); if( bMatch==0 || bEof ) break; } if( i==pStr->nToken ){ /* Record a match here */ fts5InstanceListAppend(&out, aIn[0].iCol, aIn[0].iWeight, aIn[0].iOff); *pbMatch = 1; } bEof = fts5InstanceListNext(&aIn[0]); } pStr->nList = out.iList; sqlite4DbFree(pCsr->db, aIn); }else{ *pbMatch = 1; } return rc; } static int fts5StringAdvanceToMatch(Fts5Cursor *pCsr, Fts5Str *pStr){ int rc; do { int bMatch; int iAdvance; rc = fts5StringIsMatch(pCsr, pStr, &bMatch, &iAdvance); if( rc!=SQLITE4_OK || bMatch ) break; rc = sqlite4KVCursorNext(pStr->aToken[iAdvance].pCsr); }while( rc==SQLITE4_OK ); return rc; } static int fts5StringAdvance(Fts5Cursor *pCsr, Fts5Str *pStr){ int rc = sqlite4KVCursorNext(pStr->aToken[0].pCsr); if( rc==SQLITE4_OK ) rc = fts5StringAdvanceToMatch(pCsr, pStr); return rc; } static int fts5PhraseAdvanceToMatch(Fts5Cursor *pCsr, Fts5Phrase *pPhrase){ return fts5StringAdvanceToMatch(pCsr, &pPhrase->aStr[0]); } static int fts5PhraseAdvance(Fts5Cursor *pCsr, Fts5Phrase *pPhrase){ return fts5StringAdvance(pCsr, &pPhrase->aStr[0]); } int sqlite4Fts5Next(Fts5Cursor *pCsr){ Fts5Phrase *pPhrase; int rc; assert( pCsr->pExpr->pRoot->eType==TOKEN_PRIMITIVE ); pPhrase = pCsr->pExpr->pRoot->pPhrase; rc = fts5PhraseAdvance(pCsr, pPhrase); return rc; } int sqlite4Fts5Open( sqlite4 *db, /* Database handle */ Fts5Info *pInfo, /* Index description */ const char *zMatch, /* Match expression */ int bDesc, /* True to iterate in desc. order of PK */ Fts5Cursor **ppCsr, /* OUT: New FTS cursor object */ |
︙ | ︙ | |||
1483 1484 1485 1486 1487 1488 1489 | pCsr->db = db; rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, pInfo->iRoot, pInfo->azCol, pInfo->nCol, zMatch, &pCsr->pExpr, pzErr ); } if( rc==SQLITE4_OK ){ | | > > > < < < < < < < < < < < < < < < < < < < < < < | 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 | pCsr->db = db; rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, pInfo->iRoot, pInfo->azCol, pInfo->nCol, zMatch, &pCsr->pExpr, pzErr ); } if( rc==SQLITE4_OK ){ /* Open a KV cursor for each term in the expression. Set each cursor ** to point to the first entry in the range it will scan. */ rc = fts5OpenCursors(db, pInfo, pCsr); } if( rc!=SQLITE4_OK ){ sqlite4Fts5Close(db, pCsr); pCsr = 0; }else{ rc = fts5PhraseAdvanceToMatch(pCsr, pCsr->pExpr->pRoot->pPhrase); } *ppCsr = pCsr; return rc; } /* ** Return true if the cursor passed as the second argument currently points ** to a valid entry, or false otherwise. */ |
︙ | ︙ |
Changes to src/vdbe.c.
︙ | ︙ | |||
4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 | assert( pOp->p4type==P4_FTS5INFO ); pInfo = pOp->p4.pFtsInfo; pCur = allocateCursor(p, pOp->p1, 0, pInfo->iDb, 0); if( pCur ){ rc = sqlite4Fts5Open(db, pInfo, zMatch, pOp->p5, &pCur->pFts, &p->zErrMsg); } if( rc==SQLITE4_OK && 0==sqlite4Fts5Valid(pCur->pFts) ){ pc = pOp->p2-1; } break; } /* Opcode: FtsNext P1 P2 * * * ** ** Advance FTS cursor P1 to the next entry and jump to instruction P2. Or, ** if there is no next entry, set the cursor to point to EOF and fall through | > > > > > > | 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 | assert( pOp->p4type==P4_FTS5INFO ); pInfo = pOp->p4.pFtsInfo; pCur = allocateCursor(p, pOp->p1, 0, pInfo->iDb, 0); if( pCur ){ rc = sqlite4Fts5Open(db, pInfo, zMatch, pOp->p5, &pCur->pFts, &p->zErrMsg); } if( rc==SQLITE4_NOTFOUND ){ rc = SQLITE4_OK; pc = pOp->p2-1; } #if 0 if( rc==SQLITE4_OK && 0==sqlite4Fts5Valid(pCur->pFts) ){ pc = pOp->p2-1; } #endif break; } /* Opcode: FtsNext P1 P2 * * * ** ** Advance FTS cursor P1 to the next entry and jump to instruction P2. Or, ** if there is no next entry, set the cursor to point to EOF and fall through |
︙ | ︙ |
Changes to src/vdbeaux.c.
︙ | ︙ | |||
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 | ** Close a VDBE cursor and release all the resources that cursor ** happens to hold. */ void sqlite4VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ if( pCx==0 ){ return; } if( pCx->pKVCur ){ sqlite4KVCursorClose(pCx->pKVCur); } if( pCx->pTmpKV ){ sqlite4KVStoreClose(pCx->pTmpKV); } #ifndef SQLITE4_OMIT_VIRTUALTABLE | > | 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 | ** Close a VDBE cursor and release all the resources that cursor ** happens to hold. */ void sqlite4VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){ if( pCx==0 ){ return; } sqlite4Fts5Close(p->db, pCx->pFts); if( pCx->pKVCur ){ sqlite4KVCursorClose(pCx->pKVCur); } if( pCx->pTmpKV ){ sqlite4KVStoreClose(pCx->pTmpKV); } #ifndef SQLITE4_OMIT_VIRTUALTABLE |
︙ | ︙ |
Changes to test/fts5query1.test.
︙ | ︙ | |||
43 44 45 46 47 48 49 | INSERT INTO t1 VALUES(1, 'o n e'); INSERT INTO t1 VALUES(2, 't w o'); INSERT INTO t1 VALUES(3, 't h r e e'); INSERT INTO t1 VALUES(4, 'f o u r'); } foreach {tn stmt res} { | | > > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | INSERT INTO t1 VALUES(1, 'o n e'); INSERT INTO t1 VALUES(2, 't w o'); INSERT INTO t1 VALUES(3, 't h r e e'); INSERT INTO t1 VALUES(4, 'f o u r'); } foreach {tn stmt res} { 1 {SELECT x FROM t1 WHERE t1 MATCH 't'} {2 3} 2 {SELECT x FROM t1 WHERE t1 MATCH 'abc'} {} 3 {SELECT x FROM t1 WHERE t1 MATCH 't+h'} {3} 4 {SELECT x FROM t1 WHERE t1 MATCH 't+o'} {} } { do_execsql_test 2.$tn $stmt $res } finish_test |