Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Make a small change to the bt cell formats to accommodate delete keys. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
a5186d0b0ad8032fa1b93a2bcb1b6390 |
User & Date: | dan 2013-11-28 15:23:21.239 |
Context
2013-11-28
| ||
18:24 | Write a delete-key into the top level of the fast-insert tree when an item is deleted. Change the seek code so that if a delete-key is encountered SQLITE4_NOTFOUND is returned to the caller. check-in: d9fa045dd7 user: dan tags: trunk | |
15:23 | Make a small change to the bt cell formats to accommodate delete keys. check-in: a5186d0b0a user: dan tags: trunk | |
2013-11-27
| ||
18:44 | Fix a bug in sqlite4BtCsrFirst(). check-in: 10d25d2516 user: dan tags: trunk | |
Changes
Changes to src/bt_main.c.
︙ | ︙ | |||
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 | if( nKLocal==0 ){ /* Type (c) leaf cell. */ pCell += sqlite4BtVarintGet32(pCell, &nKLocal); pKLocal = pCell; pCell += nKLocal; pCell += sqlite4BtVarintGet32(pCell, &nKOvfl); pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); }else{ pKLocal = pCell; pCell += nKLocal; pCell += sqlite4BtVarintGet32(pCell, &nVLocal); if( nVLocal==0 ){ /* Type (b) */ pCell += sqlite4BtVarintGet32(pCell, &nVLocal); pVLocal = pCell; pCell += nVLocal; pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); }else{ /* Type (a) */ pVLocal = pCell; } } pCsr->ovfl.nKey = nKLocal + nKOvfl; pCsr->ovfl.nVal = nVLocal + nVOvfl; nReq = pCsr->ovfl.nKey + pCsr->ovfl.nVal; | > > | 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 | if( nKLocal==0 ){ /* Type (c) leaf cell. */ pCell += sqlite4BtVarintGet32(pCell, &nKLocal); pKLocal = pCell; pCell += nKLocal; pCell += sqlite4BtVarintGet32(pCell, &nKOvfl); pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); nVOvfl -= 1; }else{ pKLocal = pCell; pCell += nKLocal; pCell += sqlite4BtVarintGet32(pCell, &nVLocal); if( nVLocal==0 ){ /* Type (b) */ pCell += sqlite4BtVarintGet32(pCell, &nVLocal); pVLocal = pCell; pCell += nVLocal; pCell += sqlite4BtVarintGet32(pCell, &nVOvfl); }else{ /* Type (a) */ pVLocal = pCell; nVLocal -= 2; } } pCsr->ovfl.nKey = nKLocal + nKOvfl; pCsr->ovfl.nVal = nVLocal + nVOvfl; nReq = pCsr->ovfl.nKey + pCsr->ovfl.nVal; |
︙ | ︙ | |||
1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 | aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); assert( btCellCount(aData, pgsz)>iCell ); pCell = btCellFind(aData, pgsz, iCell); pCell += sqlite4BtVarintGet32(pCell, &n); if( n==0 ){ pCell += sqlite4BtVarintGet32(pCell, &n); pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); pCell += sqlite4BtVarintGet32(pCell, &n); pOvfl = pCell; }else{ pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); if( n==0 ){ pCell += sqlite4BtVarintGet32(pCell, &n); pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); pOvfl = pCell; } } | > > | 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 | aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); assert( btCellCount(aData, pgsz)>iCell ); pCell = btCellFind(aData, pgsz, iCell); pCell += sqlite4BtVarintGet32(pCell, &n); if( n==0 ){ /* Type (c) cell */ pCell += sqlite4BtVarintGet32(pCell, &n); pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); pCell += sqlite4BtVarintGet32(pCell, &n); pOvfl = pCell; }else{ pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); if( n==0 ){ /* Type (b) cell */ pCell += sqlite4BtVarintGet32(pCell, &n); pCell += n; pCell += sqlite4BtVarintGet32(pCell, &n); pOvfl = pCell; } } |
︙ | ︙ | |||
1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 | pCell += sqlite4BtVarintGet32(pCell, &nK); if( nK>0 ){ pCell += nK; pCell += sqlite4BtVarintGet32(pCell, &nV); } if( nV==0 ){ rc = btCsrBuffer(pCsr, 1); if( rc==SQLITE4_OK ){ u8 *aBuf = (u8*)pCsr->ovfl.buf.p; *ppV = &aBuf[pCsr->ovfl.nKey]; *pnV = pCsr->ovfl.nVal; } }else{ *ppV = pCell; | > > | | 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 | pCell += sqlite4BtVarintGet32(pCell, &nK); if( nK>0 ){ pCell += nK; pCell += sqlite4BtVarintGet32(pCell, &nV); } if( nV==0 ){ /* Type (b) or (c) cell */ rc = btCsrBuffer(pCsr, 1); if( rc==SQLITE4_OK ){ u8 *aBuf = (u8*)pCsr->ovfl.buf.p; *ppV = &aBuf[pCsr->ovfl.nKey]; *pnV = pCsr->ovfl.nVal; } }else{ /* Type (a) cell */ *ppV = pCell; *pnV = (nV-2); } #ifndef NDEBUG if( rc==SQLITE4_OK ){ const void *pK; int nK; rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pK, &nK); if( rc==SQLITE4_OK ){ |
︙ | ︙ | |||
1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 | static int btCellSize(u8 *pCell, int bLeaf){ u8 *p = pCell; int nKey; p += sqlite4BtVarintGet32(p, &nKey); if( bLeaf==0 ){ p += nKey; p += 4; }else if( nKey==0 ){ p += sqlite4BtVarintGet32(p, &nKey); p += nKey; p += sqlite4BtVarintGet32(p, &nKey); p += sqlite4BtVarintGet32(p, &nKey); p += btOverflowArrayLen(p); }else{ p += nKey; p += sqlite4BtVarintGet32(p, &nKey); if( nKey==0 ){ p += sqlite4BtVarintGet32(p, &nKey); p += nKey; p += sqlite4BtVarintGet32(p, &nKey); p += btOverflowArrayLen(p); }else{ | > > > > | | 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 | static int btCellSize(u8 *pCell, int bLeaf){ u8 *p = pCell; int nKey; p += sqlite4BtVarintGet32(p, &nKey); if( bLeaf==0 ){ /* Internal page cell */ p += nKey; p += 4; }else if( nKey==0 ){ /* Type (c) cell */ p += sqlite4BtVarintGet32(p, &nKey); p += nKey; p += sqlite4BtVarintGet32(p, &nKey); p += sqlite4BtVarintGet32(p, &nKey); p += btOverflowArrayLen(p); }else{ p += nKey; p += sqlite4BtVarintGet32(p, &nKey); if( nKey==0 ){ /* Type (b) cell */ p += sqlite4BtVarintGet32(p, &nKey); p += nKey; p += sqlite4BtVarintGet32(p, &nKey); p += btOverflowArrayLen(p); }else{ /* Type (a) cell */ p += (nKey-2); } } return (p-pCell); } static u8 *btCellFindSize(u8 *aData, int nData, int iCell, int *pnByte){ |
︙ | ︙ | |||
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 | const void *pK; int nK; const void *pV; int nV; u32 pgno; }; /* ** Return the number of bytes consumed by a cell generated based on *pKV. */ static int btKVCellSize(KeyValue *pKV){ int nByte; assert( pKV->eType==KV_CELL || pKV->eType==KV_VALUE ); if( pKV->eType==KV_CELL ){ nByte = pKV->nV; }else{ if( pKV->pgno ){ nByte = sqlite4BtVarintLen32(pKV->nK) + pKV->nK + 4; }else{ nByte = sqlite4BtVarintLen32(pKV->nK) | > > > | | | > | 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 | const void *pK; int nK; const void *pV; int nV; u32 pgno; }; /* ** Return the number of bytes consumed by a cell generated based on *pKV. ** ** If the KeyValue is not already in KV_CELL form, then assume it will ** be formatted as a type (a) cell. */ static int btKVCellSize(KeyValue *pKV){ int nByte; assert( pKV->eType==KV_CELL || pKV->eType==KV_VALUE ); if( pKV->eType==KV_CELL ){ nByte = pKV->nV; }else{ if( pKV->pgno ){ nByte = sqlite4BtVarintLen32(pKV->nK) + pKV->nK + 4; }else{ nByte = sqlite4BtVarintLen32(pKV->nK) + sqlite4BtVarintLen32(pKV->nV+2) + pKV->nV + pKV->nK; } } return nByte; } /* ** Write a cell based on *pKV to buffer aBuffer. Return the number ** of bytes written. */ static int btKVCellWrite(KeyValue *pKV, u8 *aBuf){ int i = 0; if( pKV->eType==KV_CELL ){ i = pKV->nV; memcpy(aBuf, pKV->pV, i); }else{ i += sqlite4BtVarintPut32(&aBuf[i], pKV->nK); memcpy(&aBuf[i], pKV->pK, pKV->nK); i += pKV->nK; if( pKV->pgno==0 ){ i += sqlite4BtVarintPut32(&aBuf[i], pKV->nV+2); memcpy(&aBuf[i], pKV->pV, pKV->nV); i += pKV->nV; }else{ btPutU32(&aBuf[i], pKV->pgno); i += 4; } } assert( i==btKVCellSize(pKV) ); |
︙ | ︙ | |||
1812 1813 1814 1815 1816 1817 1818 | int nReq; int rc = SQLITE4_OK; assert( pKV->pgno==0 && pKV->eType==KV_VALUE ); /* Check if this is a type (a) cell - one that can fit entirely on a ** leaf page. If so, do nothing. */ | < | | > > > > < | | 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 | int nReq; int rc = SQLITE4_OK; assert( pKV->pgno==0 && pKV->eType==KV_VALUE ); /* Check if this is a type (a) cell - one that can fit entirely on a ** leaf page. If so, do nothing. */ nReq = btKVCellSize(pKV); if( nReq > nMaxSize ){ int nArraySz = btOverflowArraySz(pgsz, pKV->nK + pKV->nV); u8 *pBuf = 0; /* Buffer containing formatted cell */ int nKeyOvfl; /* Bytes of key that overflow */ int nValOvfl; /* Bytes of value that overflow */ /* Check if the entire key can fit on a leaf page. If so, this is a ** type (b) page - entire key and partial value on the leaf page, ** overflow pages contain the rest of the value. ** ** This expression uses sqlite4BtVarintLen32() to calculate an upper ** bound for the size of the varint that indicates the number of bytes ** of the value stored locally. */ nReq = 1 + sqlite4BtVarintLen32(pKV->nK) + pKV->nK + 1 + sqlite4BtVarintLen32(pKV->nV) + nArraySz; if( nReq<nMaxSize ){ /* nSpc is initialized to the amount of space available to store: ** ** * varint containing number of bytes stored locally (nLVal). ** * nLVal bytes of content. ** * varint containing number of bytes in overflow pages. */ int nLVal; /* Bytes of value data on main page */ int nSpc = (nMaxSize - sqlite4BtVarintLen32(pKV->nK) - pKV->nK - 1 - nArraySz ); nLVal = nSpc - sqlite4BtVarintLen32(pgsz) - sqlite4BtVarintLen32(pKV->nV); nKeyOvfl = 0; nValOvfl = pKV->nV - nLVal; }else{ /* Type (c) cell. Both the key and value overflow. */ int nLKey = nMaxSize - 1 /* 0x00 byte */ - sqlite4BtVarintLen32(pgsz) /* nLKey */ - sqlite4BtVarintLen32(pKV->nK) /* nOKey */ - sqlite4BtVarintLen32(pKV->nV+1) /* nVal */ - nArraySz; /* overflow array */ nValOvfl = pKV->nV; nKeyOvfl = pKV->nK - nLKey; } /* Allocate a pager buffer to store the KV_CELL buffer. Using a pager |
︙ | ︙ | |||
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 | if( nKeyOvfl>0 ){ *pOut++ = 0x00; } pOut += sqlite4BtVarintPut32(pOut, nLKey); memcpy(pOut, pKV->pK, nLKey); pOut += nLKey; if( nKeyOvfl==0 ){ *pOut++ = 0x00; pOut += sqlite4BtVarintPut32(pOut, nLVal); memcpy(pOut, pKV->pV, nLVal); pOut += nLVal; }else{ pOut += sqlite4BtVarintPut32(pOut, nKeyOvfl); } | > > | | 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 | if( nKeyOvfl>0 ){ *pOut++ = 0x00; } pOut += sqlite4BtVarintPut32(pOut, nLKey); memcpy(pOut, pKV->pK, nLKey); pOut += nLKey; if( nKeyOvfl==0 ){ /* Type (b) cell */ *pOut++ = 0x00; pOut += sqlite4BtVarintPut32(pOut, nLVal); memcpy(pOut, pKV->pV, nLVal); pOut += nLVal; }else{ /* Type (c) cell */ pOut += sqlite4BtVarintPut32(pOut, nKeyOvfl); } pOut += sqlite4BtVarintPut32(pOut, nValOvfl + (nKeyOvfl>0)); rc = btOverflowArrayPopulate(db, &pOut, (u8*)(pKV->pK) + nLKey, nKeyOvfl, (u8*)(pKV->pV) + nLVal, nValOvfl ); if( rc==SQLITE4_OK ){ memset(pKV, 0, sizeof(*pKV)); |
︙ | ︙ | |||
2698 2699 2700 2701 2702 2703 2704 | rc = btBalance(pCsr, bLeaf, 0, 0); } return rc; } static int btSaveAllCursor(bt_db *pDb, BtCursor *pCsr){ int rc = SQLITE4_OK; /* Return code */ | | | | | | | | | | | < | 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 | rc = btBalance(pCsr, bLeaf, 0, 0); } return rc; } static int btSaveAllCursor(bt_db *pDb, BtCursor *pCsr){ int rc = SQLITE4_OK; /* Return code */ bt_cursor *pIter; /* Used to iterate through cursors */ for(pIter=pDb->pAllCsr; rc==SQLITE4_OK && pIter; pIter=pIter->pNextCsr){ if( pIter->eType==CSR_TYPE_BT ){ BtCursor *p = (BtCursor*)pIter; if( p->nPg>0 ){ assert( p->bRequireReseek==0 ); rc = btCsrBuffer(p, 0); if( rc==SQLITE4_OK ){ assert( p->ovfl.buf.p ); p->bRequireReseek = 1; if( p!=pCsr ) btCsrReleaseAll(p); } } }else{ /* ?? */ } } return rc; } static int btFastInsertRoot(bt_db *db, BtDbHdr *pHdr, u32 *piRoot); |
︙ | ︙ |
Changes to www/bt.wiki.
︙ | ︙ | |||
538 539 540 541 542 543 544 | that use overflow pages for the value only, and (c) cells that use overflow pages for the key and value. <p>Cell type (a): <ul> <li> Number of bytes of the entries key (nKey), as a varint. <li> nKey bytes of key data. | | > | | > | 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 | that use overflow pages for the value only, and (c) cells that use overflow pages for the key and value. <p>Cell type (a): <ul> <li> Number of bytes of the entries key (nKey), as a varint. <li> nKey bytes of key data. <li> Number of bytes in entries value plus one (nValue+2), as a varint. Or, for a delete key, a single 0x01 byte. <li> Unless this is a delete key, nValue bytes of value data. </ul> <p>Cell type (b): <ul> <li> Number of bytes in entries key (nKey), as a varint. <li> nKey bytes of key data. <li> Single 0x00 byte. <li> Number of bytes in entries value stored locally (nLVal), as a varint. <li> nLVal bytes of value data. <li> Number of bytes in entries value stored on overflow pages, as a varint. </ul> <p>Cell type (c): <ul> <li> Single 0x00 byte. <li> Number of bytes of the entries key (nLocalKey) stored locally, as a varint. <li> nLocalKey bytes of key data. <li> Number of bytes of the entries key stored on overflow pages, as a varint. <li> Number of bytes in the entries value plus one (nValue+1), as a varint. Or, for a delete key, a single 0x00 byte. </ul> <p>Cell types (b) and (c) are followed by an array of pointers to overflow pages. The overflow data for a single entry is distributed between up to 16 "direct" overflow pages and a single overflow tree. A direct overflow page is just that - a pointer to an overflow page that contains data. An overflow "tree" is a tree structure where leaves contain overflow data and nodes |
︙ | ︙ |