Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Allocate blocks of space (not individual pages) within the database file for sub-trees. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
5986afca5887778b12de0920d0ee0d25 |
User & Date: | dan 2013-11-27 14:40:09.224 |
Context
2013-11-27
| ||
18:21 | Begin adding code for querying the fast-insert tree. check-in: 65735e3ca9 user: dan tags: trunk | |
14:40 | Allocate blocks of space (not individual pages) within the database file for sub-trees. check-in: 5986afca58 user: dan tags: trunk | |
2013-11-26
| ||
20:35 | Have the low-level b-tree insert routine return BT_BLOCKFULL if a level-0 tree is full. check-in: 65642c32ba user: dan tags: trunk | |
Changes
Changes to src/btInt.h.
︙ | ︙ | |||
18 19 20 21 22 23 24 25 26 27 28 29 30 31 | typedef sqlite4_int64 i64; typedef sqlite4_uint64 u64; typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; /* ** Special error codes used internally. These must be distinct from SQLite4 ** error codes. Which is easy - SQLite4 error codes are all greater than or ** equal to zero. */ #define BT_BLOCKFULL -1 | > > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | typedef sqlite4_int64 i64; typedef sqlite4_uint64 u64; typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; typedef struct BtDbHdr BtDbHdr; /* ** Special error codes used internally. These must be distinct from SQLite4 ** error codes. Which is easy - SQLite4 error codes are all greater than or ** equal to zero. */ #define BT_BLOCKFULL -1 |
︙ | ︙ | |||
45 46 47 48 49 50 51 | /* By default pages are 1024 bytes in size. */ #define BT_DEFAULT_PGSZ 1024 /* By default blocks are 512K bytes in size. */ #define BT_DEFAULT_BLKSZ (512*1024) | < > > > > > > > > > > > > > > > > > > > > > > | | 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | /* By default pages are 1024 bytes in size. */ #define BT_DEFAULT_PGSZ 1024 /* By default blocks are 512K bytes in size. */ #define BT_DEFAULT_BLKSZ (512*1024) /* ** ** pgsz, blksz: ** Byte offset 0 of the database file is the first byte of both page 1 ** and block 1. Each page is pHdr->pgsz bytes in size. Each block is ** pHdr->blksz bytes in size. It is guaranteed that the block-size is ** an integer multiple of the page-size. ** ** iSubBlock, nSubPg: ** These are likely a stop-gap. When a user executes a 'fast-insert' write, ** the new key-value pair (or delete marker) is written into a b-tree ** stored within block iSubBlock. The root of the tree is always the first ** page in the block. ** ** Variable nSubPg contains the number of pages currently used by the ** sub-tree, not including any overflow pages. Pages (except overflow ** pages) are always allocated contiguously within the block. ** ** The reason these are likely a stop-gap is that the write-magnification ** caused by using a b-tree for to populate level-0 sub-trees is too ** expensive. */ struct BtDbHdr { u32 pgsz; /* Page size in bytes */ u32 blksz; /* Block size in bytes */ u32 nPg; /* Size of database file in pages */ u32 iRoot; /* B-tree root page */ u32 iMRoot; /* Root page of meta-tree */ u32 iSRoot; /* Root page of schedule-tree */ u32 iSubBlock; /* Root of current sub-tree */ u32 nSubPg; /* Number of non-overflow pages in sub-tree */ u32 iCookie; /* Current value of schema cookie */ u32 iFreePg; /* First page in free-page list trunk */ u32 iFreeBlk; /* First page in free-block list trunk */ }; |
︙ | ︙ | |||
112 113 114 115 116 117 118 | int sqlite4BtPageTrimPgno(BtPager*, u32 pgno); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); void sqlite4BtPageReference(BtPage*); /* | | > | 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | int sqlite4BtPageTrimPgno(BtPager*, u32 pgno); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); void sqlite4BtPageReference(BtPage*); /* ** Allocate new database pages or blocks. */ int sqlite4BtPageAllocate(BtPager*, BtPage **ppPage); int sqlite4BtBlockAllocate(BtPager*, u32 *piBlk); /* ** Query page references. */ u32 sqlite4BtPagePgno(BtPage*); void *sqlite4BtPageData(BtPage*); |
︙ | ︙ |
Changes to src/bt_main.c.
︙ | ︙ | |||
1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 | nPg = (nContent + pgsz - 1) / pgsz; if( nPg<=BT_MAX_DIRECT_OVERFLOW ){ return 1 + nPg*4; } return 1 + (BT_MAX_DIRECT_OVERFLOW+1) * 4; } /* ** Allocate a non-overflow page. ** ** This function is a simple wrapper around sqlite4BtPageAllocate(), ** except that if the database is currenly in fast-insert mode the ** BtDbHdr.nSubPg counter is incremented. */ static int btAllocateNonOverflow(bt_db *db, BtPage **ppPg){ | > > > > | | > > > > > > > > > > > > | 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 | nPg = (nContent + pgsz - 1) / pgsz; if( nPg<=BT_MAX_DIRECT_OVERFLOW ){ return 1 + nPg*4; } return 1 + (BT_MAX_DIRECT_OVERFLOW+1) * 4; } static u32 btBlockToRoot(BtDbHdr *pHdr, u32 iBlk){ assert( iBlk>0 ); return (iBlk - 1) * (pHdr->blksz / pHdr->pgsz) + 1; } /* ** Allocate a non-overflow page. ** ** This function is a simple wrapper around sqlite4BtPageAllocate(), ** except that if the database is currenly in fast-insert mode the ** BtDbHdr.nSubPg counter is incremented. */ static int btAllocateNonOverflow(bt_db *db, BtPage **ppPg){ int rc; if( db->bFastInsertOp ){ BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager); u32 iPg; iPg = pHdr->nSubPg + btBlockToRoot(pHdr, pHdr->iSubBlock); pHdr->nSubPg++; rc = sqlite4BtPageGet(db->pPager, iPg, ppPg); if( rc==SQLITE4_OK ){ rc = sqlite4BtPageWrite(*ppPg); if( rc!=SQLITE4_OK ){ sqlite4BtPageRelease(*ppPg); } } }else{ rc = sqlite4BtPageAllocate(db->pPager, ppPg); } return rc; } /* ** Trim a non-overflow page. ** |
︙ | ︙ | |||
2431 2432 2433 2434 2435 2436 2437 | } }else{ /* The new entry will not fit on the leaf page. Entries will have ** to be shuffled between existing leaves and new leaves may need ** to be added to make space for it. */ bt_db *db = pCsr->base.pDb; | | > > > | > | | 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 | } }else{ /* The new entry will not fit on the leaf page. Entries will have ** to be shuffled between existing leaves and new leaves may need ** to be added to make space for it. */ bt_db *db = pCsr->base.pDb; if( bLeaf && db->bFastInsertOp ){ /* This operation will need to allocate further pages. The worst ** case scenario is (nDepth+1) pages. If fewer than that remain ** available in the block, return BT_BLOCKFULL. */ BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager); int nPgPerBlk = (pHdr->blksz / pHdr->pgsz); if( (nPgPerBlk - pHdr->nSubPg) < pCsr->nPg+1 ){ rc = BT_BLOCKFULL; pHdr->iSubBlock = 0; } } if( rc==SQLITE4_OK && pCsr->nPg==1 ){ rc = btExtendTree(pCsr); } if( rc==SQLITE4_OK ){ rc = btBalance(pCsr, bLeaf, nKV, apKV); |
︙ | ︙ | |||
2552 2553 2554 2555 2556 2557 2558 | static int btReplaceEntry( bt_db *db, /* Database handle */ u32 iRoot, /* Root page of b-tree to update */ const void *pK, int nK, /* Key to insert */ const void *pV, int nV /* Value to insert. (nV<0) -> delete */ ){ BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager); | | | > | | > | 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 | static int btReplaceEntry( bt_db *db, /* Database handle */ u32 iRoot, /* Root page of b-tree to update */ const void *pK, int nK, /* Key to insert */ const void *pV, int nV /* Value to insert. (nV<0) -> delete */ ){ BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager); int rc = SQLITE4_OK; /* Return code */ BtCursor csr; /* Cursor object to seek to insert point */ u32 iRootPg = iRoot; if( iRoot==0 ){ rc = btFastInsertRoot(db, pHdr, &iRootPg); } btCsrSetup(db, iRootPg, &csr); /* Seek stack cursor csr to the b-tree page that key pK/nK is/would be ** stored on. */ if( rc==SQLITE4_OK ){ rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, BT_CSRSEEK_UPDATE); } if( rc==SQLITE4_OK ){ /* The cursor currently points to an entry with key pK/nK. This call ** should therefore replace that entry. So delete it and then re-seek ** the cursor. */ rc = sqlite4BtDelete(&csr.base); |
︙ | ︙ | |||
2618 2619 2620 2621 2622 2623 2624 | } static int btAllocateNewRoot(bt_db *db, u32 *piNew){ u32 iNew = 0; BtPage *pPg; int rc; | | | 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 | } static int btAllocateNewRoot(bt_db *db, u32 *piNew){ u32 iNew = 0; BtPage *pPg; int rc; rc = sqlite4BtPageAllocate(db->pPager, &pPg); if( rc==SQLITE4_OK ){ iNew = sqlite4BtPagePgno(pPg); sqlite4BtPageRelease(pPg); } *piNew = iNew; return rc; |
︙ | ︙ | |||
2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 | }else if( rc==SQLITE4_NOTFOUND ){ rc = SQLITE4_OK; } btCsrReset(&csr, 1); return rc; } static int btFastInsertRoot( bt_db *db, BtDbHdr *pHdr, u32 *piRoot ){ int rc = SQLITE4_OK; | > > > > < > < | | > | | | | > > > > > | > | 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 | }else if( rc==SQLITE4_NOTFOUND ){ rc = SQLITE4_OK; } btCsrReset(&csr, 1); return rc; } static int btAllocateBlock(bt_db *db, u32 *piBlk){ return sqlite4BtBlockAllocate(db->pPager, piBlk); } static int btFastInsertRoot( bt_db *db, BtDbHdr *pHdr, u32 *piRoot ){ int rc = SQLITE4_OK; /* If the meta-tree has not been created, create it now. */ if( pHdr->iMRoot==0 ){ rc = btAllocateNewRoot(db, &pHdr->iMRoot); } /* If no writable sub-tree has been discovered, create one now. */ if( rc==SQLITE4_OK && pHdr->iSubBlock==0 ){ u32 iLevel; /* Level number for new sub-tree */ u32 iSubBlock; /* New block */ rc = btFindNextLevel(db, pHdr, &iLevel); if( rc==SQLITE4_OK ){ rc = btAllocateBlock(db, &iSubBlock); } if( rc==SQLITE4_OK ){ u8 aKey[8]; u8 aVal[8]; pHdr->iSubBlock = iSubBlock; pHdr->nSubPg = 1; /* Root page is automatically allocated */ /* The key for the new entry consists of the concatentation of two ** 32-bit big-endian integers - the <age> and <level-no>. The age ** of the new segment is 0. The level number is one greater than the ** level number of the previous segment. */ btPutU32(&aKey[0], 0); btPutU32(&aKey[4], ~iLevel); btPutU32(&aVal[0], iSubBlock); btPutU32(&aVal[4], 1); assert( db->bFastInsertOp==1 ); db->bFastInsertOp = 0; rc = btReplaceEntry(db, pHdr->iMRoot, aKey, 8, aVal, 8); db->bFastInsertOp = 1; } } if( rc==SQLITE4_OK ){ *piRoot = btBlockToRoot(pHdr, pHdr->iSubBlock); } return rc; } /* ** Insert a new key/value pair or replace an existing one. ** ** This function may modify either the b-tree or fast-insert-tree, depending |
︙ | ︙ |
Changes to src/bt_pager.c.
︙ | ︙ | |||
976 977 978 979 980 981 982 983 984 985 986 987 988 989 | #ifdef BT_STDERR_DEBUG fprintf(stderr, "allocated page %d\n", pgno); #endif *ppPg = pPg; return rc; } /* ** Return the current page number of the argument page reference. */ u32 sqlite4BtPagePgno(BtPage *pPg){ return pPg->pgno; } | > > > > > > > > > > > > > > > > > > > > > > > > | 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 | #ifdef BT_STDERR_DEBUG fprintf(stderr, "allocated page %d\n", pgno); #endif *ppPg = pPg; return rc; } int sqlite4BtBlockAllocate(BtPager *p, u32 *piBlk){ int rc = SQLITE4_OK; BtDbHdr *pHdr = p->pHdr; int nPgPerBlk = (pHdr->blksz / pHdr->pgsz); u32 iBlk; u32 iRoot; u32 iFree; /* Figure out the next block in the file. And its root (first) page. */ iBlk = 1 + (pHdr->nPg + nPgPerBlk - 1) / nPgPerBlk; iRoot = (iBlk-1) * nPgPerBlk + 1; assert( iBlk>0 ); for(iFree = pHdr->nPg+1; rc==SQLITE4_OK && iFree<iRoot; iFree++){ rc = sqlite4BtPageTrimPgno(p, iFree); } if( rc==SQLITE4_OK ){ pHdr->nPg = iBlk * nPgPerBlk; *piBlk = iBlk; } return rc; } /* ** Return the current page number of the argument page reference. */ u32 sqlite4BtPagePgno(BtPage *pPg){ return pPg->pgno; } |
︙ | ︙ | |||
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 | int sqlite4BtPagerHdrdump(BtPager *pPager, sqlite4_buffer *pBuf){ BtDbHdr *pHdr = pPager->pHdr; int rc = SQLITE4_OK; sqlite4BtBufAppendf(pBuf, "pgsz=%d blksz=%d nPg=%d" " iRoot=%d iMRoot=%d iSRoot=%d" " iCookie=%d iFreePg=%d iFreeBlk=%d", pHdr->pgsz, pHdr->blksz, pHdr->nPg, pHdr->iRoot, pHdr->iMRoot, pHdr->iSRoot, pHdr->iCookie, pHdr->iFreePg, pHdr->iFreeBlk ); return rc; } #ifndef NDEBUG int sqlite4BtPagerRefcount(BtPager *p){ return p->nTotalRef; } #endif | > > | 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 | int sqlite4BtPagerHdrdump(BtPager *pPager, sqlite4_buffer *pBuf){ BtDbHdr *pHdr = pPager->pHdr; int rc = SQLITE4_OK; sqlite4BtBufAppendf(pBuf, "pgsz=%d blksz=%d nPg=%d" " iRoot=%d iMRoot=%d iSRoot=%d" " iSubBlock=%d nSubPg=%d" " iCookie=%d iFreePg=%d iFreeBlk=%d", pHdr->pgsz, pHdr->blksz, pHdr->nPg, pHdr->iRoot, pHdr->iMRoot, pHdr->iSRoot, pHdr->iSubBlock, pHdr->nSubPg, pHdr->iCookie, pHdr->iFreePg, pHdr->iFreeBlk ); return rc; } #ifndef NDEBUG int sqlite4BtPagerRefcount(BtPager *p){ return p->nTotalRef; } #endif |