Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Begin adding code to update the meta tree with the results of a merge. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
4db4b4ceeb97b55854f60ed8c57e59f6 |
User & Date: | dan 2014-01-07 20:41:49.749 |
Context
2014-01-08
| ||
20:29 | Fill in more merging code. Fix many bugs. check-in: 885387b919 user: dan tags: trunk | |
2014-01-07
| ||
20:41 | Begin adding code to update the meta tree with the results of a merge. check-in: 4db4b4ceeb user: dan tags: trunk | |
10:37 | Apply fixes to the build system and rename a few things in the bt code so that sqlite4.c can be compiled. check-in: f0eee06cf0 user: dan tags: trunk | |
Changes
Changes to src/btInt.h.
︙ | ︙ | |||
92 93 94 95 96 97 98 | /* ** This struct defines the format of database "schedule" pages. */ typedef struct BtSchedule BtSchedule; struct BtSchedule { | | > > > > | 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | /* ** This struct defines the format of database "schedule" pages. */ typedef struct BtSchedule BtSchedule; struct BtSchedule { u32 eBusy; /* One of the BT_SCHEDULE_XXX constants */ u32 iAge; /* Age of input segments */ u32 iMinLevel; /* Minimum level of input segments */ u32 iMaxLevel; /* Maximum level of input segments */ u32 iOutLevel; /* Level at which to write output */ u32 aBlock[32]; /* Allocated blocks */ u32 iNextPg; /* Page that contains next input key */ u32 iNextCell; /* Cell that contains next input key */ u32 iFreeList; /* First page of new free-list (if any) */ u32 aRoot[32]; /* Root pages for populated blocks */ }; #define BT_SCHEDULE_EMPTY 0 #define BT_SCHEDULE_BUSY 1 #define BT_SCHEDULE_DONE 2 int sqlite4BtMerge(bt_db *db, BtDbHdr *pHdr, u8 *aSched); /************************************************************************* ** Interface to bt_pager.c functionality. */ typedef struct BtPage BtPage; |
︙ | ︙ |
Changes to src/bt_main.c.
︙ | ︙ | |||
118 119 120 121 122 123 124 125 126 127 128 129 130 131 | /* ** TODO: Rearrange things so these are not required! */ static int fiLoadSummary(bt_db *, BtCursor *, const u8 **, int *); static void btReadSummary(const u8 *, int, u16 *, u16 *, u16 *); static int btCsrData(BtCursor *, int, int, const void **, int *); /* ** Meta-table summary key. */ static const u8 aSummaryKey[] = {0xFF, 0xFF, 0xFF, 0xFF}; #ifndef btErrorBkpt | > | 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | /* ** TODO: Rearrange things so these are not required! */ static int fiLoadSummary(bt_db *, BtCursor *, const u8 **, int *); static void btReadSummary(const u8 *, int, u16 *, u16 *, u16 *); static int btCsrData(BtCursor *, int, int, const void **, int *); static int btReadSchedule(bt_db *, u8 *, BtSchedule *); /* ** Meta-table summary key. */ static const u8 aSummaryKey[] = {0xFF, 0xFF, 0xFF, 0xFF}; #ifndef btErrorBkpt |
︙ | ︙ | |||
201 202 203 204 205 206 207 | #define btPutU32(x,y) sqlite4BtPutU32(x,y) /* ** Allocate a new database handle. */ int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){ static const int MIN_MERGE = 4; | | | 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | #define btPutU32(x,y) sqlite4BtPutU32(x,y) /* ** Allocate a new database handle. */ int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){ static const int MIN_MERGE = 4; static const int SCHEDULE_ALLOC = 1; bt_db *db = 0; /* New database object */ BtPager *pPager = 0; /* Pager object for this database */ int nReq; /* Bytes of space required for bt_db object */ int rc; /* Return code */ nReq = sizeof(bt_db); |
︙ | ︙ | |||
541 542 543 544 545 546 547 | sqlite4BtBufAppendf(pBuf, "Page %d: ", pgno); if( pgno==pHdr->iSRoot ){ int i; BtSchedule s; sqlite4BtBufAppendf(pBuf, "(schedule page) "); | | | > > > > | 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 | sqlite4BtBufAppendf(pBuf, "Page %d: ", pgno); if( pgno==pHdr->iSRoot ){ int i; BtSchedule s; sqlite4BtBufAppendf(pBuf, "(schedule page) "); btReadSchedule(0, aData, &s); sqlite4BtBufAppendf(pBuf, " eBusy=(%s)\n", s.eBusy==BT_SCHEDULE_EMPTY ? "empty" : s.eBusy==BT_SCHEDULE_BUSY ? "busy" : s.eBusy==BT_SCHEDULE_DONE ? "done" : "!ErroR" ); sqlite4BtBufAppendf(pBuf, " iAge=%d\n", (int)s.iAge); sqlite4BtBufAppendf(pBuf, " iMinLevel=%d\n", (int)s.iMinLevel); sqlite4BtBufAppendf(pBuf, " iMaxLevel=%d\n", (int)s.iMaxLevel); sqlite4BtBufAppendf(pBuf, " iOutLevel=%d\n", (int)s.iOutLevel); sqlite4BtBufAppendf(pBuf, " aBlock=("); for(i=0; s.aBlock[i] && i<array_size(s.aBlock); i++){ sqlite4BtBufAppendf(pBuf, "%s%d", i==0 ? "" : " ", (int)s.aBlock[i]); |
︙ | ︙ | |||
610 611 612 613 614 615 616 | } } if( pVal ){ btBufferAppendBlob(pBuf, bAscii, pVal, nVal); if( flags & BT_PGFLAGS_METATREE ){ /* Interpret the meta-tree entry */ if( nKey==sizeof(aSummaryKey) && 0==memcmp(pKey, aSummaryKey, nKey) ){ | > > | > > > > > > > > | | | 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 | } } if( pVal ){ btBufferAppendBlob(pBuf, bAscii, pVal, nVal); if( flags & BT_PGFLAGS_METATREE ){ /* Interpret the meta-tree entry */ if( nKey==sizeof(aSummaryKey) && 0==memcmp(pKey, aSummaryKey, nKey) ){ u16 iMin, nLvl, iMerge; int j; sqlite4BtBufAppendf(pBuf, " [summary:"); for(j=0; j<(nVal/6); j++){ btReadSummary(pVal, j, &iMin, &nLvl, &iMerge); sqlite4BtBufAppendf(pBuf, " %d/%d/%d", (int)iMin, (int)nLvl, (int)iMerge ); } sqlite4BtBufAppendf(pBuf, "]"); }else{ u32 iAge = btGetU32(&pKey[0]); u32 iLevel = ~btGetU32(&pKey[4]); u32 iBlk = btGetU32(pVal); sqlite4BtBufAppendf(pBuf, " [age=%d level=%d root=%d]", (int)iAge, (int)iLevel, (int)iBlk ); } } } sqlite4BtBufAppendf(pBuf, "\n"); } if( pPager && btFlags(aData) & BT_PGFLAGS_INTERNAL ){ for(i=0; i<=btCellCount(aData, nData); i++){ BtPage *pChild; u8 *aChild; u32 child; child = btChildPgno(aData, nData, i); sqlite4BtPageGet(pPager, child, &pChild); aChild = sqlite4BtPageData(pChild); |
︙ | ︙ | |||
3513 3514 3515 3516 3517 3518 3519 | btCsrReset(&csr, 1); return rc; } static void btWriteSchedule(u8 *aData, BtSchedule *p, int *pRc){ if( *pRc==SQLITE4_OK ){ | | > | > > | > | > > | 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 | btCsrReset(&csr, 1); return rc; } static void btWriteSchedule(u8 *aData, BtSchedule *p, int *pRc){ if( *pRc==SQLITE4_OK ){ u32 *a = (u32*)p; int i; for(i=0; i<sizeof(BtSchedule)/sizeof(u32); i++){ btPutU32(&aData[i*4], a[i]); } } } static int btReadSchedule(bt_db *db, u8 *aData, BtSchedule *p){ u32 *a = (u32*)p; int i; for(i=0; i<sizeof(BtSchedule)/sizeof(u32); i++){ a[i] = btGetU32(&aData[i*4]); } return SQLITE4_OK; } static void btWriteSchedulePage(BtPage *pPg, BtSchedule *p, int *pRc){ if( *pRc==SQLITE4_OK ){ int rc = sqlite4BtPageWrite(pPg); if( rc==SQLITE4_OK ){ |
︙ | ︙ | |||
3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 | } } btCsrReset(&csr, 1); return rc; } /* ** If possible, schedule a merge operation. ** ** The merge operation is selected based on the following criteria: ** ** * The more levels involved in the merge the better, and | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 | } } btCsrReset(&csr, 1); return rc; } /* ** The connection passed as the first argument to this function currently ** has a write transaction open. The schedule object passed as the second ** is in BT_SCHEDULE_DONE state. This function updates the meta-tree to ** integrate the results of the completed merge into the main fast-insert ** tree structure. ** ** If successful, SQLITE4_OK is returned. If an error occurs, an SQLite ** error code. */ static int btIntegrateMerge(bt_db *db, BtSchedule *p){ BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager); int rc = SQLITE4_OK; BtCursor csr; /* Cursor for reading various sub-trees */ BtCursor mcsr; /* Cursor for reading the meta-tree */ const void *pKey = 0; const u8 *aSum; int nSum; /* Summary value */ int nKey = 0; sqlite4_buffer buf; u32 iLvl; int iBlk; memset(&csr, 0, sizeof(csr)); memset(&mcsr, 0, sizeof(mcsr)); btCsrSetup(db, pHdr->iMRoot, &mcsr); sqlite4_buffer_init(&buf, 0); if( p->iNextPg ){ btCsrSetup(db, p->iNextPg, &csr); rc = btCsrEnd(&csr, 0); if( rc==SQLITE4_OK ){ csr.aiCell[0] = p->iNextCell; rc = btCsrKey(&csr, &pKey, &nKey); } } for(iLvl=p->iMinLevel; iLvl<=p->iMaxLevel; iLvl++){ u8 aPrefix[8]; u32 iRoot = 0; btPutU32(&aPrefix[0], p->iAge); btPutU32(&aPrefix[4], ~iLvl); rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0); if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK; while( rc==SQLITE4_OK && iRoot==0 ){ const u8 *pMKey; int nMKey; rc = btCsrKey(&mcsr, (const void **)&pMKey, &nMKey); if( rc==SQLITE4_OK ){ if( nMKey<sizeof(aPrefix) || memcmp(aPrefix, pMKey, sizeof(aPrefix)) ){ rc = SQLITE4_NOTFOUND; } } if( rc==SQLITE4_OK ){ if( pKey ){ int res = btKeyCompare(pMKey + 8, nMKey - 8, pKey, nKey); if( res<=0 ){ const void *pData; int nData; btCsrData(&mcsr, 0, 4, &pData, &nData); iRoot = btGetU32(pData); } } rc = sqlite4BtDelete(&mcsr.base); } } if( rc==SQLITE4_OK && iRoot ){ int n = sizeof(aPrefix) + nKey; rc = sqlite4_buffer_resize(&buf, n); if( rc==SQLITE4_OK ){ u8 aData[4]; u8 *a = (u8*)buf.p; memcpy(a, aPrefix, sizeof(aPrefix)); memcpy(&a[sizeof(aPrefix)], pKey, nKey); btPutU32(aData, iRoot); rc = btReplaceEntry(db, pHdr->iMRoot, a, n, aData, sizeof(aData)); } } } /* Add new entries for the new output level blocks. */ for(iBlk=0; rc==SQLITE4_OK && iBlk<array_size(p->aRoot) && p->aRoot[iBlk]; iBlk++ ){ btCsrReset(&csr, 1); btCsrSetup(db, p->aRoot[iBlk], &csr); rc = btCsrEnd(&csr, 0); if( rc==SQLITE4_OK ){ rc = btCsrKey(&csr, &pKey, &nKey); } if( rc==SQLITE4_OK ){ rc = sqlite4_buffer_resize(&buf, nKey+8); } if( rc==SQLITE4_OK ){ u8 aData[4]; u8 *a = (u8*)buf.p; btPutU32(a, p->iAge+1); btPutU32(&a[4], ~p->iOutLevel); memcpy(&a[8], pKey, nKey); btPutU32(aData, p->aRoot[iBlk]); rc = btReplaceEntry(db, pHdr->iMRoot, a, nKey+8, aData, sizeof(aData)); } } btCsrReset(&csr, 1); rc = fiLoadSummary(db, &csr, &aSum, &nSum); if( rc==SQLITE4_OK ){ u16 iMinLevel = 0; u16 nLevel = 0; u16 iMergeLevel = 0; if( nSum>(6*(p->iAge+1)) ){ btReadSummary(aSum, p->iAge+1, &iMinLevel, &nLevel, &iMergeLevel); } if( (iMinLevel+nLevel)>p->iOutLevel ){ rc = sqlite4_buffer_resize(&buf, MAX(nSum, (p->iOutLevel+1)*6)); if( rc==SQLITE4_OK ){ nLevel = p->iOutLevel - iMinLevel + 1; memcpy(buf.p, aSum, nSum); btWriteSummary((u8*)buf.p, p->iAge+1, iMinLevel, nLevel, iMergeLevel); rc = btReplaceEntry( db, pHdr->iMRoot, aSummaryKey, sizeof(aSummaryKey), buf.p, buf.n ); } } } btCsrReset(&csr, 1); btCsrReset(&mcsr, 1); sqlite4_buffer_clear(&buf); return rc; } /* ** If possible, schedule a merge operation. ** ** The merge operation is selected based on the following criteria: ** ** * The more levels involved in the merge the better, and |
︙ | ︙ | |||
3664 3665 3666 3667 3668 3669 3670 | } /* Check if the schedule page is busy. If so, no new merge may be ** scheduled. If the schedule page is not busy, call btFindMerge() to ** figure out which levels should be scheduled for merge. */ if( rc==SQLITE4_OK ){ aData = sqlite4BtPageData(pPg); | > | > > | > | > > > > > > > > > > > > > > > > > > > | | 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 | } /* Check if the schedule page is busy. If so, no new merge may be ** scheduled. If the schedule page is not busy, call btFindMerge() to ** figure out which levels should be scheduled for merge. */ if( rc==SQLITE4_OK ){ aData = sqlite4BtPageData(pPg); switch( btGetU32(aData) ){ case 555: case BT_SCHEDULE_BUSY: rc = SQLITE4_NOTFOUND; break; case BT_SCHEDULE_DONE: { BtSchedule s; rc = btReadSchedule(db, aData, &s); if( rc==SQLITE4_OK ){ rc = btIntegrateMerge(db, &s); } if( rc==SQLITE4_OK ){ s.eBusy = 555; btWriteSchedulePage(pPg, &s, &rc); rc = SQLITE4_NOTFOUND; } break; } default: /* BT_SCHEDULE_EMPTY */ break; } if( rc==SQLITE4_OK ){ rc = btFindMerge(db, &iAge, &iMin, &iMax, &iOutLvl); } } if( rc==SQLITE4_OK ){ BtSchedule s; memset(&s, 0, sizeof(BtSchedule)); s.eBusy = BT_SCHEDULE_BUSY; s.iAge = iAge; s.iMaxLevel = iMax; s.iMinLevel = iMin; s.iOutLevel = iOutLvl; rc = btAllocateBlock(db, db->nScheduleAlloc, s.aBlock); btWriteSchedulePage(pPg, &s, &rc); |
︙ | ︙ | |||
4096 4097 4098 4099 4100 4101 4102 | */ int sqlite4BtMerge(bt_db *db, BtDbHdr *pHdr, u8 *aSched){ BtSchedule s; /* Deserialized schedule object */ int rc = SQLITE4_OK; /* Return code */ /* Set up the input cursor. */ btReadSchedule(db, aSched, &s); | | | 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 | */ int sqlite4BtMerge(bt_db *db, BtDbHdr *pHdr, u8 *aSched){ BtSchedule s; /* Deserialized schedule object */ int rc = SQLITE4_OK; /* Return code */ /* Set up the input cursor. */ btReadSchedule(db, aSched, &s); if( s.eBusy==BT_SCHEDULE_BUSY ){ FiCursor fcsr; /* FiCursor used to read input */ FiWriter writer; /* FiWriter used to write output */ rc = fiSetupMergeCsr(db, pHdr, &s, &fcsr); fiWriterInit(db, &s, &writer, &rc); /* The following loop runs once for each key copied from the input to |
︙ | ︙ | |||
4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 | } } /* Assuming no error has occurred, update the serialized BtSchedule ** structure stored in buffer aSched[]. The caller will write this ** buffer to the database file as page (pHdr->iSRoot). */ if( rc==BT_BLOCKFULL || rc==SQLITE4_NOTFOUND ){ rc = fiWriterFlushAll(&writer); if( rc==SQLITE4_OK ){ btWriteSchedule(aSched, &s, &rc); } } fiWriterCleanup(&writer); | > > > > > > > > > > > > > > | 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 | } } /* Assuming no error has occurred, update the serialized BtSchedule ** structure stored in buffer aSched[]. The caller will write this ** buffer to the database file as page (pHdr->iSRoot). */ if( rc==BT_BLOCKFULL || rc==SQLITE4_NOTFOUND ){ if( rc==SQLITE4_NOTFOUND ){ assert( fcsr.iBt<0 ); s.iNextPg = 0; s.iNextCell = 0; }else{ BtCursor *pCsr = &fcsr.aSub[fcsr.iBt].csr; assert( pCsr->nPg>0 ); s.iNextPg = sqlite4BtPagePgno(pCsr->apPage[pCsr->nPg-1]); s.iNextCell = pCsr->aiCell[pCsr->nPg-1]; } s.iFreeList = 0; s.eBusy = BT_SCHEDULE_DONE; rc = fiWriterFlushAll(&writer); if( rc==SQLITE4_OK ){ btWriteSchedule(aSched, &s, &rc); } } fiWriterCleanup(&writer); |
︙ | ︙ |