Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Begin adding routines for b-tree balancing and so on. Incomplete. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
3003f1d99ac712d9195e167e8318b516 |
User & Date: | dan 2013-09-27 20:25:16.484 |
Context
2013-09-28
| ||
11:23 | Fixes for b-tree balancing routines. Still incomplete. check-in: 9e8d7525d8 user: dan tags: trunk | |
2013-09-27
| ||
20:25 | Begin adding routines for b-tree balancing and so on. Incomplete. check-in: 3003f1d99a user: dan tags: trunk | |
2013-09-25
| ||
19:40 | Add a bugfix for b-tree deletes. check-in: 88c8fd489d user: dan tags: trunk | |
Changes
Changes to src/btInt.h.
︙ | ︙ | |||
15 16 17 18 19 20 21 22 23 24 25 26 27 28 | #include "bt.h" typedef sqlite4_int64 i64; typedef sqlite4_uint64 u64; typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; /************************************************************************* ** Interface to bt_pager.c functionality. */ typedef struct BtPage BtPage; typedef struct BtPager BtPager; | > > > > > | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | #include "bt.h" typedef sqlite4_int64 i64; typedef sqlite4_uint64 u64; typedef unsigned int u32; typedef unsigned short u16; typedef unsigned char u8; /* Number of elements in an array object. */ #define array_size(x) (sizeof(x)/sizeof(x[0])) /************************************************************************* ** Interface to bt_pager.c functionality. */ typedef struct BtPage BtPage; typedef struct BtPager BtPager; |
︙ | ︙ | |||
61 62 63 64 65 66 67 68 69 70 71 72 73 74 | /* ** Read, write and trim existing database pages. */ int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); /* ** Allocate a new database page and return a writable reference to it. */ int sqlite4BtPageAllocate(BtPager*, BtPage **ppPage); /* | > | 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | /* ** Read, write and trim existing database pages. */ int sqlite4BtPageGet(BtPager*, u32 pgno, BtPage **ppPage); int sqlite4BtPageWrite(BtPage*); int sqlite4BtPageTrim(BtPage*); int sqlite4BtPageRelease(BtPage*); void sqlite4BtPageReference(BtPage*); /* ** Allocate a new database page and return a writable reference to it. */ int sqlite4BtPageAllocate(BtPager*, BtPage **ppPage); /* |
︙ | ︙ |
Changes to src/bt_main.c.
︙ | ︙ | |||
56 57 58 59 60 61 62 63 64 65 66 67 68 69 | return ((u32)a[0] << 8) + (u32)a[1]; } static void btPutU16(u8 *a, u16 i){ a[0] = (u8)((i>>8) & 0xFF); a[1] = (u8)((i>>0) & 0xFF); } /* ** Allocate a new database handle. */ int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){ bt_db *db = 0; /* New database object */ BtPager *pPager = 0; /* Pager object for this database */ | > > > > > > > | 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | return ((u32)a[0] << 8) + (u32)a[1]; } static void btPutU16(u8 *a, u16 i){ a[0] = (u8)((i>>8) & 0xFF); a[1] = (u8)((i>>0) & 0xFF); } static void btPutU32(u8 *a, u32 i){ a[0] = (u8)((i>>24) & 0xFF); a[1] = (u8)((i>>16) & 0xFF); a[2] = (u8)((i>>8) & 0xFF); a[3] = (u8)((i>>0) & 0xFF); } /* ** Allocate a new database handle. */ int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){ bt_db *db = 0; /* New database object */ BtPager *pPager = 0; /* Pager object for this database */ |
︙ | ︙ | |||
195 196 197 198 199 200 201 202 203 204 205 206 207 | static int btCsrAscend(bt_cursor *pCsr){ if( pCsr->nPg>0 ){ pCsr->nPg--; sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]); } return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK); } static int btCellCount(const u8 *aData, int nData){ return (int)btGetU16(&aData[nData-2]); } static int btFreeOffset(const u8 *aData, int nData){ | > > > > > > > > > > | > > > > > | | | > > | | 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | static int btCsrAscend(bt_cursor *pCsr){ if( pCsr->nPg>0 ){ pCsr->nPg--; sqlite4BtPageRelease(pCsr->apPage[pCsr->nPg]); } return (pCsr->nPg==0 ? SQLITE4_NOTFOUND : SQLITE4_OK); } /************************************************************************** ** The functions in this section are used to extract data from buffers ** containing formatted b-tree pages. They do not entirely encapsulate all ** page format details, but go some way to doing so. */ static int btCellCount(const u8 *aData, int nData){ return (int)btGetU16(&aData[nData-2]); } static int btFreeSpace(const u8 *aData, int nData){ return (int)btGetU16(&aData[nData-4]); } static int btFreeOffset(const u8 *aData, int nData){ return (int)btGetU16(&aData[nData-6]); } static int btFreeContiguous(const u8 *aData, int nData){ int nCell = btCellCount(aData, nData); return nData - btFreeOffset(aData, nData) - (3+nCell)*2; } static u8 btFlags(const u8 *aData, int nData){ return aData[0]; } static u8 *btCellFind(u8 *aData, int nData, int iCell){ int iOff = btGetU16(&aData[nData - 6 - iCell*2 - 2]); return &aData[iOff]; } /* ** Return a pointer to the big-endian u16 field that contains the ** pointer to cell iCell. */ static u8* btCellPtrFind(u8 *aData, int nData, int iCell){ return &aData[nData - 6 - iCell*2 - 2]; } /* ** Parameters aData and nData describe a buffer containing an internal ** b-tree node. The page number of the iCell'th leftmost child page ** is returned. */ static u32 btChildPgno(u8 *aData, int nData, int iCell){ u32 pgno; /* Return value */ int nCell = btCellCount(aData, nData); if( iCell>=nCell ){ pgno = btGetU32(&aData[1]); }else{ int nKey; u8 *pCell = btCellFind(aData, nData, iCell); pCell += sqlite4BtVarintGet32(pCell, &nKey); pCell += nKey; pgno = btGetU32(pCell); } return pgno; } /* **************************************************************************/ #ifndef NDEBUG #include <stdio.h> static void printPage(u8 *aData, int nData){ int i; int nCell = (int)btCellCount(aData, nData); fprintf(stderr, "nCell=%d\n", nCell); fprintf(stderr, "iFree=%d\n", (int)btFreeOffset(aData, nData)); fprintf(stderr, "flags=%d\n", (int)btFlags(aData, nData)); fprintf(stderr, "cell offsets:"); for(i=0; i<nCell; i++){ fprintf(stderr, " %d", (int)(btCellFind(aData, nData, i) - aData)); } fprintf(stderr, "\n"); } #endif /* |
︙ | ︙ | |||
553 554 555 556 557 558 559 | ){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); u8 *aData; u8 *pCell; int iCell = pCsr->aiCell[pCsr->nPg-1]; int nK; int nV; | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > | > > > > > > > > > > > > > > | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > | > > > > > > | | > > > > > > > > | > > > | | > | | > | > | > | > > > > > > > | > > > > > > > | 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 | ){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); u8 *aData; u8 *pCell; int iCell = pCsr->aiCell[pCsr->nPg-1]; int nK; int nV; aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]); pCell = btCellFind(aData, pgsz, iCell); pCell += sqlite4BtVarintGet32(pCell, &nK); assert( nK!=0 ); pCell += nK; pCell += sqlite4BtVarintGet32(pCell, &nV); assert( nV!=0 ); *ppV = pCell; *pnV = nV; return SQLITE4_OK; } static u8 *btCellFindSize(u8 *aData, int nData, int iCell, int *pnByte){ int nKey; u8 *pCell; u8 *p; p = pCell = btCellFind(aData, nData, iCell); p += sqlite4BtVarintGet32(p, &nKey); p += nKey; p += sqlite4BtVarintGet32(p, &nKey); p += nKey; *pnByte = (p - pCell); return pCell; } /* ** Allocate a new page buffer. */ static int btNewBuffer(bt_db *pDb, u8 **paBuf){ const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); u8 *aBuf; *paBuf = aBuf = sqlite4_malloc(pDb->pEnv, pgsz); if( aBuf==0 ) return btErrorBkpt(SQLITE4_NOMEM); return SQLITE4_OK; } /* ** Discard a page buffer allocated using btNewBuffer. */ static void btFreeBuffer(bt_db *pDb, u8 *aBuf){ sqlite4_free(pDb->pEnv, aBuf); } /* ** Attach a buffer to an existing page object. */ static int btSetBuffer(bt_db *pDb, BtPage *pPg, u8 *aBuf){ const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); int rc; rc = sqlite4BtPageWrite(pPg); if( rc==SQLITE4_OK ){ u8 *aData = sqlite4BtPageData(pPg); memcpy(aData, aBuf, pgsz); sqlite4_free(pDb->pEnv, aBuf); } return rc; } /* ** Defragment the b-tree page passed as the first argument. Return ** SQLITE4_OK if successful, or an SQLite error code otherwise. */ static int btDefragmentPage(bt_db *pDb, BtPage *pPg){ const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); u8 *aData; /* Pointer to buffer of pPg */ u8 *aTmp; /* Temporary buffer to assemble new page in */ int nCell; /* Number of cells on page */ int iWrite; /* Write next cell at this offset in aTmp[] */ int i; /* Used to iterate through cells */ int bLeaf; /* True if pPg is a leaf page */ int nHdr; /* Bytes in header of this page */ if( btNewBuffer(pDb, &aTmp) ) return SQLITE4_NOMEM; aData = sqlite4BtPageData(pPg); assert( (btFlags(aData, pgsz) & BT_PGFLAGS_INTERNAL)==0 ); nCell = btCellCount(aData, pgsz); bLeaf = 0==(btFlags(aData, pgsz) & BT_PGFLAGS_INTERNAL); nHdr = bLeaf ? 1 : 5; /* Set header bytes of new page */ memcpy(aTmp, aData, nHdr); iWrite = nHdr; for(i=0; i<nCell; i++){ int nByte; u8 *pCell; pCell = btCellFindSize(aData, pgsz, i, &nByte); btPutU16(btCellPtrFind(aTmp, pgsz, i), iWrite); memcpy(&aTmp[iWrite], pCell, nByte); iWrite += nByte; } /* Write the rest of the page footer */ btPutU16(&aTmp[pgsz-2], nCell); btPutU16(&aTmp[pgsz-4], pgsz - (3+nCell)*2 - iWrite); btPutU16(&aTmp[pgsz-6], iWrite); btSetBuffer(pDb, pPg, aTmp); return SQLITE4_OK; } typedef struct KeyValue KeyValue; struct KeyValue { const void *pK; int nK; const void *pV; int nV; u32 pgno; }; /* ** Return the number of bytes consumed by the leaf cell generated based ** on *pKV in a database with page size pgsz. */ static int btKVCellSize(KeyValue *pKV, int pgsz){ int nByte; if( pKV->pgno ){ nByte = sqlite4BtVarintLen32(pKV->nK) + pKV->nK + 4; }else{ nByte = sqlite4BtVarintLen32(pKV->nK) + sqlite4BtVarintLen32(pKV->nV) + pKV->nV + pKV->nK; } return nByte; } /* ** Write a leaf cell based on *pKV to buffer aBuffer. Return the number ** of bytes written. */ static int btKVCellWrite(KeyValue *pKV, int pgsz, u8 *aBuf){ int i = 0; i += sqlite4BtVarintPut32(&aBuf[i], pKV->nK); memcpy(&aBuf[i], pKV->pK, pKV->nK); i += pKV->nK; if( pKV->pgno==0 ){ i += sqlite4BtVarintPut32(&aBuf[i], pKV->nV); memcpy(&aBuf[i], pKV->pV, pKV->nV); i += pKV->nV; }else{ btPutU32(&aBuf[i], pKV->pgno); i += 4; } assert( i==btKVCellSize(pKV, pgsz) ); return i; } static int btGatherSiblings(bt_cursor *pCsr, BtPage **apPg, int *pnPg){ bt_db * const pDb = pCsr->pDb; const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); int rc = SQLITE4_OK; int nCell; /* Number of cells in parent page */ u8 *aParent; /* Buffer of parent page */ int iChild; /* Index of child page */ int nSib; /* Number of siblings */ int iSib; /* Index of left-most sibling page */ int i; aParent = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-2]); iChild = pCsr->aiCell[pCsr->nPg-2]; nCell = btCellCount(aParent, pgsz); if( nCell<2 ){ nSib = nCell+1; }else{ nSib = 3; } if( iChild==0 ){ iSib = 0; }else if( iChild==nCell ){ iSib = nCell-2; }else{ iSib = iChild-1; } for(i=0; i<nSib && rc==SQLITE4_OK; i++){ u32 pgno = btChildPgno(aParent, pgsz, iSib+i); rc = sqlite4BtPageGet(pDb->pPager, pgno, &apPg[i]); } *pnPg = nSib; pCsr->aiCell[pCsr->nPg-2] = iSib; return rc; } static int btSetChildPgno(bt_db *pDb, BtPage *pPg, int iChild, u32 pgno){ const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); int rc; rc = sqlite4BtPageWrite(pPg); if( rc==SQLITE4_OK ){ u8 *aData = sqlite4BtPageData(pPg); int nCell = btCellCount(aData, pgsz); if( iChild>=nCell ){ btPutU32(&aData[1], pgno); }else{ int nKey; u8 *pCell = btCellFind(aData, pgsz, iChild); pCell += sqlite4BtVarintGet32(pCell, &nKey); pCell += nKey; btPutU32(pCell, pgno); } } return rc; } /* Called recursively by btInsertAndRebalance(). todo: Fix this! */ static int btModifyPage(bt_cursor *, int, int, KeyValue *); static int btInsertAndRebalance(bt_cursor *pCsr, KeyValue *pKV){ bt_db * const pDb = pCsr->pDb; const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); const int nSpacePerPage = (pgsz - 1 - 6); int nIn = 0; /* Number of input pages */ int iPg; /* Used to iterate through pages */ int iCell; /* Used to iterate through cells */ int nCell = 0; /* Total number of cells to redistribute */ int *anCellSz; /* Array containing size in bytes of cells */ int iIns; /* Index of new cell */ int nOut; /* Number of output pages */ BtPage *apPg[5]; /* Input/output pages */ int anOut[5]; /* Cell counts for output pages */ u8 *apOut[5]; /* Buffers for assembly of output pages */ KeyValue aPCell[5]; /* Cells to push into the parent page */ BtPage *pPar; /* Parent page */ int iSib; /* Index of left-most sibling */ int nTotal; /* Total bytes of content to distribute */ int rc = SQLITE4_OK; /* Return code */ int iPgIn; int iPgInFirst; memset(apOut, 0, sizeof(apOut)); memset(apPg, 0, sizeof(apPg)); memset(aPCell, 0, sizeof(aPCell)); /* Gather the sibling pages from which cells will be redistributed into ** the apPg[] array. */ assert( pCsr->nPg>1 ); rc = btGatherSiblings(pCsr, apPg, &nIn); if( rc!=SQLITE4_OK ) goto rebalance_out; pPar = pCsr->apPage[pCsr->nPg-2]; iSib = pCsr->aiCell[pCsr->nPg-2]; /* Count the cells on the input pages. This loop also sets iNew. */ for(iPg=0; iPg<nIn; iPg++){ u8 *aData = sqlite4BtPageData(apPg[iPg]); if( apPg[iPg]==pCsr->apPage[pCsr->nPg-1] ){ iIns = (nCell + pCsr->aiCell[pCsr->nPg-1]); nCell++; } nCell += btCellCount(aData, pgsz); } /* Allocate and populate the anCellSz[] array */ anCellSz = (int*)sqlite4_malloc(pDb->pEnv, sizeof(int) * nCell); if( anCellSz==0 ){ rc = btErrorBkpt(SQLITE4_NOMEM); goto rebalance_out; } iCell = 0; for(iPg=0; iPg<nIn; iPg++){ u8 *aData = sqlite4BtPageData(apPg[iPg]); int nPgCell; /* Number of cells on page apPg[iPg] */ int iPgCell; /* Index of cell under analysis in page */ nPgCell = btCellCount(aData, pgsz); for(iPgCell=0; iPgCell<nPgCell; iPgCell++){ if( iCell==iIns ) iCell++; btCellFindSize(aData, pgsz, iCell, &anCellSz[iCell]); iCell++; } } anCellSz[iIns] = btKVCellSize(pKV, pgsz); /* Now figure out the number of output pages. Set nOut to this value. */ iCell = 0; for(iPg=0; iCell<nCell; iPg++){ int nByte = 0; /* Number of bytes of content on page */ assert( iPg<array_size(anOut) ); for(/* noop */; iCell<nCell; iCell++){ nByte += (anCellSz[iCell] + 2); if( nByte>nSpacePerPage ) break; } anOut[iPg] = iCell; } nOut = iPg; assert( anOut[nOut-1]==nCell ); /* Calculate the total size of all cells. */ nTotal = 0; for(iCell=0; iCell<nCell; iCell++) nTotal += (anCellSz[iCell] + 2); /* The loop in the previous block populated the anOut[] array in such a ** way as to make the (nOut-1) leftmost pages completely full but leave ** the rightmost page partially empty. This block redistributes cells ** a bit more evenly. This block may reduce one or more of the values in ** the anOut[] array, but will not increase any. No values are reduced ** to values lower than 1. */ iCell = nCell; for(iPg=(nOut-2); iPg>=0; iPg--){ int nByte = 0; /* Number of bytes of content on page */ int nGoal = nTotal / (iPg + 2); for(/* noop */; iCell>0 && ((nByte<nGoal) || iCell>anOut[iPg]); iCell--){ int nThis = (anCellSz[iCell-1] + 2); if( (nThis + nByte)>nSpacePerPage ) break; nByte += nThis; } assert( iCell<=anOut[iPg] ); anOut[iPg] = iCell; nTotal = nByte; } #ifndef NDEBUG { int iDbg; fprintf(stderr, "btInsertAndRebalance(): nIn=%d anIn[] = ", nIn); for(iDbg=0; iDbg<nIn; iDbg++){ u8 *aData = sqlite4BtPageData(apPg[iDbg]); fprintf(stderr, "%d ", btCellCount(aData, pgsz)); } fprintf(stderr, " -> nOut=%d anOut[] = ", nOut); for(iDbg=0; iDbg<nOut; iDbg++){ fprintf(stderr, "%d ", anOut[iDbg]); } fprintf(stderr, "\n"); } #endif /* Allocate buffers for the output leaves */ for(iPg=0; iPg<nOut; iPg++){ rc = btNewBuffer(pDb, &apOut[iPg]); if( rc!=SQLITE4_OK ) goto rebalance_out; } /* Populate buffers for the output leaves */ iPg = 0; iPgIn = 0; iPgInFirst = 0; for(iCell=0; iCell<nCell; iCell++){ int iOff; /* Output page offset */ u8 *aOut; /* Output page buffer */ if( iCell==anOut[iPg] ) iPg++; aOut = apOut[iPg]; iOff = btFreeOffset(aOut, pgsz); if( iCell==iIns ){ iOff += btKVCellWrite(pKV, pgsz, &aOut[iOff]); }else{ u8 *aIn = sqlite4BtPageData(apPg[iPgIn]); int iPgCell = iCell - iPgInFirst; if( iPgCell>=btCellCount(aIn, pgsz) ){ iPgIn++; iPgInFirst = iCell; iPgCell = 0; aIn = sqlite4BtPageData(apPg[iPgIn]); } memcpy(&aOut[iOff], btCellFind(aIn, pgsz, iPgCell), anCellSz[iCell]); iOff += anCellSz[iCell]; } btPutU16(&aOut[pgsz-6], iOff); } /* Clobber the old pages with the new buffers */ for(iPg=0; iPg<nOut; iPg++){ BtPage *pPg; if( iPg<nIn ){ pPg = apPg[iPg]; }else{ rc = sqlite4BtPageAllocate(pDb->pPager, &apPg[iPg]); if( rc!=SQLITE4_OK ) goto rebalance_out; } btSetBuffer(pDb, pPg, apOut[iPg]); apOut[iPg] = 0; } /* The leaves are written. Now gather the keys and page numbers to ** push up into the parent page. */ for(iPg=0; iPg<(nOut-1); iPg++){ u8 *aData = sqlite4BtPageData(apPg[iPg]); u8 *pCell; pCell = btCellFind(aData, pgsz, btCellCount(aData, pgsz)-1); aPCell[iPg].pgno = sqlite4BtPagePgno(apPg[iPg]); pCell += sqlite4BtVarintGet32(pCell, &aPCell[iPg].nK); aPCell[iPg].pK = pCell; } assert( nIn==1 && nOut==2 ); rc = btSetChildPgno(pDb, pPar, iSib+nIn-1, sqlite4BtPagePgno(apPg[nOut-1])); if( rc!=SQLITE4_OK ) goto rebalance_out; pCsr->nPg--; rc = btModifyPage(pCsr, nIn-1, nOut-1, aPCell); if( rc!=SQLITE4_OK ) goto rebalance_out; rebalance_out: for(iPg=0; iPg<nIn; iPg++){ sqlite4BtPageRelease(apPg[iPg]); } return rc; } static int btExtendTree(bt_cursor *pCsr){ bt_db * const pDb = pCsr->pDb; const int pgsz = sqlite4BtPagerPagesize(pDb->pPager); int rc; /* Return code */ BtPage *pNew; /* New (and only) child of root page */ BtPage *pRoot = pCsr->apPage[0]; assert( pCsr->nPg==1 ); rc = sqlite4BtPageWrite(pRoot); if( rc==SQLITE4_OK ){ rc = sqlite4BtPageAllocate(pDb->pPager, &pNew); } if( rc==SQLITE4_OK ){ u8 *aRoot = sqlite4BtPageData(pRoot); u8 *aData = sqlite4BtPageData(pNew); memcpy(aData, aRoot, pgsz); aRoot[0] = BT_PGFLAGS_INTERNAL; btPutU32(&aRoot[1], sqlite4BtPagePgno(pNew)); btPutU16(&aRoot[pgsz-2], 0); btPutU16(&aRoot[pgsz-4], 5); btPutU16(&aRoot[pgsz-6], pgsz - 5 - 6); pCsr->nPg = 2; pCsr->aiCell[1] = pCsr->aiCell[0]; pCsr->apPage[1] = pNew; pCsr->aiCell[0] = 0; } return rc; } static int btModifyPage( bt_cursor *pCsr, /* Cursor identifying page to modify */ int nDel, /* Number of entries to delete from page */ int nKV, /* Number of entries in apKV */ KeyValue *apKV /* New cells to insert into the page */ ){ int rc = SQLITE4_OK; const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); u8 *aData; /* Page buffer */ int nCell; /* Number of cells on this page already */ int nFree; /* Contiguous free space on this page */ int nReq; /* Space required for type (a) cell */ int iCell; /* Position to insert new key */ int iWrite; /* Byte offset at which to write new cell */ BtPage *pLeaf; KeyValue *pKV; assert( nDel==0 && nKV==1 ); pKV = apKV; /* Bytes of space required on the current page. */ nReq = btKVCellSize(pKV, pgsz) + 2; iCell = pCsr->aiCell[pCsr->nPg-1]; assert( pCsr->nPg>0 ); pLeaf = pCsr->apPage[pCsr->nPg-1]; aData = (u8*)sqlite4BtPageData(pLeaf); nCell = btCellCount(aData, pgsz); assert( iCell<=btCellCount(aData, pgsz) ); if( nCell==0 ){ /* If the nCell field is zero, then the rest of the header may ** contain invalid values (zeroes - as it may never have been ** initialized). So set our stack variables to values appropriate ** to an empty page explicitly here. */ iWrite = ((btFlags(aData, pgsz) & BT_PGFLAGS_INTERNAL) ? 5 : 1); nFree = pgsz - iWrite - 6; }else{ if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){ /* Special case - the new entry will not fit on the page at present ** but would if the page were defragmented. So defragment it before ** continuing. */ rc = btDefragmentPage(pCsr->pDb, pLeaf); aData = sqlite4BtPageData(pLeaf); } iWrite = btFreeOffset(aData, pgsz); nFree = btFreeContiguous(aData, pgsz); } if( nFree>=nReq ){ /* The new entry will fit on the leaf page. So in this case all there ** is to do is update this single page. The easy case. */ rc = sqlite4BtPageWrite(pLeaf); if( rc==SQLITE4_OK ){ aData = sqlite4BtPageData(pLeaf); /* Update the cell pointer array */ if( iCell!=nCell ){ u8 *aFrom = btCellPtrFind(aData, pgsz, nCell-1); u8 *aTo = btCellPtrFind(aData, pgsz, nCell); memmove(aTo, aFrom, (nCell-iCell) * 2); } btPutU16(btCellPtrFind(aData, pgsz, iCell), iWrite); /* Increase cell count */ btPutU16(&aData[pgsz-2], nCell+1); /* Write the cell itself */ iWrite += btKVCellWrite(pKV, pgsz, &aData[iWrite]); /* Set the new total free space */ if( nCell==0 ){ btPutU16(&aData[pgsz-4], nFree - nReq); }else{ btPutU16(&aData[pgsz-4], btFreeSpace(aData, pgsz) - nReq); } /* Set the offset to the block of empty space */ btPutU16(&aData[pgsz-6], iWrite); } }else{ /* The new entry will not fit on the leaf page. Entries will have ** to be shuffled between existing leaves and new leaves may need ** to be added to make space for it. */ if( pCsr->nPg==1 ){ rc = btExtendTree(pCsr); } if( rc==SQLITE4_OK ){ rc = btInsertAndRebalance(pCsr, pKV); } } return rc; } static int btRemoveFromLeaf(bt_cursor *pCsr){ const int pgsz = sqlite4BtPagerPagesize(pCsr->pDb->pPager); int rc = SQLITE4_OK; /* Return code */ BtPage *pLeaf; /* Leaf page */ pLeaf = pCsr->apPage[pCsr->nPg-1]; rc = sqlite4BtPageWrite(pLeaf); if( rc==SQLITE4_OK ){ u8 *aData; /* Page buffer */ int nCell; /* Number of cells initially on this page */ int iDel; /* Index of cell to delete */ int nByte; /* Size of cell to delete in bytes */ iDel = pCsr->aiCell[pCsr->nPg-1]; aData = (u8*)sqlite4BtPageData(pLeaf); nCell = btCellCount(aData, pgsz); btCellFindSize(aData, pgsz, iDel, &nByte); if( iDel<(nCell-1) ){ u8 *aTo = btCellPtrFind(aData, pgsz, nCell-2); u8 *aFrom = btCellPtrFind(aData, pgsz, nCell-1); memmove(aTo, aFrom, 2*(nCell-iDel-1)); } /* Decrease cell count */ btPutU16(&aData[pgsz-2], nCell-1); /* Increase total free space */ btPutU16(&aData[pgsz-4], btFreeSpace(aData, pgsz) + nByte + 2); } return rc; } /* ** Insert a new key/value pair or replace an existing one. |
︙ | ︙ | |||
683 684 685 686 687 688 689 | rc = sqlite4BtCsrSeek(&csr, pK, nK, BT_SEEK_GE); } } if( rc==SQLITE4_OK ) rc = btErrorBkpt(SQLITE4_CORRUPT); if( rc==SQLITE4_NOTFOUND || rc==SQLITE4_INEXACT ){ /* Insert the new KV pair into the current leaf. */ | > > > > | | 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 | rc = sqlite4BtCsrSeek(&csr, pK, nK, BT_SEEK_GE); } } if( rc==SQLITE4_OK ) rc = btErrorBkpt(SQLITE4_CORRUPT); if( rc==SQLITE4_NOTFOUND || rc==SQLITE4_INEXACT ){ /* Insert the new KV pair into the current leaf. */ KeyValue kv; kv.pgno = 0; kv.pK = pK; kv.nK = nK; kv.pV = pV; kv.nV = nV; rc = btModifyPage(&csr, 0, 1, &kv); } return rc; } int sqlite4BtDelete(bt_cursor *pCsr){ return btRemoveFromLeaf(pCsr); |
︙ | ︙ |
Changes to src/bt_pager.c.
︙ | ︙ | |||
504 505 506 507 508 509 510 511 512 513 514 515 516 517 | } int sqlite4BtPageRelease(BtPage *pPg){ assert( pPg->nRef>=1 ); pPg->nRef--; return SQLITE4_OK; } /* ** Allocate a new database page and return a writable reference to it. */ int sqlite4BtPageAllocate(BtPager *p, BtPage **ppPg){ BtPage *pPg = 0; int rc; | > > > > > > | 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 | } int sqlite4BtPageRelease(BtPage *pPg){ assert( pPg->nRef>=1 ); pPg->nRef--; return SQLITE4_OK; } void sqlite4BtPageReference(BtPage *pPg){ assert( pPg->nRef>=1 ); pPg->nRef++; return SQLITE4_OK; } /* ** Allocate a new database page and return a writable reference to it. */ int sqlite4BtPageAllocate(BtPager *p, BtPage **ppPg){ BtPage *pPg = 0; int rc; |
︙ | ︙ |
Changes to test/simple3.test.
︙ | ︙ | |||
44 45 46 47 48 49 50 51 52 53 | do_execsql_test 1.6 { UPDATE t1 SET b = 5; } do_execsql_test 1.7 { SELECT rowid, a, b FROM t1; } {1 abc 5 2 ghi 5} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | do_execsql_test 1.6 { UPDATE t1 SET b = 5; } do_execsql_test 1.7 { SELECT rowid, a, b FROM t1; } {1 abc 5 2 ghi 5} #execsql { PRAGMA kv_trace = 1 } do_execsql_test 1.8 { DELETE FROM t1 WHERE 1; } do_execsql_test 1.9 { SELECT * FROM t1; DROP TABLE t1; SELECT * FROM sqlite_kvstore; } #-------------------------------------------------------------------------- set val [string repeat x 200] do_execsql_test 2.0 { CREATE TABLE t1(a PRIMARY KEY, b); INSERT INTO t1 VALUES(1, $val); INSERT INTO t1 VALUES(2, $val); INSERT INTO t1 VALUES(3, $val); INSERT INTO t1 VALUES(4, $val); } do_execsql_test 2.1 { DELETE FROM t1 WHERE a = 2; } do_execsql_test 2.2 { INSERT INTO t1 VALUES(5, $val); } do_execsql_test 2.3 { SELECT a FROM t1 } {1 3 4 5} do_execsql_test 2.4 { INSERT INTO t1 VALUES(6, $val); } do_execsql_test 2.5 { SELECT a FROM t1 } {1 3 4 5} finish_test |
Changes to www/bt.wiki.
︙ | ︙ | |||
502 503 504 505 506 507 508 509 510 511 512 513 514 515 | <p><b>Page Footer</b> <p> Starting from the end of the page, the fields in the page footer are: <ul> <li> 2 bytes - Number of cells on this page. <li> 2 bytes - Offset of first byte after last cell. <li> 2 bytes for each cell - the offset to the start of the cell. </ul> <h4 id=cell_formats>2.2.3.8. Cell Formats</h4> <p><b>B-Tree Nodes</b> | > | 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | <p><b>Page Footer</b> <p> Starting from the end of the page, the fields in the page footer are: <ul> <li> 2 bytes - Number of cells on this page. <li> 2 bytes - Total free space on page, in bytes. <li> 2 bytes - Offset of first byte after last cell. <li> 2 bytes for each cell - the offset to the start of the cell. </ul> <h4 id=cell_formats>2.2.3.8. Cell Formats</h4> <p><b>B-Tree Nodes</b> |
︙ | ︙ |