1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
-
+
|
/*
** 2004 April 6
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.185 2004/09/01 16:12:25 drh Exp $
** $Id: btree.c,v 1.186 2004/09/03 18:38:45 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
** Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
** "Sorting And Searching", pages 473-480. Addison-Wesley
** Publishing Company, Reading, Massachusetts.
|
︙ | | |
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
|
-
+
-
+
|
#include "os.h"
#include <assert.h>
/* The following value is the maximum cell size assuming a maximum page
** size give above.
*/
#define MX_CELL_SIZE (SQLITE_MAX_PAGE_SIZE-8)
#define MX_CELL_SIZE(pBt) (pBt->pageSize-8)
/* The maximum number of cells on a single page of the database. This
** assumes a minimum cell size of 3 bytes. Such small cells will be
** exceedingly rare, but they are possible.
*/
#define MX_CELL ((SQLITE_MAX_PAGE_SIZE-8)/3)
#define MX_CELL(pBt) ((pBt->pageSize-8)/3)
/* Forward declarations */
typedef struct MemPage MemPage;
/*
** This is a magic string that appears at the beginning of every
** SQLite database in order to identify the file as a real database.
|
︙ | | |
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
|
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
|
-
+
+
+
|
#if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
static void _pageIntegrity(MemPage *pPage){
int usableSize;
u8 *data;
int i, j, idx, c, pc, hdr, nFree;
int cellOffset;
int nCell, cellLimit;
u8 used[SQLITE_MAX_PAGE_SIZE];
u8 *used;
used = sqliteMallocRaw( pPage->pBt->pageSize );
if( used==0 ) return;
usableSize = pPage->pBt->usableSize;
assert( pPage->aData==&((unsigned char*)pPage)[-pPage->pBt->pageSize] );
hdr = pPage->hdrOffset;
assert( hdr==(pPage->pgno==1 ? 100 : 0) );
assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
c = pPage->aData[hdr];
if( pPage->isInit ){
|
︙ | | |
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
|
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
|
+
-
+
-
-
+
+
+
+
|
}
nFree = 0;
for(i=0; i<usableSize; i++){
assert( used[i]<=1 );
if( used[i]==0 ) nFree++;
}
assert( nFree==data[hdr+7] );
sqliteFree(used);
}
#define pageIntegrity(X) _pageIntegrity(X)
#else
# define pageIntegrity(X)
#endif
/*
** Defragment the page given. All Cells are moved to the
** beginning of the page and all free space is collected
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
static int defragmentPage(MemPage *pPage){
int i; /* Loop counter */
int pc; /* Address of a i-th cell */
int addr; /* Offset of first byte after cell pointer array */
int hdr; /* Offset to the page header */
int size; /* Size of a cell */
int usableSize; /* Number of usable bytes on a page */
int cellOffset; /* Offset to the cell pointer array */
int brk; /* Offset to the cell content area */
int nCell; /* Number of cells on the page */
unsigned char *data; /* The page data */
unsigned char temp[SQLITE_MAX_PAGE_SIZE]; /* Temp area for cell content */
unsigned char *data; /* The page data */
unsigned char *temp; /* Temp area for cell content */
assert( sqlite3pager_iswriteable(pPage->aData) );
assert( pPage->pBt!=0 );
assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
assert( pPage->nOverflow==0 );
temp = sqliteMalloc( pPage->pBt->pageSize );
if( temp==0 ) return SQLITE_NOMEM;
data = pPage->aData;
hdr = pPage->hdrOffset;
cellOffset = pPage->cellOffset;
nCell = pPage->nCell;
assert( nCell==get2byte(&data[hdr+3]) );
usableSize = pPage->pBt->usableSize;
brk = get2byte(&data[hdr+5]);
|
︙ | | |
639
640
641
642
643
644
645
646
647
648
649
650
651
652
|
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
|
+
+
|
assert( brk>=cellOffset+2*nCell );
put2byte(&data[hdr+5], brk);
data[hdr+1] = 0;
data[hdr+2] = 0;
data[hdr+7] = 0;
addr = cellOffset+2*nCell;
memset(&data[addr], 0, brk-addr);
sqliteFree(temp);
return SQLITE_OK;
}
/*
** Allocate nByte bytes of space on a page.
**
** Return the index into pPage->aData[] of the first byte of
** the new allocation. Or return 0 if there is not enough free
|
︙ | | |
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
|
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
|
-
+
|
/* Allocate memory from the gap in between the cell pointer array
** and the cell content area.
*/
top = get2byte(&data[hdr+5]);
nCell = get2byte(&data[hdr+3]);
cellOffset = pPage->cellOffset;
if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
defragmentPage(pPage);
if( defragmentPage(pPage) ) return 0;
top = get2byte(&data[hdr+5]);
}
top -= nByte;
assert( cellOffset + 2*nCell <= top );
put2byte(&data[hdr+5], top);
return top;
}
|
︙ | | |
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
|
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
|
+
+
-
-
+
+
-
+
-
+
-
+
|
MemPage *pPage, /* The page to be initialized */
MemPage *pParent /* The parent. Might be NULL */
){
int pc; /* Address of a freeblock within pPage->aData[] */
int i; /* Loop counter */
int hdr; /* Offset to beginning of page header */
u8 *data; /* Equal to pPage->aData */
Btree *pBt; /* The main btree structure */
int usableSize; /* Amount of usable space on each page */
int cellOffset; /* Offset from start of page to first cell pointer */
int nFree; /* Number of unused bytes on the page */
int top; /* First byte of the cell content area */
pBt = pPage->pBt;
assert( pPage->pBt!=0 );
assert( pParent==0 || pParent->pBt==pPage->pBt );
assert( pBt!=0 );
assert( pParent==0 || pParent->pBt==pBt );
assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
assert( pPage->aData == &((unsigned char*)pPage)[-pPage->pBt->pageSize] );
assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] );
if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
/* The parent page should never change unless the file is corrupt */
return SQLITE_CORRUPT; /* bkpt-CORRUPT */
}
if( pPage->isInit ) return SQLITE_OK;
if( pPage->pParent==0 && pParent!=0 ){
pPage->pParent = pParent;
sqlite3pager_ref(pParent->aData);
}
hdr = pPage->hdrOffset;
data = pPage->aData;
decodeFlags(pPage, data[hdr]);
pPage->nOverflow = 0;
pPage->idxShift = 0;
usableSize = pPage->pBt->usableSize;
usableSize = pBt->usableSize;
pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
top = get2byte(&data[hdr+5]);
pPage->nCell = get2byte(&data[hdr+3]);
if( pPage->nCell>MX_CELL ){
if( pPage->nCell>MX_CELL(pBt) ){
/* To many cells for a single page. The page must be corrupt */
return SQLITE_CORRUPT; /* bkpt-CORRUPT */
}
if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
/* All pages must have at least one cell, except for root pages */
return SQLITE_CORRUPT; /* bkpt-CORRUPT */
}
|
︙ | | |
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
|
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
|
-
+
|
pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
pBt->maxLeaf = pBt->usableSize - 35;
pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
goto page1_init_failed;
}
assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE );
assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
pBt->pPage1 = pPage1;
pBt->pageSizeFixed = 1;
return SQLITE_OK;
page1_init_failed:
releasePage(pPage1);
pBt->pPage1 = 0;
|
︙ | | |
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
|
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
|
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
int rc; /* The return code */
int leafCorrection; /* 4 if pPage is a leaf. 0 if not */
int leafData; /* True if pPage is a leaf of a LEAFDATA tree */
int usableSpace; /* Bytes in pPage beyond the header */
int pageFlags; /* Value of pPage->aData[0] */
int subtotal; /* Subtotal of bytes in cells on one page */
int iSpace = 0; /* First unused byte of aSpace[] */
int mxCellPerPage; /* Maximum number of cells in one page */
MemPage *apOld[NB]; /* pPage and up to two siblings */
Pgno pgnoOld[NB]; /* Page numbers for each page in apOld[] */
MemPage *apCopy[NB]; /* Private copies of apOld[] pages */
MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */
Pgno pgnoNew[NB+2]; /* Page numbers for each page in apNew[] */
int idxDiv[NB]; /* Indices of divider cells in pParent */
u8 *apDiv[NB]; /* Divider cells in pParent */
int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */
int szNew[NB+2]; /* Combined size of cells place on i-th page */
u8 *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
u8 aCopy[NB][SQLITE_MAX_PAGE_SIZE+sizeof(MemPage)]; /* Space for apCopy[] */
u8 aSpace[SQLITE_MAX_PAGE_SIZE*5]; /* Space to copies of divider cells */
u8 **apCell; /* All cells begin balanced */
int *szCell; /* Local size of all cells in apCell[] */
u8 *aCopy[NB]; /* Space for holding data of apCopy[] */
u8 *aSpace; /* Space to hold copies of dividers cells */
/*
** Find the parent page.
*/
assert( pPage->isInit );
assert( sqlite3pager_iswriteable(pPage->aData) );
pBt = pPage->pBt;
pParent = pPage->pParent;
sqlite3pager_write(pParent->aData);
assert( pParent );
TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
/*
** Allocate space for memory structures
*/
mxCellPerPage = MX_CELL(pBt);
apCell = sqliteMallocRaw(
(mxCellPerPage+2)*NB*(sizeof(u8*)+sizeof(int))
+ sizeof(MemPage)*NB
+ pBt->pageSize*(5+NB)
);
if( apCell==0 ){
return SQLITE_NOMEM;
}
szCell = (int*)&apCell[(mxCellPerPage+2)*NB];
aCopy[0] = (u8*)&szCell[(mxCellPerPage+2)*NB];
for(i=1; i<NB; i++){
aCopy[i] = &aCopy[i-1][pBt->pageSize+sizeof(MemPage)];
}
aSpace = &aCopy[NB-1][pBt->pageSize+sizeof(MemPage)];
/*
** Find the cell in the parent page whose left child points back
** to pPage. The "idx" variable is the index of that cell. If pPage
** is the rightmost child of pParent then set idx to pParent->nCell
*/
if( pParent->idxShift ){
|
︙ | | |
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
|
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
|
-
+
|
/*
** Make copies of the content of pPage and its siblings into aOld[].
** The rest of this function will use data from the copies rather
** that the original pages since the original pages will be in the
** process of being overwritten.
*/
for(i=0; i<nOld; i++){
MemPage *p = apCopy[i] = (MemPage*)&aCopy[i+1][-(int)sizeof(MemPage)];
MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize];
p->aData = &((u8*)p)[-pBt->pageSize];
memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
p->aData = &((u8*)p)[-pBt->pageSize];
}
/*
** Load pointers to all cells on sibling pages and the divider cells
|
︙ | | |
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
|
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
|
-
+
|
*/
dropCell(pParent, nxDiv, sz);
}else{
u8 *pTemp;
szCell[nCell] = sz;
pTemp = &aSpace[iSpace];
iSpace += sz;
assert( iSpace<=sizeof(aSpace) );
assert( iSpace<=pBt->pageSize*5 );
memcpy(pTemp, apDiv[i], sz);
apCell[nCell] = pTemp+leafCorrection;
dropCell(pParent, nxDiv, sz);
szCell[nCell] -= leafCorrection;
assert( get4byte(pTemp)==pgnoOld[i] );
if( !pOld->leaf ){
assert( leafCorrection==0 );
|
︙ | | |
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
|
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
|
-
+
-
+
|
}else if( leafData ){
CellInfo info;
j--;
parseCellPtr(pNew, apCell[j], &info);
pCell = &aSpace[iSpace];
fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
iSpace += sz;
assert( iSpace<=sizeof(aSpace) );
assert( iSpace<=pBt->pageSize*5 );
pTemp = 0;
}else{
pCell -= 4;
pTemp = &aSpace[iSpace];
iSpace += sz;
assert( iSpace<=sizeof(aSpace) );
assert( iSpace<=pBt->pageSize*5 );
}
insertCell(pParent, nxDiv, pCell, sz, pTemp);
put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
j++;
nxDiv++;
}
}
|
︙ | | |
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
|
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
|
+
-
-
-
+
+
+
+
+
+
+
+
+
+
-
+
-
+
|
/* pageIntegrity(pPage); // No! pPage might have been added to freelist */
rc = balance(pParent);
/*
** Cleanup before returning.
*/
balance_cleanup:
sqliteFree(apCell);
for(i=0; i<nOld; i++){
releasePage(apOld[i]);
}
for(i=0; i<nNew; i++){
releasePage(apNew[i]);
}
releasePage(pParent);
TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
pPage->pgno, nOld, nNew, nCell));
return rc;
}
/*
** This routine is called for the root page of a btree when the root
** page contains no cells. This is an opportunity to make the tree
** shallower by one level.
*/
static int balance_shallower(MemPage *pPage){
MemPage *pChild; /* The only child page of pPage */
Pgno pgnoChild; /* Page number for pChild */
int rc; /* Return code from subprocedures */
u8 *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
int szCell[(MX_CELL+2)*NB]; /* Local size of all cells */
int rc = SQLITE_OK; /* Return code from subprocedures */
Btree *pBt; /* The main BTree structure */
int mxCellPerPage; /* Maximum number of cells per page */
u8 **apCell; /* All cells from pages being balanced */
int *szCell; /* Local size of all cells */
assert( pPage->pParent==0 );
assert( pPage->nCell==0 );
pBt = pPage->pBt;
mxCellPerPage = MX_CELL(pBt);
apCell = sqliteMallocRaw( mxCellPerPage*(sizeof(u8*)+sizeof(int)) );
if( apCell==0 ) return SQLITE_NOMEM;
szCell = (int*)&apCell[mxCellPerPage];
if( pPage->leaf ){
/* The table is completely empty */
TRACE(("BALANCE: empty table %d\n", pPage->pgno));
}else{
/* The root page is empty but has one child. Transfer the
** information from that one child into the root page if it
** will fit. This reduces the depth of the tree by one.
**
** If the root page is page 1, it has less space available than
** its child (due to the 100 byte header that occurs at the beginning
** of the database fle), so it might not be able to hold all of the
** information currently contained in the child. If this is the
** case, then do not do the transfer. Leave page 1 empty except
** for the right-pointer to the child page. The child page becomes
** the virtual root of the tree.
*/
pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
assert( pgnoChild>0 );
assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
rc = getPage(pPage->pBt, pgnoChild, &pChild);
if( rc ) return rc;
if( rc ) goto end_shallow_balance;
if( pPage->pgno==1 ){
rc = initPage(pChild, pPage);
if( rc ) return rc;
if( rc ) goto end_shallow_balance;
assert( pChild->nOverflow==0 );
if( pChild->nFree>=100 ){
/* The child information will fit on the root page, so do the
** copy */
int i;
zeroPage(pPage, pChild->aData[0]);
for(i=0; i<pChild->nCell; i++){
|
︙ | | |
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
|
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
|
+
+
-
+
|
freePage(pChild);
TRACE(("BALANCE: transfer child %d into root %d\n",
pChild->pgno, pPage->pgno));
}
reparentChildPages(pPage);
releasePage(pChild);
}
end_shallow_balance:
sqliteFree(apCell);
return SQLITE_OK;
return rc;
}
/*
** The root page is overfull
**
** When this happens, Create a new child page and copy the
|
︙ | | |
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
|
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
|
-
+
|
){
int rc;
int loc;
int szNew;
MemPage *pPage;
Btree *pBt = pCur->pBt;
unsigned char *oldCell;
unsigned char newCell[MX_CELL_SIZE];
unsigned char *newCell = 0;
if( pCur->status ){
return pCur->status; /* A rollback destroyed this cursor */
}
if( pBt->inTrans!=TRANS_WRITE ){
/* Must start a transaction before doing an insert */
return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
|
︙ | | |
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
|
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
|
+
+
-
+
-
+
-
+
+
+
|
assert( pPage->leaf || !pPage->leafData );
TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
pCur->pgnoRoot, nKey, nData, pPage->pgno,
loc==0 ? "overwrite" : "new entry"));
assert( pPage->isInit );
rc = sqlite3pager_write(pPage->aData);
if( rc ) return rc;
newCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
if( newCell==0 ) return SQLITE_NOMEM;
rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, &szNew);
if( rc ) return rc;
if( rc ) goto end_insert;
assert( szNew==cellSizePtr(pPage, newCell) );
assert( szNew<=sizeof(newCell) );
assert( szNew<=MX_CELL_SIZE(pBt) );
if( loc==0 && pCur->isValid ){
int szOld;
assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
oldCell = findCell(pPage, pCur->idx);
if( !pPage->leaf ){
memcpy(newCell, oldCell, 4);
}
szOld = cellSizePtr(pPage, oldCell);
rc = clearCell(pPage, oldCell);
if( rc ) return rc;
if( rc ) goto end_insert;
dropCell(pPage, pCur->idx, szOld);
}else if( loc<0 && pPage->nCell>0 ){
assert( pPage->leaf );
pCur->idx++;
pCur->info.nSize = 0;
}else{
assert( pPage->leaf );
}
insertCell(pPage, pCur->idx, newCell, szNew, 0);
rc = balance(pPage);
/* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
/* fflush(stdout); */
moveToRoot(pCur);
end_insert:
sqliteFree(newCell);
return rc;
}
/*
** Delete the entry that the cursor is pointing to. The cursor
** is left pointing at a random location.
*/
|
︙ | | |
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
|
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
|
-
+
-
+
+
+
+
|
** next Cell after the one to be deleted is guaranteed to exist and
** to be a leaf so we can use it.
*/
BtCursor leafCur;
unsigned char *pNext;
int szNext;
int notUsed;
unsigned char tempCell[MX_CELL_SIZE];
unsigned char *tempCell;
assert( !pPage->leafData );
getTempCursor(pCur, &leafCur);
rc = sqlite3BtreeNext(&leafCur, ¬Used);
if( rc!=SQLITE_OK ){
if( rc!=SQLITE_NOMEM ){
rc = SQLITE_CORRUPT; /* bkpt-CORRUPT */
}
return rc;
}
rc = sqlite3pager_write(leafCur.pPage->aData);
if( rc ) return rc;
TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
pNext = findCell(leafCur.pPage, leafCur.idx);
szNext = cellSizePtr(leafCur.pPage, pNext);
assert( sizeof(tempCell)>=szNext+4 );
assert( MX_CELL_SIZE(pBt)>=szNext+4 );
tempCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
if( tempCell==0 ) return SQLITE_NOMEM;
insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell);
put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
rc = balance(pPage);
sqliteFree(tempCell);
if( rc ) return rc;
dropCell(leafCur.pPage, leafCur.idx, szNext);
rc = balance(leafCur.pPage);
releaseTempCursor(&leafCur);
}else{
TRACE(("DELETE: table=%d delete from leaf %d\n",
pCur->pgnoRoot, pPage->pgno));
|
︙ | | |
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
|
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
|
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
-
-
-
+
-
+
-
+
-
-
-
+
-
-
+
+
|
int *anRef; /* Number of times each page is referenced */
char *zErrMsg; /* An error message. NULL of no errors seen. */
};
/*
** Append a message to the error message string.
*/
static void checkAppendMsg(IntegrityCk *pCheck, char *zMsg1, char *zMsg2){
static void checkAppendMsg(
IntegrityCk *pCheck,
char *zMsg1,
const char *zFormat,
...
){
va_list ap;
char *zMsg2;
va_start(ap, zFormat);
zMsg2 = sqlite3VMPrintf(zFormat, ap);
va_end(ap);
if( zMsg1==0 ) zMsg1 = "";
if( pCheck->zErrMsg ){
char *zOld = pCheck->zErrMsg;
pCheck->zErrMsg = 0;
sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
sqliteFree(zOld);
}else{
sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
}
sqliteFree(zMsg2);
}
/*
** Add 1 to the reference count for page iPage. If this is the second
** reference to the page, add an error message to pCheck->zErrMsg.
** Return 1 if there are 2 ore more references to the page and 0 if
** if this is the first reference to the page.
**
** Also check that the page number is in bounds.
*/
static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
if( iPage==0 ) return 1;
if( iPage>pCheck->nPage || iPage<0 ){
char zBuf[100];
sprintf(zBuf, "invalid page number %d", iPage);
checkAppendMsg(pCheck, zContext, zBuf);
checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
return 1;
}
if( pCheck->anRef[iPage]==1 ){
char zBuf[100];
sprintf(zBuf, "2nd reference to page %d", iPage);
checkAppendMsg(pCheck, zContext, zBuf);
checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
return 1;
}
return (pCheck->anRef[iPage]++)>1;
}
/*
** Check the integrity of the freelist or of an overflow page list.
** Verify that the number of pages on the list is N.
*/
static void checkList(
IntegrityCk *pCheck, /* Integrity checking context */
int isFreeList, /* True for a freelist. False for overflow page list */
int iPage, /* Page number for first page in the list */
int N, /* Expected number of pages in the list */
char *zContext /* Context for error messages */
){
int i;
int expected = N;
int iFirst = iPage;
char zMsg[100];
while( N-- > 0 ){
unsigned char *pOvfl;
if( iPage<1 ){
checkAppendMsg(pCheck, zContext,
sprintf(zMsg, "%d of %d pages missing from overflow list starting at %d",
"%d of %d pages missing from overflow list starting at %d",
N+1, expected, iFirst);
checkAppendMsg(pCheck, zContext, zMsg);
break;
}
if( checkRef(pCheck, iPage, zContext) ) break;
if( sqlite3pager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
sprintf(zMsg, "failed to get page %d", iPage);
checkAppendMsg(pCheck, zContext, zMsg);
checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
break;
}
if( isFreeList ){
int n = get4byte(&pOvfl[4]);
if( n>pCheck->pBt->usableSize/4-8 ){
sprintf(zMsg, "freelist leaf count too big on page %d", iPage);
checkAppendMsg(pCheck, zContext, zMsg);
checkAppendMsg(pCheck, zContext,
"freelist leaf count too big on page %d", iPage);
N--;
}else{
for(i=0; i<n; i++){
checkRef(pCheck, get4byte(&pOvfl[8+i*4]), zContext);
}
N -= n;
}
|
︙ | | |
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
|
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
|
-
-
+
-
-
+
+
-
-
+
|
int i, rc, depth, d2, pgno, cnt;
int hdr, cellStart;
int nCell;
u8 *data;
BtCursor cur;
Btree *pBt;
int maxLocal, usableSize;
char zMsg[100];
char zContext[100];
char hit[SQLITE_MAX_PAGE_SIZE];
char *hit;
/* Check that the page exists
*/
cur.pBt = pBt = pCheck->pBt;
usableSize = pBt->usableSize;
if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0;
if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
sprintf(zMsg, "unable to get the page. error code=%d", rc);
checkAppendMsg(pCheck, zContext, zMsg);
checkAppendMsg(pCheck, zContext,
"unable to get the page. error code=%d", rc);
return 0;
}
maxLocal = pPage->leafData ? pBt->maxLeaf : pBt->maxLocal;
if( (rc = initPage(pPage, pParent))!=0 ){
sprintf(zMsg, "initPage() returns error code %d", rc);
checkAppendMsg(pCheck, zContext, zMsg);
checkAppendMsg(pCheck, zContext, "initPage() returns error code %d", rc);
releasePage(pPage);
return 0;
}
/* Check out all the cells.
*/
depth = 0;
|
︙ | | |
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
|
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
+
+
+
|
checkTreePage(pCheck, pgno, pPage, zContext,0,0,0,0);
}
/* Check for complete coverage of the page
*/
data = pPage->aData;
hdr = pPage->hdrOffset;
memset(hit, 0, usableSize);
memset(hit, 1, get2byte(&data[hdr+5]));
nCell = get2byte(&data[hdr+3]);
cellStart = hdr + 12 - 4*pPage->leaf;
for(i=0; i<nCell; i++){
int pc = get2byte(&data[cellStart+i*2]);
int size = cellSizePtr(pPage, &data[pc]);
int j;
for(j=pc+size-1; j>=pc; j--) hit[j]++;
}
for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; cnt++){
int size = get2byte(&data[i+2]);
int j;
for(j=i+size-1; j>=i; j--) hit[j]++;
i = get2byte(&data[i]);
}
for(i=cnt=0; i<usableSize; i++){
if( hit[i]==0 ){
cnt++;
}else if( hit[i]>1 ){
hit = sqliteMalloc( usableSize );
if( hit ){
memset(hit, 1, get2byte(&data[hdr+5]));
nCell = get2byte(&data[hdr+3]);
cellStart = hdr + 12 - 4*pPage->leaf;
for(i=0; i<nCell; i++){
int pc = get2byte(&data[cellStart+i*2]);
int size = cellSizePtr(pPage, &data[pc]);
int j;
for(j=pc+size-1; j>=pc; j--) hit[j]++;
}
for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000;
cnt++){
int size = get2byte(&data[i+2]);
int j;
for(j=i+size-1; j>=i; j--) hit[j]++;
i = get2byte(&data[i]);
}
for(i=cnt=0; i<usableSize; i++){
if( hit[i]==0 ){
cnt++;
}else if( hit[i]>1 ){
sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
checkAppendMsg(pCheck, zMsg, 0);
break;
}
}
if( cnt!=data[hdr+7] ){
sprintf(zMsg, "Fragmented space is %d byte reported as %d on page %d",
cnt, data[hdr+7], iPage);
checkAppendMsg(pCheck, 0,
"Multiple uses for byte %d of page %d", i, iPage);
break;
}
}
if( cnt!=data[hdr+7] ){
checkAppendMsg(pCheck, 0,
"Fragmented space is %d byte reported as %d on page %d",
cnt, data[hdr+7], iPage);
checkAppendMsg(pCheck, zMsg, 0);
}
}
}
sqliteFree(hit);
releasePage(pPage);
return depth+1;
}
/*
** This routine does a complete check of the given BTree file. aRoot[] is
|
︙ | | |
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
|
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
|
-
-
-
+
-
+
-
-
|
checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ", 0,0,0,0);
}
/* Make sure every page in the file is referenced
*/
for(i=1; i<=sCheck.nPage; i++){
if( sCheck.anRef[i]==0 ){
char zBuf[100];
sprintf(zBuf, "Page %d is never used", i);
checkAppendMsg(&sCheck, zBuf, 0);
checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
}
}
/* Make sure this analysis did not leave any unref() pages
*/
unlockBtreeIfUnused(pBt);
if( nRef != *sqlite3pager_stats(pBt->pPager) ){
char zBuf[100];
checkAppendMsg(&sCheck, 0,
sprintf(zBuf,
"Outstanding page count goes from %d to %d during this analysis",
nRef, *sqlite3pager_stats(pBt->pPager)
);
checkAppendMsg(&sCheck, zBuf, 0);
}
/* Clean up and report errors.
*/
sqliteFree(sCheck.anRef);
return sCheck.zErrMsg;
}
|
︙ | | |