Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix various problems causing block and page leaks during merge operations.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: aaac4355c786c891834fb5504a9e756b924997de
User & Date: dan 2014-02-03 07:40:58.892
Context
2014-02-03
15:54
Zero shared-memory before attempting recovery operations. check-in: 4cb2e9c5af user: dan tags: trunk
07:40
Fix various problems causing block and page leaks during merge operations. check-in: aaac4355c7 user: dan tags: trunk
2014-01-26
17:13
Fix a bug in merge-tree rollback operations. check-in: 60c8db5553 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to lsm-test/lsmtest1.c.
318
319
320
321
322
323
324

325
326
327
328
329
330
331

void test_data_1(
  const char *zSystem,            /* Database system name */
  const char *zPattern,           /* Run test cases that match this pattern */
  int *pRc                        /* IN/OUT: Error code */
){
  Datatest1 aTest[] = {

    { {DATA_RANDOM,     20,25,     100,200},       1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      100,200},       1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      10,20},         1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      1000,2000},     1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,100,     10000,20000},    100,   25,  100, 1},
    { {DATA_RANDOM,     80,100,    10,20},         1000,  250, 1000, 1},
    { {DATA_RANDOM,     5000,6000, 10,20},          100,   25,  100, 1},







>







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

void test_data_1(
  const char *zSystem,            /* Database system name */
  const char *zPattern,           /* Run test cases that match this pattern */
  int *pRc                        /* IN/OUT: Error code */
){
  Datatest1 aTest[] = {
    { {DATA_RANDOM,     500,600,   1000,2000},     1000,  100,  10,  0},
    { {DATA_RANDOM,     20,25,     100,200},       1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      100,200},       1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      10,20},         1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,10,      1000,2000},     1000,  250, 1000, 1},
    { {DATA_RANDOM,     8,100,     10000,20000},    100,   25,  100, 1},
    { {DATA_RANDOM,     80,100,    10,20},         1000,  250, 1000, 1},
    { {DATA_RANDOM,     5000,6000, 10,20},          100,   25,  100, 1},
353
354
355
356
357
358
359



360
361
362
363
364
365
366
  int nData,
  int iSeed,
  TestDb *pControl,
  TestDb *pDb,
  int *pRc
){
  int i;




  testScanCompare(pControl, pDb, 0, 0, 0,         0, 0,         pRc);
  testScanCompare(pControl, pDb, 1, 0, 0,         0, 0,         pRc);

  if( *pRc==0 ){
    int iKey1;
    int iKey2;







>
>
>







354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
  int nData,
  int iSeed,
  TestDb *pControl,
  TestDb *pDb,
  int *pRc
){
  int i;

  static int nCall = 0;
  nCall++;

  testScanCompare(pControl, pDb, 0, 0, 0,         0, 0,         pRc);
  testScanCompare(pControl, pDb, 1, 0, 0,         0, 0,         pRc);

  if( *pRc==0 ){
    int iKey1;
    int iKey2;
Changes to src/bt.h.
199
200
201
202
203
204
205



206
207
208
209
210
211
212
  sqlite4_buffer output;
};

#define BT_INFO_PAGEDUMP       1
#define BT_INFO_FILENAME       2
#define BT_INFO_HDRDUMP        3
#define BT_INFO_PAGEDUMP_ASCII 4




typedef struct bt_logsizecb bt_logsizecb;
struct bt_logsizecb {
  void *pCtx;                     /* A copy of this is passed to xLogsize() */
  void (*xLogsize)(void*, int);   /* Callback function */
};








>
>
>







199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
  sqlite4_buffer output;
};

#define BT_INFO_PAGEDUMP       1
#define BT_INFO_FILENAME       2
#define BT_INFO_HDRDUMP        3
#define BT_INFO_PAGEDUMP_ASCII 4
#define BT_INFO_BLOCK_FREELIST 5
#define BT_INFO_PAGE_FREELIST  6
#define BT_INFO_PAGE_LEAKS     7

typedef struct bt_logsizecb bt_logsizecb;
struct bt_logsizecb {
  void *pCtx;                     /* A copy of this is passed to xLogsize() */
  void (*xLogsize)(void*, int);   /* Callback function */
};

Changes to src/btInt.h.
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95


96
97
98
99
100
101
102
  u32 blksz;                      /* Block size in bytes */
  u32 nPg;                        /* Size of database file in pages */

  u32 iRoot;                      /* B-tree root page */
  u32 iMRoot;                     /* Root page of meta-tree */
  u32 iSRoot;                     /* Root page of schedule-tree */

  u32 iSubBlock;                  /* Root of current sub-tree */
  u32 nSubPg;                     /* Number of non-overflow pages in sub-tree */

  u32 iCookie;                    /* Current value of schema cookie */
  u32 iFreePg;                    /* First page in free-page list trunk */
  u32 iFreeBlk;                   /* First page in free-block list trunk */
};


/*
** This struct defines the format of database "schedule" pages.


*/
typedef struct BtSchedule BtSchedule;
struct BtSchedule {
  u32 eBusy;                      /* One of the BT_SCHEDULE_XXX constants */
  u32 iAge;                       /* Age of input segments */
  u32 iMinLevel;                  /* Minimum level of input segments */
  u32 iMaxLevel;                  /* Maximum level of input segments */







|










>
>







78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
  u32 blksz;                      /* Block size in bytes */
  u32 nPg;                        /* Size of database file in pages */

  u32 iRoot;                      /* B-tree root page */
  u32 iMRoot;                     /* Root page of meta-tree */
  u32 iSRoot;                     /* Root page of schedule-tree */

  u32 iSubBlock;                  /* Block containing current sub-tree */
  u32 nSubPg;                     /* Number of non-overflow pages in sub-tree */

  u32 iCookie;                    /* Current value of schema cookie */
  u32 iFreePg;                    /* First page in free-page list trunk */
  u32 iFreeBlk;                   /* First page in free-block list trunk */
};


/*
** This struct defines the format of database "schedule" pages.
**
** eBusy:
*/
typedef struct BtSchedule BtSchedule;
struct BtSchedule {
  u32 eBusy;                      /* One of the BT_SCHEDULE_XXX constants */
  u32 iAge;                       /* Age of input segments */
  u32 iMinLevel;                  /* Minimum level of input segments */
  u32 iMaxLevel;                  /* Maximum level of input segments */
172
173
174
175
176
177
178



179
180
181
182
183
184
185

/*
** Allocate new database pages or blocks.
*/
int sqlite4BtPageAllocate(BtPager*, BtPage **ppPage);
int sqlite4BtBlockAllocate(BtPager*, int nBlk, u32 *piBlk);




/*
** Query page references.
*/
u32 sqlite4BtPagePgno(BtPage*);
void *sqlite4BtPageData(BtPage*);

/*







>
>
>







174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

/*
** Allocate new database pages or blocks.
*/
int sqlite4BtPageAllocate(BtPager*, BtPage **ppPage);
int sqlite4BtBlockAllocate(BtPager*, int nBlk, u32 *piBlk);

/* Block trim */
int sqlite4BtBlockTrim(BtPager*, u32);

/*
** Query page references.
*/
u32 sqlite4BtPagePgno(BtPage*);
void *sqlite4BtPageData(BtPage*);

/*
324
325
326
327
328
329
330




331
332
333
334
335
336
337

  /* These are used only by the bt_lock module. */
  BtShared *pShared;              /* Shared by all handles on this file */
  BtLock *pNext;                  /* Next connection using pShared */
  u32 mExclLock;                  /* Mask of exclusive locks held */
  u32 mSharedLock;                /* Mask of shared locks held */
  BtFile *pBtFile;                /* Used to defer close if necessary */




};

struct BtReadSlot {
  u32 iFirst;
  u32 iLast;
};








>
>
>
>







329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

  /* These are used only by the bt_lock module. */
  BtShared *pShared;              /* Shared by all handles on this file */
  BtLock *pNext;                  /* Next connection using pShared */
  u32 mExclLock;                  /* Mask of exclusive locks held */
  u32 mSharedLock;                /* Mask of shared locks held */
  BtFile *pBtFile;                /* Used to defer close if necessary */

#ifndef NDEBUG
  u8 *aUsed;
#endif
};

struct BtReadSlot {
  u32 iFirst;
  u32 iLast;
};

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
*************************************************************************/

#ifdef NDEBUG
# define sqlite4BtDebugReadPage(a,b,c,d)
# define sqlite4BtDebugKV(a,b,c,d,e,f)
# define sqlite4BtDebugReadlock(a,b,c)
# define sqlite4BtDebugPageAlloc(a,b,c)
# define sqlite4BtDebugPageFree(a,b,c)
# define btErrorBkpt(x) x
#else
void sqlite4BtDebugReadPage(BtLock *pLock, u32 pgno, u8 *aData, int pgsz);
void sqlite4BtDebugKV(BtLock*, const char*,u8 *pK, int nK, u8 *pV, int nV);
void sqlite4BtDebugReadlock(BtLock *pLock, u32 iFirst, u32 iLast);
void sqlite4BtDebugPageAlloc(BtLock *pLock, const char*, u32);
void sqlite4BtDebugPageFree(BtLock *pLock, const char*, u32);
int btErrorBkpt(int rc);
#endif








|






|



382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
*************************************************************************/

#ifdef NDEBUG
# define sqlite4BtDebugReadPage(a,b,c,d)
# define sqlite4BtDebugKV(a,b,c,d,e,f)
# define sqlite4BtDebugReadlock(a,b,c)
# define sqlite4BtDebugPageAlloc(a,b,c)
# define sqlite4BtDebugPageFree(a,b,c,d)
# define btErrorBkpt(x) x
#else
void sqlite4BtDebugReadPage(BtLock *pLock, u32 pgno, u8 *aData, int pgsz);
void sqlite4BtDebugKV(BtLock*, const char*,u8 *pK, int nK, u8 *pV, int nV);
void sqlite4BtDebugReadlock(BtLock *pLock, u32 iFirst, u32 iLast);
void sqlite4BtDebugPageAlloc(BtLock *pLock, const char*, u32);
void sqlite4BtDebugPageFree(BtLock *pLock, int, const char*, u32);
int btErrorBkpt(int rc);
#endif

Changes to src/bt_log.c.
252
253
254
255
256
257
258

259
260
261
262
263
264
265
266
267
268
    btLogChecksum(nativeCksum, a, 8, aIn, aOut);
    btLogChecksum(nativeCksum, &a[4], nByte-4, aOut, aOut);
  }else{
    btLogChecksum(nativeCksum, a, nByte, aIn, aOut);
  }
}


#define BT_PAGE_DEBUG 0
#define BT_VAL_DEBUG  0
#define BT_HDR_DEBUG  0

static void btDebugTopology(BtLock *pLock, char *zStr, int iSide, u32 *aLog){
#if BT_PAGE_DEBUG
  fprintf(stderr, "%d:%s: (side=%d) %d..%d  %d..%d  %d..%d\n", 
      pLock->iDebugId, zStr, iSide,
      (int)aLog[0], (int)aLog[1], (int)aLog[2], 
      (int)aLog[3], (int)aLog[4], (int)aLog[5]







>
|
|
|







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
    btLogChecksum(nativeCksum, a, 8, aIn, aOut);
    btLogChecksum(nativeCksum, &a[4], nByte-4, aOut, aOut);
  }else{
    btLogChecksum(nativeCksum, a, nByte, aIn, aOut);
  }
}

#define BT_ALLOC_DEBUG 0
#define BT_PAGE_DEBUG  0
#define BT_VAL_DEBUG   0
#define BT_HDR_DEBUG   0

static void btDebugTopology(BtLock *pLock, char *zStr, int iSide, u32 *aLog){
#if BT_PAGE_DEBUG
  fprintf(stderr, "%d:%s: (side=%d) %d..%d  %d..%d  %d..%d\n", 
      pLock->iDebugId, zStr, iSide,
      (int)aLog[0], (int)aLog[1], (int)aLog[2], 
      (int)aLog[3], (int)aLog[4], (int)aLog[5]
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312


313
314
315
316
317
318
319
320
321
322
323
  fflush(stderr);
#endif
}
#endif

#ifndef NDEBUG
void sqlite4BtDebugPageAlloc(BtLock *pLock, const char *zStr, u32 pgno){
#if BT_PAGE_DEBUG
  static int nCall = 0;
  fprintf(stderr, "%d:%d: allocate page %d from %s\n", 
      pLock->iDebugId, nCall++, pgno, zStr
  );
  fflush(stderr);
#endif
}
#endif

#ifndef NDEBUG
void sqlite4BtDebugPageFree(BtLock *pLock, const char *zStr, u32 pgno){


#if BT_PAGE_DEBUG
  static int nCall = 0;
  fprintf(stderr, "%d:%d: trim page %d (%s)\n", 
      pLock->iDebugId, nCall++, pgno, zStr
  );
  fflush(stderr);
  assert( pgno<1000000 );
#endif
}
#endif








|










|
>
>
|

|
|







295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
  fflush(stderr);
#endif
}
#endif

#ifndef NDEBUG
void sqlite4BtDebugPageAlloc(BtLock *pLock, const char *zStr, u32 pgno){
#if BT_ALLOC_DEBUG
  static int nCall = 0;
  fprintf(stderr, "%d:%d: allocate page %d from %s\n", 
      pLock->iDebugId, nCall++, pgno, zStr
  );
  fflush(stderr);
#endif
}
#endif

#ifndef NDEBUG
void sqlite4BtDebugPageFree(
  BtLock *pLock, int bBlock, const char *zStr, u32 pgno
){
#if BT_ALLOC_DEBUG
  static int nCall = 0;
  fprintf(stderr, "%d:%d: trim %s %d (%s)\n", 
      pLock->iDebugId, nCall++, bBlock ? "block" : "page", pgno, zStr
  );
  fflush(stderr);
  assert( pgno<1000000 );
#endif
}
#endif

Changes to src/bt_main.c.
122
123
124
125
126
127
128

129
130
131
132
133
134
135
static int fiLoadSummary(bt_db *, BtCursor *, const u8 **, int *);
static void btReadSummary(const u8 *, int, u16 *, u16 *, u16 *);
static int btCsrData(BtCursor *, int, int, const void **, int *);
static int btReadSchedule(bt_db *, u8 *, BtSchedule *);
static int btCsrEnd(BtCursor *pCsr, int bLast);
static int btCsrStep(BtCursor *pCsr, int bNext);
static int btCsrKey(BtCursor *pCsr, const void **ppK, int *pnK);


/* 
** Meta-table summary key.
*/
static const u8 aSummaryKey[] = {0xFF, 0xFF, 0xFF, 0xFF};

#ifndef btErrorBkpt







>







122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
static int fiLoadSummary(bt_db *, BtCursor *, const u8 **, int *);
static void btReadSummary(const u8 *, int, u16 *, u16 *, u16 *);
static int btCsrData(BtCursor *, int, int, const void **, int *);
static int btReadSchedule(bt_db *, u8 *, BtSchedule *);
static int btCsrEnd(BtCursor *pCsr, int bLast);
static int btCsrStep(BtCursor *pCsr, int bNext);
static int btCsrKey(BtCursor *pCsr, const void **ppK, int *pnK);


/* 
** Meta-table summary key.
*/
static const u8 aSummaryKey[] = {0xFF, 0xFF, 0xFF, 0xFF};

#ifndef btErrorBkpt
592
593
594
595
596
597
598

599

600
601











602
603
604
605
606
607

608
609
610

611
612



613
614
615
616
617
618
619

620













621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637

638
639
640
641
642
643

644
645
646












647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665



























666
667
668
669
670
671
672
    sqlite4BtBufAppendf(pBuf, ")\n");

    for(i=0; i<nCell; i++){
      u8 *pCell;          /* Cell i */
      int nKey;           /* Number of bytes of key to output */
      u8 *pKey;           /* Buffer containing key. */
      int nVal;           /* Number of bytes of value to output */

      u8 *pVal = 0;       /* Buffer containing value. */


      pCell = btCellFind(aData, nData, i);











      sqlite4BtBufAppendf(pBuf, "  Cell %d: ", i);

      pCell += sqlite4BtVarintGet32(pCell, &nKey);
      pKey = pCell;
      pCell += nKey;
      btBufferAppendBlob(pBuf, bAscii, pKey, nKey);


      if( flags & BT_PGFLAGS_INTERNAL ){
        sqlite4BtBufAppendf(pBuf, "  child=%d ", (int)btGetU32(pCell));

      }else{
        sqlite4BtBufAppendf(pBuf, "  ");



        pCell += sqlite4BtVarintGet32(pCell, &nVal);
        if( nVal>=2 ){
          nVal -= 2;
          pVal = pCell;
        }else{
          sqlite4BtBufAppendf(pBuf, "delete-key");
        }

      }













      if( pVal ){
        btBufferAppendBlob(pBuf, bAscii, pVal, nVal);
        if( flags & BT_PGFLAGS_METATREE ){
          /* Interpret the meta-tree entry */
          if( nKey==sizeof(aSummaryKey) && 0==memcmp(pKey, aSummaryKey, nKey) ){
            u16 iMin, nLvl, iMerge;
            int j;
            sqlite4BtBufAppendf(pBuf, "  [summary:");
            for(j=0; j<(nVal/6); j++){
              btReadSummary(pVal, j, &iMin, &nLvl, &iMerge);
              sqlite4BtBufAppendf(pBuf, " %d/%d/%d", 
                  (int)iMin, (int)nLvl, (int)iMerge
              );
            }
            sqlite4BtBufAppendf(pBuf, "]");
            
          }else{

            u32 iAge = btGetU32(&pKey[0]);
            u32 iLevel = ~btGetU32(&pKey[4]);
            u32 iBlk = btGetU32(pVal);
            sqlite4BtBufAppendf(pBuf, "  [age=%d level=%d root=%d]", 
                (int)iAge, (int)iLevel, (int)iBlk
            );

          }
        }
      }












      sqlite4BtBufAppendf(pBuf, "\n");
    }

    if( pPager && btFlags(aData) & BT_PGFLAGS_INTERNAL ){
      for(i=0; i<=btCellCount(aData, nData); i++){
        BtPage *pChild;
        u8 *aChild;
        u32 child;

        child = btChildPgno(aData, nData, i);
        sqlite4BtPageGet(pPager, child, &pChild);
        aChild = sqlite4BtPageData(pChild);
        btPageToAscii(child, bAscii, pPager, aChild, nData, pBuf);
        sqlite4BtPageRelease(pChild);
      }
    }
  }
  sqlite4BtBufAppendf(pBuf, "\n");
}




























#ifndef NDEBUG
#include <stdio.h>

static void printPage(FILE *f, u32 pgno, u8 *aData, int nData){
  sqlite4_buffer buf;








>

>


>
>
>
>
>
>
>
>
>
>
>
|

<



>

<
<
>
|
|
>
>
>
|
|
|
|
|
|
|
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>

















>


|

|

>



>
>
>
>
>
>
>
>
>
>
>
>



















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617

618
619
620
621
622


623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
    sqlite4BtBufAppendf(pBuf, ")\n");

    for(i=0; i<nCell; i++){
      u8 *pCell;          /* Cell i */
      int nKey;           /* Number of bytes of key to output */
      u8 *pKey;           /* Buffer containing key. */
      int nVal;           /* Number of bytes of value to output */
      int nDummy;         /* Unused */
      u8 *pVal = 0;       /* Buffer containing value. */
      char celltype = 'A';

      pCell = btCellFind(aData, nData, i);
      pCell += sqlite4BtVarintGet32(pCell, &nKey);
      if( flags & BT_PGFLAGS_INTERNAL ){
        celltype = 'I';
      }else{
        if( nKey==0 ){
          celltype = 'C';
          pCell += sqlite4BtVarintGet32(pCell, &nKey);
        }else if( pCell[nKey]==0 ){
          celltype = 'B';
        }
      }
      sqlite4BtBufAppendf(pBuf, "  Cell %d: [%c] ", i, celltype);


      pKey = pCell;
      pCell += nKey;
      btBufferAppendBlob(pBuf, bAscii, pKey, nKey);
      sqlite4BtBufAppendf(pBuf, "  ");



      switch( celltype ){
        case 'I':
          sqlite4BtBufAppendf(pBuf, "child=%d ", (int)btGetU32(pCell));
          break;

        case 'A':
          pCell += sqlite4BtVarintGet32(pCell, &nVal);
          if( nVal>=2 ){
            nVal -= 2;
            pVal = pCell;
          }else{
            sqlite4BtBufAppendf(pBuf, "delete-key");
          }
          break;

        case 'B':
          assert( pCell[0]==0x00 );
          pCell++;
          pCell += sqlite4BtVarintGet32(pCell, &nVal);
          pVal = pCell;
          pCell += nVal;
          pCell += sqlite4BtVarintGet32(pCell, &nDummy);
          break;

        case 'C':
          assert( 0 );
      }

      if( pVal ){
        btBufferAppendBlob(pBuf, bAscii, pVal, nVal);
        if( flags & BT_PGFLAGS_METATREE ){
          /* Interpret the meta-tree entry */
          if( nKey==sizeof(aSummaryKey) && 0==memcmp(pKey, aSummaryKey, nKey) ){
            u16 iMin, nLvl, iMerge;
            int j;
            sqlite4BtBufAppendf(pBuf, "  [summary:");
            for(j=0; j<(nVal/6); j++){
              btReadSummary(pVal, j, &iMin, &nLvl, &iMerge);
              sqlite4BtBufAppendf(pBuf, " %d/%d/%d", 
                  (int)iMin, (int)nLvl, (int)iMerge
              );
            }
            sqlite4BtBufAppendf(pBuf, "]");
            
          }else{
            int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
            u32 iAge = btGetU32(&pKey[0]);
            u32 iLevel = ~btGetU32(&pKey[4]);
            u32 iRoot = btGetU32(pVal);
            sqlite4BtBufAppendf(pBuf, "  [age=%d level=%d root=%d]", 
                (int)iAge, (int)iLevel, (int)iRoot
            );
            sqlite4BtBufAppendf(pBuf, "  (blk=%d)", 1 + (iRoot / nPgPerBlk));
          }
        }
      }

      if( celltype=='B' ){
        int j;
        u8 ctrl = *pCell++;
        int nDirect = (ctrl & 0x0F);
        int nDepth = ((ctrl>>4) & 0x0F);
        sqlite4BtBufAppendf(
            pBuf, "  [overflow: direct=%d depth=%d]", nDirect, nDepth);
        for(j=0; j<=nDirect; j++){
          sqlite4BtBufAppendf(pBuf, " %d", btGetU32(&pCell[j*4]));
        }
      }
      sqlite4BtBufAppendf(pBuf, "\n");
    }

    if( pPager && btFlags(aData) & BT_PGFLAGS_INTERNAL ){
      for(i=0; i<=btCellCount(aData, nData); i++){
        BtPage *pChild;
        u8 *aChild;
        u32 child;

        child = btChildPgno(aData, nData, i);
        sqlite4BtPageGet(pPager, child, &pChild);
        aChild = sqlite4BtPageData(pChild);
        btPageToAscii(child, bAscii, pPager, aChild, nData, pBuf);
        sqlite4BtPageRelease(pChild);
      }
    }
  }
  sqlite4BtBufAppendf(pBuf, "\n");
}

static int btFreelistToAscii(bt_db *db, u32 iFirst, sqlite4_buffer *pBuf){
  int rc = SQLITE4_OK;
  u32 iTrunk = iFirst;
  while( iTrunk && rc==SQLITE4_OK ){
    BtPage *pPg = 0;
    rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      u32 nFree = btGetU32(aData);
      u32 iNext = btGetU32(&aData[4]);
      int i;

      sqlite4BtBufAppendf(pBuf, "iTrunk=%d ", (int)iTrunk);
      sqlite4BtBufAppendf(pBuf, "nFree=%d iNext=%d (", (int)nFree, (int)iNext);
      for(i=0; i<(int)nFree; i++){
        u32 pgnoFree = btGetU32(&aData[8 + i*sizeof(u32)]);
        sqlite4BtBufAppendf(pBuf, "%s%d", (i==0)?"": " ", (int)pgnoFree);
      }
      sqlite4BtBufAppendf(pBuf, ")\n");

      sqlite4BtPageRelease(pPg);
      iTrunk = iNext;
    }
  }
  return rc;
}

#ifndef NDEBUG
#include <stdio.h>

static void printPage(FILE *f, u32 pgno, u8 *aData, int nData){
  sqlite4_buffer buf;

1419
1420
1421
1422
1423
1424
1425



1426
1427
1428
1429
1430
1431
1432
    }else{
      /* Type (a) */
      pVLocal = pCell;
      nVLocal -= 2;
    }
  }




  pCsr->ovfl.nKey = nKLocal + nKOvfl;
  pCsr->ovfl.nVal = nVLocal + nVOvfl;

  nReq = pCsr->ovfl.nKey + pCsr->ovfl.nVal;
  assert( nReq>0 );
  rc = sqlite4_buffer_resize(&pCsr->ovfl.buf, nReq);
  if( rc!=SQLITE4_OK ) return rc;







>
>
>







1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
    }else{
      /* Type (a) */
      pVLocal = pCell;
      nVLocal -= 2;
    }
  }

  /* A delete-key */
  if( nVLocal<0 ) nVLocal = 0;

  pCsr->ovfl.nKey = nKLocal + nKOvfl;
  pCsr->ovfl.nVal = nVLocal + nVOvfl;

  nReq = pCsr->ovfl.nKey + pCsr->ovfl.nVal;
  assert( nReq>0 );
  rc = sqlite4_buffer_resize(&pCsr->ovfl.buf, nReq);
  if( rc!=SQLITE4_OK ) return rc;
2299
2300
2301
2302
2303
2304
2305

















2306
2307
2308
2309
2310
2311
2312
  pSub = &pCsr->aSub[pCsr->iBt];
  aData = sqlite4BtPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
  iCell = pSub->csr.aiCell[pSub->csr.nPg-1];

  *ppCell = btCellFindSize(aData, pgsz, iCell, pnCell);
}



















/*
** Allocate a new page buffer.
*/
static int btNewBuffer(bt_db *pDb, u8 **paBuf){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  u8 *aBuf;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
  pSub = &pCsr->aSub[pCsr->iBt];
  aData = sqlite4BtPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
  iCell = pSub->csr.aiCell[pSub->csr.nPg-1];

  *ppCell = btCellFindSize(aData, pgsz, iCell, pnCell);
}

/*
** Return true if the cell that the cursor currently points to contains 
** pointers to one or more overflow pages. Or false otherwise.
*/
static int btCsrOverflow(BtCursor *pCsr){
  const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  u8 *aData;                      /* Current page data */
  int nKey;                       /* First varint in cell */
  int res;                       /* First varint in cell */

  aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
  aData = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);

  aData += sqlite4BtVarintGet32(aData, &nKey);
  res = (nKey==0 || aData[nKey]==0);
  return res;
}

/*
** Allocate a new page buffer.
*/
static int btNewBuffer(bt_db *pDb, u8 **paBuf){
  const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  u8 *aBuf;
3933
3934
3935
3936
3937
3938
3939

3940
3941
3942
3943

3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
** tree structure.
**
** If successful, SQLITE4_OK is returned. If an error occurs, an SQLite
** error code.
*/ 
static int btIntegrateMerge(bt_db *db, BtSchedule *p){
  BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);

  int rc = SQLITE4_OK;
  BtCursor csr;                   /* Cursor for reading various sub-trees */
  BtCursor mcsr;                  /* Cursor for reading the meta-tree */
  const void *pKey = 0;

  const u8 *aSum; int nSum;       /* Summary value */
  int nKey = 0;
  sqlite4_buffer buf;
  u32 iLvl;
  int iBlk;

  /* Save the value of the fast-insert flag. It will be restored before
  ** this function returns. Leaving it set here interferes with page 
  ** allocation if the meta-tree needs to be extended.  */
  const int bFastInsertOp = db->bFastInsertOp;







>



|
>

<
|







4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037

4038
4039
4040
4041
4042
4043
4044
4045
** tree structure.
**
** If successful, SQLITE4_OK is returned. If an error occurs, an SQLite
** error code.
*/ 
static int btIntegrateMerge(bt_db *db, BtSchedule *p){
  BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
  const int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
  int rc = SQLITE4_OK;
  BtCursor csr;                   /* Cursor for reading various sub-trees */
  BtCursor mcsr;                  /* Cursor for reading the meta-tree */
  const void *pKey = 0;           /* If not NULL, first key to leave in input */
  int nKey = 0;                   /* Size of pKey in bytes */
  const u8 *aSum; int nSum;       /* Summary value */

  sqlite4_buffer buf;             /* Buffer object used for various purposes */
  u32 iLvl;
  int iBlk;

  /* Save the value of the fast-insert flag. It will be restored before
  ** this function returns. Leaving it set here interferes with page 
  ** allocation if the meta-tree needs to be extended.  */
  const int bFastInsertOp = db->bFastInsertOp;
3969
3970
3971
3972
3973
3974
3975































3976
3977
3978
3979
3980
3981
3982
3983


3984
3985
3986
3987
3988







3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006








4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021

4022
4023
4024
4025
4026
4027
4028
4029
4030



4031
4032
4033
4034
4035
4036
4037
    btCsrSetup(db, p->iNextPg, &csr);
    rc = btCsrEnd(&csr, 0);
    if( rc==SQLITE4_OK ){
      csr.aiCell[0] = p->iNextCell;
      rc = btCsrKey(&csr, &pKey, &nKey);
    }
  }
































  /* The following loop iterates through each of the input levels. Each
  ** level is either removed from the database completely (if the merge
  ** completed) or else modified so that it contains no keys smaller
  ** than (pKey/nKey).  */ 
  for(iLvl=p->iMinLevel; iLvl<=p->iMaxLevel; iLvl++){
    u8 aPrefix[8];
    u32 iRoot = 0;


    btPutU32(&aPrefix[0], p->iAge);
    btPutU32(&aPrefix[4], ~iLvl);

    rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
    if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;







    while( rc==SQLITE4_OK ){
      const u8 *pMKey; int nMKey;
      rc = btCsrKey(&mcsr, (const void **)&pMKey, &nMKey);

      if( rc==SQLITE4_OK ){
        if( nMKey<sizeof(aPrefix) || memcmp(aPrefix, pMKey, sizeof(aPrefix)) ){
          rc = SQLITE4_NOTFOUND;
        }else{
          rc = SQLITE4_OK;
        }
      }
      if( rc==SQLITE4_OK ){
        if( pKey ){
          const void *pData; int nData;
          int res = btKeyCompare(pMKey + 8, nMKey - 8, pKey, nKey);
          if( res>0 ){
            break;
          }








          btCsrData(&mcsr, 0, 4, &pData, &nData);
          iRoot = btGetU32(pData);
        }
        rc = sqlite4BtDelete(&mcsr.base);
      }

      if( rc==SQLITE4_OK ){
        /* rc = btCsrStep(&mcsr, 1); */
        rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
      }
    }
    if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;

    if( rc==SQLITE4_OK && iRoot ){

      int n = sizeof(aPrefix) + nKey;
      rc = sqlite4_buffer_resize(&buf, n);
      if( rc==SQLITE4_OK ){
        u8 aData[4];
        u8 *a = (u8*)buf.p;
        memcpy(a, aPrefix, sizeof(aPrefix));
        memcpy(&a[sizeof(aPrefix)], pKey, nKey);
        btPutU32(aData, iRoot);
        rc = btReplaceEntry(db, pHdr->iMRoot, a, n, aData, sizeof(aData));



      }
    }
  }

  /* Add new entries for the new output level blocks. */
  for(iBlk=0; 
      rc==SQLITE4_OK && iBlk<array_size(p->aRoot) && p->aRoot[iBlk]; 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>








>
>


<


>
>
>
>
>
>
>













<




>
>
>
>
>
>
>
>
|
|
<












>
|
|
|
|
|
|
|
|
|
>
>
>







4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110

4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132

4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146

4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
    btCsrSetup(db, p->iNextPg, &csr);
    rc = btCsrEnd(&csr, 0);
    if( rc==SQLITE4_OK ){
      csr.aiCell[0] = p->iNextCell;
      rc = btCsrKey(&csr, &pKey, &nKey);
    }
  }

  if( p->iFreeList ){
    u32 iTrunk = p->iFreeList;
    BtCursor delcsr;
    memset(&delcsr, 0, sizeof(BtCursor));
    delcsr.nPg = 1;
    delcsr.base.pDb = db;

    while( rc==SQLITE4_OK && iTrunk!=0 ){
      BtPage *pTrunk = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pTrunk);
      if( rc==SQLITE4_OK ){
        u8 *aTData = sqlite4BtPageData(pTrunk);
        int nOvfl = btGetU32(aTData);
        int i;

        for(i=0; i<nOvfl; i++){
          u32 lpgno = btGetU32(&aTData[8 + i*8]);
          delcsr.aiCell[0] = (int)btGetU32(&aTData[8 + i*8 + 4]);
          rc = sqlite4BtPageGet(db->pPager, lpgno, &delcsr.apPage[0]);
          if( rc==SQLITE4_OK ){
            rc = btOverflowDelete(&delcsr);
            sqlite4BtPageRelease(delcsr.apPage[0]);
          }
        }

        iTrunk = btGetU32(&aTData[4]);
        sqlite4BtPageRelease(pTrunk);
      }
    }
  }

  /* The following loop iterates through each of the input levels. Each
  ** level is either removed from the database completely (if the merge
  ** completed) or else modified so that it contains no keys smaller
  ** than (pKey/nKey).  */ 
  for(iLvl=p->iMinLevel; iLvl<=p->iMaxLevel; iLvl++){
    u8 aPrefix[8];
    u32 iRoot = 0;

    /* Seek mcsr to the first sub-tree (smallest keys) in level iLvl. */
    btPutU32(&aPrefix[0], p->iAge);
    btPutU32(&aPrefix[4], ~iLvl);

    rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
    if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;

    /* Loop through the meta-tree entries that compose level iLvl, deleting 
    ** them as they are visited. If (pKey!=0), stop (and do not delete) the 
    ** first sub-tree for which all keys are larger than pKey.
    **
    ** When the loop exits, variable iRoot is left set to the root page of
    ** the last sub-tree deleted.  */
    while( rc==SQLITE4_OK ){
      const u8 *pMKey; int nMKey;
      rc = btCsrKey(&mcsr, (const void **)&pMKey, &nMKey);

      if( rc==SQLITE4_OK ){
        if( nMKey<sizeof(aPrefix) || memcmp(aPrefix, pMKey, sizeof(aPrefix)) ){
          rc = SQLITE4_NOTFOUND;
        }else{
          rc = SQLITE4_OK;
        }
      }
      if( rc==SQLITE4_OK ){
        if( pKey ){

          int res = btKeyCompare(pMKey + 8, nMKey - 8, pKey, nKey);
          if( res>0 ){
            break;
          }
        }
        if( iRoot ){
          rc = sqlite4BtBlockTrim(db->pPager, 1 + (iRoot / nPgPerBlk));
        }
      }

      if( rc==SQLITE4_OK ){
        const void *pData; int nData;
        btCsrData(&mcsr, 0, 4, &pData, &nData);
        iRoot = btGetU32(pData);

        rc = sqlite4BtDelete(&mcsr.base);
      }

      if( rc==SQLITE4_OK ){
        /* rc = btCsrStep(&mcsr, 1); */
        rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
      }
    }
    if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;

    if( rc==SQLITE4_OK && iRoot ){
      if( pKey ){
        int n = sizeof(aPrefix) + nKey;
        rc = sqlite4_buffer_resize(&buf, n);
        if( rc==SQLITE4_OK ){
          u8 aData[4];
          u8 *a = (u8*)buf.p;
          memcpy(a, aPrefix, sizeof(aPrefix));
          memcpy(&a[sizeof(aPrefix)], pKey, nKey);
          btPutU32(aData, iRoot);
          rc = btReplaceEntry(db, pHdr->iMRoot, a, n, aData, sizeof(aData));
        }
      }else{
        rc = sqlite4BtBlockTrim(db->pPager, 1 + (iRoot / nPgPerBlk));
      }
    }
  }

  /* Add new entries for the new output level blocks. */
  for(iBlk=0; 
      rc==SQLITE4_OK && iBlk<array_size(p->aRoot) && p->aRoot[iBlk]; 
4053
4054
4055
4056
4057
4058
4059






4060
4061
4062
4063
4064
4065
4066
4067
4068
      btPutU32(&a[4], ~p->iOutLevel);
      memcpy(&a[8], pKey, nKey);
      btPutU32(aData, p->aRoot[iBlk]);
      rc = btReplaceEntry(db, pHdr->iMRoot, a, nKey+8, aData, sizeof(aData));
    }
  }







  /* Update the summary record with the outcome of the merge operation.
  */
  if( rc==SQLITE4_OK ){
    btCsrReset(&csr, 1);
    rc = fiLoadSummary(db, &csr, &aSum, &nSum);
  }
  if( rc==SQLITE4_OK ){
    rc = sqlite4_buffer_resize(&buf, MAX(nSum, (p->iAge+1+1)*6));
    if( rc==SQLITE4_OK ){







>
>
>
>
>
>
|
<







4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207

4208
4209
4210
4211
4212
4213
4214
      btPutU32(&a[4], ~p->iOutLevel);
      memcpy(&a[8], pKey, nKey);
      btPutU32(aData, p->aRoot[iBlk]);
      rc = btReplaceEntry(db, pHdr->iMRoot, a, nKey+8, aData, sizeof(aData));
    }
  }

  /* Trim any unused blocks */
  while( rc==SQLITE4_OK && iBlk<array_size(p->aBlock) && p->aBlock[iBlk] ){
    rc = sqlite4BtBlockTrim(db->pPager, p->aBlock[iBlk]);
    iBlk++;
  }

  /* Update the summary record with the outcome of the merge operation.  */

  if( rc==SQLITE4_OK ){
    btCsrReset(&csr, 1);
    rc = fiLoadSummary(db, &csr, &aSum, &nSum);
  }
  if( rc==SQLITE4_OK ){
    rc = sqlite4_buffer_resize(&buf, MAX(nSum, (p->iAge+1+1)*6));
    if( rc==SQLITE4_OK ){
4231
4232
4233
4234
4235
4236
4237














4238
4239
4240
4241
4242
4243
4244
      ** of the new segment is 0. The level number is one greater than the
      ** level number of the previous segment.  */
      btPutU32(&aKey[0], 0);
      btPutU32(&aKey[4], ~iLevel);
      btPutU32(&aVal[0], btFirstOfBlock(pHdr, iSubBlock));
      rc = btReplaceEntry(db, pHdr->iMRoot, aKey, 8, aVal, 4);
    }














  }

  if( rc==SQLITE4_OK ){
    *piRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
  }
  db->bFastInsertOp = 1;
  return rc;







>
>
>
>
>
>
>
>
>
>
>
>
>
>







4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
      ** of the new segment is 0. The level number is one greater than the
      ** level number of the previous segment.  */
      btPutU32(&aKey[0], 0);
      btPutU32(&aKey[4], ~iLevel);
      btPutU32(&aVal[0], btFirstOfBlock(pHdr, iSubBlock));
      rc = btReplaceEntry(db, pHdr->iMRoot, aKey, 8, aVal, 4);
    }

    if( rc==SQLITE4_OK ){
      u32 iRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
      BtPage *pPg = 0;

      rc = sqlite4BtPageGet(db->pPager, iRoot, &pPg);
      if( rc==SQLITE4_OK ) rc = sqlite4BtPageWrite(pPg);
      if( rc==SQLITE4_OK ){
        u8 *aData = sqlite4BtPageData(pPg);
        memset(&aData[pHdr->pgsz-6], 0, 6);
        aData[0] = 0;
      }
      sqlite4BtPageRelease(pPg);
    }
  }

  if( rc==SQLITE4_OK ){
    *piRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
  }
  db->bFastInsertOp = 1;
  return rc;
4333
4334
4335
4336
4337
4338
4339

4340
4341
4342
4343
4344




4345
4346
4347
4348
4349
4350
4351
};
struct FiWriter {
  bt_db *db;                      /* Database handle */
  BtSchedule *pSched;             /* Schedule object being implemented */
  int pgsz;                       /* Page size in bytes */
  int nPgPerBlk;                  /* Pages per block in this database */
  u32 iBlk;                       /* Block to write to */


  int nAlloc;                     /* Pages allocated from current block */
  int nWrite;                     /* Pages written to current block */
  int nHier;                      /* Valid entries in apHier[] array */
  FiWriterPg aHier[MAX_SUBTREE_DEPTH];      /* Path from root to current leaf */




};

/*
** Write the page out to disk.
*/
static int fiWriterFlush(
  FiWriter *p, 







>





>
>
>
>







4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
};
struct FiWriter {
  bt_db *db;                      /* Database handle */
  BtSchedule *pSched;             /* Schedule object being implemented */
  int pgsz;                       /* Page size in bytes */
  int nPgPerBlk;                  /* Pages per block in this database */
  u32 iBlk;                       /* Block to write to */
  int nOvflPerPage;               /* Overflow pointers per page */

  int nAlloc;                     /* Pages allocated from current block */
  int nWrite;                     /* Pages written to current block */
  int nHier;                      /* Valid entries in apHier[] array */
  FiWriterPg aHier[MAX_SUBTREE_DEPTH];      /* Path from root to current leaf */

  /* Variables used to collect overflow pages freed by merge operation */
  int nOvfl;                      /* Number of pointers already in buffer */
  u8 *aTrunk;                     /* Buffer for current trunk page */
};

/*
** Write the page out to disk.
*/
static int fiWriterFlush(
  FiWriter *p, 
4500
4501
4502
4503
4504
4505
4506

4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519

























































4520
4521
4522
4523
4524
4525
4526
    int i;

    memset(p, 0, sizeof(FiWriter));
    p->db = db;
    p->pSched = pSched;
    p->pgsz = pHdr->pgsz;
    p->nPgPerBlk = (pHdr->blksz / pHdr->pgsz);


    /* Find a block to write to */
    for(i=0; pSched->aBlock[i]; i++){
      if( pSched->aRoot[i]==0 ) break;
    }
    if( pSched->aBlock[i]==0 ){
      *pRc = BT_BLOCKFULL;
    }else{
      p->iBlk = pSched->aBlock[i];
    }
  }
}



























































/*
** Argument aBuf points to a buffer containing a leaf cell. This function
** returns a pointer to the key prefix embedded within the cell. Before
** returning, *pnKey is set to the size of the key prefix in bytes.
*/
static const u8 *btKeyPrefixFromCell(const u8 *aBuf, int *pnKey){







>













>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
    int i;

    memset(p, 0, sizeof(FiWriter));
    p->db = db;
    p->pSched = pSched;
    p->pgsz = pHdr->pgsz;
    p->nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
    p->nOvflPerPage = ((pHdr->pgsz / 8) - 1);

    /* Find a block to write to */
    for(i=0; pSched->aBlock[i]; i++){
      if( pSched->aRoot[i]==0 ) break;
    }
    if( pSched->aBlock[i]==0 ){
      *pRc = BT_BLOCKFULL;
    }else{
      p->iBlk = pSched->aBlock[i];
    }
  }
}

static int fiWriterFlushOvfl(FiWriter *p, u32 *pPgno){
  u32 pgno;                       /* Page number for new overflow ptr page */
  int rc;

  /* Set the "number of entries" field */
  btPutU32(p->aTrunk, p->nOvfl);

  /* Write the page to disk */
  pgno = (p->nPgPerBlk * (p->iBlk-1) + 1) + p->nWrite;
  p->nWrite++;
  p->nAlloc++;
  rc = sqlite4BtPagerRawWrite(p->db->pPager, pgno, p->aTrunk);

  btPutU32(&p->aTrunk[4], pgno);
  if( pPgno ) *pPgno = pgno;

  return rc;
}


static int fiWriterFreeOverflow(FiWriter *p, FiCursor *pCsr){
  const void *pKey;               /* Buffer containing current key for pCsr */
  int nKey;                       /* Size of pKey in bytes */
  int rc;
  int i;

  rc = btCsrKey(&pCsr->aSub[pCsr->iBt].csr, &pKey, &nKey);
  for(i=pCsr->iBt+1; i<pCsr->nBt && rc==SQLITE4_OK; i++){
    BtCursor *pSub = &pCsr->aSub[i].csr;
    if( pSub->nPg ){
      const void *pSKey;            /* Current key for pSub */
      int nSKey;                    /* Size of pSKey in bytes */
      rc = btCsrKey(pSub, &pSKey, &nSKey);
      if( rc==SQLITE4_OK 
       && 0==btKeyCompare(pKey, nKey, pSKey, nSKey) 
       && btCsrOverflow(pSub)
      ){
        u32 pgno = sqlite4BtPagePgno(pSub->apPage[pSub->nPg-1]);
        int iCell = pSub->aiCell[pSub->nPg-1];

        if( p->aTrunk==0 ){
          rc = btNewBuffer(p->db, &p->aTrunk);
          if( rc==SQLITE4_OK ) memset(p->aTrunk, 0, 8);
        }else if( p->nOvflPerPage==p->nOvfl ){
          rc = fiWriterFlushOvfl(p, 0);
        }
        if( rc==SQLITE4_OK ){
          btPutU32(&p->aTrunk[8 + p->nOvfl*8], pgno);
          btPutU32(&p->aTrunk[8 + p->nOvfl*8 + 4], iCell);
          p->nOvfl++;
        }
      }
    }
  }

  return rc;
}

/*
** Argument aBuf points to a buffer containing a leaf cell. This function
** returns a pointer to the key prefix embedded within the cell. Before
** returning, *pnKey is set to the size of the key prefix in bytes.
*/
static const u8 *btKeyPrefixFromCell(const u8 *aBuf, int *pnKey){
4628
4629
4630
4631
4632
4633
4634


4635
4636


4637

4638
4639
4640
4641
4642
4643
4644
4645
      const void *pCell;          /* Cell to copy to output */
      int nCell;                  /* Size of cell in bytes */

      /* Read the current cell from the input and push it to the output. */
      fiCsrCell(&fcsr, &pCell, &nCell);
      rc = fiWriterAdd(&writer, pCell, nCell);
      if( rc==BT_BLOCKFULL ){


        rc = fiWriterFlushAll(&writer);
        fiWriterInit(db, &s, &writer, &rc);


      }else if( rc==SQLITE4_OK ){

        rc = fiCsrStep(&fcsr);
      }
    }

    /* Assuming no error has occurred, update the serialized BtSchedule
    ** structure stored in buffer aSched[]. The caller will write this
    ** buffer to the database file as page (pHdr->iSRoot).  */
    if( rc==BT_BLOCKFULL || rc==SQLITE4_NOTFOUND ){







>
>


>
>

>
|







4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
      const void *pCell;          /* Cell to copy to output */
      int nCell;                  /* Size of cell in bytes */

      /* Read the current cell from the input and push it to the output. */
      fiCsrCell(&fcsr, &pCell, &nCell);
      rc = fiWriterAdd(&writer, pCell, nCell);
      if( rc==BT_BLOCKFULL ){
        int nOvflSaved = writer.nOvfl;
        u8 *aTrunkSaved = writer.aTrunk;
        rc = fiWriterFlushAll(&writer);
        fiWriterInit(db, &s, &writer, &rc);
        writer.nOvfl = nOvflSaved;
        writer.aTrunk = aTrunkSaved;
      }else if( rc==SQLITE4_OK ){
        rc = fiWriterFreeOverflow(&writer, &fcsr);
        if( rc==SQLITE4_OK ) rc = fiCsrStep(&fcsr);
      }
    }

    /* Assuming no error has occurred, update the serialized BtSchedule
    ** structure stored in buffer aSched[]. The caller will write this
    ** buffer to the database file as page (pHdr->iSRoot).  */
    if( rc==BT_BLOCKFULL || rc==SQLITE4_NOTFOUND ){
4653
4654
4655
4656
4657
4658
4659





4660

4661
4662
4663
4664
4665
4666
4667
        assert( pCsr->nPg>0 );
        s.iNextPg = sqlite4BtPagePgno(pCsr->apPage[pCsr->nPg-1]);
        s.iNextCell = pCsr->aiCell[pCsr->nPg-1];
      }
      s.iFreeList = 0;
      s.eBusy = BT_SCHEDULE_DONE;






      rc = fiWriterFlushAll(&writer);

      if( rc==SQLITE4_OK ){
        btWriteSchedule(aSched, &s, &rc);
      }
    }

    fiWriterCleanup(&writer);
    fiCsrReset(&fcsr);







>
>
>
>
>
|
>







4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
        assert( pCsr->nPg>0 );
        s.iNextPg = sqlite4BtPagePgno(pCsr->apPage[pCsr->nPg-1]);
        s.iNextCell = pCsr->aiCell[pCsr->nPg-1];
      }
      s.iFreeList = 0;
      s.eBusy = BT_SCHEDULE_DONE;

      assert( s.iFreeList==0 );
      if( writer.aTrunk ){
        rc = fiWriterFlushOvfl(&writer, &s.iFreeList);
      }
      if( rc==SQLITE4_OK ){
        rc = fiWriterFlushAll(&writer);
      }
      if( rc==SQLITE4_OK ){
        btWriteSchedule(aSched, &s, &rc);
      }
    }

    fiWriterCleanup(&writer);
    fiCsrReset(&fcsr);
4787
4788
4789
4790
4791
4792
4793


4794
4795
4796
4797
4798
4799
4800
  return rc;
}

static void btControlTransactionDone(bt_db *db, int iCtx){
  if( iCtx==0 ) sqlite4BtCommit(db, 0);
}



static int btControlInfo(bt_db *db, bt_info *pInfo){
  int rc = SQLITE4_OK;

  switch( pInfo->eType ){
    case BT_INFO_PAGEDUMP_ASCII:
    case BT_INFO_PAGEDUMP: {
      int iCtx;                   /* ControlTransaction() context */







>
>







5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
  return rc;
}

static void btControlTransactionDone(bt_db *db, int iCtx){
  if( iCtx==0 ) sqlite4BtCommit(db, 0);
}

static int btCheckForPageLeaks(bt_db*, sqlite4_buffer*);

static int btControlInfo(bt_db *db, bt_info *pInfo){
  int rc = SQLITE4_OK;

  switch( pInfo->eType ){
    case BT_INFO_PAGEDUMP_ASCII:
    case BT_INFO_PAGEDUMP: {
      int iCtx;                   /* ControlTransaction() context */
4830
4831
4832
4833
4834
4835
4836

























4837
4838
4839
4840
4841
4842

4843
4844
4845
4846
4847
4848
4849
      rc = btControlTransaction(db, &iCtx);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPagerHdrdump(db->pPager, &pInfo->output);
        btControlTransactionDone(db, iCtx);
      }
      break;
    }


























    default: {
      rc = SQLITE4_ERROR;
      break;
    }
  }

  return rc;
}

int sqlite4BtControl(bt_db *db, int op, void *pArg){
  int rc = SQLITE4_OK;

  switch( op ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






>







5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
      rc = btControlTransaction(db, &iCtx);
      if( rc==SQLITE4_OK ){
        rc = sqlite4BtPagerHdrdump(db->pPager, &pInfo->output);
        btControlTransactionDone(db, iCtx);
      }
      break;
    }

    case BT_INFO_BLOCK_FREELIST: 
    case BT_INFO_PAGE_FREELIST: {
      int iCtx;                   /* ControlTransaction() context */
      rc = btControlTransaction(db, &iCtx);
      if( rc==SQLITE4_OK ){
        BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
        u32 iFirst = (
            pInfo->eType==BT_INFO_BLOCK_FREELIST?pHdr->iFreeBlk:pHdr->iFreePg
        );
        rc = btFreelistToAscii(db, iFirst, &pInfo->output);
        btControlTransactionDone(db, iCtx);
      }
      break;
    }

    case BT_INFO_PAGE_LEAKS: {
      int iCtx;                   /* ControlTransaction() context */
      rc = btControlTransaction(db, &iCtx);
      if( rc==SQLITE4_OK ){
        rc = btCheckForPageLeaks(db, &pInfo->output);
        btControlTransactionDone(db, iCtx);
      }
      break;
    }

    default: {
      rc = SQLITE4_ERROR;
      break;
    }
  }
  sqlite4_buffer_append(&pInfo->output, "\0", 1);
  return rc;
}

int sqlite4BtControl(bt_db *db, int op, void *pArg){
  int rc = SQLITE4_OK;

  switch( op ){
4904
4905
4906
4907
4908
4909
4910

4911









4912




4913
4914














































































































































































      break;
    }
  }

  return rc;
}








































































































































































































>

>
>
>
>
>
>
>
>
>

>
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
      break;
    }
  }

  return rc;
}

#ifndef NDEBUG

static void markBlockAsUsed(
  bt_db *db,
  u32 iBlk,
  u8 *aUsed
){
  if( iBlk ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
    int i;

    for(i=0; i<nPgPerBlk; i++){
      u32 iPg = (iBlk-1) * nPgPerBlk + 1 + i;
      assert( aUsed[iPg]==0 || aUsed[iPg]==1 );
      aUsed[iPg] = 1;
    }
  }
}

/*
** Iterate through the b-tree with root page iRoot. For each page used
** by the b-tree, set the corresponding entry in the aUsed[] array.
*/
static void assert_pages_used(
  bt_db *db,                      /* Database handle */
  u32 iRoot,                      /* Root page of b-tree to iterate through */
  int *pRc                        /* IN/OUT: Error code */
){
  if( *pRc==SQLITE4_OK && iRoot ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
    int bMeta = (iRoot==pHdr->iMRoot);
    BtLock *pLock = (BtLock*)(db->pPager);
    u8 *aUsed = pLock->aUsed;
    int rc;
    BtCursor csr;

    btCsrSetup(db, iRoot, &csr);
    rc = btCsrEnd(&csr, 0);
    while( rc==SQLITE4_OK ){
      rc = btCsrBuffer(&csr, 1);
      if( bMeta && rc==SQLITE4_OK ){
        u8 *aVal;
        int nVal;

        btCsrKey(&csr, (const void**)&aVal, &nVal);
        if( nVal!=sizeof(aSummaryKey) || memcmp(aVal, aSummaryKey, nVal) ){
          u32 iSubRoot;
          u32 iBlk;
          int i;
          btCsrData(&csr, 0, 4, (const void**)&aVal, &nVal);
          iSubRoot = btGetU32(aVal);
          iBlk = (iSubRoot / nPgPerBlk) + 1;
          assert_pages_used(db, iSubRoot, &rc);
          markBlockAsUsed(db, iBlk, aUsed);
        }
      }
      if( rc==SQLITE4_OK ) rc = btCsrStep(&csr, 1);
    }
  }
}

static void assert_freelist_pages_used(
  bt_db *db, 
  int bBlocklist,                 /* True to examine free-block list */
  u8 *aUsed, 
  int *pRc
){
  if( *pRc==SQLITE4_OK ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    u32 iTrunk = (bBlocklist ? pHdr->iFreeBlk : pHdr->iFreePg);
    int rc = SQLITE4_OK;

    while( rc==SQLITE4_OK && iTrunk ){
      BtPage *pPg = 0;
      rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
      if( rc==SQLITE4_OK ){
        int i;
        u32 nFree;
        u8 *aData = sqlite4BtPageData(pPg);

        nFree = btGetU32(aData);
        for(i=0; i<nFree; i++){
          u32 pgno = btGetU32(&aData[8 + i*4]);
          if( bBlocklist ){
            markBlockAsUsed(db, pgno, aUsed);
          }else{
            aUsed[pgno]++;
          }
        }

        iTrunk = btGetU32(&aData[4]);
        sqlite4BtPageRelease(pPg);
      }
    }

    *pRc = rc;
  }
}

static void assert_schedule_page_used(bt_db *db, u8 *aUsed, int *pRc){
  BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
  if( *pRc==SQLITE4_OK && pHdr->iSRoot!=0 ){
    int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
    BtPage *pPg = 0;
    int rc;

    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
    if( rc==SQLITE4_OK ){
      BtSchedule s;
      int i;

      btReadSchedule(db, sqlite4BtPageData(pPg), &s);
      sqlite4BtPageRelease(pPg);

      assert( s.eBusy!=BT_SCHEDULE_BUSY || s.aRoot[0]==0 );
      if( s.eBusy!=BT_SCHEDULE_EMPTY ){
        for(i=0; rc==SQLITE4_OK && i<array_size(s.aBlock); i++){
          markBlockAsUsed(db, s.aBlock[i], aUsed);
        }
      }
    }

    *pRc = rc;
  }
}

/*
** Check that all pages in the database file are accounted for (not leaked).
** If any problems are detected, append a description of them to the buffer
** passed as the second argument.
*/
static int btCheckForPageLeaks(
  bt_db *db,                      /* Database handle */
  sqlite4_buffer *pBuf            /* Write error messages here */
){
  BtLock *pLock = (BtLock*)(db->pPager);

  static int nCall = 0;
  nCall++;

  if( pLock->aUsed==0 ){
    int rc;
    int iCtx;                     /* ControlTransaction() context */
    rc = btControlTransaction(db, &iCtx);

    if( rc==SQLITE4_OK ){
      BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
      u8 *aUsed;
      int i;

      aUsed = btMalloc(db, pHdr->nPg, &rc);
      if( aUsed ) memset(aUsed, 0, pHdr->nPg);
      aUsed[0] = 1;                 /* Page 1 is always in use */
      pLock->aUsed = &aUsed[-1];

      /* The scheduled-merge page, if it is allocated */
      assert_schedule_page_used(db, pLock->aUsed, &rc);

      /* Walk the main b-tree */
      assert_pages_used(db, pHdr->iRoot, &rc);

      /* Walk the meta-tree */
      assert_pages_used(db, pHdr->iMRoot, &rc);

      /* Walk the free-page list */
      assert_freelist_pages_used(db, 0, pLock->aUsed, &rc);

      /* The free-block list */
      assert_freelist_pages_used(db, 1, pLock->aUsed, &rc);

      for(i=0; i<pHdr->nPg; i++){
        if( aUsed[i]!=1 ){
          sqlite4BtBufAppendf(
              pBuf, "refcount on page %d is %d\n", i+1, (int)aUsed[i]
          );
        }
      }
      btFree(db, aUsed);
      btControlTransactionDone(db, iCtx);
      pLock->aUsed = 0;
    }
  }

  return SQLITE4_OK;
}
#endif




Changes to src/bt_pager.c.
802
803
804
805
806
807
808




809
810
811
812
813
814
815

/*
** Request a reference to page pgno of the database.
*/
int sqlite4BtPageGet(BtPager *p, u32 pgno, BtPage **ppPg){
  int rc = SQLITE4_OK;            /* Return code */
  BtPage *pRet;                   /* Returned page handle */





  /* Search the cache for an existing page. */
  pRet = btHashSearch(p, pgno);

  /* If the page is not in the cache, load it from disk */
  if( pRet==0 ){
    rc = btAllocatePage(p, &pRet);







>
>
>
>







802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819

/*
** Request a reference to page pgno of the database.
*/
int sqlite4BtPageGet(BtPager *p, u32 pgno, BtPage **ppPg){
  int rc = SQLITE4_OK;            /* Return code */
  BtPage *pRet;                   /* Returned page handle */

  if( p->btl.aUsed ){
    p->btl.aUsed[pgno]++;
  }

  /* Search the cache for an existing page. */
  pRet = btHashSearch(p, pgno);

  /* If the page is not in the cache, load it from disk */
  if( pRet==0 ){
    rc = btAllocatePage(p, &pRet);
860
861
862
863
864
865
866
867




868
869
870

871
872
873
874
875
876
877
878
879
880
881
882
883


884
885
886
887
888
889
890
891
892
893
894
895
896
897
898



899

900
901
902
903
904
905
906
907
908
909
910



911
912
913
914
915
916
917



918




919
920


921
922
923

924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940





941
942


943
944
945
946
947
948

949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
  return rc;
}

/*
** Add page pgno to the free-page list. If argument pPg is not NULL, then
** it is a reference to page pgno.
*/
static int btFreelistAdd(BtPager *p, BtPage *pPg, u32 pgno){




  BtDbHdr *pHdr = sqlite4BtLogDbhdr(p->pLog);
  int rc = SQLITE4_OK;
  int bDone = 0;


  /* Check if there is space on the first free-list trunk page. If so,
  ** add the new entry to it. Set variable bDone to indicate that the
  ** page has already been added to the free-list. */
  if( pHdr->iFreePg ){
    BtPage *pTrunk;
    rc = sqlite4BtPageGet(p, pHdr->iFreePg, &pTrunk);
    if( rc==SQLITE4_OK ){
      const int nMax = ((pHdr->pgsz - 8) / 4);
      u8 *aData = pTrunk->aData;
      int nFree = (int)sqlite4BtGetU32(aData);

      if( nFree<nMax ){


        sqlite4BtPutU32(&pTrunk->aData[8 + nFree*4], pgno);
        sqlite4BtPutU32(pTrunk->aData, nFree+1);
        bDone = 1;
        sqlite4BtDebugPageFree((BtLock*)p, "free-list", pgno);
      }

      sqlite4BtPageRelease(pTrunk);
    } 
  }

  /* If no error has occurred but the page number has not yet been added 
  ** to the free-list, this page becomes the first trunk in the list.  */
  if( rc==SQLITE4_OK && bDone==0 ){
    BtPage *pRelease = 0;
    if( pPg==0 ){



      rc = sqlite4BtPageGet(p, pgno, &pRelease);

    }
    if( rc==SQLITE4_OK ){
      BtPage *pTrunk = pPg ? pPg : pRelease;
      rc = sqlite4BtPageWrite(pTrunk);
      if( rc==SQLITE4_OK ){
        sqlite4BtPagerDbhdrDirty(p);
        sqlite4BtPutU32(&pTrunk->aData[0], 0);
        sqlite4BtPutU32(&pTrunk->aData[4], pHdr->iFreePg);
        pHdr->iFreePg = pgno;
        sqlite4BtDebugPageFree((BtLock*)p, "free-list-trunk", pgno);
      }



    }
    sqlite4BtPageRelease(pRelease);
  }

  return rc;
}




static int btFreelistAlloc(BtPager *p, u32 *pPgno){




  BtDbHdr *pHdr = sqlite4BtLogDbhdr(p->pLog);
  int rc = SQLITE4_OK;



  assert( *pPgno==0 );
  if( pHdr->iFreePg ){

    BtPage *pTrunk = 0;
    rc = sqlite4BtPageGet(p, pHdr->iFreePg, &pTrunk);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtPageWrite(pTrunk);
    }
    if( rc==SQLITE4_OK ){
      u8 *aData = pTrunk->aData;
      u32 nFree = sqlite4BtGetU32(aData);
      if( nFree==0 ){
        u32 iNext = sqlite4BtGetU32(&aData[4]);
        sqlite4BtPagerDbhdrDirty(p);
        *pPgno = pHdr->iFreePg;
        pHdr->iFreePg = iNext;
        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list-trunk", *pPgno);
      }else{
        *pPgno = sqlite4BtGetU32(&aData[8 + 4*(nFree-1)]);
        sqlite4BtPutU32(aData, nFree-1);





        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list", *pPgno);
        assert( *pPgno < 1000000 );


      }
    }

    sqlite4BtPageRelease(pTrunk);
  }


  return rc;
}

/*
** Decrement the refcount on page pPg. Also, indicate that page pPg is
** no longer in use.
*/
int sqlite4BtPageTrim(BtPage *pPg){
  int rc;                         /* Return code */
  rc = btFreelistAdd(pPg->pPager, pPg, pPg->pgno);
  sqlite4BtPageRelease(pPg);
  return rc;
}

/*
** Page number pgno is no longer in use.
*/







|
>
>
>
>



>




|

|






>
>
|
|
|
|
|
|







|
|
>
>
>
|
>


<
<
<
|
|
|
|
|
|
>
>
>

<





>
>
>
|
>
>
>
>


>
>


<
>

|






|
|
|
<
<
|

|
|
>
>
>
>
>
|
<
>
>






>









|







864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916



917
918
919
920
921
922
923
924
925
926

927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945

946
947
948
949
950
951
952
953
954
955
956
957


958
959
960
961
962
963
964
965
966
967

968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
  return rc;
}

/*
** Add page pgno to the free-page list. If argument pPg is not NULL, then
** it is a reference to page pgno.
*/
static int btFreelistAdd(
  BtPager *p,                     /* Pager object */
  int bBlock,                     /* True if pgno is actually a block number */
  u32 pgno
){
  BtDbHdr *pHdr = sqlite4BtLogDbhdr(p->pLog);
  int rc = SQLITE4_OK;
  int bDone = 0;
  u32 *piFirst = (bBlock ? &pHdr->iFreeBlk : &pHdr->iFreePg);

  /* Check if there is space on the first free-list trunk page. If so,
  ** add the new entry to it. Set variable bDone to indicate that the
  ** page has already been added to the free-list. */
  if( *piFirst ){
    BtPage *pTrunk;
    rc = sqlite4BtPageGet(p, *piFirst, &pTrunk);
    if( rc==SQLITE4_OK ){
      const int nMax = ((pHdr->pgsz - 8) / 4);
      u8 *aData = pTrunk->aData;
      int nFree = (int)sqlite4BtGetU32(aData);

      if( nFree<nMax ){
        rc = sqlite4BtPageWrite(pTrunk);
        if( rc==SQLITE4_OK ){
          sqlite4BtPutU32(&pTrunk->aData[8 + nFree*4], pgno);
          sqlite4BtPutU32(pTrunk->aData, nFree+1);
          bDone = 1;
          sqlite4BtDebugPageFree((BtLock*)p, bBlock, "free-list-leaf", pgno);
        }
      }
      sqlite4BtPageRelease(pTrunk);
    } 
  }

  /* If no error has occurred but the page number has not yet been added 
  ** to the free-list, this page becomes the first trunk in the list.  */
  if( rc==SQLITE4_OK && bDone==0 ){
    BtPage *pTrunk = 0;

    if( bBlock ){
      rc = sqlite4BtPageAllocate(p, &pTrunk);
    }else{
      rc = sqlite4BtPageGet(p, pgno, &pTrunk);
      if( rc==SQLITE4_OK ) rc = sqlite4BtPageWrite(pTrunk);
    }
    if( rc==SQLITE4_OK ){



      sqlite4BtPagerDbhdrDirty(p);
      sqlite4BtPutU32(&pTrunk->aData[0], 0);
      sqlite4BtPutU32(&pTrunk->aData[4], *piFirst);
      *piFirst = pTrunk->pgno;
      sqlite4BtDebugPageFree((BtLock*)p, 0, "free-list-trunk", pTrunk->pgno);
    }
    sqlite4BtPageRelease(pTrunk);
    if( rc==SQLITE4_OK && bBlock ){
      rc = btFreelistAdd(p, 1, pgno);
    }

  }

  return rc;
}

/*
** Attempt to allocate a page from the free-list.
*/
static int btFreelistAlloc(
  BtPager *p,                     /* Pager object */
  int bBlock,                     /* True to allocate a block (not a page) */
  u32 *pPgno                      /* OUT: Page or block number */
){
  BtDbHdr *pHdr = sqlite4BtLogDbhdr(p->pLog);
  int rc = SQLITE4_OK;
  u32 *piFirst = (bBlock ? &pHdr->iFreeBlk : &pHdr->iFreePg);
  u32 pgno = 0;

  assert( *pPgno==0 );

  while( *piFirst && pgno==0 && rc==SQLITE4_OK ){
    BtPage *pTrunk = 0;
    rc = sqlite4BtPageGet(p, *piFirst, &pTrunk);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtPageWrite(pTrunk);
    }
    if( rc==SQLITE4_OK ){
      u8 *aData = pTrunk->aData;
      u32 nFree = sqlite4BtGetU32(aData);
      if( nFree>0 ){
        pgno = sqlite4BtGetU32(&aData[8 + 4*(nFree-1)]);
        sqlite4BtPutU32(aData, nFree-1);


        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list", pgno);
      }else{
        u32 iNext = sqlite4BtGetU32(&aData[4]);
        sqlite4BtPagerDbhdrDirty(p);
        
        if( bBlock ){
          rc = btFreelistAdd(p, 0, *piFirst);
        }else{
          pgno = *piFirst;
          sqlite4BtDebugPageAlloc((BtLock*)p, "free-list-trunk", pgno);

        }
        *piFirst = iNext;
      }
    }

    sqlite4BtPageRelease(pTrunk);
  }

  *pPgno = pgno;
  return rc;
}

/*
** Decrement the refcount on page pPg. Also, indicate that page pPg is
** no longer in use.
*/
int sqlite4BtPageTrim(BtPage *pPg){
  int rc;                         /* Return code */
  rc = btFreelistAdd(pPg->pPager, 0, pPg->pgno);
  sqlite4BtPageRelease(pPg);
  return rc;
}

/*
** Page number pgno is no longer in use.
*/
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
  BtPage *pPg = 0;
  int rc;
  u32 pgno = 0;

  /* Find the page number of the new page. There are two ways a page may
  ** be allocated - from the free-list or by appending it to the end of
  ** the database file. */ 
  rc = btFreelistAlloc(p, &pgno);
  if( rc==SQLITE4_OK && pgno==0 ){
    pgno = p->pHdr->nPg+1;
    sqlite4BtDebugPageAlloc((BtLock*)p, "end-of-file", pgno);
  }

  rc = sqlite4BtPageGet(p, pgno, &pPg);
  if( rc==SQLITE4_OK ){







|







1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
  BtPage *pPg = 0;
  int rc;
  u32 pgno = 0;

  /* Find the page number of the new page. There are two ways a page may
  ** be allocated - from the free-list or by appending it to the end of
  ** the database file. */ 
  rc = btFreelistAlloc(p, 0, &pgno);
  if( rc==SQLITE4_OK && pgno==0 ){
    pgno = p->pHdr->nPg+1;
    sqlite4BtDebugPageAlloc((BtLock*)p, "end-of-file", pgno);
  }

  rc = sqlite4BtPageGet(p, pgno, &pPg);
  if( rc==SQLITE4_OK ){
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031



1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043


1044
1045
1046
1047







1048
1049
1050
1051
1052
1053
1054

int sqlite4BtBlockAllocate(BtPager *p, int nBlk, u32 *aiBlk){
  int rc = SQLITE4_OK;
  BtDbHdr *pHdr = p->pHdr;
  int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
  int i;

  for(i=0; i<nBlk; i++){
    u32 iBlk;
    u32 iRoot;
    u32 iFree;




    /* Figure out the next block in the file. And its root (first) page. */
    iBlk = 1 + (pHdr->nPg + nPgPerBlk - 1) / nPgPerBlk;
    iRoot = (iBlk-1) * nPgPerBlk + 1;
    assert( iBlk>0 );

    for(iFree = pHdr->nPg+1; rc==SQLITE4_OK && iFree<iRoot; iFree++){
      rc = sqlite4BtPageTrimPgno(p, iFree);
    }
    if( rc==SQLITE4_OK ){
      pHdr->nPg = iBlk * nPgPerBlk;
      aiBlk[i] = iBlk;
    }


  }

  return rc;
}








/*
** Return the current page number of the argument page reference.
*/
u32 sqlite4BtPagePgno(BtPage *pPg){
  return pPg->pgno;
}







|
|



>
>
>
|
|
|
|

|
|
|
<

<

>
>




>
>
>
>
>
>
>







1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070

1071

1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092

int sqlite4BtBlockAllocate(BtPager *p, int nBlk, u32 *aiBlk){
  int rc = SQLITE4_OK;
  BtDbHdr *pHdr = p->pHdr;
  int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
  int i;

  for(i=0; rc==SQLITE4_OK && i<nBlk; i++){
    u32 iBlk = 0;
    u32 iRoot;
    u32 iFree;

    rc = btFreelistAlloc(p, 1, &iBlk);
    if( rc==SQLITE4_OK && iBlk==0 ){

      /* Figure out the next block in the file. And its root (first) page. */
      iBlk = 1 + (pHdr->nPg + nPgPerBlk - 1) / nPgPerBlk;
      iRoot = (iBlk-1) * nPgPerBlk + 1;
      assert( iBlk>0 );

      for(iFree = pHdr->nPg+1; rc==SQLITE4_OK && iFree<iRoot; iFree++){
        rc = sqlite4BtPageTrimPgno(p, iFree);
      }

      pHdr->nPg = iBlk * nPgPerBlk;

    }

    aiBlk[i] = iBlk;
  }

  return rc;
}

/*
** Trim a block.
*/
int sqlite4BtBlockTrim(BtPager *p, u32 iBlk){
  return btFreelistAdd(p, 1, iBlk);
}

/*
** Return the current page number of the argument page reference.
*/
u32 sqlite4BtPagePgno(BtPage *pPg){
  return pPg->pgno;
}