Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem preventing the database header from being written correctly.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7abccde8a354352148a1ac26493a65b549ad4f50
User & Date: dan 2014-01-21 18:59:27.283
Context
2014-01-21
20:40
Fix a problem with BT_SEEK_GE on the merge-tree when it contains partially merged deletes. check-in: 0d9860b35b user: dan tags: trunk
18:59
Fix a problem preventing the database header from being written correctly. check-in: 7abccde8a3 user: dan tags: trunk
2014-01-09
20:36
Fix even more bugs in merging. check-in: 9f83998312 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btInt.h.
149
150
151
152
153
154
155

156
157
158
159
160
161
162
int sqlite4BtPagerPagesize(BtPager*);

/* 
** Query for the db header values. Requires an open read transaction or
** an active checkpoint.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager*);


/*
** Used by checkpointers to specify the header to use during a checkpoint.
*/
void sqlite4BtPagerSetDbhdr(BtPager *, BtDbHdr *);

/*







>







149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
int sqlite4BtPagerPagesize(BtPager*);

/* 
** Query for the db header values. Requires an open read transaction or
** an active checkpoint.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager*);
void sqlite4BtPagerDbhdrDirty(BtPager*);

/*
** Used by checkpointers to specify the header to use during a checkpoint.
*/
void sqlite4BtPagerSetDbhdr(BtPager *, BtDbHdr *);

/*
281
282
283
284
285
286
287

288
289
290
291
292
293
294
int sqlite4BtLogPagesize(BtLog*);
int sqlite4BtLogPagecount(BtLog*);
u32 sqlite4BtLogCookie(BtLog*);
#endif
BtDbHdr *sqlite4BtLogDbhdr(BtLog*);

int sqlite4BtLogSetCookie(BtLog*, u32 iCookie);


/*
** End of bt_log.c interface.
*************************************************************************/

/*************************************************************************
** Interface to bt_lock.c functionality.







>







282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
int sqlite4BtLogPagesize(BtLog*);
int sqlite4BtLogPagecount(BtLog*);
u32 sqlite4BtLogCookie(BtLog*);
#endif
BtDbHdr *sqlite4BtLogDbhdr(BtLog*);

int sqlite4BtLogSetCookie(BtLog*, u32 iCookie);
int sqlite4BtLogDbhdrFlush(BtLog*);

/*
** End of bt_log.c interface.
*************************************************************************/

/*************************************************************************
** Interface to bt_lock.c functionality.
Changes to src/bt_log.c.
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
  u32 iLast;                      /* Frame containing last commit flag in log */
  u32 iNextFrame;                 /* Frame that follows frame iLast */
  u32 iPageOneFrame;              /* Frame containing most recent page 1 */
};

static int btLogRecoverFrame(
  BtLog *pLog,                    /* Log module handle */
  void *pCtx,                     /* woints to type u32 - pgno of last commit*/
  u32 iFrame,                     /* Frame number */
  BtFrameHdr *pHdr                /* Frame header */
){
  FrameRecoverCtx *pFRC = (FrameRecoverCtx*)pCtx;

  if( btLogIsEmpty(pLog) ){
    /* This is the first frame recovered. It is therefore both the first







|







769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
  u32 iLast;                      /* Frame containing last commit flag in log */
  u32 iNextFrame;                 /* Frame that follows frame iLast */
  u32 iPageOneFrame;              /* Frame containing most recent page 1 */
};

static int btLogRecoverFrame(
  BtLog *pLog,                    /* Log module handle */
  void *pCtx,                     /* Pointer to FrameRecoverCtx */
  u32 iFrame,                     /* Frame number */
  BtFrameHdr *pHdr                /* Frame header */
){
  FrameRecoverCtx *pFRC = (FrameRecoverCtx*)pCtx;

  if( btLogIsEmpty(pLog) ){
    /* This is the first frame recovered. It is therefore both the first
1960
1961
1962
1963
1964
1965
1966
















1967
1968
1969
1970
1971
1972
1973
}
#endif

BtDbHdr *sqlite4BtLogDbhdr(BtLog *pLog){
  return &pLog->snapshot.dbhdr;
}


















/*
** Set the value of the user cookie.
*/
int sqlite4BtLogSetCookie(BtLog *pLog, u32 iCookie){
  BtPager *pPager = (BtPager *)(pLog->pLock);
  BtPage *pOne = 0;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
}
#endif

BtDbHdr *sqlite4BtLogDbhdr(BtLog *pLog){
  return &pLog->snapshot.dbhdr;
}

int sqlite4BtLogDbhdrFlush(BtLog *pLog){
  BtPager *pPager = (BtPager *)(pLog->pLock);
  BtPage *pOne = 0;
  int rc;

  rc = sqlite4BtPageGet(pPager, 1, &pOne);
  if( rc==SQLITE4_OK ){
    rc = sqlite4BtPageWrite(pOne);
  }
  if( rc==SQLITE4_OK ){
    btLogUpdateDbhdr(pLog, sqlite4BtPageData(pOne));
  }
  sqlite4BtPageRelease(pOne);

  return rc;
} 

/*
** Set the value of the user cookie.
*/
int sqlite4BtLogSetCookie(BtLog *pLog, u32 iCookie){
  BtPager *pPager = (BtPager *)(pLog->pLock);
  BtPage *pOne = 0;
Changes to src/bt_main.c.
737
738
739
740
741
742
743

744
745
746
747
748
749
750
    btDumpCsr(&buf, &pSub->mcsr);

    sqlite4BtBufAppendf(&buf, "\n%d csr   : ", iBt);
    btDumpCsr(&buf, &pSub->csr);
    sqlite4BtBufAppendf(&buf, "\n");
  }


  fprintf(stderr, "%s", (char*)buf.p);
  sqlite4_buffer_clear(&buf);
}
#endif

#ifndef NDEBUG
/*







>







737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
    btDumpCsr(&buf, &pSub->mcsr);

    sqlite4BtBufAppendf(&buf, "\n%d csr   : ", iBt);
    btDumpCsr(&buf, &pSub->csr);
    sqlite4BtBufAppendf(&buf, "\n");
  }

  sqlite4_buffer_append(&buf, "", 1);
  fprintf(stderr, "%s", (char*)buf.p);
  sqlite4_buffer_clear(&buf);
}
#endif

#ifndef NDEBUG
/*
1798
1799
1800
1801
1802
1803
1804

1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
        BtCursor *pM = &pSub->mcsr;
        btCsrSetup(db, pHdr->iMRoot, pM);

        btPutU32(&pSub->aPrefix[0], (u32)iter.iAge);
        btPutU32(&pSub->aPrefix[4], ~(u32)iter.iLvl);

        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);

        if( rc==SQLITE4_NOTFOUND && eSeek==BT_SEEK_GE ){
          rc = btCsrSeek(pM, pSub->aPrefix, 0, 0, BT_SEEK_GE, BT_CSRSEEK_SEEK);
        }

        if( rc==SQLITE4_INEXACT ){
          rc = fiSubCsrCheckPrefix(pSub);
        }

        if( rc==SQLITE4_NOTFOUND ){
          /* No keys to visit in this level */
          assert( pSub->mcsr.nPg==0 );
          assert( pSub->csr.nPg==0 );
          rc = SQLITE4_OK;







>


<
<
|
<







1799
1800
1801
1802
1803
1804
1805
1806
1807
1808


1809

1810
1811
1812
1813
1814
1815
1816
        BtCursor *pM = &pSub->mcsr;
        btCsrSetup(db, pHdr->iMRoot, pM);

        btPutU32(&pSub->aPrefix[0], (u32)iter.iAge);
        btPutU32(&pSub->aPrefix[4], ~(u32)iter.iLvl);

        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);
        if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
        if( rc==SQLITE4_NOTFOUND && eSeek==BT_SEEK_GE ){
          rc = btCsrSeek(pM, pSub->aPrefix, 0, 0, BT_SEEK_GE, BT_CSRSEEK_SEEK);


          if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);

        }

        if( rc==SQLITE4_NOTFOUND ){
          /* No keys to visit in this level */
          assert( pSub->mcsr.nPg==0 );
          assert( pSub->csr.nPg==0 );
          rc = SQLITE4_OK;
1891
1892
1893
1894
1895
1896
1897

1898
1899
1900
1901
1902
1903
1904
1905
1906
1907



1908
1909
1910
1911
1912
1913
1914
      btPutU32(&aPrefix[4], ~(u32)(iter.iLvl - (iter.iLvl==0 ? 0 : 1)));
      rc = btCsrSeek(&pSub->mcsr, 0, aPrefix, n, BT_SEEK_LE, 0);
      if( rc==SQLITE4_OK ){
        rc = btCsrStep(&pSub->mcsr, 0);
      }
    }
    if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;


    if( rc==SQLITE4_OK ){
      const void *pV;
      int nV;
      int iRoot;
      btCsrData(&pSub->mcsr, 0, 4, &pV, &nV);
      iRoot = sqlite4BtGetU32((const u8*)pV);
      btCsrReset(&pSub->csr, 1);
      btCsrSetup(db, iRoot, &pSub->csr);
      rc = btCsrEnd(&pSub->csr, bLast);



    }
  }
  fiLevelIterCleanup(&iter);

  if( rc==SQLITE4_OK ){
    pCsr->base.flags &= ~(CSR_NEXT_OK | CSR_PREV_OK);
    pCsr->base.flags |= (bLast ? CSR_PREV_OK : CSR_NEXT_OK);







>










>
>
>







1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
      btPutU32(&aPrefix[4], ~(u32)(iter.iLvl - (iter.iLvl==0 ? 0 : 1)));
      rc = btCsrSeek(&pSub->mcsr, 0, aPrefix, n, BT_SEEK_LE, 0);
      if( rc==SQLITE4_OK ){
        rc = btCsrStep(&pSub->mcsr, 0);
      }
    }
    if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
    if( rc==SQLITE4_OK ) rc = fiSubCsrCheckPrefix(pSub);

    if( rc==SQLITE4_OK ){
      const void *pV;
      int nV;
      int iRoot;
      btCsrData(&pSub->mcsr, 0, 4, &pV, &nV);
      iRoot = sqlite4BtGetU32((const u8*)pV);
      btCsrReset(&pSub->csr, 1);
      btCsrSetup(db, iRoot, &pSub->csr);
      rc = btCsrEnd(&pSub->csr, bLast);
    }else if( rc==SQLITE4_NOTFOUND ){
      btCsrReset(&pSub->mcsr, 0);
      rc = SQLITE4_OK;
    }
  }
  fiLevelIterCleanup(&iter);

  if( rc==SQLITE4_OK ){
    pCsr->base.flags &= ~(CSR_NEXT_OK | CSR_PREV_OK);
    pCsr->base.flags |= (bLast ? CSR_PREV_OK : CSR_NEXT_OK);
1946
1947
1948
1949
1950
1951
1952

1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965

1966
1967
1968
1969
1970
1971
1972
*/
int sqlite4BtCsrFirst(bt_cursor *pBase){
  int rc;
  if( IsBtCsr(pBase) ){
    rc = btCsrEnd((BtCursor*)pBase, 0);
  }else{
    rc = fiCsrEnd((FiCursor*)pBase, 0);

  }
  return rc;
}

/*
** Position cursor pCsr to point to the largest key in the database.
*/
int sqlite4BtCsrLast(bt_cursor *pBase){
  int rc;
  if( IsBtCsr(pBase) ){
    rc = btCsrEnd((BtCursor*)pBase, 1);
  }else{
    rc = fiCsrEnd((FiCursor*)pBase, 1);

  }
  return rc;
}


/*
** Advance to the next entry in the tree.







>













>







1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
*/
int sqlite4BtCsrFirst(bt_cursor *pBase){
  int rc;
  if( IsBtCsr(pBase) ){
    rc = btCsrEnd((BtCursor*)pBase, 0);
  }else{
    rc = fiCsrEnd((FiCursor*)pBase, 0);
    assert_ficursor_ok((FiCursor*)pBase, rc);
  }
  return rc;
}

/*
** Position cursor pCsr to point to the largest key in the database.
*/
int sqlite4BtCsrLast(bt_cursor *pBase){
  int rc;
  if( IsBtCsr(pBase) ){
    rc = btCsrEnd((BtCursor*)pBase, 1);
  }else{
    rc = fiCsrEnd((FiCursor*)pBase, 1);
    assert_ficursor_ok((FiCursor*)pBase, rc);
  }
  return rc;
}


/*
** Advance to the next entry in the tree.
2449
2450
2451
2452
2453
2454
2455

2456
2457
2458
2459
2460
2461
2462
*/
static int btAllocateNonOverflow(bt_db *db, BtPage **ppPg){
  int rc;
  if( db->bFastInsertOp ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    u32 iPg;


    iPg = pHdr->nSubPg + btFirstOfBlock(pHdr, pHdr->iSubBlock);
    pHdr->nSubPg++;
    rc = sqlite4BtPageGet(db->pPager, iPg, ppPg);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtPageWrite(*ppPg);
      if( rc!=SQLITE4_OK ){
        sqlite4BtPageRelease(*ppPg);







>







2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
*/
static int btAllocateNonOverflow(bt_db *db, BtPage **ppPg){
  int rc;
  if( db->bFastInsertOp ){
    BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
    u32 iPg;

    sqlite4BtPagerDbhdrDirty(db->pPager);
    iPg = pHdr->nSubPg + btFirstOfBlock(pHdr, pHdr->iSubBlock);
    pHdr->nSubPg++;
    rc = sqlite4BtPageGet(db->pPager, iPg, ppPg);
    if( rc==SQLITE4_OK ){
      rc = sqlite4BtPageWrite(*ppPg);
      if( rc!=SQLITE4_OK ){
        sqlite4BtPageRelease(*ppPg);
3430
3431
3432
3433
3434
3435
3436

3437
3438
3439
3440
3441
3442
3443
    if( bLeaf && db->bFastInsertOp ){
      /* This operation will need to allocate further pages. The worst
      ** case scenario is (nDepth+1) pages. If fewer than that remain
      ** available in the block, return BT_BLOCKFULL. */
      BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
      int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
      if( (nPgPerBlk - pHdr->nSubPg) < pCsr->nPg+1 ){

        rc = BT_BLOCKFULL;
        pHdr->iSubBlock = 0;
      }
    }
    if( rc==SQLITE4_OK && pCsr->nPg==1 ){
      rc = btExtendTree(pCsr);
    }







>







3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
    if( bLeaf && db->bFastInsertOp ){
      /* This operation will need to allocate further pages. The worst
      ** case scenario is (nDepth+1) pages. If fewer than that remain
      ** available in the block, return BT_BLOCKFULL. */
      BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
      int nPgPerBlk = (pHdr->blksz / pHdr->pgsz);
      if( (nPgPerBlk - pHdr->nSubPg) < pCsr->nPg+1 ){
        sqlite4BtPagerDbhdrDirty(db->pPager);
        rc = BT_BLOCKFULL;
        pHdr->iSubBlock = 0;
      }
    }
    if( rc==SQLITE4_OK && pCsr->nPg==1 ){
      rc = btExtendTree(pCsr);
    }
3901
3902
3903
3904
3905
3906
3907
3908

3909
3910
3911
3912
3913
3914
3915
3916

  /* Save the value of the fast-insert flag. It will be restored before
  ** this function returns. Leaving it set here interferes with page 
  ** allocation if the meta-tree needs to be extended.  */
  const int bFastInsertOp = db->bFastInsertOp;
  db->bFastInsertOp = 0;
  
#if 0

  fprintf(stderr, "BEFORE!\n");
  btPrintMetaTree(db->pPager, 1, pHdr);
#endif
  assert_summary_ok(db, SQLITE4_OK);

  memset(&csr, 0, sizeof(csr));
  memset(&mcsr, 0, sizeof(mcsr));
  btCsrSetup(db, pHdr->iMRoot, &mcsr);







|
>
|







3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924

  /* Save the value of the fast-insert flag. It will be restored before
  ** this function returns. Leaving it set here interferes with page 
  ** allocation if the meta-tree needs to be extended.  */
  const int bFastInsertOp = db->bFastInsertOp;
  db->bFastInsertOp = 0;
  
#if 1
  static int nCall = 0; nCall++;
  fprintf(stderr, "BEFORE %d\n", nCall);
  btPrintMetaTree(db->pPager, 1, pHdr);
#endif
  assert_summary_ok(db, SQLITE4_OK);

  memset(&csr, 0, sizeof(csr));
  memset(&mcsr, 0, sizeof(mcsr));
  btCsrSetup(db, pHdr->iMRoot, &mcsr);
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980

      if( rc==SQLITE4_OK ){
        /* rc = btCsrStep(&mcsr, 1); */
        rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
      }
    }

    if( rc==SQLITE4_NOTFOUND ){
      rc = SQLITE4_OK;
    }else if( rc==SQLITE4_OK && iRoot ){
      int n = sizeof(aPrefix) + nKey;
      rc = sqlite4_buffer_resize(&buf, n);
      if( rc==SQLITE4_OK ){
        u8 aData[4];
        u8 *a = (u8*)buf.p;
        memcpy(a, aPrefix, sizeof(aPrefix));
        memcpy(&a[sizeof(aPrefix)], pKey, nKey);







<
|
|
|







3971
3972
3973
3974
3975
3976
3977

3978
3979
3980
3981
3982
3983
3984
3985
3986
3987

      if( rc==SQLITE4_OK ){
        /* rc = btCsrStep(&mcsr, 1); */
        rc = btCsrSeek(&mcsr, 0, aPrefix, sizeof(aPrefix), BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
      }
    }

    if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;

    if( rc==SQLITE4_OK && iRoot ){
      int n = sizeof(aPrefix) + nKey;
      rc = sqlite4_buffer_resize(&buf, n);
      if( rc==SQLITE4_OK ){
        u8 aData[4];
        u8 *a = (u8*)buf.p;
        memcpy(a, aPrefix, sizeof(aPrefix));
        memcpy(&a[sizeof(aPrefix)], pKey, nKey);
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
    }
  }

  btCsrReset(&csr, 1);
  btCsrReset(&mcsr, 1);
  sqlite4_buffer_clear(&buf);

#if 0
  if( rc==SQLITE4_OK ){
    btPrintMetaTree(db->pPager, 1, pHdr);
  }
#endif
  assert_summary_ok(db, SQLITE4_OK);
  db->bFastInsertOp = bFastInsertOp;
  return rc;







|







4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
    }
  }

  btCsrReset(&csr, 1);
  btCsrReset(&mcsr, 1);
  sqlite4_buffer_clear(&buf);

#if 1
  if( rc==SQLITE4_OK ){
    btPrintMetaTree(db->pPager, 1, pHdr);
  }
#endif
  assert_summary_ok(db, SQLITE4_OK);
  db->bFastInsertOp = bFastInsertOp;
  return rc;
4082
4083
4084
4085
4086
4087
4088

4089
4090
4091
4092
4093
4094
4095

  /* Find the schedule page. If there is no schedule page, allocate it now. */
  if( pHdr->iSRoot==0 ){
    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      memset(aData, 0, pHdr->pgsz);

      pHdr->iSRoot = sqlite4BtPagePgno(pPg);
    }
  }else{
    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  }

  /* Check if the schedule page is busy. If so, no new merge may be 







>







4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103

  /* Find the schedule page. If there is no schedule page, allocate it now. */
  if( pHdr->iSRoot==0 ){
    rc = sqlite4BtPageAllocate(db->pPager, &pPg);
    if( rc==SQLITE4_OK ){
      u8 *aData = sqlite4BtPageData(pPg);
      memset(aData, 0, pHdr->pgsz);
      sqlite4BtPagerDbhdrDirty(db->pPager);
      pHdr->iSRoot = sqlite4BtPagePgno(pPg);
    }
  }else{
    rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  }

  /* Check if the schedule page is busy. If so, no new merge may be 
4152
4153
4154
4155
4156
4157
4158

4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174

4175
4176
4177
4178
4179
4180
4181
  int rc = SQLITE4_OK;

  assert( db->bFastInsertOp );
  db->bFastInsertOp = 0;

  /* If the meta-tree has not been created, create it now. */
  if( pHdr->iMRoot==0 ){

    rc = btAllocateNewRoot(db, BT_PGFLAGS_METATREE, &pHdr->iMRoot);
  }

  /* If no writable sub-tree current exists, create one */ 
  if( rc==SQLITE4_OK && pHdr->iSubBlock==0 ){
    u32 iLevel;                   /* Level number for new sub-tree */
    u32 iSubBlock;                /* New block */

    rc = btAllocateNewLevel(db, pHdr, &iLevel);
    if( rc==SQLITE4_OK ){
      rc = btAllocateBlock(db, 1, &iSubBlock);
    }

    if( rc==SQLITE4_OK ){
      u8 aKey[8];
      u8 aVal[4];

      pHdr->iSubBlock = iSubBlock;
      pHdr->nSubPg = 1;           /* Root page is automatically allocated */

      /* The key for the new entry consists of the concatentation of two 
      ** 32-bit big-endian integers - the <age> and <level-no>. The age
      ** of the new segment is 0. The level number is one greater than the
      ** level number of the previous segment.  */







>
















>







4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
  int rc = SQLITE4_OK;

  assert( db->bFastInsertOp );
  db->bFastInsertOp = 0;

  /* If the meta-tree has not been created, create it now. */
  if( pHdr->iMRoot==0 ){
    sqlite4BtPagerDbhdrDirty(db->pPager);
    rc = btAllocateNewRoot(db, BT_PGFLAGS_METATREE, &pHdr->iMRoot);
  }

  /* If no writable sub-tree current exists, create one */ 
  if( rc==SQLITE4_OK && pHdr->iSubBlock==0 ){
    u32 iLevel;                   /* Level number for new sub-tree */
    u32 iSubBlock;                /* New block */

    rc = btAllocateNewLevel(db, pHdr, &iLevel);
    if( rc==SQLITE4_OK ){
      rc = btAllocateBlock(db, 1, &iSubBlock);
    }

    if( rc==SQLITE4_OK ){
      u8 aKey[8];
      u8 aVal[4];
      sqlite4BtPagerDbhdrDirty(db->pPager);
      pHdr->iSubBlock = iSubBlock;
      pHdr->nSubPg = 1;           /* Root page is automatically allocated */

      /* The key for the new entry consists of the concatentation of two 
      ** 32-bit big-endian integers - the <age> and <level-no>. The age
      ** of the new segment is 0. The level number is one greater than the
      ** level number of the previous segment.  */
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253



4254
4255
4256
4257
4258
4259
4260
          nKey -= nPrefix;
          pKey = (const void*)(((const u8*)pKey) + nPrefix);
        }
      }
    }

    /* Assuming the process above found a block, set up the block cursor and
    ** seek it to the smallest first valid key.  */
    if( rc==SQLITE4_OK ){
      const void *pVal = 0; int nVal = 0;
      rc = btCsrData(pM, 0, 4, &pVal, &nVal);
      if( rc==SQLITE4_OK ){
        u32 iRoot = sqlite4BtGetU32((const u8*)pVal);
        btCsrSetup(db, iRoot, &pSub->csr);
        rc = btCsrSeek(&pSub->csr, 0, pKey, nKey, BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
        if( rc==SQLITE4_NOTFOUND ) rc = btErrorBkpt(SQLITE4_CORRUPT);



      }
    }else if( rc==SQLITE4_NOTFOUND ){
      assert( pSub->csr.nPg==0 );
      assert( pSub->mcsr.nPg==0 );
      rc = SQLITE4_OK;
    }
  }







|








|
>
>
>







4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
          nKey -= nPrefix;
          pKey = (const void*)(((const u8*)pKey) + nPrefix);
        }
      }
    }

    /* Assuming the process above found a block, set up the block cursor and
    ** seek it to the smallest valid key.  */
    if( rc==SQLITE4_OK ){
      const void *pVal = 0; int nVal = 0;
      rc = btCsrData(pM, 0, 4, &pVal, &nVal);
      if( rc==SQLITE4_OK ){
        u32 iRoot = sqlite4BtGetU32((const u8*)pVal);
        btCsrSetup(db, iRoot, &pSub->csr);
        rc = btCsrSeek(&pSub->csr, 0, pKey, nKey, BT_SEEK_GE, 0);
        if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
        if( rc==SQLITE4_NOTFOUND ){
          btCsrReset(pM, 0);
          rc = SQLITE4_OK;
        }
      }
    }else if( rc==SQLITE4_NOTFOUND ){
      assert( pSub->csr.nPg==0 );
      assert( pSub->mcsr.nPg==0 );
      rc = SQLITE4_OK;
    }
  }
Changes to src/bt_pager.c.
76
77
78
79
80
81
82


83
84
85
86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
/*
** Candidate values for BtPage.flags
*/
#define BT_PAGE_DIRTY 0x0001      /* Set for pages in BtPager.pDirty list */

/*
** Pager object.


*/
struct BtPager {
  BtLock btl;                     /* Variables shared with bt_lock module */
  BtLog *pLog;                    /* Logging module */
  int iTransactionLevel;          /* Current transaction level (see bt.h) */
  char *zFile;                    /* Database file name */
  int nFile;                      /* Length of string zFile in bytes */
  BtPageHash hash;                /* Hash table */
  BtPage *pDirty;                 /* List of all dirty pages */
  int nTotalRef;                  /* Total number of outstanding page refs */
  int bDoAutoCkpt;                /* Do auto-checkpoint after next unlock */
  BtSavepoint *aSavepoint;        /* Savepoint array */
  int nSavepoint;                 /* Number of entries in aSavepoint array */
  BtDbHdr *pHdr;                  /* Header object for current read snapshot */

  void *pLogsizeCtx;              /* A copy of this is passed to xLogsize() */
  void (*xLogsize)(void*, int);   /* Log-size Callback function */
};


/**************************************************************************
** Interface to BtPageHash object.







>
>














>







76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
** Candidate values for BtPage.flags
*/
#define BT_PAGE_DIRTY 0x0001      /* Set for pages in BtPager.pDirty list */

/*
** Pager object.
**
** bDirtyHdr:
*/
struct BtPager {
  BtLock btl;                     /* Variables shared with bt_lock module */
  BtLog *pLog;                    /* Logging module */
  int iTransactionLevel;          /* Current transaction level (see bt.h) */
  char *zFile;                    /* Database file name */
  int nFile;                      /* Length of string zFile in bytes */
  BtPageHash hash;                /* Hash table */
  BtPage *pDirty;                 /* List of all dirty pages */
  int nTotalRef;                  /* Total number of outstanding page refs */
  int bDoAutoCkpt;                /* Do auto-checkpoint after next unlock */
  BtSavepoint *aSavepoint;        /* Savepoint array */
  int nSavepoint;                 /* Number of entries in aSavepoint array */
  BtDbHdr *pHdr;                  /* Header object for current read snapshot */
  int bDirtyHdr;                  /* True if pHdr has been modified */
  void *pLogsizeCtx;              /* A copy of this is passed to xLogsize() */
  void (*xLogsize)(void*, int);   /* Log-size Callback function */
};


/**************************************************************************
** Interface to BtPageHash object.
530
531
532
533
534
535
536









537
538
539
540
541
542
543
544
545
546
547

548
549
550
551
552
553
554
555
556
557
  if( rc==SQLITE4_OK && p->bDoAutoCkpt ){
    sqlite4BtLogCheckpoint(p->pLog, (p->btl.nAutoCkpt / 2));
  }
  p->bDoAutoCkpt = 0;

  return rc;
}










/*
** Commit the current write transaction to disk.
*/
static int btCommitTransaction(BtPager *p){
  int rc = SQLITE4_OK;
  int nLogsize;                   /* Number of frames in log after commit */
  BtPage *pPg;
  BtPage *pNext;
  assert( p->iTransactionLevel>=2 );


  btCloseSavepoints(p, 2, 0);

  for(pPg=p->pDirty; pPg; pPg=pNext){
    pNext = pPg->pNextDirty;
    pPg->flags &= ~(BT_PAGE_DIRTY);
    pPg->pNextDirty = 0;
    if( rc==SQLITE4_OK ){
      int nPg = ((pNext==0) ? p->pHdr->nPg : 0);
      rc = sqlite4BtLogWrite(p->pLog, pPg->pgno, pPg->aData, nPg);
    }







>
>
>
>
>
>
>
>
>











>


|







533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
  if( rc==SQLITE4_OK && p->bDoAutoCkpt ){
    sqlite4BtLogCheckpoint(p->pLog, (p->btl.nAutoCkpt / 2));
  }
  p->bDoAutoCkpt = 0;

  return rc;
}

int btPagerDbhdrFlush(BtPager *p){
  int rc = SQLITE4_OK;
  if( p->bDirtyHdr ){
    rc = sqlite4BtLogDbhdrFlush(p->pLog);
    p->bDirtyHdr = 0;
  }
  return rc;
}

/*
** Commit the current write transaction to disk.
*/
static int btCommitTransaction(BtPager *p){
  int rc = SQLITE4_OK;
  int nLogsize;                   /* Number of frames in log after commit */
  BtPage *pPg;
  BtPage *pNext;
  assert( p->iTransactionLevel>=2 );

  rc = btPagerDbhdrFlush(p);
  btCloseSavepoints(p, 2, 0);

  for(pPg=p->pDirty; rc==SQLITE4_OK && pPg; pPg=pNext){
    pNext = pPg->pNextDirty;
    pPg->flags &= ~(BT_PAGE_DIRTY);
    pPg->pNextDirty = 0;
    if( rc==SQLITE4_OK ){
      int nPg = ((pNext==0) ? p->pHdr->nPg : 0);
      rc = sqlite4BtLogWrite(p->pLog, pPg->pgno, pPg->aData, nPg);
    }
766
767
768
769
770
771
772




773
774
775
776
777
778
779

/* 
** Query for the root page number. Requires an open read transaction.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager *p){
  return p->pHdr;
}





void sqlite4BtPagerSetDbhdr(BtPager *p, BtDbHdr *pHdr){
  assert( p->pHdr==0 || pHdr==0 );
  p->pHdr = pHdr;
}

/*







>
>
>
>







779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796

/* 
** Query for the root page number. Requires an open read transaction.
*/
BtDbHdr *sqlite4BtPagerDbhdr(BtPager *p){
  return p->pHdr;
}

void sqlite4BtPagerDbhdrDirty(BtPager *p){
  p->bDirtyHdr = 1;
}

void sqlite4BtPagerSetDbhdr(BtPager *p, BtDbHdr *pHdr){
  assert( p->pHdr==0 || pHdr==0 );
  p->pHdr = pHdr;
}

/*
874
875
876
877
878
879
880

881
882
883
884
885
886
887
    if( pPg==0 ){
      rc = sqlite4BtPageGet(p, pgno, &pRelease);
    }
    if( rc==SQLITE4_OK ){
      BtPage *pTrunk = pPg ? pPg : pRelease;
      rc = sqlite4BtPageWrite(pTrunk);
      if( rc==SQLITE4_OK ){

        sqlite4BtPutU32(&pTrunk->aData[0], 0);
        sqlite4BtPutU32(&pTrunk->aData[4], pHdr->iFreePg);
        pHdr->iFreePg = pgno;
        sqlite4BtDebugPageFree((BtLock*)p, "free-list-trunk", pgno);
      }
    }
    sqlite4BtPageRelease(pRelease);







>







891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
    if( pPg==0 ){
      rc = sqlite4BtPageGet(p, pgno, &pRelease);
    }
    if( rc==SQLITE4_OK ){
      BtPage *pTrunk = pPg ? pPg : pRelease;
      rc = sqlite4BtPageWrite(pTrunk);
      if( rc==SQLITE4_OK ){
        sqlite4BtPagerDbhdrDirty(p);
        sqlite4BtPutU32(&pTrunk->aData[0], 0);
        sqlite4BtPutU32(&pTrunk->aData[4], pHdr->iFreePg);
        pHdr->iFreePg = pgno;
        sqlite4BtDebugPageFree((BtLock*)p, "free-list-trunk", pgno);
      }
    }
    sqlite4BtPageRelease(pRelease);
902
903
904
905
906
907
908

909
910
911
912
913
914
915
      rc = sqlite4BtPageWrite(pTrunk);
    }
    if( rc==SQLITE4_OK ){
      u8 *aData = pTrunk->aData;
      u32 nFree = sqlite4BtGetU32(aData);
      if( nFree==0 ){
        u32 iNext = sqlite4BtGetU32(&aData[4]);

        *pPgno = pHdr->iFreePg;
        pHdr->iFreePg = iNext;
        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list-trunk", *pPgno);
      }else{
        *pPgno = sqlite4BtGetU32(&aData[8 + 4*(nFree-1)]);
        sqlite4BtPutU32(aData, nFree-1);
        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list", *pPgno);







>







920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
      rc = sqlite4BtPageWrite(pTrunk);
    }
    if( rc==SQLITE4_OK ){
      u8 *aData = pTrunk->aData;
      u32 nFree = sqlite4BtGetU32(aData);
      if( nFree==0 ){
        u32 iNext = sqlite4BtGetU32(&aData[4]);
        sqlite4BtPagerDbhdrDirty(p);
        *pPgno = pHdr->iFreePg;
        pHdr->iFreePg = iNext;
        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list-trunk", *pPgno);
      }else{
        *pPgno = sqlite4BtGetU32(&aData[8 + 4*(nFree-1)]);
        sqlite4BtPutU32(aData, nFree-1);
        sqlite4BtDebugPageAlloc((BtLock*)p, "free-list", *pPgno);
Changes to www/bt.wiki.
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667

<p> As well as the records that index all segments in the db, the meta-table
contains a single summary record. The key is four 0xFF bytes.

<p>For each age between 0 and the largest age in the db, inclusive:

  * current minimum level number (16-bits).
  * total number of levels.
  * maximum level in active merge (16-bits).

Store this as a separate record in the meta-table. Even if there segments with
age=31 in the db, this is still only 6*32=192 bytes.

<h3> Schedule-Page Format</h3>








|







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667

<p> As well as the records that index all segments in the db, the meta-table
contains a single summary record. The key is four 0xFF bytes.

<p>For each age between 0 and the largest age in the db, inclusive:

  * current minimum level number (16-bits).
  * total number of levels. (16-bits)
  * maximum level in active merge (16-bits).

Store this as a separate record in the meta-table. Even if there segments with
age=31 in the db, this is still only 6*32=192 bytes.

<h3> Schedule-Page Format</h3>