SQLite

Check-in [a84fb078ba]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:BTree and pager are working pretty well now. (CVS 234)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: a84fb078baf96dbfb5983981127dfc905074b7f9
User & Date: drh 2001-07-02 17:51:46.000
Context
2001-07-23
14:33
Add ability to quote table and column names in expression. (CVS 235) (check-in: 029e3a3a5d user: drh tags: trunk)
2001-07-02
17:51
BTree and pager are working pretty well now. (CVS 234) (check-in: a84fb078ba user: drh tags: trunk)
2001-07-01
22:12
More BTree tests (CVS 233) (check-in: 55c89bfdd3 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
** Boston, MA  02111-1307, USA.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** $Id: btree.c,v 1.19 2001/07/01 22:12:01 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.







|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
** Boston, MA  02111-1307, USA.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** $Id: btree.c,v 1.20 2001/07/02 17:51:46 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
194
195
196
197
198
199
200






201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

/*
** The maximum number of database entries that can be held in a single
** page of the database. 
*/
#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)







/*
** The maximum amount of payload (in bytes) that can be stored locally for
** a database entry.  If the entry contains more data than this, the
** extra goes onto overflow pages.
**
** This number is chosen so that at least 4 cells will fit on every page.
*/
#define MX_LOCAL_PAYLOAD \
  (((SQLITE_PAGE_SIZE-sizeof(PageHdr))/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)

/*
** Data on a database page is stored as a linked list of Cell structures.
** Both the key and the data are stored in aPayload[].  The key always comes
** first.  The aPayload[] field grows as necessary to hold the key and data,
** up to a maximum of MX_LOCAL_PAYLOAD bytes.  If the size of the key and
** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the







>
>
>
>
>
>







|
<







194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221

/*
** The maximum number of database entries that can be held in a single
** page of the database. 
*/
#define MX_CELL ((SQLITE_PAGE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)

/*
** The amount of usable space on a single page of the BTree.  This is the
** page size minus the overhead of the page header.
*/
#define USABLE_SPACE  (SQLITE_PAGE_SIZE - sizeof(PageHdr))

/*
** The maximum amount of payload (in bytes) that can be stored locally for
** a database entry.  If the entry contains more data than this, the
** extra goes onto overflow pages.
**
** This number is chosen so that at least 4 cells will fit on every page.
*/
#define MX_LOCAL_PAYLOAD ((USABLE_SPACE/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)


/*
** Data on a database page is stored as a linked list of Cell structures.
** Both the key and the data are stored in aPayload[].  The key always comes
** first.  The aPayload[] field grows as necessary to hold the key and data,
** up to a maximum of MX_LOCAL_PAYLOAD bytes.  If the size of the key and
** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
357
358
359
360
361
362
363

364
365
366
367
368
369
370
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
  int pc, i, n;
  FreeBlk *pFBlk;
  char newPage[SQLITE_PAGE_SIZE];


  pc = sizeof(PageHdr);
  pPage->u.hdr.firstCell = pc;
  memcpy(newPage, pPage->u.aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = pPage->apCell[i];

    /* This routine should never be called on an overfull page.  The







>







362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
** into one big FreeBlk at the end of the page.
*/
static void defragmentPage(MemPage *pPage){
  int pc, i, n;
  FreeBlk *pFBlk;
  char newPage[SQLITE_PAGE_SIZE];

  assert( sqlitepager_iswriteable(pPage) );
  pc = sizeof(PageHdr);
  pPage->u.hdr.firstCell = pc;
  memcpy(newPage, pPage->u.aDisk, pc);
  for(i=0; i<pPage->nCell; i++){
    Cell *pCell = pPage->apCell[i];

    /* This routine should never be called on an overfull page.  The
405
406
407
408
409
410
411

412
413
414
415
416
417
418
*/
static int allocateSpace(MemPage *pPage, int nByte){
  FreeBlk *p;
  u16 *pIdx;
  int start;
  int cnt = 0;


  assert( nByte==ROUNDUP(nByte) );
  if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
  pIdx = &pPage->u.hdr.firstFree;
  p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
  while( p->iSize<nByte ){
    assert( cnt++ < SQLITE_PAGE_SIZE/4 );
    if( p->iNext==0 ){







>







411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
*/
static int allocateSpace(MemPage *pPage, int nByte){
  FreeBlk *p;
  u16 *pIdx;
  int start;
  int cnt = 0;

  assert( sqlitepager_iswriteable(pPage) );
  assert( nByte==ROUNDUP(nByte) );
  if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
  pIdx = &pPage->u.hdr.firstFree;
  p = (FreeBlk*)&pPage->u.aDisk[*pIdx];
  while( p->iSize<nByte ){
    assert( cnt++ < SQLITE_PAGE_SIZE/4 );
    if( p->iNext==0 ){
450
451
452
453
454
455
456

457
458
459
460
461
462
463
static void freeSpace(MemPage *pPage, int start, int size){
  int end = start + size;
  u16 *pIdx, idx;
  FreeBlk *pFBlk;
  FreeBlk *pNew;
  FreeBlk *pNext;


  assert( size == ROUNDUP(size) );
  assert( start == ROUNDUP(start) );
  pIdx = &pPage->u.hdr.firstFree;
  idx = *pIdx;
  while( idx!=0 && idx<start ){
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    if( idx + pFBlk->iSize == start ){







>







457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
static void freeSpace(MemPage *pPage, int start, int size){
  int end = start + size;
  u16 *pIdx, idx;
  FreeBlk *pFBlk;
  FreeBlk *pNew;
  FreeBlk *pNext;

  assert( sqlitepager_iswriteable(pPage) );
  assert( size == ROUNDUP(size) );
  assert( start == ROUNDUP(start) );
  pIdx = &pPage->u.hdr.firstFree;
  idx = *pIdx;
  while( idx!=0 && idx<start ){
    pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
    if( idx + pFBlk->iSize == start ){
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
  if( pParent ){
    pPage->pParent = pParent;
    sqlitepager_ref(pParent);
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = SQLITE_PAGE_SIZE - sizeof(PageHdr);
  idx = pPage->u.hdr.firstCell;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    if( idx!=ROUNDUP(idx) ) goto page_format_error;
    pCell = (Cell*)&pPage->u.aDisk[idx];
    sz = cellSize(pCell);







|







522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
  if( pParent ){
    pPage->pParent = pParent;
    sqlitepager_ref(pParent);
  }
  if( pPage->isInit ) return SQLITE_OK;
  pPage->isInit = 1;
  pPage->nCell = 0;
  freeSpace = USABLE_SPACE;
  idx = pPage->u.hdr.firstCell;
  while( idx!=0 ){
    if( idx>SQLITE_PAGE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
    if( idx<sizeof(PageHdr) ) goto page_format_error;
    if( idx!=ROUNDUP(idx) ) goto page_format_error;
    pCell = (Cell*)&pPage->u.aDisk[idx];
    sz = cellSize(pCell);
556
557
558
559
560
561
562

563
564
565
566
567
568
569
/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;

  memset(pPage, 0, SQLITE_PAGE_SIZE);
  pHdr = &pPage->u.hdr;
  pHdr->firstCell = 0;
  pHdr->firstFree = sizeof(*pHdr);
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);







>







564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(MemPage *pPage){
  PageHdr *pHdr;
  FreeBlk *pFBlk;
  assert( sqlitepager_iswriteable(pPage) );
  memset(pPage, 0, SQLITE_PAGE_SIZE);
  pHdr = &pPage->u.hdr;
  pHdr->firstCell = 0;
  pHdr->firstFree = sizeof(*pHdr);
  pFBlk = (FreeBlk*)&pHdr[1];
  pFBlk->iNext = 0;
  pFBlk->iSize = SQLITE_PAGE_SIZE - sizeof(*pHdr);
589
590
591
592
593
594
595
596





597
598
599
600
601
602
603
604

605
606
607
608
609
610
611
612
/*
** Open a new database.
**
** Actually, this routine just sets up the internal data structures
** for accessing the database.  We do not open the database file 
** until the first page is loaded.
*/
int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree){





  Btree *pBt;
  int rc;

  pBt = sqliteMalloc( sizeof(*pBt) );
  if( pBt==0 ){
    *ppBtree = 0;
    return SQLITE_NOMEM;
  }

  rc = sqlitepager_open(&pBt->pPager, zFilename, 100, EXTRA_SIZE);
  if( rc!=SQLITE_OK ){
    if( pBt->pPager ) sqlitepager_close(pBt->pPager);
    sqliteFree(pBt);
    *ppBtree = 0;
    return rc;
  }
  sqlitepager_set_destructor(pBt->pPager, pageDestructor);







|
>
>
>
>
>








>
|







598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/*
** Open a new database.
**
** Actually, this routine just sets up the internal data structures
** for accessing the database.  We do not open the database file 
** until the first page is loaded.
*/
int sqliteBtreeOpen(
  const char *zFilename,    /* Name of the file containing the BTree database */
  int mode,                 /* Not currently used */
  int nCache,               /* How many pages in the page cache */
  Btree **ppBtree           /* Pointer to new Btree object written here */
){
  Btree *pBt;
  int rc;

  pBt = sqliteMalloc( sizeof(*pBt) );
  if( pBt==0 ){
    *ppBtree = 0;
    return SQLITE_NOMEM;
  }
  if( nCache<10 ) nCache = 10;
  rc = sqlitepager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE);
  if( rc!=SQLITE_OK ){
    if( pBt->pPager ) sqlitepager_close(pBt->pPager);
    sqliteFree(pBt);
    *ppBtree = 0;
    return rc;
  }
  sqlitepager_set_destructor(pBt->pPager, pageDestructor);
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077

1078
1079
1080
1081
1082
1083
1084
** Move the cursor down to a new child page.
*/
static int moveToChild(BtCursor *pCur, int newPgno){
  int rc;
  MemPage *pNewPage;

  rc = sqlitepager_get(pCur->pBt->pPager, newPgno, (void**)&pNewPage);
  if( rc ){
    return rc;
  }
  initPage(pNewPage, newPgno, pCur->pPage);

  sqlitepager_unref(pCur->pPage);
  pCur->pPage = pNewPage;
  pCur->idx = 0;
  return SQLITE_OK;
}

/*







|
<
<
|
>







1082
1083
1084
1085
1086
1087
1088
1089


1090
1091
1092
1093
1094
1095
1096
1097
1098
** Move the cursor down to a new child page.
*/
static int moveToChild(BtCursor *pCur, int newPgno){
  int rc;
  MemPage *pNewPage;

  rc = sqlitepager_get(pCur->pBt->pPager, newPgno, (void**)&pNewPage);
  if( rc ) return rc;


  rc = initPage(pNewPage, newPgno, pCur->pPage);
  if( rc ) return rc;
  sqlitepager_unref(pCur->pPage);
  pCur->pPage = pNewPage;
  pCur->idx = 0;
  return SQLITE_OK;
}

/*
1113
1114
1115
1116
1117
1118
1119


1120
1121
1122
1123
1124
1125
1126
** Move the cursor to the root page
*/
static int moveToRoot(BtCursor *pCur){
  MemPage *pNew;
  int rc;

  rc = sqlitepager_get(pCur->pBt->pPager, pCur->pgnoRoot, (void**)&pNew);


  if( rc ) return rc;
  sqlitepager_unref(pCur->pPage);
  pCur->pPage = pNew;
  pCur->idx = 0;
  return SQLITE_OK;
}








>
>







1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
** Move the cursor to the root page
*/
static int moveToRoot(BtCursor *pCur){
  MemPage *pNew;
  int rc;

  rc = sqlitepager_get(pCur->pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
  if( rc ) return rc;
  rc = initPage(pNew, pCur->pgnoRoot, 0);
  if( rc ) return rc;
  sqlitepager_unref(pCur->pPage);
  pCur->pPage = pNew;
  pCur->idx = 0;
  return SQLITE_OK;
}

1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
*/
static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent){
  MemPage *pThis;

  if( pgno==0 ) return;
  assert( pPager!=0 );
  pThis = sqlitepager_lookup(pPager, pgno);
  if( pThis ){
    if( pThis->pParent!=pNewParent ){
      if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
      pThis->pParent = pNewParent;
      if( pNewParent ) sqlitepager_ref(pNewParent);
    }
    sqlitepager_unref(pThis);
  }







|







1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
*/
static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent){
  MemPage *pThis;

  if( pgno==0 ) return;
  assert( pPager!=0 );
  pThis = sqlitepager_lookup(pPager, pgno);
  if( pThis && pThis->isInit ){
    if( pThis->pParent!=pNewParent ){
      if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
      pThis->pParent = pNewParent;
      if( pNewParent ) sqlitepager_ref(pNewParent);
    }
    sqlitepager_unref(pThis);
  }
1476
1477
1478
1479
1480
1481
1482

1483
1484
1485
1486
1487
1488
1489
** routine will be called soon after this routine in order to rebuild 
** the linked list.
*/
static void dropCell(MemPage *pPage, int idx, int sz){
  int j;
  assert( idx>=0 && idx<pPage->nCell );
  assert( sz==cellSize(pPage->apCell[idx]) );

  freeSpace(pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
  for(j=idx; j<pPage->nCell-1; j++){
    pPage->apCell[j] = pPage->apCell[j+1];
  }
  pPage->nCell--;
}








>







1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
** routine will be called soon after this routine in order to rebuild 
** the linked list.
*/
static void dropCell(MemPage *pPage, int idx, int sz){
  int j;
  assert( idx>=0 && idx<pPage->nCell );
  assert( sz==cellSize(pPage->apCell[idx]) );
  assert( sqlitepager_iswriteable(pPage) );
  freeSpace(pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
  for(j=idx; j<pPage->nCell-1; j++){
    pPage->apCell[j] = pPage->apCell[j+1];
  }
  pPage->nCell--;
}

1500
1501
1502
1503
1504
1505
1506

1507
1508
1509
1510
1511
1512
1513
** routine will be called soon after this routine in order to rebuild 
** the linked list.
*/
static void insertCell(MemPage *pPage, int i, Cell *pCell, int sz){
  int idx, j;
  assert( i>=0 && i<=pPage->nCell );
  assert( sz==cellSize(pCell) );

  idx = allocateSpace(pPage, sz);
  for(j=pPage->nCell; j>i; j--){
    pPage->apCell[j] = pPage->apCell[j-1];
  }
  pPage->nCell++;
  if( idx<=0 ){
    pPage->isOverfull = 1;







>







1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
** routine will be called soon after this routine in order to rebuild 
** the linked list.
*/
static void insertCell(MemPage *pPage, int i, Cell *pCell, int sz){
  int idx, j;
  assert( i>=0 && i<=pPage->nCell );
  assert( sz==cellSize(pCell) );
  assert( sqlitepager_iswriteable(pPage) );
  idx = allocateSpace(pPage, sz);
  for(j=pPage->nCell; j>i; j--){
    pPage->apCell[j] = pPage->apCell[j-1];
  }
  pPage->nCell++;
  if( idx<=0 ){
    pPage->isOverfull = 1;
1523
1524
1525
1526
1527
1528
1529

1530
1531
1532
1533
1534
1535
1536
** occur in the order specified by the pPage->apCell[] array.  
** Invoke this routine once to repair damage after one or more
** invocations of either insertCell() or dropCell().
*/
static void relinkCellList(MemPage *pPage){
  int i;
  u16 *pIdx;

  pIdx = &pPage->u.hdr.firstCell;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->apCell[i]) - Addr(pPage);
    assert( idx>0 && idx<SQLITE_PAGE_SIZE );
    *pIdx = idx;
    pIdx = &pPage->apCell[i]->h.iNext;
  }







>







1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
** occur in the order specified by the pPage->apCell[] array.  
** Invoke this routine once to repair damage after one or more
** invocations of either insertCell() or dropCell().
*/
static void relinkCellList(MemPage *pPage){
  int i;
  u16 *pIdx;
  assert( sqlitepager_iswriteable(pPage) );
  pIdx = &pPage->u.hdr.firstCell;
  for(i=0; i<pPage->nCell; i++){
    int idx = Addr(pPage->apCell[i]) - Addr(pPage);
    assert( idx>0 && idx<SQLITE_PAGE_SIZE );
    *pIdx = idx;
    pIdx = &pPage->apCell[i]->h.iNext;
  }
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625



1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636

1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665

1666
1667
1668
1669
1670
1671
1672
  int nNew;                    /* Number of pages in apNew[] */
  int nDiv;                    /* Number of cells in apDiv[] */
  int i, j, k;                 /* Loop counters */
  int idx;                     /* Index of pPage in pParent->apCell[] */
  int nxDiv;                   /* Next divider slot in pParent->apCell[] */
  int rc;                      /* The return code */
  int iCur;                    /* apCell[iCur] is the cell of the cursor */
  int usedPerPage;             /* Memory needed for each page */
  int freePerPage;             /* Average free space per page */
  int totalSize;               /* Total bytes for all cells */



  MemPage *extraUnref = 0;     /* A page that needs to be unref-ed */
  Pgno pgno;                   /* Page number */
  Cell *apCell[MX_CELL*3+5];   /* All cells from pages being balanceed */
  int szCell[MX_CELL*3+5];     /* Local size of all cells */
  Cell aTemp[2];               /* Temporary holding area for apDiv[] */
  MemPage aOld[3];             /* Temporary copies of pPage and its siblings */

  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */

  if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/2 ){
    relinkCellList(pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanceed.
  ** If there is no parent, it means this page is the root page and
  ** special rules apply.
  */
  pParent = pPage->pParent;
  if( pParent==0 ){
    Pgno pgnoChild;
    MemPage *pChild;
    if( pPage->nCell==0 ){
      if( pPage->u.hdr.rightChild ){
        /*
        ** The root page is empty.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */
        rc = sqlitepager_write(pPage);
        if( rc ) return rc;
        pgnoChild = pPage->u.hdr.rightChild;
        rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
        pPage->isInit = 0;
        initPage(pPage, sqlitepager_pagenumber(pPage), 0);

        reparentChildPages(pBt->pPager, pPage);
        freePage(pBt, pChild, pgnoChild);
        sqlitepager_unref(pChild);
      }else{
        relinkCellList(pPage);
      }
      return SQLITE_OK;







<
<

>
>
>











>
|




















<
<





|
>







1635
1636
1637
1638
1639
1640
1641


1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678


1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
  int nNew;                    /* Number of pages in apNew[] */
  int nDiv;                    /* Number of cells in apDiv[] */
  int i, j, k;                 /* Loop counters */
  int idx;                     /* Index of pPage in pParent->apCell[] */
  int nxDiv;                   /* Next divider slot in pParent->apCell[] */
  int rc;                      /* The return code */
  int iCur;                    /* apCell[iCur] is the cell of the cursor */


  int totalSize;               /* Total bytes for all cells */
  int subtotal;                /* Subtotal of bytes in cells on one page */
  int cntNew[4];               /* Index in apCell[] of cell after i-th page */
  int szNew[4];                /* Combined size of cells place on i-th page */
  MemPage *extraUnref = 0;     /* A page that needs to be unref-ed */
  Pgno pgno;                   /* Page number */
  Cell *apCell[MX_CELL*3+5];   /* All cells from pages being balanceed */
  int szCell[MX_CELL*3+5];     /* Local size of all cells */
  Cell aTemp[2];               /* Temporary holding area for apDiv[] */
  MemPage aOld[3];             /* Temporary copies of pPage and its siblings */

  /* 
  ** Return without doing any work if pPage is neither overfull nor
  ** underfull.
  */
  assert( sqlitepager_iswriteable(pPage) );
  if( !pPage->isOverfull && pPage->nFree<SQLITE_PAGE_SIZE/3 ){
    relinkCellList(pPage);
    return SQLITE_OK;
  }

  /*
  ** Find the parent of the page to be balanceed.
  ** If there is no parent, it means this page is the root page and
  ** special rules apply.
  */
  pParent = pPage->pParent;
  if( pParent==0 ){
    Pgno pgnoChild;
    MemPage *pChild;
    if( pPage->nCell==0 ){
      if( pPage->u.hdr.rightChild ){
        /*
        ** The root page is empty.  Copy the one child page
        ** into the root page and return.  This reduces the depth
        ** of the BTree by one.
        */


        pgnoChild = pPage->u.hdr.rightChild;
        rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
        if( rc ) return rc;
        memcpy(pPage, pChild, SQLITE_PAGE_SIZE);
        pPage->isInit = 0;
        rc = initPage(pPage, sqlitepager_pagenumber(pPage), 0);
        assert( rc==SQLITE_OK );
        reparentChildPages(pBt->pPager, pPage);
        freePage(pBt, pChild, pgnoChild);
        sqlitepager_unref(pChild);
      }else{
        relinkCellList(pPage);
      }
      return SQLITE_OK;
1685
1686
1687
1688
1689
1690
1691

1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706

1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
    ** child.  Then fall thru to the code below which will cause
    ** the overfull child page to be split.
    */
    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
    rc = allocatePage(pBt, &pChild, &pgnoChild);
    if( rc ) return rc;

    copyPage(pChild, pPage);
    pChild->pParent = pPage;
    sqlitepager_ref(pPage);
    pChild->isOverfull = 1;
    if( pCur ){
      sqlitepager_unref(pCur->pPage);
      pCur->pPage = pChild;
    }else{
      extraUnref = pChild;
    }
    zeroPage(pPage);
    pPage->u.hdr.rightChild = pgnoChild;
    pParent = pPage;
    pPage = pChild;
  }else{

    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
  }
  
  /*
  ** Find the Cell in the parent page whose h.leftChild points back
  ** to pPage.  The "idx" variable is the index of that cell.  If pPage
  ** is the rightmost child of pParent then set idx to pParent->nCell 
  */
  idx = -1;







>














<
>
|
|
<







1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726

1727
1728
1729

1730
1731
1732
1733
1734
1735
1736
    ** child.  Then fall thru to the code below which will cause
    ** the overfull child page to be split.
    */
    rc = sqlitepager_write(pPage);
    if( rc ) return rc;
    rc = allocatePage(pBt, &pChild, &pgnoChild);
    if( rc ) return rc;
    assert( sqlitepager_iswriteable(pChild) );
    copyPage(pChild, pPage);
    pChild->pParent = pPage;
    sqlitepager_ref(pPage);
    pChild->isOverfull = 1;
    if( pCur ){
      sqlitepager_unref(pCur->pPage);
      pCur->pPage = pChild;
    }else{
      extraUnref = pChild;
    }
    zeroPage(pPage);
    pPage->u.hdr.rightChild = pgnoChild;
    pParent = pPage;
    pPage = pChild;

  }
  rc = sqlitepager_write(pParent);
  if( rc ) return rc;

  
  /*
  ** Find the Cell in the parent page whose h.leftChild points back
  ** to pPage.  The "idx" variable is the index of that cell.  If pPage
  ** is the rightmost child of pParent then set idx to pParent->nCell 
  */
  idx = -1;
1757
1758
1759
1760
1761
1762
1763


1764
1765
1766
1767
1768
1769
1770
      pgnoOld[i] = apDiv[i]->h.leftChild;
    }else if( k==pParent->nCell ){
      pgnoOld[i] = pParent->u.hdr.rightChild;
    }else{
      break;
    }
    rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);


    if( rc ) goto balance_cleanup;
    nOld++;
  }

  /*
  ** Set iCur to be the index in apCell[] of the cell that the cursor
  ** is pointing to.  We will need this later on in order to keep the







>
>







1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
      pgnoOld[i] = apDiv[i]->h.leftChild;
    }else if( k==pParent->nCell ){
      pgnoOld[i] = pParent->u.hdr.rightChild;
    }else{
      break;
    }
    rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
    if( rc ) goto balance_cleanup;
    rc = initPage(apOld[i], pgnoOld[i], pParent);
    if( rc ) goto balance_cleanup;
    nOld++;
  }

  /*
  ** Set iCur to be the index in apCell[] of the cell that the cursor
  ** is pointing to.  We will need this later on in order to keep the
1814
1815
1816
1817
1818
1819
1820
1821
1822



1823


1824
1825
1826
1827
1828

1829

1830

1831




1832







1833
1834


1835
1836
1837
1838
1839
1840
1841
1842

1843
1844
1845
1846
1847
1848
1849
1850
1851

1852
1853
1854
1855
1856

1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867

1868
1869
1870
1871
1872
1873
1874
      assert( apCell[nCell]->h.leftChild==pgnoOld[i] );
      apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
      nCell++;
    }
  }

  /*
  ** Estimate the number of pages needed.  Record this number in "k"
  ** for now.  It will get transferred to nNew as we allocate the



  ** new pages.


  */
  totalSize = 0;
  for(i=0; i<nCell; i++){
    totalSize += szCell[i];
  }

  k = (totalSize + (SQLITE_PAGE_SIZE - sizeof(PageHdr) - 1)) /

            (SQLITE_PAGE_SIZE - sizeof(PageHdr));

  usedPerPage = (totalSize+k-1)/k;




  freePerPage = SQLITE_PAGE_SIZE - usedPerPage;







  



  /*
  ** Allocate new pages
  */
  for(i=0; i<k; i++){
    rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
    if( rc ) goto balance_cleanup;
    nNew++;
    zeroPage(apNew[i]);

  }

  /*
  ** Evenly distribute the data in apCell[] across the new pages.
  ** Insert divider cells into pParent as necessary.
  */
  j = 0;
  for(i=0; i<nNew; i++){
    MemPage *pNew = apNew[i];

    while( j<nCell && pNew->nFree>freePerPage && szCell[j]<=pNew->nFree ){
      if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
      insertCell(pNew, pNew->nCell, apCell[j], szCell[j]);
      j++;
    }

    assert( !pNew->isOverfull );
    relinkCellList(pNew);
    if( i<nNew-1 && j<nCell ){
      pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
      apCell[j]->h.leftChild = pgnoNew[i];
      if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
      insertCell(pParent, nxDiv, apCell[j], szCell[j]);
      j++;
      nxDiv++;
    }
  }

  apNew[nNew-1]->u.hdr.rightChild = apOld[nOld-1]->u.hdr.rightChild;
  if( nxDiv==pParent->nCell ){
    pParent->u.hdr.rightChild = pgnoNew[nNew-1];
  }else{
    pParent->apCell[nxDiv]->h.leftChild = pgnoNew[nNew-1];
  }
  if( pCur ){







|
|
>
>
>
|
>
>





>
|
>
|
>
|
>
>
>
>
|
>
>
>
>
>
>
>
|
|
>
>

|






>









>
|




>











>







1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
      assert( apCell[nCell]->h.leftChild==pgnoOld[i] );
      apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
      nCell++;
    }
  }

  /*
  ** Figure out the number of pages needed to hold all nCell cells.
  ** Store this number in "k".  Also compute szNew[] which is the total
  ** size of all cells on the i-th page and cntNew[] which is the index
  ** in apCell[] of the cell that divides path i from path i+1.  
  ** cntNew[k] should equal nCell.
  **
  ** This little patch of code is critical for keeping the tree
  ** balanced. 
  */
  totalSize = 0;
  for(i=0; i<nCell; i++){
    totalSize += szCell[i];
  }
  for(subtotal=k=i=0; i<nCell; i++){
    subtotal += szCell[i];
    if( subtotal > USABLE_SPACE ){
      szNew[k] = subtotal - szCell[i];
      cntNew[k] = i;
      subtotal = 0;
      k++;
    }
  }
  szNew[k] = subtotal;
  cntNew[k] = nCell;
  k++;
  for(i=k-1; i>0; i--){
    while( szNew[i]<USABLE_SPACE/2 ){
      cntNew[i-1]--;
      assert( cntNew[i-1]>0 );
      szNew[i] += szCell[cntNew[i-1]];
      szNew[i-1] -= szCell[cntNew[i-1]-1];
    }
  }
  assert( cntNew[0]>0 );

  /*
  ** Allocate k new pages
  */
  for(i=0; i<k; i++){
    rc = allocatePage(pBt, &apNew[i], &pgnoNew[i]);
    if( rc ) goto balance_cleanup;
    nNew++;
    zeroPage(apNew[i]);
    apNew[i]->isInit = 1;
  }

  /*
  ** Evenly distribute the data in apCell[] across the new pages.
  ** Insert divider cells into pParent as necessary.
  */
  j = 0;
  for(i=0; i<nNew; i++){
    MemPage *pNew = apNew[i];
    while( j<cntNew[i] ){
      assert( pNew->nFree>=szCell[j] );
      if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
      insertCell(pNew, pNew->nCell, apCell[j], szCell[j]);
      j++;
    }
    assert( pNew->nCell>0 );
    assert( !pNew->isOverfull );
    relinkCellList(pNew);
    if( i<nNew-1 && j<nCell ){
      pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
      apCell[j]->h.leftChild = pgnoNew[i];
      if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
      insertCell(pParent, nxDiv, apCell[j], szCell[j]);
      j++;
      nxDiv++;
    }
  }
  assert( j==nCell );
  apNew[nNew-1]->u.hdr.rightChild = apOld[nOld-1]->u.hdr.rightChild;
  if( nxDiv==pParent->nCell ){
    pParent->u.hdr.rightChild = pgnoNew[nNew-1];
  }else{
    pParent->apCell[nxDiv]->h.leftChild = pgnoNew[nNew-1];
  }
  if( pCur ){
1995
1996
1997
1998
1999
2000
2001


2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014

2015

2016


2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051


2052
2053
2054
2055
2056
2057
2058
    Cell *pNext;
    int szNext;
    getTempCursor(pCur, &leafCur);
    rc = sqliteBtreeNext(&leafCur, 0);
    if( rc!=SQLITE_OK ){
      return SQLITE_CORRUPT;
    }


    dropCell(pPage, pCur->idx, cellSize(pCell));
    pNext = leafCur.pPage->apCell[leafCur.idx];
    szNext = cellSize(pNext);
    pNext->h.leftChild = pgnoChild;
    insertCell(pPage, pCur->idx, pNext, szNext);
    rc = balance(pCur->pBt, pPage, pCur);
    if( rc ) return rc;
    pCur->bSkipNext = 1;
    dropCell(leafCur.pPage, leafCur.idx, szNext);
    rc = balance(pCur->pBt, leafCur.pPage, 0);
    releaseTempCursor(&leafCur);
  }else{
    dropCell(pPage, pCur->idx, cellSize(pCell));

    rc = balance(pCur->pBt, pPage, pCur);

    pCur->bSkipNext = 1;


  }
  return rc;
}

/*
** Create a new BTree in the same file.  Write into *piTable the index
** of the root page of the new table.
*/
int sqliteBtreeCreateTable(Btree *pBt, int *piTable){
  MemPage *pRoot;
  Pgno pgnoRoot;
  int rc;
  if( !pBt->inTrans ){
    return SQLITE_ERROR;  /* Must start a transaction first */
  }
  rc = allocatePage(pBt, &pRoot, &pgnoRoot);
  if( rc ) return rc;
  sqlitepager_write(pRoot);
  zeroPage(pRoot);
  sqlitepager_unref(pRoot);
  *piTable = (int)pgnoRoot;
  return SQLITE_OK;
}

/*
** Erase the given database page and all its children.  Return
** the page to the freelist.
*/
static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
  MemPage *pPage;
  int rc;
  Cell *pCell;
  int idx;

  rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);


  if( rc ) return rc;
  idx = pPage->u.hdr.firstCell;
  while( idx>0 ){
    pCell = (Cell*)&pPage->u.aDisk[idx];
    idx = pCell->h.iNext;
    if( pCell->h.leftChild ){
      rc = clearDatabasePage(pBt, pCell->h.leftChild, 1);







>
>













>
|
>
|
>
>

















|

















>
>







2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
    Cell *pNext;
    int szNext;
    getTempCursor(pCur, &leafCur);
    rc = sqliteBtreeNext(&leafCur, 0);
    if( rc!=SQLITE_OK ){
      return SQLITE_CORRUPT;
    }
    rc = sqlitepager_write(leafCur.pPage);
    if( rc ) return rc;
    dropCell(pPage, pCur->idx, cellSize(pCell));
    pNext = leafCur.pPage->apCell[leafCur.idx];
    szNext = cellSize(pNext);
    pNext->h.leftChild = pgnoChild;
    insertCell(pPage, pCur->idx, pNext, szNext);
    rc = balance(pCur->pBt, pPage, pCur);
    if( rc ) return rc;
    pCur->bSkipNext = 1;
    dropCell(leafCur.pPage, leafCur.idx, szNext);
    rc = balance(pCur->pBt, leafCur.pPage, 0);
    releaseTempCursor(&leafCur);
  }else{
    dropCell(pPage, pCur->idx, cellSize(pCell));
    if( pCur->idx>=pPage->nCell && pCur->idx>0 ){
      pCur->idx--;
    }else{
      pCur->bSkipNext = 1;
    }
    rc = balance(pCur->pBt, pPage, pCur);
  }
  return rc;
}

/*
** Create a new BTree in the same file.  Write into *piTable the index
** of the root page of the new table.
*/
int sqliteBtreeCreateTable(Btree *pBt, int *piTable){
  MemPage *pRoot;
  Pgno pgnoRoot;
  int rc;
  if( !pBt->inTrans ){
    return SQLITE_ERROR;  /* Must start a transaction first */
  }
  rc = allocatePage(pBt, &pRoot, &pgnoRoot);
  if( rc ) return rc;
  assert( sqlitepager_iswriteable(pRoot) );
  zeroPage(pRoot);
  sqlitepager_unref(pRoot);
  *piTable = (int)pgnoRoot;
  return SQLITE_OK;
}

/*
** Erase the given database page and all its children.  Return
** the page to the freelist.
*/
static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
  MemPage *pPage;
  int rc;
  Cell *pCell;
  int idx;

  rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
  if( rc ) return rc;
  rc = sqlitepager_write(pPage);
  if( rc ) return rc;
  idx = pPage->u.hdr.firstCell;
  while( idx>0 ){
    pCell = (Cell*)&pPage->u.aDisk[idx];
    idx = pCell->h.iNext;
    if( pCell->h.leftChild ){
      rc = clearDatabasePage(pBt, pCell->h.leftChild, 1);
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171

2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
******************************************************************************/
#ifdef SQLITE_TEST

/*
** Print a disassembly of the given page on standard output.  This routine
** is used for debugging and testing only.
*/
int sqliteBtreePageDump(Btree *pBt, int pgno){
  int rc;
  MemPage *pPage;
  int i, j;
  int nFree;
  u16 idx;
  char range[20];
  unsigned char payload[20];
  rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
  if( rc ){
    return rc;
  }

  i = 0;
  idx = pPage->u.hdr.firstCell;
  while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
    int sz = cellSize(pCell);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    sz = pCell->h.nKey + pCell->h.nData;
    if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
    memcpy(payload, pCell->aPayload, sz);
    for(j=0; j<sz; j++){
      if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
    }
    payload[sz] = 0;
    printf(
      "cell %2d: i=%-10s chld=%-4d nk=%-3d nd=%-3d payload=%s\n",
      i, range, (int)pCell->h.leftChild, pCell->h.nKey, pCell->h.nData,
      payload
    );
    if( pPage->apCell[i]!=pCell ){
      printf("**** apCell[%d] does not match on prior entry ****\n", i);
    }
    i++;
    idx = pCell->h.iNext;
  }
  if( idx!=0 ){
    printf("ERROR: next cell index out of range: %d\n", idx);







|











>














|



|







2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
******************************************************************************/
#ifdef SQLITE_TEST

/*
** Print a disassembly of the given page on standard output.  This routine
** is used for debugging and testing only.
*/
int sqliteBtreePageDump(Btree *pBt, int pgno, int recursive){
  int rc;
  MemPage *pPage;
  int i, j;
  int nFree;
  u16 idx;
  char range[20];
  unsigned char payload[20];
  rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
  if( rc ){
    return rc;
  }
  if( recursive ) printf("PAGE %d:\n", pgno);
  i = 0;
  idx = pPage->u.hdr.firstCell;
  while( idx>0 && idx<=SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
    Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
    int sz = cellSize(pCell);
    sprintf(range,"%d..%d", idx, idx+sz-1);
    sz = pCell->h.nKey + pCell->h.nData;
    if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
    memcpy(payload, pCell->aPayload, sz);
    for(j=0; j<sz; j++){
      if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
    }
    payload[sz] = 0;
    printf(
      "cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n",
      i, range, (int)pCell->h.leftChild, pCell->h.nKey, pCell->h.nData,
      payload
    );
    if( pPage->isInit && pPage->apCell[i]!=pCell ){
      printf("**** apCell[%d] does not match on prior entry ****\n", i);
    }
    i++;
    idx = pCell->h.iNext;
  }
  if( idx!=0 ){
    printf("ERROR: next cell index out of range: %d\n", idx);
2208
2209
2210
2211
2212
2213
2214









2215
2216
2217
2218
2219
2220
2221
       i, range, p->iSize, nFree);
    idx = p->iNext;
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }









  sqlitepager_unref(pPage);
  return SQLITE_OK;
}

/*
** Fill aResult[] with information about the entry and page that the
** cursor is pointing to.







>
>
>
>
>
>
>
>
>







2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
       i, range, p->iSize, nFree);
    idx = p->iNext;
    i++;
  }
  if( idx!=0 ){
    printf("ERROR: next freeblock index out of range: %d\n", idx);
  }
  if( recursive && pPage->u.hdr.rightChild!=0 ){
    idx = pPage->u.hdr.firstCell;
    while( idx>0 && idx<SQLITE_PAGE_SIZE-MIN_CELL_SIZE ){
      Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
      sqliteBtreePageDump(pBt, pCell->h.leftChild, 1);
      idx = pCell->h.iNext;
    }
    sqliteBtreePageDump(pBt, pPage->u.hdr.rightChild, 1);
  }
  sqlitepager_unref(pPage);
  return SQLITE_OK;
}

/*
** Fill aResult[] with information about the entry and page that the
** cursor is pointing to.
2270
2271
2272
2273
2274
2275
2276


2277
2278
2279
2280
2281
2282
2283
*/
typedef struct SanityCheck SanityCheck;
struct SanityCheck {
  Btree *pBt;    // The tree being checked out
  Pager *pPager; // The associated pager.  Also accessible by pBt->pPager
  int nPage;     // Number of pages in the database
  int *anRef;    // Number of times each page is referenced


  char *zErrMsg; // An error message.  NULL of no errors seen.
};

/*
** Append a message to the error message string.
*/
static void checkAppendMsg(SanityCheck *pCheck, char *zMsg1, char *zMsg2){







>
>







2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
*/
typedef struct SanityCheck SanityCheck;
struct SanityCheck {
  Btree *pBt;    // The tree being checked out
  Pager *pPager; // The associated pager.  Also accessible by pBt->pPager
  int nPage;     // Number of pages in the database
  int *anRef;    // Number of times each page is referenced
  int nTreePage; // Number of BTree pages
  int nByte;     // Number of bytes of data stored on BTree pages
  char *zErrMsg; // An error message.  NULL of no errors seen.
};

/*
** Append a message to the error message string.
*/
static void checkAppendMsg(SanityCheck *pCheck, char *zMsg1, char *zMsg2){
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
**          but combine to completely cover the page.
**      2.  Make sure cell keys are in order.
**      3.  Make sure no key is less than or equal to zLowerBound.
**      4.  Make sure no key is greater than or equal to zUpperBound.
**      5.  Check the integrity of overflow pages.
**      6.  Recursively call checkTreePage on all children.
**      7.  Verify that the depth of all children is the same.
**      8.  Make sure this page is at least 50% full or else it is
**          the root of the tree.
*/
static int checkTreePage(
  SanityCheck *pCheck,  /* Context for the sanity check */
  int iPage,            /* Page number of the page to check */
  MemPage *pParent,     /* Parent page */
  char *zParentContext, /* Parent context */







|







2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
**          but combine to completely cover the page.
**      2.  Make sure cell keys are in order.
**      3.  Make sure no key is less than or equal to zLowerBound.
**      4.  Make sure no key is greater than or equal to zUpperBound.
**      5.  Check the integrity of overflow pages.
**      6.  Recursively call checkTreePage on all children.
**      7.  Verify that the depth of all children is the same.
**      8.  Make sure this page is at least 33% full or else it is
**          the root of the tree.
*/
static int checkTreePage(
  SanityCheck *pCheck,  /* Context for the sanity check */
  int iPage,            /* Page number of the page to check */
  MemPage *pParent,     /* Parent page */
  char *zParentContext, /* Parent context */
2461
2462
2463
2464
2465
2466
2467

2468
2469
2470
2471
2472






2473
2474
2475
2476
2477
2478
2479
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }
  }

  /* Check that free space is kept to a minimum
  */

  if( pParent && pPage->nFree>SQLITE_PAGE_SIZE/3 ){
    sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
       SQLITE_PAGE_SIZE/3);
    checkAppendMsg(pCheck, zContext, zMsg);
  }







  sqlitepager_unref(pPage);
  return depth;
}

/*
** This routine does a complete check of the given BTree file.  aRoot[] is







>
|




>
>
>
>
>
>







2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
      checkAppendMsg(pCheck, zMsg, 0);
      break;
    }
  }

  /* Check that free space is kept to a minimum
  */
#if 0
  if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_PAGE_SIZE/4 ){
    sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
       SQLITE_PAGE_SIZE/3);
    checkAppendMsg(pCheck, zContext, zMsg);
  }
#endif

  /* Update freespace totals.
  */
  pCheck->nTreePage++;
  pCheck->nByte += USABLE_SPACE - pPage->nFree;

  sqlitepager_unref(pPage);
  return depth;
}

/*
** This routine does a complete check of the given BTree file.  aRoot[] is
Changes to src/btree.h.
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** This header file defines the interface that the sqlite B-Tree file
** subsystem.
**
** @(#) $Id: btree.h,v 1.8 2001/06/30 21:53:53 drh Exp $
*/

typedef struct Btree Btree;
typedef struct BtCursor BtCursor;

int sqliteBtreeOpen(const char *zFilename, int mode, Btree **ppBtree);
int sqliteBtreeClose(Btree*);

int sqliteBtreeBeginTrans(Btree*);
int sqliteBtreeCommit(Btree*);
int sqliteBtreeRollback(Btree*);

int sqliteBtreeCreateTable(Btree*, int*);







|





|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*************************************************************************
** This header file defines the interface that the sqlite B-Tree file
** subsystem.
**
** @(#) $Id: btree.h,v 1.9 2001/07/02 17:51:46 drh Exp $
*/

typedef struct Btree Btree;
typedef struct BtCursor BtCursor;

int sqliteBtreeOpen(const char *zFilename, int mode, int nPg, Btree **ppBtree);
int sqliteBtreeClose(Btree*);

int sqliteBtreeBeginTrans(Btree*);
int sqliteBtreeCommit(Btree*);
int sqliteBtreeRollback(Btree*);

int sqliteBtreeCreateTable(Btree*, int*);
54
55
56
57
58
59
60
61
62
63
64
65

#define SQLITE_N_BTREE_META 4
int sqliteBtreeGetMeta(Btree*, int*);
int sqliteBtreeUpdateMeta(Btree*, int*);


#ifdef SQLITE_TEST
int sqliteBtreePageDump(Btree*, int);
int sqliteBtreeCursorDump(BtCursor*, int*);
Pager *sqliteBtreePager(Btree*);
char *sqliteBtreeSanityCheck(Btree*, int*, int);
#endif







|




54
55
56
57
58
59
60
61
62
63
64
65

#define SQLITE_N_BTREE_META 4
int sqliteBtreeGetMeta(Btree*, int*);
int sqliteBtreeUpdateMeta(Btree*, int*);


#ifdef SQLITE_TEST
int sqliteBtreePageDump(Btree*, int, int);
int sqliteBtreeCursorDump(BtCursor*, int*);
Pager *sqliteBtreePager(Btree*);
char *sqliteBtreeSanityCheck(Btree*, int*, int);
#endif
Changes to src/pager.c.
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
*************************************************************************
** This is the implementation of the page cache subsystem.
** 
** The page cache is used to access a database file.  The pager journals
** all writes in order to support rollback.  Locking is used to limit
** access to one or more reader or one writer.
**
** @(#) $Id: pager.c,v 1.12 2001/06/28 01:54:49 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <assert.h>







|







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
*************************************************************************
** This is the implementation of the page cache subsystem.
** 
** The page cache is used to access a database file.  The pager journals
** all writes in order to support rollback.  Locking is used to limit
** access to one or more reader or one writer.
**
** @(#) $Id: pager.c,v 1.13 2001/07/02 17:51:46 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <assert.h>
118
119
120
121
122
123
124

125
126
127
128
129
130
131
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  unsigned char state;        /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  unsigned char errMask;      /* One of several kinds of errors */

  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.







>







118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  unsigned char state;        /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
  unsigned char errMask;      /* One of several kinds of errors */
  unsigned char *aInJournal;  /* One bit for each page in the database file */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.
206
207
208
209
210
211
212

213
214
215
216
217
218
219
}

/*
** Move the cursor for file descriptor fd to the point whereto from
** the beginning of the file.
*/
static int pager_seek(int fd, off_t whereto){

  lseek(fd, whereto, SEEK_SET);
  return SQLITE_OK;
}

/*
** Truncate the given file so that it contains exactly mxPg pages
** of data.







>







207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
}

/*
** Move the cursor for file descriptor fd to the point whereto from
** the beginning of the file.
*/
static int pager_seek(int fd, off_t whereto){
  /*printf("SEEK to page %d\n", whereto/SQLITE_PAGE_SIZE + 1);*/
  lseek(fd, whereto, SEEK_SET);
  return SQLITE_OK;
}

/*
** Truncate the given file so that it contains exactly mxPg pages
** of data.
228
229
230
231
232
233
234

235
236
237
238
239
240
241
** Read nBytes of data from fd into pBuf.  If the data cannot be
** read or only a partial read occurs, then the unread parts of
** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
** If the read is completely successful, return SQLITE_OK.
*/
static int pager_read(int fd, void *pBuf, int nByte){
  int rc;

  rc = read(fd, pBuf, nByte);
  if( rc<0 ){
    memset(pBuf, 0, nByte);
    return SQLITE_IOERR;
  }
  if( rc<nByte ){
    memset(&((char*)pBuf)[rc], 0, nByte - rc);







>







230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
** Read nBytes of data from fd into pBuf.  If the data cannot be
** read or only a partial read occurs, then the unread parts of
** pBuf are filled with zeros and this routine returns SQLITE_IOERR.
** If the read is completely successful, return SQLITE_OK.
*/
static int pager_read(int fd, void *pBuf, int nByte){
  int rc;
  /* printf("READ\n");*/
  rc = read(fd, pBuf, nByte);
  if( rc<0 ){
    memset(pBuf, 0, nByte);
    return SQLITE_IOERR;
  }
  if( rc<nByte ){
    memset(&((char*)pBuf)[rc], 0, nByte - rc);
249
250
251
252
253
254
255

256
257
258
259
260
261
262
/*
** Write nBytes of data into fd.  If any problem occurs or if the
** write is incomplete, SQLITE_IOERR is returned.  SQLITE_OK is
** returned upon complete success.
*/
static int pager_write(int fd, const void *pBuf, int nByte){
  int rc;

  rc = write(fd, pBuf, nByte);
  if( rc<nByte ){
    return SQLITE_FULL;
  }else{
    return SQLITE_OK;
  }
}







>







252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/*
** Write nBytes of data into fd.  If any problem occurs or if the
** write is incomplete, SQLITE_IOERR is returned.  SQLITE_OK is
** returned upon complete success.
*/
static int pager_write(int fd, const void *pBuf, int nByte){
  int rc;
  /*printf("WRITE\n");*/
  rc = write(fd, pBuf, nByte);
  if( rc<nByte ){
    return SQLITE_FULL;
  }else{
    return SQLITE_OK;
  }
}
334
335
336
337
338
339
340


341
342
343
344
345
346
347
  PgHdr *pPg;
  if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
  pager_unlock(pPager->fd);
  rc = pager_lock(pPager->fd, 0);
  unlink(pPager->zJournal);
  close(pPager->jfd);
  pPager->jfd = -1;


  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    pPg->inJournal = 0;
    pPg->dirty = 0;
  }
  if( rc!=SQLITE_OK ){
    pPager->state = SQLITE_UNLOCK;
    rc = SQLITE_PROTOCOL;







>
>







338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
  PgHdr *pPg;
  if( pPager->state!=SQLITE_WRITELOCK ) return SQLITE_OK;
  pager_unlock(pPager->fd);
  rc = pager_lock(pPager->fd, 0);
  unlink(pPager->zJournal);
  close(pPager->jfd);
  pPager->jfd = -1;
  sqliteFree( pPager->aInJournal );
  pPager->aInJournal = 0;
  for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
    pPg->inJournal = 0;
    pPg->dirty = 0;
  }
  if( rc!=SQLITE_OK ){
    pPager->state = SQLITE_UNLOCK;
    rc = SQLITE_PROTOCOL;
430
431
432
433
434
435
436

437
438
439
440
441
442
443

    /* Playback the page.  Update the in-memory copy of the page
    ** at the same time, if there is one.
    */
    pPg = pager_lookup(pPager, pgRec.pgno);
    if( pPg ){
      memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);

    }
    rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
    if( rc!=SQLITE_OK ) break;
    rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
    if( rc!=SQLITE_OK ) break;
  }
  if( rc!=SQLITE_OK ){







>







436
437
438
439
440
441
442
443
444
445
446
447
448
449
450

    /* Playback the page.  Update the in-memory copy of the page
    ** at the same time, if there is one.
    */
    pPg = pager_lookup(pPager, pgRec.pgno);
    if( pPg ){
      memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
      memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
    }
    rc = pager_seek(pPager->fd, (pgRec.pgno-1)*SQLITE_PAGE_SIZE);
    if( rc!=SQLITE_OK ) break;
    rc = pager_write(pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
    if( rc!=SQLITE_OK ) break;
  }
  if( rc!=SQLITE_OK ){
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Recycle an older page.  First locate the page to be recycled.
      ** Try to find one that is not dirty and is near the head of
      ** of the free list */
      int cnt = 4;
      pPg = pPager->pFirst;
      while( pPg->dirty && 0<cnt-- ){
        pPg = pPg->pNextFree;
      }
      if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
      assert( pPg->nRef==0 );

      /* If the page to be recycled is dirty, sync the journal and write 
      ** the old page into the database. */







|

|







722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
      pPg->pPrevAll = 0;
      pPager->pAll = pPg;
      pPager->nPage++;
    }else{
      /* Recycle an older page.  First locate the page to be recycled.
      ** Try to find one that is not dirty and is near the head of
      ** of the free list */
      int cnt = pPager->mxPage/2;
      pPg = pPager->pFirst;
      while( pPg->dirty && 0<cnt-- && pPg->pNextFree ){
        pPg = pPg->pNextFree;
      }
      if( pPg==0 || pPg->dirty ) pPg = pPager->pFirst;
      assert( pPg->nRef==0 );

      /* If the page to be recycled is dirty, sync the journal and write 
      ** the old page into the database. */
748
749
750
751
752
753
754




755

756
757
758

759
760

761
762
763
764
765
766
767
768
769
770

771
772
773



774

775
776
777
778
779
780
781
          if( rc==SQLITE_OK ) rc = SQLITE_FULL;
          return rc;
        }
      }

      /* Unlink the old page from the free list and the hash table
      */




      pPager->pFirst = pPg->pNextFree;

      if( pPager->pFirst ){
        pPager->pFirst->pPrevFree = 0;
      }else{

        pPager->pLast = 0;
      }

      if( pPg->pNextHash ){
        pPg->pNextHash->pPrevHash = pPg->pPrevHash;
      }
      if( pPg->pPrevHash ){
        pPg->pPrevHash->pNextHash = pPg->pNextHash;
      }else{
        h = pager_hash(pPg->pgno);
        assert( pPager->aHash[h]==pPg );
        pPager->aHash[h] = pPg->pNextHash;
      }

      pPager->nOvfl++;
    }
    pPg->pgno = pgno;



    pPg->inJournal = 0;

    pPg->dirty = 0;
    pPg->nRef = 1;
    REFINFO(pPg);
    pPager->nRef++;
    h = pager_hash(pgno);
    pPg->pNextHash = pPager->aHash[h];
    pPager->aHash[h] = pPg;







>
>
>
>
|
>
|
|

>
|

>










>



>
>
>
|
>







755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
          if( rc==SQLITE_OK ) rc = SQLITE_FULL;
          return rc;
        }
      }

      /* Unlink the old page from the free list and the hash table
      */
      if( pPg->pPrevFree ){
        pPg->pPrevFree->pNextFree = pPg->pNextFree;
      }else{
        assert( pPager->pFirst==pPg );
        pPager->pFirst = pPg->pNextFree;
      }
      if( pPg->pNextFree ){
        pPg->pNextFree->pPrevFree = pPg->pPrevFree;
      }else{
        assert( pPager->pLast==pPg );
        pPager->pLast = pPg->pPrevFree;
      }
      pPg->pNextFree = pPg->pPrevFree = 0;
      if( pPg->pNextHash ){
        pPg->pNextHash->pPrevHash = pPg->pPrevHash;
      }
      if( pPg->pPrevHash ){
        pPg->pPrevHash->pNextHash = pPg->pNextHash;
      }else{
        h = pager_hash(pPg->pgno);
        assert( pPager->aHash[h]==pPg );
        pPager->aHash[h] = pPg->pNextHash;
      }
      pPg->pNextHash = pPg->pPrevHash = 0;
      pPager->nOvfl++;
    }
    pPg->pgno = pgno;
    if( pPager->aInJournal && pgno<=pPager->origDbSize ){
      pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
    }else{
      pPg->inJournal = 0;
    }
    pPg->dirty = 0;
    pPg->nRef = 1;
    REFINFO(pPg);
    pPager->nRef++;
    h = pager_hash(pgno);
    pPg->pNextHash = pPager->aHash[h];
    pPager->aHash[h] = pPg;
906
907
908
909
910
911
912





913
914
915
916
917
918
919
  if( pPager->errMask ){ 
    return pager_errcode(pPager);
  }
  pPg->dirty = 1;
  if( pPg->inJournal ){ return SQLITE_OK; }
  assert( pPager->state!=SQLITE_UNLOCK );
  if( pPager->state==SQLITE_READLOCK ){





    pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
    if( pPager->jfd<0 ){
      return SQLITE_CANTOPEN;
    }
    if( pager_lock(pPager->jfd, 1) ){
      close(pPager->jfd);
      pPager->jfd = -1;







>
>
>
>
>







925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
  if( pPager->errMask ){ 
    return pager_errcode(pPager);
  }
  pPg->dirty = 1;
  if( pPg->inJournal ){ return SQLITE_OK; }
  assert( pPager->state!=SQLITE_UNLOCK );
  if( pPager->state==SQLITE_READLOCK ){
    assert( pPager->aInJournal==0 );
    pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
    if( pPager->aInJournal==0 ){
      return SQLITE_NOMEM;
    }
    pPager->jfd = open(pPager->zJournal, O_RDWR|O_CREAT, 0644);
    if( pPager->jfd<0 ){
      return SQLITE_CANTOPEN;
    }
    if( pager_lock(pPager->jfd, 1) ){
      close(pPager->jfd);
      pPager->jfd = -1;
948
949
950
951
952
953
954


955
956
957
958
959
960
961










962
963
964
965
966
967
968
      rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
    }
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }


  }
  pPg->inJournal = 1;
  if( pPager->dbSize<pPg->pgno ){
    pPager->dbSize = pPg->pgno;
  }
  return rc;
}











/*
** Commit all changes to the database and release the write lock.
**
** If the commit fails for any reason, a rollback attempt is made
** and an error code is returned.  If the commit worked, SQLITE_OK
** is returned.







>
>







>
>
>
>
>
>
>
>
>
>







972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
      rc = pager_write(pPager->jfd, pData, SQLITE_PAGE_SIZE);
    }
    if( rc!=SQLITE_OK ){
      sqlitepager_rollback(pPager);
      pPager->errMask |= PAGER_ERR_FULL;
      return rc;
    }
    assert( pPager->aInJournal!=0 );
    pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
  }
  pPg->inJournal = 1;
  if( pPager->dbSize<pPg->pgno ){
    pPager->dbSize = pPg->pgno;
  }
  return rc;
}

/*
** Return TRUE if the page given in the argument was previous passed
** to sqlitepager_write().  In other words, return TRUE if it is ok
** to change the content of the page.
*/
int sqlitepager_iswriteable(void *pData){
  PgHdr *pPg = DATA_TO_PGHDR(pData);
  return pPg->dirty;
}

/*
** Commit all changes to the database and release the write lock.
**
** If the commit fails for any reason, a rollback attempt is made
** and an error code is returned.  If the commit worked, SQLITE_OK
** is returned.
Changes to src/pager.h.
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
**   http://www.hwaci.com/drh/
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.6 2001/06/28 01:54:49 drh Exp $
*/

/*
** The size of one page
*/
#define SQLITE_PAGE_SIZE 1024








|







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
**   http://www.hwaci.com/drh/
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.7 2001/07/02 17:51:46 drh Exp $
*/

/*
** The size of one page
*/
#define SQLITE_PAGE_SIZE 1024

49
50
51
52
53
54
55

56
57
58
59
60
61
62
63
64
int sqlitepager_close(Pager *pPager);
int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage);
void *sqlitepager_lookup(Pager *pPager, Pgno pgno);
int sqlitepager_ref(void*);
int sqlitepager_unref(void*);
Pgno sqlitepager_pagenumber(void*);
int sqlitepager_write(void*);

int sqlitepager_pagecount(Pager*);
int sqlitepager_commit(Pager*);
int sqlitepager_rollback(Pager*);
int *sqlitepager_stats(Pager*);

#ifdef SQLITE_TEST
void sqlitepager_refdump(Pager*);
int pager_refinfo_enable;
#endif







>









49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
int sqlitepager_close(Pager *pPager);
int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage);
void *sqlitepager_lookup(Pager *pPager, Pgno pgno);
int sqlitepager_ref(void*);
int sqlitepager_unref(void*);
Pgno sqlitepager_pagenumber(void*);
int sqlitepager_write(void*);
int sqlitepager_iswriteable(void*);
int sqlitepager_pagecount(Pager*);
int sqlitepager_commit(Pager*);
int sqlitepager_rollback(Pager*);
int *sqlitepager_stats(Pager*);

#ifdef SQLITE_TEST
void sqlitepager_refdump(Pager*);
int pager_refinfo_enable;
#endif
Changes to src/test3.c.
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
**   http://www.hwaci.com/drh/
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test3.c,v 1.6 2001/07/01 22:12:02 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include "btree.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>







|







21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
**   http://www.hwaci.com/drh/
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test3.c,v 1.7 2001/07/02 17:51:46 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
#include "btree.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  int rc;
  char zBuf[100];
  if( argc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " FILENAME\"", 0);
    return TCL_ERROR;
  }
  rc = sqliteBtreeOpen(argv[1], 0666, &pBt);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  sprintf(zBuf,"0x%x",(int)pBt);
  Tcl_AppendResult(interp, zBuf, 0);
  return TCL_OK;







|







75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
  int rc;
  char zBuf[100];
  if( argc!=2 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " FILENAME\"", 0);
    return TCL_ERROR;
  }
  rc = sqliteBtreeOpen(argv[1], 0666, 10, &pBt);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  sprintf(zBuf,"0x%x",(int)pBt);
  Tcl_AppendResult(interp, zBuf, 0);
  return TCL_OK;
372
373
374
375
376
377
378
379






























380
381
382
383
384
385
386
  if( argc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " ID\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pBt) ) return TCL_ERROR;
  if( Tcl_GetInt(interp, argv[2], &iPage) ) return TCL_ERROR;
  rc = sqliteBtreePageDump(pBt, iPage);






























  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}








|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
  if( argc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " ID\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pBt) ) return TCL_ERROR;
  if( Tcl_GetInt(interp, argv[2], &iPage) ) return TCL_ERROR;
  rc = sqliteBtreePageDump(pBt, iPage, 0);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}

/*
** Usage:   btree_tree_dump ID PAGENUM
**
** Print a disassembly of a page and all its child pages on standard output
*/
static int btree_tree_dump(
  void *NotUsed,
  Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
  int argc,              /* Number of arguments */
  char **argv            /* Text of each argument */
){
  Btree *pBt;
  int iPage;
  int rc;

  if( argc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " ID\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[1], (int*)&pBt) ) return TCL_ERROR;
  if( Tcl_GetInt(interp, argv[2], &iPage) ) return TCL_ERROR;
  rc = sqliteBtreePageDump(pBt, iPage, 1);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}

791
792
793
794
795
796
797

798
799
800
801
802
803
804
  Tcl_CreateCommand(interp, "btree_rollback", btree_rollback, 0, 0);
  Tcl_CreateCommand(interp, "btree_create_table", btree_create_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_drop_table", btree_drop_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_clear_table", btree_clear_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_get_meta", btree_get_meta, 0, 0);
  Tcl_CreateCommand(interp, "btree_update_meta", btree_update_meta, 0, 0);
  Tcl_CreateCommand(interp, "btree_page_dump", btree_page_dump, 0, 0);

  Tcl_CreateCommand(interp, "btree_pager_stats", btree_pager_stats, 0, 0);
  Tcl_CreateCommand(interp, "btree_pager_ref_dump", btree_pager_ref_dump, 0, 0);
  Tcl_CreateCommand(interp, "btree_cursor", btree_cursor, 0, 0);
  Tcl_CreateCommand(interp, "btree_close_cursor", btree_close_cursor, 0, 0);
  Tcl_CreateCommand(interp, "btree_move_to", btree_move_to, 0, 0);
  Tcl_CreateCommand(interp, "btree_delete", btree_delete, 0, 0);
  Tcl_CreateCommand(interp, "btree_insert", btree_insert, 0, 0);







>







821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
  Tcl_CreateCommand(interp, "btree_rollback", btree_rollback, 0, 0);
  Tcl_CreateCommand(interp, "btree_create_table", btree_create_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_drop_table", btree_drop_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_clear_table", btree_clear_table, 0, 0);
  Tcl_CreateCommand(interp, "btree_get_meta", btree_get_meta, 0, 0);
  Tcl_CreateCommand(interp, "btree_update_meta", btree_update_meta, 0, 0);
  Tcl_CreateCommand(interp, "btree_page_dump", btree_page_dump, 0, 0);
  Tcl_CreateCommand(interp, "btree_tree_dump", btree_tree_dump, 0, 0);
  Tcl_CreateCommand(interp, "btree_pager_stats", btree_pager_stats, 0, 0);
  Tcl_CreateCommand(interp, "btree_pager_ref_dump", btree_pager_ref_dump, 0, 0);
  Tcl_CreateCommand(interp, "btree_cursor", btree_cursor, 0, 0);
  Tcl_CreateCommand(interp, "btree_close_cursor", btree_close_cursor, 0, 0);
  Tcl_CreateCommand(interp, "btree_move_to", btree_move_to, 0, 0);
  Tcl_CreateCommand(interp, "btree_delete", btree_delete, 0, 0);
  Tcl_CreateCommand(interp, "btree_insert", btree_insert, 0, 0);
Changes to test/btree.test.
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#   drh@hwaci.com
#   http://www.hwaci.com/drh/
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree.test,v 1.5 2001/06/30 21:53:53 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {$dbprefix!="memory:" && [info commands btree_open]!=""} {








|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#   drh@hwaci.com
#   http://www.hwaci.com/drh/
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree.test,v 1.6 2001/07/02 17:51:47 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {$dbprefix!="memory:" && [info commands btree_open]!=""} {

714
715
716
717
718
719
720

721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
  btree_drop_table $::b1 2
  set ::c1 [btree_cursor $::b1 2]
  lindex [btree_get_meta $::b1] 0
} {4}
do_test btree-8.24 {
  lindex [btree_pager_stats $::b1] 1
} {2}


# Check page splitting logic
#
do_test btree-9.1 {
  for {set i 1} {$i<=19} {incr i} {
    set key [format %03d $i]
    set data "*** $key *** $key *** $key *** $key ***"
    btree_insert $::c1 $key $data
  }
} {}
#btree_page_dump $::b1 2
#btree_pager_ref_dump $::b1
#set pager_refinfo_enable 1
do_test btree-9.2 {
  btree_insert $::c1 020 {*** 020 *** 020 *** 020 *** 020 ***}
  select_keys $::c1
} {001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 020}
#btree_page_dump $::b1 5







>










|







714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
  btree_drop_table $::b1 2
  set ::c1 [btree_cursor $::b1 2]
  lindex [btree_get_meta $::b1] 0
} {4}
do_test btree-8.24 {
  lindex [btree_pager_stats $::b1] 1
} {2}
#btree_pager_ref_dump $::b1

# Check page splitting logic
#
do_test btree-9.1 {
  for {set i 1} {$i<=19} {incr i} {
    set key [format %03d $i]
    set data "*** $key *** $key *** $key *** $key ***"
    btree_insert $::c1 $key $data
  }
} {}
#btree_tree_dump $::b1 2
#btree_pager_ref_dump $::b1
#set pager_refinfo_enable 1
do_test btree-9.2 {
  btree_insert $::c1 020 {*** 020 *** 020 *** 020 *** 020 ***}
  select_keys $::c1
} {001 002 003 004 005 006 007 008 009 010 011 012 013 014 015 016 017 018 019 020}
#btree_page_dump $::b1 5
816
817
818
819
820
821
822
823

824
825
826
827
828


829
830
831
832
833
834
835
#btree_page_dump $::b1 2
#btree_page_dump $::b1 6
do_test btree-10.4 {
  btree_move_to $::c1 011
  btree_delete $::c1
  select_keys $::c1
} {001 002 003 004 005 006 007 008 009 010 012 013 014 015 016 017 018 019 020}
#btree_page_dump $::b1 2

for {set i 1} {$i<=20} {incr i} {
  do_test btree-10.5.$i {
    btree_move_to $::c1 [format %03d $i]
    lindex [btree_pager_stats $::b1] 1
  } {2}


}

# Create a tree with lots more pages
#
catch {unset ::data}
catch {unset ::key}
for {set i 21} {$i<=1000} {incr i} {







|
>





>
>







817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
#btree_page_dump $::b1 2
#btree_page_dump $::b1 6
do_test btree-10.4 {
  btree_move_to $::c1 011
  btree_delete $::c1
  select_keys $::c1
} {001 002 003 004 005 006 007 008 009 010 012 013 014 015 016 017 018 019 020}
#btree_tree_dump $::b1 2
#btree_pager_ref_dump $::b1
for {set i 1} {$i<=20} {incr i} {
  do_test btree-10.5.$i {
    btree_move_to $::c1 [format %03d $i]
    lindex [btree_pager_stats $::b1] 1
  } {2}
  #btree_pager_ref_dump $::b1
  #btree_tree_dump $::b1 2
}

# Create a tree with lots more pages
#
catch {unset ::data}
catch {unset ::key}
for {set i 21} {$i<=1000} {incr i} {
882
883
884
885
886
887
888
889

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933

934
935
936
937
938
939
940
941
} {258}
do_test btree-11.4.3 {
  btree_move_to $::c1 259
  btree_key $::c1
} {259}
do_test btree-11.4.4 {
  btree_move_to $::c1 257
  btree_key $::c1

} {256}
do_test btree-11.5 {
  btree_move_to $::c1 513
  btree_delete $::c1
  btree_next $::c1
  btree_key $::c1
} {514}
do_test btree-11.5.1 {
  btree_move_to $::c1 512
  btree_key $::c1
} {512}
do_test btree-11.5.2 {
  btree_move_to $::c1 514
  btree_key $::c1
} {514}
do_test btree-11.5.3 {
  btree_move_to $::c1 515
  btree_key $::c1
} {515}
do_test btree-11.5.4 {
  btree_move_to $::c1 513
  btree_key $::c1

} {512}
do_test btree-11.6 {
  btree_move_to $::c1 769
  btree_delete $::c1
  btree_next $::c1
  btree_key $::c1
} {770}
do_test btree-11.6.1 {
  btree_move_to $::c1 768
  btree_key $::c1
} {768}
do_test btree-11.6.2 {
  btree_move_to $::c1 771
  btree_key $::c1
} {771}
do_test btree-11.6.3 {
  btree_move_to $::c1 770
  btree_key $::c1
} {770}
do_test btree-11.6.4 {
  btree_move_to $::c1 769
  btree_key $::c1

} {768}
#btree_page_dump $::b1 2
#btree_page_dump $::b1 25

# Change the data on an intermediate node such that the node becomes overfull
# and has to split.  We happen to know that intermediate nodes exist on
# 337, 401 and 465 by the btree_page_dumps above
#







|
>
|




















|
>
|




















|
>
|







886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
} {258}
do_test btree-11.4.3 {
  btree_move_to $::c1 259
  btree_key $::c1
} {259}
do_test btree-11.4.4 {
  btree_move_to $::c1 257
  set n [btree_key $::c1]
  expr {$n==256||$n==258}
} {1}
do_test btree-11.5 {
  btree_move_to $::c1 513
  btree_delete $::c1
  btree_next $::c1
  btree_key $::c1
} {514}
do_test btree-11.5.1 {
  btree_move_to $::c1 512
  btree_key $::c1
} {512}
do_test btree-11.5.2 {
  btree_move_to $::c1 514
  btree_key $::c1
} {514}
do_test btree-11.5.3 {
  btree_move_to $::c1 515
  btree_key $::c1
} {515}
do_test btree-11.5.4 {
  btree_move_to $::c1 513
  set n [btree_key $::c1]
  expr {$n==512||$n==514}
} {1}
do_test btree-11.6 {
  btree_move_to $::c1 769
  btree_delete $::c1
  btree_next $::c1
  btree_key $::c1
} {770}
do_test btree-11.6.1 {
  btree_move_to $::c1 768
  btree_key $::c1
} {768}
do_test btree-11.6.2 {
  btree_move_to $::c1 771
  btree_key $::c1
} {771}
do_test btree-11.6.3 {
  btree_move_to $::c1 770
  btree_key $::c1
} {770}
do_test btree-11.6.4 {
  btree_move_to $::c1 769
  set n [btree_key $::c1]
  expr {$n==768||$n==770}
} {1}
#btree_page_dump $::b1 2
#btree_page_dump $::b1 25

# Change the data on an intermediate node such that the node becomes overfull
# and has to split.  We happen to know that intermediate nodes exist on
# 337, 401 and 465 by the btree_page_dumps above
#
Changes to test/btree2.test.
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
#   drh@hwaci.com
#   http://www.hwaci.com/drh/
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree2.test,v 1.1 2001/07/01 22:12:02 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {$dbprefix!="memory:" && [info commands btree_open]!=""} {

# Create a new database file containing no entries.  The database should
# contain 5 tables:
#
#     2   The descriptor table
#     3   The foreground table
#     4   The background table
#     5   The long key table
#     6   The long data table
#
# An explanation for what all these tables are used for is provided below.
#
do_test btree2-1.1 {

  file delete -force test2.bt
  file delete -force test2.bt-journal
  set ::b [btree_open test2.bt]
  btree_begin_transaction $::b
  btree_create_table $::b
} {3}
do_test btree2-1.2 {







|



















>







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#   drh@hwaci.com
#   http://www.hwaci.com/drh/
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is btree database backend
#
# $Id: btree2.test,v 1.2 2001/07/02 17:51:47 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl

if {$dbprefix!="memory:" && [info commands btree_open]!=""} {

# Create a new database file containing no entries.  The database should
# contain 5 tables:
#
#     2   The descriptor table
#     3   The foreground table
#     4   The background table
#     5   The long key table
#     6   The long data table
#
# An explanation for what all these tables are used for is provided below.
#
do_test btree2-1.1 {
  expr srand(1)
  file delete -force test2.bt
  file delete -force test2.bt-journal
  set ::b [btree_open test2.bt]
  btree_begin_transaction $::b
  btree_create_table $::b
} {3}
do_test btree2-1.2 {
132
133
134
135
136
137
138






139
140
141
142
143
144
145
146
147
148
149
150
151
152




153
154
155
156
157
158
159
160
  return [string range $r 0 [expr {$len-1}]]
}

# Verify the invariants on the database.  Return an empty string on 
# success or an error message if something is amiss.
#
proc check_invariants {} {






  btree_move_to $::c3 {}
  btree_move_to $::c4 {}
  btree_move_to $::c2 N
  set N [btree_data $::c2]
  btree_move_to $::c2 L
  set L [btree_data $::c2]
  set LM1 [expr {$L-1}]
  for {set i 1} {$i<=$N} {incr i} {
    set key [btree_key $::c3]
    scan $key %d k
    if {$k!=$i} {
      set key [btree_key $::c4]
      scan $key %d k
      if {$k!=$i} {




         return "Key $i is missing from both foreground and backgroun"
      }
      set data [btree_data $::c4]
      btree_next $::c4
    } else {
      set data [btree_data $::c3]
      btree_next $::c3
    }







>
>
>
>
>
>









|


|

>
>
>
>
|







133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  return [string range $r 0 [expr {$len-1}]]
}

# Verify the invariants on the database.  Return an empty string on 
# success or an error message if something is amiss.
#
proc check_invariants {} {
  set ck [btree_sanity_check $::b 2 3 4 5 6]
  if {$ck!=""} {
    puts "\n*** SANITY:\n$ck"
    exit
    return $ck
  }
  btree_move_to $::c3 {}
  btree_move_to $::c4 {}
  btree_move_to $::c2 N
  set N [btree_data $::c2]
  btree_move_to $::c2 L
  set L [btree_data $::c2]
  set LM1 [expr {$L-1}]
  for {set i 1} {$i<=$N} {incr i} {
    set key [btree_key $::c3]
    if {[scan $key %d k]<1} {set k 0}
    if {$k!=$i} {
      set key [btree_key $::c4]
      if {[scan $key %d k]<1} {set k 0}
      if {$k!=$i} {
        # puts "MISSING $i"
        # puts {Page 3:}; btree_page_dump $::b 3
        # puts {Page 4:}; btree_page_dump $::b 4
        # exit
        return "Key $i is missing from both foreground and background"
      }
      set data [btree_data $::c4]
      btree_next $::c4
    } else {
      set data [btree_data $::c3]
      btree_next $::c3
    }
189
190
191
192
193
194
195

196

197
198
199
200
201
202
203
204
205
206
207



208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225

226

227
228


229
230


231


232
233
234
235



236
237
238
239
240
241
242
243
244
245
246
247







248
249
250
251
252
253
254
255
256



257
258
259
260
261
262
263
# Make random changes to the database such that each change preserves
# the invariants.  The number of changes is $n*N where N is the parameter
# from the descriptor table.  Each changes begins with a random key.
# the entry with that key is put in the foreground table with probability
# $I and it is put in background with probability (1.0-$I).  It gets
# a long key with probability $K and long data with probability $D.  
# 

proc random_changes {n I K D} {

  set N [btree_data $::c2]
  btree_move_to $::c2 L
  set L [btree_data $::c2]
  set LM1 [expr {$L-1}]
  set total [expr {int($N*$n)}]
  set format %0${L}d
  for {set i 0} {$i<$total} {incr i} {
    set k [expr {int(rand()*$N)}]
    set insert [expr {rand()<=$I}]
    set longkey [expr {rand()<=$K}]
    set longdata [expr {rand()<=$D}]



    if {$longkey} {
      set x [expr {rand()}]
      set keylen [expr {int($x*$x*$x*$x*3000)}]
    } else {
      set keylen $L
    }
    set key [make_payload $k $L $keylen]
    if {$longdata} {
      set x [expr {rand()}]
      set datalen [expr {int($x*$x*$x*$x*3000)}]
    } else {
      set datalen $L
    }
    set data [make_payload $k $L $datalen]
    set basekey [format $format $k]
    if {$insert} {
      btree_move_to $::c4 $basekey
      if {[scan [btree_key $::c4] %d kx]<1} {set kx -1}

      if {$kx==$k} {

        btree_delete $::c4
      }


      btree_insert $::c3 $key $data
    } else {


      btree_move_to $::c3 $basekey


      if {[scan [btree_key $::c4] %d kx]<1} {set kx -1}
      if {$kx==$k} {
        btree_delete $::c3
      }



      btree_insert $::c4 $key $data
    }
    if {$longkey} {
      btree_insert $::c5 $basekey $keylen
    } elseif {[btree_move_to $::c5 $basekey]==0} {
      btree_delete $::c5
    }
    if {$longdata} {
      btree_insert $::c6 $basekey $datalen
    } elseif {[btree_move_to $::c6 $basekey]==0} {
      btree_delete $::c6
    }







  }
  return [btree_sanity_check $::b 2 3 4 5 6]
}

# Repeat this test sequence on database of various sizes
#
set testno 2
foreach {N L} {
  10 2



} {
  puts "**** N=$N L=$L ****"
  set hash [md5file test2.bt]
  do_test btree2-$testno.1 [subst -nocommands {
    set ::c2 [btree_cursor $::b 2]
    set ::c3 [btree_cursor $::b 3]
    set ::c4 [btree_cursor $::b 4]







>

>







|



>
>
>


|






|





<
|
|
>
|
>
|

>
>
|

>
>
|
>
>
|
|
|
|
>
>
>












>
>
>
>
>
>
>

<







>
>
>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# Make random changes to the database such that each change preserves
# the invariants.  The number of changes is $n*N where N is the parameter
# from the descriptor table.  Each changes begins with a random key.
# the entry with that key is put in the foreground table with probability
# $I and it is put in background with probability (1.0-$I).  It gets
# a long key with probability $K and long data with probability $D.  
# 
set chngcnt 0
proc random_changes {n I K D} {
  btree_move_to $::c2 N
  set N [btree_data $::c2]
  btree_move_to $::c2 L
  set L [btree_data $::c2]
  set LM1 [expr {$L-1}]
  set total [expr {int($N*$n)}]
  set format %0${L}d
  for {set i 0} {$i<$total} {incr i} {
    set k [expr {int(rand()*$N)+1}]
    set insert [expr {rand()<=$I}]
    set longkey [expr {rand()<=$K}]
    set longdata [expr {rand()<=$D}]
    # incr ::chngcnt
    # if {$::chngcnt==251} {btree_tree_dump $::b 3} 
    # puts "CHANGE $::chngcnt: $k $insert $longkey $longdata"
    if {$longkey} {
      set x [expr {rand()}]
      set keylen [expr {int($x*$x*$x*$x*3000)+10}]
    } else {
      set keylen $L
    }
    set key [make_payload $k $L $keylen]
    if {$longdata} {
      set x [expr {rand()}]
      set datalen [expr {int($x*$x*$x*$x*3000)+10}]
    } else {
      set datalen $L
    }
    set data [make_payload $k $L $datalen]
    set basekey [format $format $k]

    if {[set c [btree_move_to $::c3 $basekey]]==0} {
      btree_delete $::c3
    } else {
      if {$c<0} {btree_next $::c3}
      if {[string match $basekey* [btree_key $::c3]]} {
        btree_delete $::c3
      }
    }
    if {[set c [btree_move_to $::c4 $basekey]]==0} {
      btree_delete $::c4
    } else {
      if {$c<0} {btree_next $::c4}
      if {[string match $basekey* [btree_key $::c4]]} {
        btree_delete $::c4
      }
    }
    if {[scan [btree_key $::c4] %d kx]<1} {set kx -1}
    if {$kx==$k} {
      btree_delete $::c4
    }
    if {$insert} {
      btree_insert $::c3 $key $data
    } else {
      btree_insert $::c4 $key $data
    }
    if {$longkey} {
      btree_insert $::c5 $basekey $keylen
    } elseif {[btree_move_to $::c5 $basekey]==0} {
      btree_delete $::c5
    }
    if {$longdata} {
      btree_insert $::c6 $basekey $datalen
    } elseif {[btree_move_to $::c6 $basekey]==0} {
      btree_delete $::c6
    }
    # set ck [btree_sanity_check $::b 2 3 4 5 6]
    # if {$ck!=""} {
    #   puts "\nSANITY CHECK FAILED!\n$ck"
    #   exit
    # }
    # puts "PAGE 3:"; btree_page_dump $::b 3
    # puts "PAGE 4:"; btree_page_dump $::b 4
  }

}

# Repeat this test sequence on database of various sizes
#
set testno 2
foreach {N L} {
  10 2
  50 2
  200 3
  2000 5
} {
  puts "**** N=$N L=$L ****"
  set hash [md5file test2.bt]
  do_test btree2-$testno.1 [subst -nocommands {
    set ::c2 [btree_cursor $::b 2]
    set ::c3 [btree_cursor $::b 3]
    set ::c4 [btree_cursor $::b 4]
300
301
302
303
304
305
306





307
308
309
310
311
312
313
314






315
316




317
318
319
320
321
322
323
324
325
326

327
328


329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

347


348
349
350
351
352
353
354
355
356
357
358
359
360



361
362





363
364
365
366





367
368
369
370
371
372
373
    btree_close_cursor $::c5
    btree_close_cursor $::c6
    lindex [btree_pager_stats $::b] 1
  } {0}
  do_test btree2-$testno.7 {
    btree_close $::b
    set ::b [btree_open test2.bt]





    check_invariants
  } {}

  # For each database size, run various changes tests.
  #
  set num2 1
  foreach {n I K D} {
    0.5 0.5 0.5 0.5






  } {
    set testid btree2-$testno.8.$num2




    do_test $testid.1 {
      set ::c2 [btree_cursor $::b 2]
      set ::c3 [btree_cursor $::b 3]
      set ::c4 [btree_cursor $::b 4]
      set ::c5 [btree_cursor $::b 5]
      set ::c6 [btree_cursor $::b 6]
      btree_begin_transaction $::b
      lindex [btree_pager_stats $::b] 1
    } {6}   
    set hash [md5file test2.bt]

    do_test $testid.2 [subst -nocommands {
      random_changes $n $I $K $D


      check_invariants
    }] {}
    do_test $testid.3 {
      btree_close_cursor $::c2
      btree_close_cursor $::c3
      btree_close_cursor $::c4
      btree_close_cursor $::c5
      btree_close_cursor $::c6
      btree_rollback $::b
      md5file test2.bt
    } $hash
    do_test $testid.4 [subst -nocommands {
      btree_begin_transaction $::b
      set ::c2 [btree_cursor $::b 2]
      set ::c3 [btree_cursor $::b 3]
      set ::c4 [btree_cursor $::b 4]
      set ::c5 [btree_cursor $::b 5]
      set ::c6 [btree_cursor $::b 6]

      random_changes $n $I $K $D


      check_invariants
    }] {}
    do_test $testid.5 {
      btree_commit $::b
      check_invariants
    } {}
    set hash [md5file test2.bt]
    do_test $testid.6 {
      btree_close_cursor $::c2
      btree_close_cursor $::c3
      btree_close_cursor $::c4
      btree_close_cursor $::c5
      btree_close_cursor $::c6



      btree_close $::b
      set ::b [btree_open test2.bt]





      check_invariants
    } {}
    incr num2
  }





  incr testno
}  

# Testing is complete.  Shut everything down.
#
do_test btree-999.1 {
  lindex [btree_pager_stats $::b] 1







>
>
>
>
>







|
>
>
>
>
>
>


>
>
>
>

<
<
<
<
<


|

>
|

>
>

|
|








|
|
|
|
|
|
|
>

>
>

|
|




|





>
>
>


>
>
>
>
>




>
>
>
>
>







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367





368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
    btree_close_cursor $::c5
    btree_close_cursor $::c6
    lindex [btree_pager_stats $::b] 1
  } {0}
  do_test btree2-$testno.7 {
    btree_close $::b
    set ::b [btree_open test2.bt]
    set ::c2 [btree_cursor $::b 2]
    set ::c3 [btree_cursor $::b 3]
    set ::c4 [btree_cursor $::b 4]
    set ::c5 [btree_cursor $::b 5]
    set ::c6 [btree_cursor $::b 6]
    check_invariants
  } {}

  # For each database size, run various changes tests.
  #
  set num2 1
  foreach {n I K D} {
    0.5 0.5 0.1 0.1
    1.0 0.2 0.1 0.1
    1.0 0.8 0.1 0.1
    2.0 0.0 0.1 0.1
    2.0 1.0 0.1 0.1
    2.0 0.0 0.0 0.0
    2.0 1.0 0.0 0.0
  } {
    set testid btree2-$testno.8.$num2
    set cnt 6
    for {set i 2} {$i<=6} {incr i} {
      if {[lindex [btree_cursor_dump [set ::c$i]] 0]!=$i} {incr cnt}
    }
    do_test $testid.1 {





      btree_begin_transaction $::b
      lindex [btree_pager_stats $::b] 1
    } $cnt
    set hash [md5file test2.bt]
    # exec cp test2.bt test2.bt.bu1
    do_test $testid.2 [subst {
      random_changes $n $I $K $D
    }] {}
    do_test $testid.3 {
      check_invariants
    } {}
    do_test $testid.4 {
      btree_close_cursor $::c2
      btree_close_cursor $::c3
      btree_close_cursor $::c4
      btree_close_cursor $::c5
      btree_close_cursor $::c6
      btree_rollback $::b
      md5file test2.bt
    } $hash
    # exec cp test2.bt test2.bt.bu2
    btree_begin_transaction $::b
    set ::c2 [btree_cursor $::b 2]
    set ::c3 [btree_cursor $::b 3]
    set ::c4 [btree_cursor $::b 4]
    set ::c5 [btree_cursor $::b 5]
    set ::c6 [btree_cursor $::b 6]
    do_test $testid.5 [subst {
      random_changes $n $I $K $D
    }] {}
    do_test $testid.6 {
      check_invariants
    } {}
    do_test $testid.7 {
      btree_commit $::b
      check_invariants
    } {}
    set hash [md5file test2.bt]
    do_test $testid.8 {
      btree_close_cursor $::c2
      btree_close_cursor $::c3
      btree_close_cursor $::c4
      btree_close_cursor $::c5
      btree_close_cursor $::c6
      lindex [btree_pager_stats $::b] 1
    } {0}
    do_test $testid.9 {
      btree_close $::b
      set ::b [btree_open test2.bt]
      set ::c2 [btree_cursor $::b 2]
      set ::c3 [btree_cursor $::b 3]
      set ::c4 [btree_cursor $::b 4]
      set ::c5 [btree_cursor $::b 5]
      set ::c6 [btree_cursor $::b 6]
      check_invariants
    } {}
    incr num2
  }
  btree_close_cursor $::c2
  btree_close_cursor $::c3
  btree_close_cursor $::c4
  btree_close_cursor $::c5
  btree_close_cursor $::c6
  incr testno
}  

# Testing is complete.  Shut everything down.
#
do_test btree-999.1 {
  lindex [btree_pager_stats $::b] 1