SQLite

Check-in [8f1c1f61f7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not clear the MemPage.nFree variable when insertCell() adds an overflow cell to a page. Not doing this means balance_quick() can avoid a call to sqlite3BtreeInitPage(). (CVS 6732)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8f1c1f61f7bc5270212725cf0a056872df983f49
User & Date: danielk1977 2009-06-09 09:41:00.000
Context
2009-06-09
10:37
Only do the cell overread checks in sqlite3BtreeInitPage if SQLITE_OVERREAD_CHECK is defined at compile-time. (CVS 6733) (check-in: 49f544ebae user: drh tags: trunk)
09:41
Do not clear the MemPage.nFree variable when insertCell() adds an overflow cell to a page. Not doing this means balance_quick() can avoid a call to sqlite3BtreeInitPage(). (CVS 6732) (check-in: 8f1c1f61f7 user: danielk1977 tags: trunk)
2009-06-08
19:44
Additional comments to clarify the operation of the LIKE optimizer in where.c. (CVS 6731) (check-in: cc9c12170c user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.620 2009/06/08 14:49:46 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.621 2009/06/09 09:41:00 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"

5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
      memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
      pCell = pTemp;
    }
    j = pPage->nOverflow++;
    assert( j<(int)(sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0])) );
    pPage->aOvfl[j].pCell = pCell;
    pPage->aOvfl[j].idx = (u16)i;
    pPage->nFree = 0;
  }else{
    int rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
    data = pPage->aData;







<







5085
5086
5087
5088
5089
5090
5091

5092
5093
5094
5095
5096
5097
5098
      memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
      pCell = pTemp;
    }
    j = pPage->nOverflow++;
    assert( j<(int)(sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0])) );
    pPage->aOvfl[j].pCell = pCell;
    pPage->aOvfl[j].idx = (u16)i;

  }else{
    int rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
    data = pPage->aData;
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
      if( rc==SQLITE_OK ){
        rc = ptrmapPutOvfl(pNew, 0);
      }
    }

    /* Release the reference to the new page. */
    releasePage(pNew);

    /* At this point the pPage->nFree variable is not set correctly with
    ** respect to the content of the page (because it was set to 0 by 
    ** insertCell). So call sqlite3BtreeInitPage() to make sure it is
    ** correct.
    **
    ** This has to be done even if an error will be returned. Normally, if
    ** an error occurs during tree balancing, the contents of MemPage are
    ** not important, as they will be recalculated when the page is rolled
    ** back. But here, in balance_quick(), it is possible that pPage has 
    ** not yet been marked dirty or written into the journal file. Therefore
    ** it will not be rolled back and so it is important to make sure that
    ** the page data and contents of MemPage are consistent.
    */
    pPage->isInit = 0;
    sqlite3BtreeInitPage(pPage);
    assert( pPage->nOverflow==0 );
  }

  return rc;
}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







5287
5288
5289
5290
5291
5292
5293

















5294
5295
5296
5297
5298
5299
5300
      if( rc==SQLITE_OK ){
        rc = ptrmapPutOvfl(pNew, 0);
      }
    }

    /* Release the reference to the new page. */
    releasePage(pNew);

















  }

  return rc;
}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
** make sure all Cells for pPage once again fit in pPage->aData[].
**
** In the course of balancing the siblings of pPage, the parent of pPage
** might become overfull or underfull.  If that happens, then this routine
** is called recursively on the parent.
**
** If this routine fails for any reason, it might leave the database
** in a corrupted state.  So if this routine fails, the database should
** be rolled back.
*/
static int balance_nonroot(MemPage *pParent, int iParentIdx, u8 *aSpace2){
  BtShared *pBt;               /* The whole database */
  int nCell = 0;               /* Number of cells in apCell[] */
  int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
  int nOld = 0;                /* Number of pages in apOld[] */
  int nNew = 0;                /* Number of pages in apNew[] */
  int i, j, k;                 /* Loop counters */
  int nxDiv;                   /* Next divider slot in pParent->aCell[] */
  int rc;                      /* The return code */
  int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
  int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
  int usableSpace;             /* Bytes in pPage beyond the header */
  int pageFlags;               /* Value of pPage->aData[0] */
  int subtotal;                /* Subtotal of bytes in cells on one page */
  int iSpace1 = 0;             /* First unused byte of aSpace1[] */
  int iSpace2 = 0;             /* First unused byte of aSpace2[] */
  int szScratch;               /* Size of scratch memory requested */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
  MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
  MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
  Pgno pgnoNew[NB+2];          /* Page numbers for each page in apNew[] */
  u8 *apDiv[NB];               /* Divider cells in pParent */







|


|














|







5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
** make sure all Cells for pPage once again fit in pPage->aData[].
**
** In the course of balancing the siblings of pPage, the parent of pPage
** might become overfull or underfull.  If that happens, then this routine
** is called recursively on the parent.
**
** If this routine fails for any reason, it might leave the database
** in a corrupted state. So if this routine fails, the database should
** be rolled back.
*/
static int balance_nonroot(MemPage *pParent, int iParentIdx, u8 *aOvflSpace){
  BtShared *pBt;               /* The whole database */
  int nCell = 0;               /* Number of cells in apCell[] */
  int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
  int nOld = 0;                /* Number of pages in apOld[] */
  int nNew = 0;                /* Number of pages in apNew[] */
  int i, j, k;                 /* Loop counters */
  int nxDiv;                   /* Next divider slot in pParent->aCell[] */
  int rc;                      /* The return code */
  int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
  int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
  int usableSpace;             /* Bytes in pPage beyond the header */
  int pageFlags;               /* Value of pPage->aData[0] */
  int subtotal;                /* Subtotal of bytes in cells on one page */
  int iSpace1 = 0;             /* First unused byte of aSpace1[] */
  int iOvflSpace = 0;          /* First unused byte of aOvflSpace[] */
  int szScratch;               /* Size of scratch memory requested */
  MemPage *apOld[NB];          /* pPage and up to two siblings */
  Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
  MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
  MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
  Pgno pgnoNew[NB+2];          /* Page numbers for each page in apNew[] */
  u8 *apDiv[NB];               /* Divider cells in pParent */
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
  szScratch =
       nMaxCells*sizeof(u8*)                       /* apCell */
     + nMaxCells*sizeof(u16)                       /* szCell */
     + (ROUND8(sizeof(MemPage))+pBt->pageSize)*NB  /* aCopy */
     + pBt->pageSize                               /* aSpace1 */
     + (ISAUTOVACUUM ? nMaxCells : 0);             /* aFrom */
  apCell = sqlite3ScratchMalloc( szScratch ); 
  if( apCell==0 ){
    rc = SQLITE_NOMEM;
    goto balance_cleanup;
  }
  szCell = (u16*)&apCell[nMaxCells];
  aCopy[0] = (u8*)&szCell[nMaxCells];
  assert( EIGHT_BYTE_ALIGNMENT(aCopy[0]) );
  for(i=1; i<NB; i++){
    aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
    assert( ((aCopy[i] - (u8*)0) & 7)==0 ); /* 8-byte alignment required */
  }
  aSpace1 = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
  assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
  if( ISAUTOVACUUM ){
    aFrom = &aSpace1[pBt->pageSize];
  }
  /* aSpace2 = sqlite3PageMalloc(pBt->pageSize); */
  if( aSpace2==0 ){
    rc = SQLITE_NOMEM;
    goto balance_cleanup;
  }
  
  /*
  ** Make copies of the content of pPage and its siblings into aOld[].
  ** The rest of this function will use data from the copies rather
  ** that the original pages since the original pages will be in the
  ** process of being overwritten.
  */







|















<
<
<
<
<







5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426





5427
5428
5429
5430
5431
5432
5433
  szScratch =
       nMaxCells*sizeof(u8*)                       /* apCell */
     + nMaxCells*sizeof(u16)                       /* szCell */
     + (ROUND8(sizeof(MemPage))+pBt->pageSize)*NB  /* aCopy */
     + pBt->pageSize                               /* aSpace1 */
     + (ISAUTOVACUUM ? nMaxCells : 0);             /* aFrom */
  apCell = sqlite3ScratchMalloc( szScratch ); 
  if( apCell==0 || aOvflSpace==0 ){
    rc = SQLITE_NOMEM;
    goto balance_cleanup;
  }
  szCell = (u16*)&apCell[nMaxCells];
  aCopy[0] = (u8*)&szCell[nMaxCells];
  assert( EIGHT_BYTE_ALIGNMENT(aCopy[0]) );
  for(i=1; i<NB; i++){
    aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
    assert( ((aCopy[i] - (u8*)0) & 7)==0 ); /* 8-byte alignment required */
  }
  aSpace1 = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
  assert( EIGHT_BYTE_ALIGNMENT(aSpace1) );
  if( ISAUTOVACUUM ){
    aFrom = &aSpace1[pBt->pageSize];
  }





  
  /*
  ** Make copies of the content of pPage and its siblings into aOld[].
  ** The rest of this function will use data from the copies rather
  ** that the original pages since the original pages will be in the
  ** process of being overwritten.
  */
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743
5744
5745
5746
      u8 *pCell;
      u8 *pTemp;
      int sz;

      assert( j<nMaxCells );
      pCell = apCell[j];
      sz = szCell[j] + leafCorrection;
      pTemp = &aSpace2[iSpace2];
      if( !pNew->leaf ){
        memcpy(&pNew->aData[8], pCell, 4);
        if( ISAUTOVACUUM 
         && (aFrom[j]==0xFF || apCopy[aFrom[j]]->pgno!=pNew->pgno)
        ){
          rc = ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno);
          if( rc!=SQLITE_OK ){







|







5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
      u8 *pCell;
      u8 *pTemp;
      int sz;

      assert( j<nMaxCells );
      pCell = apCell[j];
      sz = szCell[j] + leafCorrection;
      pTemp = &aOvflSpace[iOvflSpace];
      if( !pNew->leaf ){
        memcpy(&pNew->aData[8], pCell, 4);
        if( ISAUTOVACUUM 
         && (aFrom[j]==0xFF || apCopy[aFrom[j]]->pgno!=pNew->pgno)
        ){
          rc = ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno);
          if( rc!=SQLITE_OK ){
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
        ** to evaluate "IN (SELECT ...)" and similar clauses.
        */
        if( szCell[j]==4 ){
          assert(leafCorrection==4);
          sz = cellSizePtr(pParent, pCell);
        }
      }
      iSpace2 += sz;
      assert( sz<=pBt->pageSize/4 );
      assert( iSpace2<=pBt->pageSize );
      rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
      if( rc!=SQLITE_OK ) goto balance_cleanup;
      assert( sqlite3PagerIswriteable(pParent->pDbPage) );
      put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);

      /* If this is an auto-vacuum database, and not a leaf-data tree,
      ** then update the pointer map with an entry for the overflow page







|

|







5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
        ** to evaluate "IN (SELECT ...)" and similar clauses.
        */
        if( szCell[j]==4 ){
          assert(leafCorrection==4);
          sz = cellSizePtr(pParent, pCell);
        }
      }
      iOvflSpace += sz;
      assert( sz<=pBt->pageSize/4 );
      assert( iOvflSpace<=pBt->pageSize );
      rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
      if( rc!=SQLITE_OK ) goto balance_cleanup;
      assert( sqlite3PagerIswriteable(pParent->pDbPage) );
      put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);

      /* If this is an auto-vacuum database, and not a leaf-data tree,
      ** then update the pointer map with an entry for the overflow page
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925

/*
** This routine is called on the root page of a btree when the root
** page contains no cells. This is an opportunity to make the tree
** shallower by one level.
*/
static int balance_shallower(MemPage *pRoot){

  /* The root page is empty but has one child.  Transfer the
  ** information from that one child into the root page if it 
  ** will fit.  This reduces the depth of the tree by one.
  **
  ** If the root page is page 1, it has less space available than
  ** its child (due to the 100 byte header that occurs at the beginning
  ** of the database fle), so it might not be able to hold all of the 







<







5888
5889
5890
5891
5892
5893
5894

5895
5896
5897
5898
5899
5900
5901

/*
** This routine is called on the root page of a btree when the root
** page contains no cells. This is an opportunity to make the tree
** shallower by one level.
*/
static int balance_shallower(MemPage *pRoot){

  /* The root page is empty but has one child.  Transfer the
  ** information from that one child into the root page if it 
  ** will fit.  This reduces the depth of the tree by one.
  **
  ** If the root page is page 1, it has less space available than
  ** its child (due to the 100 byte header that occurs at the beginning
  ** of the database fle), so it might not be able to hold all of the 
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
  assert( pgnoChild<=pagerPagecount(pRoot->pBt) );
  assert( hdr==0 || pRoot->pgno==1 );
  
  rc = sqlite3BtreeGetPage(pRoot->pBt, pgnoChild, &pChild, 0);
  if( rc==SQLITE_OK ){
    if( pChild->nFree>=hdr ){
      if( hdr ){
            rc = defragmentPage(pChild);
      }
      if( rc==SQLITE_OK ){
        rc = copyNodeContent(pChild, pRoot);
      }
      if( rc==SQLITE_OK ){
        rc = freePage(pChild);
      }







|







5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
  assert( pgnoChild<=pagerPagecount(pRoot->pBt) );
  assert( hdr==0 || pRoot->pgno==1 );
  
  rc = sqlite3BtreeGetPage(pRoot->pBt, pgnoChild, &pChild, 0);
  if( rc==SQLITE_OK ){
    if( pChild->nFree>=hdr ){
      if( hdr ){
        rc = defragmentPage(pChild);
      }
      if( rc==SQLITE_OK ){
        rc = copyNodeContent(pChild, pRoot);
      }
      if( rc==SQLITE_OK ){
        rc = freePage(pChild);
      }
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
  assert( pChild->nCell==pRoot->nCell );

  TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));

  /* Copy the overflow cells from pRoot to pChild */
  memcpy(pChild->aOvfl, pRoot->aOvfl, pRoot->nOverflow*sizeof(pRoot->aOvfl[0]));
  pChild->nOverflow = pRoot->nOverflow;
  pChild->nFree = 0;

  /* Zero the contents of pRoot. Then install pChild as the right-child. */
  zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF);
  put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild);

  *ppChild = pChild;
  return SQLITE_OK;







<







5987
5988
5989
5990
5991
5992
5993

5994
5995
5996
5997
5998
5999
6000
  assert( pChild->nCell==pRoot->nCell );

  TRACE(("BALANCE: copy root %d into %d\n", pRoot->pgno, pChild->pgno));

  /* Copy the overflow cells from pRoot to pChild */
  memcpy(pChild->aOvfl, pRoot->aOvfl, pRoot->nOverflow*sizeof(pRoot->aOvfl[0]));
  pChild->nOverflow = pRoot->nOverflow;


  /* Zero the contents of pRoot. Then install pChild as the right-child. */
  zeroPage(pRoot, pChild->aData[0] & ~PTF_LEAF);
  put4byte(&pRoot->aData[pRoot->hdrOffset+8], pgnoChild);

  *ppChild = pChild;
  return SQLITE_OK;