SQLite

Check-in [40425e9342]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Instead of storing a pointer to the parent page in the MemPage structure, have each B-Tree cursor keep track of the ancestry of the current page. (CVS 5747)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 40425e93421286cca1965d7a5769084526210c7a
User & Date: danielk1977 2008-09-29 11:49:48.000
Context
2008-09-29
14:12
Update shared_err.test to work with (5668) (return SQLITE_CORRUPT if rollback fails). (CVS 5748) (check-in: 292acaf7c4 user: danielk1977 tags: trunk)
11:49
Instead of storing a pointer to the parent page in the MemPage structure, have each B-Tree cursor keep track of the ancestry of the current page. (CVS 5747) (check-in: 40425e9342 user: danielk1977 tags: trunk)
00:11
fix #3077: use full version in pkg-config files (CVS 5746) (check-in: efe095e0cb user: vapier tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/btree.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.517 2008/09/26 17:31:55 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"












|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.518 2008/09/29 11:49:48 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** See the header comment on "btreeInt.h" for additional information.
** Including a description of file format and an overview of operation.
*/
#include "btreeInt.h"

297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319


320


321
322
323
324
325
326
327
328

  /* If this is an intKey table, then the above call to BtreeKeySize()
  ** stores the integer key in pCur->nKey. In this case this value is
  ** all that is required. Otherwise, if pCur is not open on an intKey
  ** table, then malloc space for and store the pCur->nKey bytes of key 
  ** data.
  */
  if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
    void *pKey = sqlite3Malloc(pCur->nKey);
    if( pKey ){
      rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
      if( rc==SQLITE_OK ){
        pCur->pKey = pKey;
      }else{
        sqlite3_free(pKey);
      }
    }else{
      rc = SQLITE_NOMEM;
    }
  }
  assert( !pCur->pPage->intKey || !pCur->pKey );

  if( rc==SQLITE_OK ){


    releasePage(pCur->pPage);


    pCur->pPage = 0;
    pCur->eState = CURSOR_REQUIRESEEK;
  }

  invalidateOverflowCache(pCur);
  return rc;
}








|












|


>
>
|
>
>
|







297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332

  /* If this is an intKey table, then the above call to BtreeKeySize()
  ** stores the integer key in pCur->nKey. In this case this value is
  ** all that is required. Otherwise, if pCur is not open on an intKey
  ** table, then malloc space for and store the pCur->nKey bytes of key 
  ** data.
  */
  if( rc==SQLITE_OK && 0==pCur->apPage[0]->intKey){
    void *pKey = sqlite3Malloc(pCur->nKey);
    if( pKey ){
      rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
      if( rc==SQLITE_OK ){
        pCur->pKey = pKey;
      }else{
        sqlite3_free(pKey);
      }
    }else{
      rc = SQLITE_NOMEM;
    }
  }
  assert( !pCur->apPage[0]->intKey || !pCur->pKey );

  if( rc==SQLITE_OK ){
    int i;
    for(i=0; i<=pCur->iPage; i++){
      releasePage(pCur->apPage[i]);
      pCur->apPage[i] = 0;
    }
    pCur->iPage = -1;
    pCur->eState = CURSOR_REQUIRESEEK;
  }

  invalidateOverflowCache(pCur);
  return rc;
}

904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922





923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
  }
  return SQLITE_OK;
}

/*
** Initialize the auxiliary information for a disk block.
**
** The pParent parameter must be a pointer to the MemPage which
** is the parent of the page being initialized.  The root of a
** BTree has no parent and so for that page, pParent==NULL.
**
** Return SQLITE_OK on success.  If we see that the page does
** not contain a well-formed database page, then return 
** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed.  It only shows that
** we failed to detect any corruption.
*/
int sqlite3BtreeInitPage(
  MemPage *pPage,        /* The page to be initialized */





  MemPage *pParent       /* The parent.  Might be NULL */
){
  int pc;            /* Address of a freeblock within pPage->aData[] */
  int hdr;           /* Offset to beginning of page header */
  u8 *data;          /* Equal to pPage->aData */
  BtShared *pBt;        /* The main btree structure */
  int usableSize;    /* Amount of usable space on each page */
  int cellOffset;    /* Offset from start of page to first cell pointer */
  int nFree;         /* Number of unused bytes on the page */
  int top;           /* First byte of the cell content area */

  pBt = pPage->pBt;
  assert( pBt!=0 );
  assert( pParent==0 || pParent->pBt==pBt );
  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );
  if( pPage==pParent ){
    return SQLITE_CORRUPT_BKPT;
  }
  if( (pPage->pParent!=pParent)
   && (pPage->pParent!=0 || pPage->isInit==PAGE_ISINIT_FULL) ){
    /* The parent page should never change unless the file is corrupt */
    return SQLITE_CORRUPT_BKPT;
  }
  if( pPage->isInit==PAGE_ISINIT_FULL ) return SQLITE_OK;
  if( pParent!=0 ){
    pPage->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
  }
  if( pPage->isInit==PAGE_ISINIT_NONE ){
    hdr = pPage->hdrOffset;
    data = pPage->aData;
    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
    assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
    pPage->maskPage = pBt->pageSize - 1;
    pPage->nOverflow = 0;
    pPage->idxShift = 0;
    usableSize = pBt->usableSize;
    pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
    top = get2byte(&data[hdr+5]);
    pPage->nCell = get2byte(&data[hdr+3]);
    if( pPage->nCell>MX_CELL(pBt) ){
      /* To many cells for a single page.  The page must be corrupt */
      return SQLITE_CORRUPT_BKPT;
    }
    if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
      /* All pages must have at least one cell, except for root pages */
      return SQLITE_CORRUPT_BKPT;
    }
  
    /* Compute the total free space on the page */
    pc = get2byte(&data[hdr+1]);
    nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
    while( pc>0 ){
      int next, size;
      if( pc>usableSize-4 ){







<
<
<
<






|
|
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|

|
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<















<
<
<
<







908
909
910
911
912
913
914




915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939








940











941
942
943
944
945
946
947
948
949
950
951
952
953
954
955




956
957
958
959
960
961
962
  }
  return SQLITE_OK;
}

/*
** Initialize the auxiliary information for a disk block.
**




** Return SQLITE_OK on success.  If we see that the page does
** not contain a well-formed database page, then return 
** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
** guarantee that the page is well-formed.  It only shows that
** we failed to detect any corruption.
*/
int sqlite3BtreeInitPage(MemPage *pPage){

  assert( pPage->pBt!=0 );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) );
  assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) );
  assert( pPage->aData == sqlite3PagerGetData(pPage->pDbPage) );

  if( !pPage->isInit ){
    int pc;            /* Address of a freeblock within pPage->aData[] */
    int hdr;           /* Offset to beginning of page header */
    u8 *data;          /* Equal to pPage->aData */
    BtShared *pBt;        /* The main btree structure */
    int usableSize;    /* Amount of usable space on each page */
    int cellOffset;    /* Offset from start of page to first cell pointer */
    int nFree;         /* Number of unused bytes on the page */
    int top;           /* First byte of the cell content area */

    pBt = pPage->pBt;




















    hdr = pPage->hdrOffset;
    data = pPage->aData;
    if( decodeFlags(pPage, data[hdr]) ) return SQLITE_CORRUPT_BKPT;
    assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
    pPage->maskPage = pBt->pageSize - 1;
    pPage->nOverflow = 0;
    pPage->idxShift = 0;
    usableSize = pBt->usableSize;
    pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
    top = get2byte(&data[hdr+5]);
    pPage->nCell = get2byte(&data[hdr+3]);
    if( pPage->nCell>MX_CELL(pBt) ){
      /* To many cells for a single page.  The page must be corrupt */
      return SQLITE_CORRUPT_BKPT;
    }




  
    /* Compute the total free space on the page */
    pc = get2byte(&data[hdr+1]);
    nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
    while( pc>0 ){
      int next, size;
      if( pc>usableSize-4 ){
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
      pc = next;
    }
    pPage->nFree = nFree;
    if( nFree>=usableSize ){
      /* Free space cannot exceed total page size */
      return SQLITE_CORRUPT_BKPT; 
    }
  }

#if 0
  /* Check that all the offsets in the cell offset array are within range. 
  ** 
  ** Omitting this consistency check and using the pPage->maskPage mask
  ** to prevent overrunning the page buffer in findCell() results in a
  ** 2.5% performance gain.







<







973
974
975
976
977
978
979

980
981
982
983
984
985
986
      pc = next;
    }
    pPage->nFree = nFree;
    if( nFree>=usableSize ){
      /* Free space cannot exceed total page size */
      return SQLITE_CORRUPT_BKPT; 
    }


#if 0
  /* Check that all the offsets in the cell offset array are within range. 
  ** 
  ** Omitting this consistency check and using the pPage->maskPage mask
  ** to prevent overrunning the page buffer in findCell() results in a
  ** 2.5% performance gain.
1013
1014
1015
1016
1017
1018
1019
1020

1021
1022
1023
1024
1025
1026
1027
    for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2);
    if( pOff!=pEnd ){
      return SQLITE_CORRUPT_BKPT;
    }
  }
#endif

  pPage->isInit = PAGE_ISINIT_FULL;

  return SQLITE_OK;
}

/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/







|
>







994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
    for(pOff=&data[cellOffset]; pOff!=pEnd && !((*pOff)&mask); pOff+=2);
    if( pOff!=pEnd ){
      return SQLITE_CORRUPT_BKPT;
    }
  }
#endif

    pPage->isInit = 1;
  }
  return SQLITE_OK;
}

/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
  pPage->hdrOffset = hdr;
  pPage->cellOffset = first;
  pPage->nOverflow = 0;
  assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
  pPage->maskPage = pBt->pageSize - 1;
  pPage->idxShift = 0;
  pPage->nCell = 0;
  pPage->isInit = PAGE_ISINIT_FULL;
}


/*
** Convert a DbPage obtained from the pager into a MemPage used by
** the btree layer.
*/







|







1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
  pPage->hdrOffset = hdr;
  pPage->cellOffset = first;
  pPage->nOverflow = 0;
  assert( pBt->pageSize>=512 && pBt->pageSize<=32768 );
  pPage->maskPage = pBt->pageSize - 1;
  pPage->idxShift = 0;
  pPage->nCell = 0;
  pPage->isInit = 1;
}


/*
** Convert a DbPage obtained from the pager into a MemPage used by
** the btree layer.
*/
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
** Get a page from the pager and initialize it.  This routine
** is just a convenience wrapper around separate calls to
** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
*/
static int getAndInitPage(
  BtShared *pBt,          /* The database file */
  Pgno pgno,           /* Number of the page to get */
  MemPage **ppPage,    /* Write the page pointer here */
  MemPage *pParent     /* Parent of the page */
){
  int rc;
  DbPage *pDbPage;
  MemPage *pPage;

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( !pParent || pParent->isInit==PAGE_ISINIT_FULL );
  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }

  /* It is often the case that the page we want is already in cache.
  ** If so, get it directly.  This saves us from having to call
  ** pagerPagecount() to make sure pgno is within limits, which results







|
<






<







1093
1094
1095
1096
1097
1098
1099
1100

1101
1102
1103
1104
1105
1106

1107
1108
1109
1110
1111
1112
1113
** Get a page from the pager and initialize it.  This routine
** is just a convenience wrapper around separate calls to
** sqlite3BtreeGetPage() and sqlite3BtreeInitPage().
*/
static int getAndInitPage(
  BtShared *pBt,          /* The database file */
  Pgno pgno,           /* Number of the page to get */
  MemPage **ppPage     /* Write the page pointer here */

){
  int rc;
  DbPage *pDbPage;
  MemPage *pPage;

  assert( sqlite3_mutex_held(pBt->mutex) );

  if( pgno==0 ){
    return SQLITE_CORRUPT_BKPT; 
  }

  /* It is often the case that the page we want is already in cache.
  ** If so, get it directly.  This saves us from having to call
  ** pagerPagecount() to make sure pgno is within limits, which results
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
    if( pgno>pagerPagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT; 
    }
    rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
    if( rc ) return rc;
    pPage = *ppPage;
  }
  if( pPage->isInit!=PAGE_ISINIT_FULL ){
    rc = sqlite3BtreeInitPage(pPage, pParent);
  }else if( pParent && (pPage==pParent || pPage->pParent!=pParent) ){
    /* This condition indicates a loop in the b-tree structure (the scenario
    ** where database corruption has caused a page to be a direct or
    ** indirect descendant of itself).
    */ 
    rc = SQLITE_CORRUPT_BKPT;
  }
  if( rc!=SQLITE_OK ){
    releasePage(pPage);
    *ppPage = 0;
  }
  return rc;
}







|
|
<
<
<
<
<
<







1123
1124
1125
1126
1127
1128
1129
1130
1131






1132
1133
1134
1135
1136
1137
1138
    if( pgno>pagerPagecount(pBt->pPager) ){
      return SQLITE_CORRUPT_BKPT; 
    }
    rc = sqlite3BtreeGetPage(pBt, pgno, ppPage, 0);
    if( rc ) return rc;
    pPage = *ppPage;
  }
  if( !pPage->isInit ){
    rc = sqlite3BtreeInitPage(pPage);






  }
  if( rc!=SQLITE_OK ){
    releasePage(pPage);
    *ppPage = 0;
  }
  return rc;
}
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218

1219
1220
1221

1222
1223
1224
1225
1226
1227
1228
    assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
    assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    sqlite3PagerUnref(pPage->pDbPage);
  }
}

/*
** This routine is called when the reference count for a page
** reaches zero.  We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(DbPage *pData){
  MemPage *pPage;
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage ){
    assert( pPage->isInit!=PAGE_ISINIT_FULL 
         || sqlite3_mutex_held(pPage->pBt->mutex) 
    );
    if( pPage->pParent ){
      MemPage *pParent = pPage->pParent;
      assert( pParent->pBt==pPage->pBt );
      pPage->pParent = 0;
      releasePage(pParent);
    }
    if( pPage->isInit==PAGE_ISINIT_FULL ){
      pPage->isInit = PAGE_ISINIT_DATA;
    }
  }
}

/*
** During a rollback, when the pager reloads information into the cache
** so that the cache is restored to its original state at the start of
** the transaction, for each page restored this routine is called.
**
** This routine needs to reset the extra data section at the end of the
** page to agree with the restored data.
*/
static void pageReinit(DbPage *pData){
  MemPage *pPage;
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage->isInit==PAGE_ISINIT_FULL ){
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    pPage->isInit = 0;

    sqlite3BtreeInitPage(pPage, pPage->pParent);
  }else if( pPage->isInit==PAGE_ISINIT_DATA ){
    pPage->isInit = 0;

  }
}

/*
** Invoke the busy handler for a btree.
*/
static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<











|


>
|
<
<
>







1148
1149
1150
1151
1152
1153
1154
























1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170


1171
1172
1173
1174
1175
1176
1177
1178
    assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
    assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    sqlite3PagerUnref(pPage->pDbPage);
  }
}

























/*
** During a rollback, when the pager reloads information into the cache
** so that the cache is restored to its original state at the start of
** the transaction, for each page restored this routine is called.
**
** This routine needs to reset the extra data section at the end of the
** page to agree with the restored data.
*/
static void pageReinit(DbPage *pData){
  MemPage *pPage;
  pPage = (MemPage *)sqlite3PagerGetExtra(pData);
  if( pPage->isInit ){
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    pPage->isInit = 0;
    if( sqlite3PagerPageRefcount(pData)>0 ){
      sqlite3BtreeInitPage(pPage);


    }
  }
}

/*
** Invoke the busy handler for a btree.
*/
static int sqlite3BtreeInvokeBusyHandler(void *pArg, int n){
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
    pBt = sqlite3MallocZero( sizeof(*pBt) );
    if( pBt==0 ){
      rc = SQLITE_NOMEM;
      goto btree_open_out;
    }
    pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler;
    pBt->busyHdr.pArg = pBt;
    rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, pageDestructor,
                          EXTRA_SIZE, flags, vfsFlags);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
    }
    if( rc!=SQLITE_OK ){
      goto btree_open_out;
    }







|







1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
    pBt = sqlite3MallocZero( sizeof(*pBt) );
    if( pBt==0 ){
      rc = SQLITE_NOMEM;
      goto btree_open_out;
    }
    pBt->busyHdr.xFunc = sqlite3BtreeInvokeBusyHandler;
    pBt->busyHdr.pArg = pBt;
    rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename,
                          EXTRA_SIZE, flags, vfsFlags);
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader);
    }
    if( rc!=SQLITE_OK ){
      goto btree_open_out;
    }
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
  int nCell;                         /* Number of cells in page pPage */
  int rc;                            /* Return code */
  BtShared *pBt = pPage->pBt;
  int isInitOrig = pPage->isInit;
  Pgno pgno = pPage->pgno;

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  rc = sqlite3BtreeInitPage(pPage, pPage->pParent);
  if( rc!=SQLITE_OK ){
    goto set_child_ptrmaps_out;
  }
  nCell = pPage->nCell;

  for(i=0; i<nCell; i++){
    u8 *pCell = findCell(pPage, i);







|







2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
  int nCell;                         /* Number of cells in page pPage */
  int rc;                            /* Return code */
  BtShared *pBt = pPage->pBt;
  int isInitOrig = pPage->isInit;
  Pgno pgno = pPage->pgno;

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  rc = sqlite3BtreeInitPage(pPage);
  if( rc!=SQLITE_OK ){
    goto set_child_ptrmaps_out;
  }
  nCell = pPage->nCell;

  for(i=0; i<nCell; i++){
    u8 *pCell = findCell(pPage, i);
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
    }
    put4byte(pPage->aData, iTo);
  }else{
    int isInitOrig = pPage->isInit;
    int i;
    int nCell;

    sqlite3BtreeInitPage(pPage, 0);
    nCell = pPage->nCell;

    for(i=0; i<nCell; i++){
      u8 *pCell = findCell(pPage, i);
      if( eType==PTRMAP_OVERFLOW1 ){
        CellInfo info;
        sqlite3BtreeParseCellPtr(pPage, pCell, &info);







|







2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
    }
    put4byte(pPage->aData, iTo);
  }else{
    int isInitOrig = pPage->isInit;
    int i;
    int nCell;

    sqlite3BtreeInitPage(pPage);
    nCell = pPage->nCell;

    for(i=0; i<nCell; i++){
      u8 *pCell = findCell(pPage, i);
      if( eType==PTRMAP_OVERFLOW1 ){
        CellInfo info;
        sqlite3BtreeParseCellPtr(pPage, pCell, &info);
2807
2808
2809
2810
2811
2812
2813



2814
2815
2816
2817
2818
2819
2820
** 3:  The database must be writable (not on read-only media)
**
** 4:  There must be an active transaction.
**
** No checking is done to make sure that page iTable really is the
** root page of a b-tree.  If it is not, then the cursor acquired
** will not work correctly.



*/
static int btreeCursor(
  Btree *p,                              /* The btree */
  int iTable,                            /* Root page of table to open */
  int wrFlag,                            /* 1 to write. 0 read-only */
  struct KeyInfo *pKeyInfo,              /* First arg to comparison function */
  BtCursor *pCur                         /* Space for new cursor */







>
>
>







2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
** 3:  The database must be writable (not on read-only media)
**
** 4:  There must be an active transaction.
**
** No checking is done to make sure that page iTable really is the
** root page of a b-tree.  If it is not, then the cursor acquired
** will not work correctly.
**
** It is assumed that the sqlite3BtreeCursorSize() bytes of memory 
** pointed to by pCur have been zeroed by the caller.
*/
static int btreeCursor(
  Btree *p,                              /* The btree */
  int iTable,                            /* Root page of table to open */
  int wrFlag,                            /* 1 to write. 0 read-only */
  struct KeyInfo *pKeyInfo,              /* First arg to comparison function */
  BtCursor *pCur                         /* Space for new cursor */
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
    }
  }
  pCur->pgnoRoot = (Pgno)iTable;
  if( iTable==1 && pagerPagecount(pBt->pPager)==0 ){
    rc = SQLITE_EMPTY;
    goto create_cursor_exception;
  }
  rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }

  /* Now that no other errors can occur, finish filling in the BtCursor
  ** variables, link the cursor into the BtShared list and set *ppCur (the
  ** output argument to this function).







|







2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
    }
  }
  pCur->pgnoRoot = (Pgno)iTable;
  if( iTable==1 && pagerPagecount(pBt->pPager)==0 ){
    rc = SQLITE_EMPTY;
    goto create_cursor_exception;
  }
  rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
  if( rc!=SQLITE_OK ){
    goto create_cursor_exception;
  }

  /* Now that no other errors can occur, finish filling in the BtCursor
  ** variables, link the cursor into the BtShared list and set *ppCur (the
  ** output argument to this function).
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
  }
  pBt->pCursor = pCur;
  pCur->eState = CURSOR_INVALID;

  return SQLITE_OK;

create_cursor_exception:
  releasePage(pCur->pPage);
  unlockBtreeIfUnused(pBt);
  return rc;
}
int sqlite3BtreeCursor(
  Btree *p,                                   /* The btree */
  int iTable,                                 /* Root page of table to open */
  int wrFlag,                                 /* 1 to write. 0 read-only */







|







2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
  }
  pBt->pCursor = pCur;
  pCur->eState = CURSOR_INVALID;

  return SQLITE_OK;

create_cursor_exception:
  releasePage(pCur->apPage[0]);
  unlockBtreeIfUnused(pBt);
  return rc;
}
int sqlite3BtreeCursor(
  Btree *p,                                   /* The btree */
  int iTable,                                 /* Root page of table to open */
  int wrFlag,                                 /* 1 to write. 0 read-only */
2896
2897
2898
2899
2900
2901
2902

2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914

2915

2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928

2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942

2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
/*
** Close a cursor.  The read lock on the database file is released
** when the last cursor is closed.
*/
int sqlite3BtreeCloseCursor(BtCursor *pCur){
  Btree *pBtree = pCur->pBtree;
  if( pBtree ){

    BtShared *pBt = pCur->pBt;
    sqlite3BtreeEnter(pBtree);
    pBt->db = pBtree->db;
    clearCursorPosition(pCur);
    if( pCur->pPrev ){
      pCur->pPrev->pNext = pCur->pNext;
    }else{
      pBt->pCursor = pCur->pNext;
    }
    if( pCur->pNext ){
      pCur->pNext->pPrev = pCur->pPrev;
    }

    releasePage(pCur->pPage);

    unlockBtreeIfUnused(pBt);
    invalidateOverflowCache(pCur);
    /* sqlite3_free(pCur); */
    sqlite3BtreeLeave(pBtree);
  }
  return SQLITE_OK;
}

/*
** Make a temporary cursor by filling in the fields of pTempCur.
** The temporary cursor is not on the cursor list for the Btree.
*/
void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){

  assert( cursorHoldsMutex(pCur) );
  memcpy(pTempCur, pCur, sizeof(*pCur));
  pTempCur->pNext = 0;
  pTempCur->pPrev = 0;
  if( pTempCur->pPage ){
    sqlite3PagerRef(pTempCur->pPage->pDbPage);
  }
}

/*
** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
** function above.
*/
void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){

  assert( cursorHoldsMutex(pCur) );
  if( pCur->pPage ){
    sqlite3PagerUnref(pCur->pPage->pDbPage);
  }
}

/*
** Make sure the BtCursor* given in the argument has a valid
** BtCursor.info structure.  If it is not already valid, call
** sqlite3BtreeParseCell() to fill it in.







>












>
|
>













>

|


|
|








>

|
|







2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
/*
** Close a cursor.  The read lock on the database file is released
** when the last cursor is closed.
*/
int sqlite3BtreeCloseCursor(BtCursor *pCur){
  Btree *pBtree = pCur->pBtree;
  if( pBtree ){
    int i;
    BtShared *pBt = pCur->pBt;
    sqlite3BtreeEnter(pBtree);
    pBt->db = pBtree->db;
    clearCursorPosition(pCur);
    if( pCur->pPrev ){
      pCur->pPrev->pNext = pCur->pNext;
    }else{
      pBt->pCursor = pCur->pNext;
    }
    if( pCur->pNext ){
      pCur->pNext->pPrev = pCur->pPrev;
    }
    for(i=0; i<=pCur->iPage; i++){
      releasePage(pCur->apPage[i]);
    }
    unlockBtreeIfUnused(pBt);
    invalidateOverflowCache(pCur);
    /* sqlite3_free(pCur); */
    sqlite3BtreeLeave(pBtree);
  }
  return SQLITE_OK;
}

/*
** Make a temporary cursor by filling in the fields of pTempCur.
** The temporary cursor is not on the cursor list for the Btree.
*/
void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur){
  int i;
  assert( cursorHoldsMutex(pCur) );
  memcpy(pTempCur, pCur, sizeof(BtCursor));
  pTempCur->pNext = 0;
  pTempCur->pPrev = 0;
  for(i=0; i<=pTempCur->iPage; i++){
    sqlite3PagerRef(pTempCur->apPage[i]->pDbPage);
  }
}

/*
** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
** function above.
*/
void sqlite3BtreeReleaseTempCursor(BtCursor *pCur){
  int i;
  assert( cursorHoldsMutex(pCur) );
  for(i=0; i<=pCur->iPage; i++){
    sqlite3PagerUnref(pCur->apPage[i]->pDbPage);
  }
}

/*
** Make sure the BtCursor* given in the argument has a valid
** BtCursor.info structure.  If it is not already valid, call
** sqlite3BtreeParseCell() to fill it in.
2960
2961
2962
2963
2964
2965
2966

2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977

2978
2979
2980
2981
2982
2983
2984
2985
2986
2987

2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
** (when less compiler optimizations like -Os or -O0 are used and the
** compiler is not doing agressive inlining.)  So we use a real function
** for MSVC and a macro for everything else.  Ticket #2457.
*/
#ifndef NDEBUG
  static void assertCellInfo(BtCursor *pCur){
    CellInfo info;

    memset(&info, 0, sizeof(info));
    sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &info);
    assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
  }
#else
  #define assertCellInfo(x)
#endif
#ifdef _MSC_VER
  /* Use a real function in MSVC to work around bugs in that compiler. */
  static void getCellInfo(BtCursor *pCur){
    if( pCur->info.nSize==0 ){

      sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);
      pCur->validNKey = 1;
    }else{
      assertCellInfo(pCur);
    }
  }
#else /* if not _MSC_VER */
  /* Use a macro in all other compilers so that the function is inlined */
#define getCellInfo(pCur)                                               \
  if( pCur->info.nSize==0 ){                                            \

    sqlite3BtreeParseCell(pCur->pPage, pCur->idx, &pCur->info);         \
    pCur->validNKey = 1;                                                \
  }else{                                                                \
    assertCellInfo(pCur);                                               \
  }
#endif /* _MSC_VER */

/*
** Set *pSize to the size of the buffer needed to hold the value of
** the key for the current entry.  If the cursor is not pointing
** to a valid entry, *pSize is set to 0. 







>

|









>
|







|
|
>
|
|
|
|







2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
** (when less compiler optimizations like -Os or -O0 are used and the
** compiler is not doing agressive inlining.)  So we use a real function
** for MSVC and a macro for everything else.  Ticket #2457.
*/
#ifndef NDEBUG
  static void assertCellInfo(BtCursor *pCur){
    CellInfo info;
    int iPage = pCur->iPage;
    memset(&info, 0, sizeof(info));
    sqlite3BtreeParseCell(pCur->apPage[iPage], pCur->aiIdx[iPage], &info);
    assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
  }
#else
  #define assertCellInfo(x)
#endif
#ifdef _MSC_VER
  /* Use a real function in MSVC to work around bugs in that compiler. */
  static void getCellInfo(BtCursor *pCur){
    if( pCur->info.nSize==0 ){
      int iPage = pCur->iPage;
      sqlite3BtreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info);
      pCur->validNKey = 1;
    }else{
      assertCellInfo(pCur);
    }
  }
#else /* if not _MSC_VER */
  /* Use a macro in all other compilers so that the function is inlined */
#define getCellInfo(pCur)                                                      \
  if( pCur->info.nSize==0 ){                                                   \
    int iPage = pCur->iPage;                                                   \
    sqlite3BtreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \
    pCur->validNKey = 1;                                                       \
  }else{                                                                       \
    assertCellInfo(pCur);                                                      \
  }
#endif /* _MSC_VER */

/*
** Set *pSize to the size of the buffer needed to hold the value of
** the key for the current entry.  If the cursor is not pointing
** to a valid entry, *pSize is set to 0. 
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
  int skipKey,         /* offset begins at data if this is true */
  int eOp              /* zero to read. non-zero to write. */
){
  unsigned char *aPayload;
  int rc = SQLITE_OK;
  u32 nKey;
  int iIdx = 0;
  MemPage *pPage = pCur->pPage;     /* Btree page of current cursor entry */
  BtShared *pBt;                   /* Btree this cursor belongs to */

  assert( pPage );
  assert( pCur->eState==CURSOR_VALID );
  assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  assert( offset>=0 );
  assert( cursorHoldsMutex(pCur) );

  getCellInfo(pCur);
  aPayload = pCur->info.pCell + pCur->info.nHeader;
  nKey = (pPage->intKey ? 0 : pCur->info.nKey);








|
|



|







3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
  int skipKey,         /* offset begins at data if this is true */
  int eOp              /* zero to read. non-zero to write. */
){
  unsigned char *aPayload;
  int rc = SQLITE_OK;
  u32 nKey;
  int iIdx = 0;
  MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */
  BtShared *pBt;                              /* Btree this cursor belongs to */

  assert( pPage );
  assert( pCur->eState==CURSOR_VALID );
  assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
  assert( offset>=0 );
  assert( cursorHoldsMutex(pCur) );

  getCellInfo(pCur);
  aPayload = pCur->info.pCell + pCur->info.nHeader;
  nKey = (pPage->intKey ? 0 : pCur->info.nKey);

3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
  int rc;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc==SQLITE_OK ){
    assert( pCur->eState==CURSOR_VALID );
    assert( pCur->pPage!=0 );
    if( pCur->pPage->intKey ){
      return SQLITE_CORRUPT_BKPT;
    }
    assert( pCur->pPage->intKey==0 );
    assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
    rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
  }
  return rc;
}

/*
** Read part of the data associated with cursor pCur.  Exactly







|
|


|
<







3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307

3308
3309
3310
3311
3312
3313
3314
int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
  int rc;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc==SQLITE_OK ){
    assert( pCur->eState==CURSOR_VALID );
    assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] );
    if( pCur->apPage[0]->intKey ){
      return SQLITE_CORRUPT_BKPT;
    }
    assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );

    rc = accessPayload(pCur, offset, amt, (unsigned char*)pBuf, 0, 0);
  }
  return rc;
}

/*
** Read part of the data associated with cursor pCur.  Exactly
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
  }
#endif

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc==SQLITE_OK ){
    assert( pCur->eState==CURSOR_VALID );
    assert( pCur->pPage!=0 );
    assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
    rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
  }
  return rc;
}

/*
** Return a pointer to payload information from the entry that the 







|
|







3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
  }
#endif

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc==SQLITE_OK ){
    assert( pCur->eState==CURSOR_VALID );
    assert( pCur->iPage>=0 && pCur->apPage[pCur->iPage] );
    assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
    rc = accessPayload(pCur, offset, amt, pBuf, 1, 0);
  }
  return rc;
}

/*
** Return a pointer to payload information from the entry that the 
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
  int skipKey          /* read beginning at data if this is true */
){
  unsigned char *aPayload;
  MemPage *pPage;
  u32 nKey;
  int nLocal;

  assert( pCur!=0 && pCur->pPage!=0 );
  assert( pCur->eState==CURSOR_VALID );
  assert( cursorHoldsMutex(pCur) );
  pPage = pCur->pPage;
  assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
  getCellInfo(pCur);
  aPayload = pCur->info.pCell;
  aPayload += pCur->info.nHeader;
  if( pPage->intKey ){
    nKey = 0;
  }else{
    nKey = pCur->info.nKey;







|


|
|







3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
  int skipKey          /* read beginning at data if this is true */
){
  unsigned char *aPayload;
  MemPage *pPage;
  u32 nKey;
  int nLocal;

  assert( pCur!=0 && pCur->iPage>=0 && pCur->apPage[pCur->iPage]);
  assert( pCur->eState==CURSOR_VALID );
  assert( cursorHoldsMutex(pCur) );
  pPage = pCur->apPage[pCur->iPage];
  assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
  getCellInfo(pCur);
  aPayload = pCur->info.pCell;
  aPayload += pCur->info.nHeader;
  if( pPage->intKey ){
    nKey = 0;
  }else{
    nKey = pCur->info.nKey;
3467
3468
3469
3470
3471
3472
3473

3474
3475
3476
3477
3478
3479




3480
3481
3482
3483
3484
3485
3486

3487

3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551

/*
** Move the cursor down to a new child page.  The newPgno argument is the
** page number of the child page to move to.
*/
static int moveToChild(BtCursor *pCur, u32 newPgno){
  int rc;

  MemPage *pNewPage;
  MemPage *pOldPage;
  BtShared *pBt = pCur->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );




  rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
  if( rc ) return rc;
  pNewPage->idxParent = pCur->idx;
  pOldPage = pCur->pPage;
  pOldPage->idxShift = 0;
  releasePage(pOldPage);
  pCur->pPage = pNewPage;

  pCur->idx = 0;

  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( pNewPage->nCell<1 ){
    return SQLITE_CORRUPT_BKPT;
  }
  return SQLITE_OK;
}

/*
** Return true if the page is the virtual root of its table.
**
** The virtual root page is the root page for most tables.  But
** for the table rooted on page 1, sometime the real root page
** is empty except for the right-pointer.  In such cases the
** virtual root page is the page that the right-pointer of page
** 1 is pointing to.
*/
int sqlite3BtreeIsRootPage(MemPage *pPage){
  MemPage *pParent;

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pParent = pPage->pParent;
  if( pParent==0 ) return 1;
  if( pParent->pgno>1 ) return 0;
  if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
  return 0;
}

/*
** Move the cursor up to the parent page.
**
** pCur->idx is set to the cell index that contains the pointer
** to the page we are coming from.  If we are coming from the
** right-most child page then pCur->idx is set to one more than
** the largest cell index.
*/
void sqlite3BtreeMoveToParent(BtCursor *pCur){
  MemPage *pParent;
  MemPage *pPage;
  int idxParent;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  pPage = pCur->pPage;
  assert( pPage!=0 );
  assert( !sqlite3BtreeIsRootPage(pPage) );
  pParent = pPage->pParent;
  assert( pParent!=0 );
  assert( pPage->pDbPage->nRef>0 );
  idxParent = pPage->idxParent;
  sqlite3PagerRef(pParent->pDbPage);
  releasePage(pPage);
  pCur->pPage = pParent;
  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  assert( pParent->idxShift==0 );
  pCur->idx = idxParent;
}

/*
** Move the cursor to the root page
*/
static int moveToRoot(BtCursor *pCur){
  MemPage *pRoot;







>

<




>
>
>
>
|

<
<
|
<
|
>
|
>








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<









<
<
<
<


<
|
<
<
|
|
<
<
|
|


|
<







3427
3428
3429
3430
3431
3432
3433
3434
3435

3436
3437
3438
3439
3440
3441
3442
3443
3444
3445


3446

3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458




















3459
3460
3461
3462
3463
3464
3465
3466
3467




3468
3469

3470


3471
3472


3473
3474
3475
3476
3477

3478
3479
3480
3481
3482
3483
3484

/*
** Move the cursor down to a new child page.  The newPgno argument is the
** page number of the child page to move to.
*/
static int moveToChild(BtCursor *pCur, u32 newPgno){
  int rc;
  int i = pCur->iPage;
  MemPage *pNewPage;

  BtShared *pBt = pCur->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  assert( pCur->iPage<BTCURSOR_MAX_DEPTH );
  if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){
    return SQLITE_CORRUPT_BKPT;
  }
  rc = getAndInitPage(pBt, newPgno, &pNewPage);
  if( rc ) return rc;


  pCur->apPage[i]->idxShift = 0;

  pCur->apPage[i+1] = pNewPage;
  pCur->aiIdx[i+1] = 0;
  pCur->iPage++;

  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( pNewPage->nCell<1 ){
    return SQLITE_CORRUPT_BKPT;
  }
  return SQLITE_OK;
}





















/*
** Move the cursor up to the parent page.
**
** pCur->idx is set to the cell index that contains the pointer
** to the page we are coming from.  If we are coming from the
** right-most child page then pCur->idx is set to one more than
** the largest cell index.
*/
void sqlite3BtreeMoveToParent(BtCursor *pCur){




  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );

  assert( pCur->iPage>0 );


  assert( pCur->apPage[pCur->iPage] );



  releasePage(pCur->apPage[pCur->iPage]);
  pCur->iPage--;
  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  assert( pCur->apPage[pCur->iPage]->idxShift==0 );

}

/*
** Move the cursor to the root page
*/
static int moveToRoot(BtCursor *pCur){
  MemPage *pRoot;
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578

3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592


3593
3594

3595
3596
3597

3598
3599
3600
3601
3602
3603
3604


3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
  assert( CURSOR_FAULT   > CURSOR_REQUIRESEEK );
  if( pCur->eState>=CURSOR_REQUIRESEEK ){
    if( pCur->eState==CURSOR_FAULT ){
      return pCur->skip;
    }
    clearCursorPosition(pCur);
  }
  pRoot = pCur->pPage;
  if( pRoot && pRoot->isInit ){
    /* If the page the cursor is currently pointing to is fully initialized,
    ** then the root page can be found by following the MemPage.pParent
    ** pointers. This is faster than requesting a reference to the root
    ** page from the pager layer.
    */
    while( pRoot->pParent ){
      assert( pRoot->isInit==PAGE_ISINIT_FULL );
      pRoot = pRoot->pParent;
    }
    assert( pRoot->isInit==PAGE_ISINIT_FULL );
    if( pRoot!=pCur->pPage ){

      sqlite3PagerRef(pRoot->pDbPage);
      releasePage(pCur->pPage);
      pCur->pPage = pRoot;
    }
  }else{
    if( 
      SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
    ){
      pCur->eState = CURSOR_INVALID;
      return rc;
    }
    releasePage(pCur->pPage);
    pCur->pPage = pRoot;
  }


  assert( pCur->pPage->pgno==pCur->pgnoRoot );
  pCur->idx = 0;

  pCur->info.nSize = 0;
  pCur->atLast = 0;
  pCur->validNKey = 0;

  if( pRoot->nCell==0 && !pRoot->leaf ){
    Pgno subpage;
    assert( pRoot->pgno==1 );
    subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
    assert( subpage>0 );
    pCur->eState = CURSOR_VALID;
    rc = moveToChild(pCur, subpage);


  }
  pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
  return rc;
}

/*
** Move the cursor down to the left-most leaf entry beneath the
** entry to which it is currently pointing.
**
** The left-most leaf is the one with the smallest key - the first
** in ascending order.
*/
static int moveToLeftmost(BtCursor *pCur){
  Pgno pgno;
  int rc = SQLITE_OK;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
    assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
    pgno = get4byte(findCell(pPage, pCur->idx));
    rc = moveToChild(pCur, pgno);
  }
  return rc;
}

/*
** Move the cursor down to the right-most leaf entry beneath the







<
<
<
<
<
<
<
<
<
<
|
<
|
>
|
|
<



|




<
<

>
>
|
|
>



>







>
>

<

















|
|
|







3492
3493
3494
3495
3496
3497
3498










3499

3500
3501
3502
3503

3504
3505
3506
3507
3508
3509
3510
3511


3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531

3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
  assert( CURSOR_FAULT   > CURSOR_REQUIRESEEK );
  if( pCur->eState>=CURSOR_REQUIRESEEK ){
    if( pCur->eState==CURSOR_FAULT ){
      return pCur->skip;
    }
    clearCursorPosition(pCur);
  }












  if( pCur->iPage>=0 ){
    int i;
    for(i=1; i<=pCur->iPage; i++){
      releasePage(pCur->apPage[i]);

    }
  }else{
    if( 
      SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]))
    ){
      pCur->eState = CURSOR_INVALID;
      return rc;
    }


  }

  pRoot = pCur->apPage[0];
  assert( pRoot->pgno==pCur->pgnoRoot );
  pCur->iPage = 0;
  pCur->aiIdx[0] = 0;
  pCur->info.nSize = 0;
  pCur->atLast = 0;
  pCur->validNKey = 0;

  if( pRoot->nCell==0 && !pRoot->leaf ){
    Pgno subpage;
    assert( pRoot->pgno==1 );
    subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
    assert( subpage>0 );
    pCur->eState = CURSOR_VALID;
    rc = moveToChild(pCur, subpage);
  }else{
    pCur->eState = ((pRoot->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
  }

  return rc;
}

/*
** Move the cursor down to the left-most leaf entry beneath the
** entry to which it is currently pointing.
**
** The left-most leaf is the one with the smallest key - the first
** in ascending order.
*/
static int moveToLeftmost(BtCursor *pCur){
  Pgno pgno;
  int rc = SQLITE_OK;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
    assert( pCur->aiIdx[pCur->iPage]<pPage->nCell );
    pgno = get4byte(findCell(pPage, pCur->aiIdx[pCur->iPage]));
    rc = moveToChild(pCur, pgno);
  }
  return rc;
}

/*
** Move the cursor down to the right-most leaf entry beneath the
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
static int moveToRightmost(BtCursor *pCur){
  Pgno pgno;
  int rc = SQLITE_OK;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  while( rc==SQLITE_OK && !(pPage = pCur->pPage)->leaf ){
    pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    pCur->idx = pPage->nCell;
    rc = moveToChild(pCur, pgno);
  }
  if( rc==SQLITE_OK ){
    pCur->idx = pPage->nCell - 1;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
  }
  return rc;
}

/* Move the cursor to the first entry in the table.  Return SQLITE_OK
** on success.  Set *pRes to 0 if the cursor actually points to something
** or set *pRes to 1 if the table is empty.
*/
int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
  int rc;

  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  rc = moveToRoot(pCur);
  if( rc==SQLITE_OK ){
    if( pCur->eState==CURSOR_INVALID ){
      assert( pCur->pPage->nCell==0 );
      *pRes = 1;
      rc = SQLITE_OK;
    }else{
      assert( pCur->pPage->nCell>0 );
      *pRes = 0;
      rc = moveToLeftmost(pCur);
    }
  }
  return rc;
}

/* Move the cursor to the last entry in the table.  Return SQLITE_OK
** on success.  Set *pRes to 0 if the cursor actually points to something
** or set *pRes to 1 if the table is empty.
*/
int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
  int rc;
 
  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  rc = moveToRoot(pCur);
  if( rc==SQLITE_OK ){
    if( CURSOR_INVALID==pCur->eState ){
      assert( pCur->pPage->nCell==0 );
      *pRes = 1;
    }else{
      assert( pCur->eState==CURSOR_VALID );
      *pRes = 0;
      rc = moveToRightmost(pCur);
      getCellInfo(pCur);
      pCur->atLast = rc==SQLITE_OK;







|

|



|


















|



|



















|







3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
static int moveToRightmost(BtCursor *pCur){
  Pgno pgno;
  int rc = SQLITE_OK;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  assert( pCur->eState==CURSOR_VALID );
  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
    pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    pCur->aiIdx[pCur->iPage] = pPage->nCell;
    rc = moveToChild(pCur, pgno);
  }
  if( rc==SQLITE_OK ){
    pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
  }
  return rc;
}

/* Move the cursor to the first entry in the table.  Return SQLITE_OK
** on success.  Set *pRes to 0 if the cursor actually points to something
** or set *pRes to 1 if the table is empty.
*/
int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
  int rc;

  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  rc = moveToRoot(pCur);
  if( rc==SQLITE_OK ){
    if( pCur->eState==CURSOR_INVALID ){
      assert( pCur->apPage[pCur->iPage]->nCell==0 );
      *pRes = 1;
      rc = SQLITE_OK;
    }else{
      assert( pCur->apPage[pCur->iPage]->nCell>0 );
      *pRes = 0;
      rc = moveToLeftmost(pCur);
    }
  }
  return rc;
}

/* Move the cursor to the last entry in the table.  Return SQLITE_OK
** on success.  Set *pRes to 0 if the cursor actually points to something
** or set *pRes to 1 if the table is empty.
*/
int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
  int rc;
 
  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
  rc = moveToRoot(pCur);
  if( rc==SQLITE_OK ){
    if( CURSOR_INVALID==pCur->eState ){
      assert( pCur->apPage[pCur->iPage]->nCell==0 );
      *pRes = 1;
    }else{
      assert( pCur->eState==CURSOR_VALID );
      *pRes = 0;
      rc = moveToRightmost(pCur);
      getCellInfo(pCur);
      pCur->atLast = rc==SQLITE_OK;
3745
3746
3747
3748
3749
3750
3751
3752


3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793

3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
  int rc;

  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );

  /* If the cursor is already positioned at the point we are trying
  ** to move to, then just return without doing any work */
  if( pCur->eState==CURSOR_VALID && pCur->validNKey && pCur->pPage->intKey ){


    if( pCur->info.nKey==intKey ){
      *pRes = 0;
      return SQLITE_OK;
    }
    if( pCur->atLast && pCur->info.nKey<intKey ){
      *pRes = -1;
      return SQLITE_OK;
    }
  }

  rc = moveToRoot(pCur);
  if( rc ){
    return rc;
  }
  assert( pCur->pPage );
  assert( pCur->pPage->isInit==PAGE_ISINIT_FULL );
  if( pCur->eState==CURSOR_INVALID ){
    *pRes = -1;
    assert( pCur->pPage->nCell==0 );
    return SQLITE_OK;
  }
  assert( pCur->pPage->intKey || pIdxKey );
  for(;;){
    int lwr, upr;
    Pgno chldPg;
    MemPage *pPage = pCur->pPage;
    int c = -1;  /* pRes return if table is empty must be -1 */
    lwr = 0;
    upr = pPage->nCell-1;
    if( !pPage->intKey && pIdxKey==0 ){
      rc = SQLITE_CORRUPT_BKPT;
      goto moveto_finish;
    }
    if( biasRight ){
      pCur->idx = upr;
    }else{
      pCur->idx = (upr+lwr)/2;
    }
    if( lwr<=upr ) for(;;){
      void *pCellKey;
      i64 nCellKey;

      pCur->info.nSize = 0;
      pCur->validNKey = 1;
      if( pPage->intKey ){
        u8 *pCell;
        pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
        if( pPage->hasData ){
          u32 dummy;
          pCell += getVarint32(pCell, dummy);
        }
        getVarint(pCell, (u64*)&nCellKey);
        if( nCellKey==intKey ){
          c = 0;







|
>
>














|
|


|


|



|








|

|




>




|







3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
  int rc;

  assert( cursorHoldsMutex(pCur) );
  assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );

  /* If the cursor is already positioned at the point we are trying
  ** to move to, then just return without doing any work */
  if( pCur->eState==CURSOR_VALID && pCur->validNKey 
   && pCur->apPage[0]->intKey 
  ){
    if( pCur->info.nKey==intKey ){
      *pRes = 0;
      return SQLITE_OK;
    }
    if( pCur->atLast && pCur->info.nKey<intKey ){
      *pRes = -1;
      return SQLITE_OK;
    }
  }

  rc = moveToRoot(pCur);
  if( rc ){
    return rc;
  }
  assert( pCur->apPage[pCur->iPage] );
  assert( pCur->apPage[pCur->iPage]->isInit );
  if( pCur->eState==CURSOR_INVALID ){
    *pRes = -1;
    assert( pCur->apPage[pCur->iPage]->nCell==0 );
    return SQLITE_OK;
  }
  assert( pCur->apPage[0]->intKey || pIdxKey );
  for(;;){
    int lwr, upr;
    Pgno chldPg;
    MemPage *pPage = pCur->apPage[pCur->iPage];
    int c = -1;  /* pRes return if table is empty must be -1 */
    lwr = 0;
    upr = pPage->nCell-1;
    if( !pPage->intKey && pIdxKey==0 ){
      rc = SQLITE_CORRUPT_BKPT;
      goto moveto_finish;
    }
    if( biasRight ){
      pCur->aiIdx[pCur->iPage] = upr;
    }else{
      pCur->aiIdx[pCur->iPage] = (upr+lwr)/2;
    }
    if( lwr<=upr ) for(;;){
      void *pCellKey;
      i64 nCellKey;
      int idx = pCur->aiIdx[pCur->iPage];
      pCur->info.nSize = 0;
      pCur->validNKey = 1;
      if( pPage->intKey ){
        u8 *pCell;
        pCell = findCell(pPage, idx) + pPage->childPtrSize;
        if( pPage->hasData ){
          u32 dummy;
          pCell += getVarint32(pCell, dummy);
        }
        getVarint(pCell, (u64*)&nCellKey);
        if( nCellKey==intKey ){
          c = 0;
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
          sqlite3_free(pCellKey);
          if( rc ) goto moveto_finish;
        }
      }
      if( c==0 ){
        pCur->info.nKey = nCellKey;
        if( pPage->intKey && !pPage->leaf ){
          lwr = pCur->idx;
          upr = lwr - 1;
          break;
        }else{
          if( pRes ) *pRes = 0;
          rc = SQLITE_OK;
          goto moveto_finish;
        }
      }
      if( c<0 ){
        lwr = pCur->idx+1;
      }else{
        upr = pCur->idx-1;
      }
      if( lwr>upr ){
        pCur->info.nKey = nCellKey;
        break;
      }
      pCur->idx = (lwr+upr)/2;
    }
    assert( lwr==upr+1 );
    assert( pPage->isInit==PAGE_ISINIT_FULL );
    if( pPage->leaf ){
      chldPg = 0;
    }else if( lwr>=pPage->nCell ){
      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    }else{
      chldPg = get4byte(findCell(pPage, lwr));
    }
    if( chldPg==0 ){
      assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
      if( pRes ) *pRes = c;
      rc = SQLITE_OK;
      goto moveto_finish;
    }
    pCur->idx = lwr;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
    rc = moveToChild(pCur, chldPg);
    if( rc ) goto moveto_finish;
  }
moveto_finish:
  return rc;







|









|

|





|


|








|




|







3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
          sqlite3_free(pCellKey);
          if( rc ) goto moveto_finish;
        }
      }
      if( c==0 ){
        pCur->info.nKey = nCellKey;
        if( pPage->intKey && !pPage->leaf ){
          lwr = idx;
          upr = lwr - 1;
          break;
        }else{
          if( pRes ) *pRes = 0;
          rc = SQLITE_OK;
          goto moveto_finish;
        }
      }
      if( c<0 ){
        lwr = idx+1;
      }else{
        upr = idx-1;
      }
      if( lwr>upr ){
        pCur->info.nKey = nCellKey;
        break;
      }
      pCur->aiIdx[pCur->iPage] = (lwr+upr)/2;
    }
    assert( lwr==upr+1 );
    assert( pPage->isInit );
    if( pPage->leaf ){
      chldPg = 0;
    }else if( lwr>=pPage->nCell ){
      chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    }else{
      chldPg = get4byte(findCell(pPage, lwr));
    }
    if( chldPg==0 ){
      assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell );
      if( pRes ) *pRes = c;
      rc = SQLITE_OK;
      goto moveto_finish;
    }
    pCur->aiIdx[pCur->iPage] = lwr;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
    rc = moveToChild(pCur, chldPg);
    if( rc ) goto moveto_finish;
  }
moveto_finish:
  return rc;
3933
3934
3935
3936
3937
3938
3939

3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959


3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
** Advance the cursor to the next entry in the database.  If
** successful then set *pRes=0.  If the cursor
** was already pointing to the last entry in the database before
** this routine was called, then set *pRes=1.
*/
int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
  int rc;

  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  assert( pRes!=0 );
  pPage = pCur->pPage;
  if( CURSOR_INVALID==pCur->eState ){
    *pRes = 1;
    return SQLITE_OK;
  }
  if( pCur->skip>0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;



  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( pCur->idx<pPage->nCell );

  pCur->idx++;
  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( pCur->idx>=pPage->nCell ){
    if( !pPage->leaf ){
      rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
      if( rc ) return rc;
      rc = moveToLeftmost(pCur);
      *pRes = 0;
      return rc;
    }
    do{
      if( sqlite3BtreeIsRootPage(pPage) ){
        *pRes = 1;
        pCur->eState = CURSOR_INVALID;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }while( pCur->idx>=pPage->nCell );
    *pRes = 0;
    if( pPage->intKey ){
      rc = sqlite3BtreeNext(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
    return rc;







>








<











>
>
|
|

<


|








|





|
|







3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876

3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892

3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
** Advance the cursor to the next entry in the database.  If
** successful then set *pRes=0.  If the cursor
** was already pointing to the last entry in the database before
** this routine was called, then set *pRes=1.
*/
int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
  int rc;
  int idx;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  assert( pRes!=0 );

  if( CURSOR_INVALID==pCur->eState ){
    *pRes = 1;
    return SQLITE_OK;
  }
  if( pCur->skip>0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  pPage = pCur->apPage[pCur->iPage];
  idx = ++pCur->aiIdx[pCur->iPage];
  assert( pPage->isInit );
  assert( idx<=pPage->nCell );


  pCur->info.nSize = 0;
  pCur->validNKey = 0;
  if( idx>=pPage->nCell ){
    if( !pPage->leaf ){
      rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
      if( rc ) return rc;
      rc = moveToLeftmost(pCur);
      *pRes = 0;
      return rc;
    }
    do{
      if( pCur->iPage==0 ){
        *pRes = 1;
        pCur->eState = CURSOR_INVALID;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->apPage[pCur->iPage];
    }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell );
    *pRes = 0;
    if( pPage->intKey ){
      rc = sqlite3BtreeNext(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
    return rc;
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050



4051
4052
4053
4054
4055
4056
4057
** Step the cursor to the back to the previous entry in the database.  If
** successful then set *pRes=0.  If the cursor
** was already pointing to the first entry in the database before
** this routine was called, then set *pRes=1.
*/
int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
  int rc;
  Pgno pgno;
  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCur->atLast = 0;
  if( CURSOR_INVALID==pCur->eState ){
    *pRes = 1;
    return SQLITE_OK;
  }
  if( pCur->skip<0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  pPage = pCur->pPage;
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( pCur->idx>=0 );
  if( !pPage->leaf ){
    pgno = get4byte( findCell(pPage, pCur->idx) );
    rc = moveToChild(pCur, pgno);
    if( rc ){
      return rc;
    }
    rc = moveToRightmost(pCur);
  }else{
    while( pCur->idx==0 ){
      if( sqlite3BtreeIsRootPage(pPage) ){
        pCur->eState = CURSOR_INVALID;
        *pRes = 1;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);
      pPage = pCur->pPage;
    }
    pCur->idx--;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;



    if( pPage->intKey && !pPage->leaf ){
      rc = sqlite3BtreePrevious(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
  }
  *pRes = 0;







<



















|
|
<

|
|





|
|





<

<


>
>
>







3930
3931
3932
3933
3934
3935
3936

3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957

3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972

3973

3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
** Step the cursor to the back to the previous entry in the database.  If
** successful then set *pRes=0.  If the cursor
** was already pointing to the first entry in the database before
** this routine was called, then set *pRes=1.
*/
int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
  int rc;

  MemPage *pPage;

  assert( cursorHoldsMutex(pCur) );
  rc = restoreCursorPosition(pCur);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCur->atLast = 0;
  if( CURSOR_INVALID==pCur->eState ){
    *pRes = 1;
    return SQLITE_OK;
  }
  if( pCur->skip<0 ){
    pCur->skip = 0;
    *pRes = 0;
    return SQLITE_OK;
  }
  pCur->skip = 0;

  pPage = pCur->apPage[pCur->iPage];
  assert( pPage->isInit );

  if( !pPage->leaf ){
    int idx = pCur->aiIdx[pCur->iPage];
    rc = moveToChild(pCur, get4byte(findCell(pPage, idx)));
    if( rc ){
      return rc;
    }
    rc = moveToRightmost(pCur);
  }else{
    while( pCur->aiIdx[pCur->iPage]==0 ){
      if( pCur->iPage==0 ){
        pCur->eState = CURSOR_INVALID;
        *pRes = 1;
        return SQLITE_OK;
      }
      sqlite3BtreeMoveToParent(pCur);

    }

    pCur->info.nSize = 0;
    pCur->validNKey = 0;

    pCur->aiIdx[pCur->iPage]--;
    pPage = pCur->apPage[pCur->iPage];
    if( pPage->intKey && !pPage->leaf ){
      rc = sqlite3BtreePrevious(pCur, pRes);
    }else{
      rc = SQLITE_OK;
    }
  }
  *pRes = 0;
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
  }

  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );

end_allocate_page:
  releasePage(pTrunk);
  releasePage(pPrevTrunk);
  if( rc==SQLITE_OK ){
    if( (*ppPage)->isInit==PAGE_ISINIT_FULL ){
      releasePage(*ppPage);
      return SQLITE_CORRUPT_BKPT;
    }
    (*ppPage)->isInit = 0;
  }
  return rc;
}

/*
** Add a page of the database file to the freelist.
**
** sqlite3PagerUnref() is NOT called for pPage.
*/
static int freePage(MemPage *pPage){
  BtShared *pBt = pPage->pBt;
  MemPage *pPage1 = pBt->pPage1;
  int rc, n, k;

  /* Prepare the page for freeing */
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  assert( pPage->pgno>1 );
  pPage->isInit = 0;
  releasePage(pPage->pParent);
  pPage->pParent = 0;

  /* Increment the free page count on pPage1 */
  rc = sqlite3PagerWrite(pPage1->pDbPage);
  if( rc ) return rc;
  n = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], n+1);








|
<
|
|
<
<


















<
<







4241
4242
4243
4244
4245
4246
4247
4248

4249
4250


4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268


4269
4270
4271
4272
4273
4274
4275
  }

  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );

end_allocate_page:
  releasePage(pTrunk);
  releasePage(pPrevTrunk);
  if( rc==SQLITE_OK && sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){

    releasePage(*ppPage);
    return SQLITE_CORRUPT_BKPT;


  }
  return rc;
}

/*
** Add a page of the database file to the freelist.
**
** sqlite3PagerUnref() is NOT called for pPage.
*/
static int freePage(MemPage *pPage){
  BtShared *pBt = pPage->pBt;
  MemPage *pPage1 = pBt->pPage1;
  int rc, n, k;

  /* Prepare the page for freeing */
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  assert( pPage->pgno>1 );
  pPage->isInit = 0;



  /* Increment the free page count on pPage1 */
  rc = sqlite3PagerWrite(pPage1->pDbPage);
  if( rc ) return rc;
  n = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], n+1);

4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
static int reparentPage(
  BtShared *pBt,                /* B-Tree structure */
  Pgno pgno,                    /* Page number of child being adopted */
  MemPage *pNewParent,          /* New parent of pgno */
  int idx,                      /* Index of child page pgno in pNewParent */
  int updatePtrmap              /* If true, update pointer-map for pgno */
){
  MemPage *pThis;
  DbPage *pDbPage;

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pNewParent!=0 );
  if( pgno==0 ) return SQLITE_OK;
  assert( pBt->pPager!=0 );
  pDbPage = sqlite3PagerLookup(pBt->pPager, pgno);
  if( pDbPage ){
    pThis = (MemPage *)sqlite3PagerGetExtra(pDbPage);
    if( pThis->isInit==PAGE_ISINIT_FULL ){
      assert( pThis->aData==sqlite3PagerGetData(pDbPage) );
      if( pThis->pParent!=pNewParent ){
        if( pThis->pParent ) sqlite3PagerUnref(pThis->pParent->pDbPage);
        pThis->pParent = pNewParent;
        sqlite3PagerRef(pNewParent->pDbPage);
      }
      pThis->idxParent = idx;
    }
    sqlite3PagerUnref(pDbPage);
  }

  if( ISAUTOVACUUM && updatePtrmap ){
    return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
  }

#ifndef NDEBUG
  /* If the updatePtrmap flag was clear, assert that the entry in the
  ** pointer-map is already correct.







<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







4532
4533
4534
4535
4536
4537
4538

4539




















4540
4541
4542
4543
4544
4545
4546
static int reparentPage(
  BtShared *pBt,                /* B-Tree structure */
  Pgno pgno,                    /* Page number of child being adopted */
  MemPage *pNewParent,          /* New parent of pgno */
  int idx,                      /* Index of child page pgno in pNewParent */
  int updatePtrmap              /* If true, update pointer-map for pgno */
){

  DbPage *pDbPage;




















  if( ISAUTOVACUUM && updatePtrmap ){
    return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
  }

#ifndef NDEBUG
  /* If the updatePtrmap flag was clear, assert that the entry in the
  ** pointer-map is already correct.
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913


4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932

4933
4934

4935
4936
4937
4938
4939
4940
4941
** in exchange for a larger degradation in INSERT and UPDATE performance.
** The value of NN appears to give the best results overall.
*/
#define NN 1             /* Number of neighbors on either side of pPage */
#define NB (NN*2+1)      /* Total pages involved in the balance */

/* Forward reference */
static int balance(MemPage*, int);

#ifndef SQLITE_OMIT_QUICKBALANCE
/*
** This version of balance() handles the common special case where
** a new entry is being inserted on the extreme right-end of the
** tree, in other words, when the new entry will become the largest
** entry in the tree.
**
** Instead of trying balance the 3 right-most leaf pages, just add
** a new page to the right-hand side and put the one new entry in
** that page.  This leaves the right side of the tree somewhat
** unbalanced.  But odds are that we will be inserting new entries
** at the end soon afterwards so the nearly empty page will quickly
** fill up.  On average.
**
** pPage is the leaf page which is the right-most page in the tree.
** pParent is its parent.  pPage must have a single overflow entry
** which is also the right-most entry on the page.
*/
static int balance_quick(MemPage *pPage, MemPage *pParent){
  int rc;
  MemPage *pNew = 0;
  Pgno pgnoNew;
  u8 *pCell;
  u16 szCell;
  CellInfo info;


  BtShared *pBt = pPage->pBt;
  int parentIdx = pParent->nCell;   /* pParent new divider cell index */
  int parentSize;                   /* Size of new divider cell */
  u8 parentCell[64];                /* Space for the new divider cell */

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* Allocate a new page. Insert the overflow cell from pPage
  ** into it. Then remove the overflow cell from pPage.
  */
  rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
  if( rc==SQLITE_OK ){
    pCell = pPage->aOvfl[0].pCell;
    szCell = cellSizePtr(pPage, pCell);
    zeroPage(pNew, pPage->aData[0]);
    assemblePage(pNew, 1, &pCell, &szCell);
    pPage->nOverflow = 0;
  
    /* Set the parent of the newly allocated page to pParent. */

    pNew->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);

  
    /* pPage is currently the right-child of pParent. Change this
    ** so that the right-child is the new page allocated above and
    ** pPage is the next-to-right child. 
    **
    ** Ignore the return value of the call to fillInCell(). fillInCell()
    ** may only return other than SQLITE_OK if it is required to allocate







|



















|






>
>



















>


>







4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
** in exchange for a larger degradation in INSERT and UPDATE performance.
** The value of NN appears to give the best results overall.
*/
#define NN 1             /* Number of neighbors on either side of pPage */
#define NB (NN*2+1)      /* Total pages involved in the balance */

/* Forward reference */
static int balance(BtCursor*, int);

#ifndef SQLITE_OMIT_QUICKBALANCE
/*
** This version of balance() handles the common special case where
** a new entry is being inserted on the extreme right-end of the
** tree, in other words, when the new entry will become the largest
** entry in the tree.
**
** Instead of trying balance the 3 right-most leaf pages, just add
** a new page to the right-hand side and put the one new entry in
** that page.  This leaves the right side of the tree somewhat
** unbalanced.  But odds are that we will be inserting new entries
** at the end soon afterwards so the nearly empty page will quickly
** fill up.  On average.
**
** pPage is the leaf page which is the right-most page in the tree.
** pParent is its parent.  pPage must have a single overflow entry
** which is also the right-most entry on the page.
*/
static int balance_quick(BtCursor *pCur){
  int rc;
  MemPage *pNew = 0;
  Pgno pgnoNew;
  u8 *pCell;
  u16 szCell;
  CellInfo info;
  MemPage *pPage = pCur->apPage[pCur->iPage];
  MemPage *pParent = pCur->apPage[pCur->iPage-1];
  BtShared *pBt = pPage->pBt;
  int parentIdx = pParent->nCell;   /* pParent new divider cell index */
  int parentSize;                   /* Size of new divider cell */
  u8 parentCell[64];                /* Space for the new divider cell */

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* Allocate a new page. Insert the overflow cell from pPage
  ** into it. Then remove the overflow cell from pPage.
  */
  rc = allocateBtreePage(pBt, &pNew, &pgnoNew, 0, 0);
  if( rc==SQLITE_OK ){
    pCell = pPage->aOvfl[0].pCell;
    szCell = cellSizePtr(pPage, pCell);
    zeroPage(pNew, pPage->aData[0]);
    assemblePage(pNew, 1, &pCell, &szCell);
    pPage->nOverflow = 0;
  
    /* Set the parent of the newly allocated page to pParent. */
#if 0
    pNew->pParent = pParent;
    sqlite3PagerRef(pParent->pDbPage);
#endif
  
    /* pPage is currently the right-child of pParent. Change this
    ** so that the right-child is the new page allocated above and
    ** pPage is the next-to-right child. 
    **
    ** Ignore the return value of the call to fillInCell(). fillInCell()
    ** may only return other than SQLITE_OK if it is required to allocate
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995


4996
4997
4998
4999
5000
5001
5002
5003
  ** not important, as they will be recalculated when the page is rolled
  ** back. But here, in balance_quick(), it is possible that pPage has 
  ** not yet been marked dirty or written into the journal file. Therefore
  ** it will not be rolled back and so it is important to make sure that
  ** the page data and contents of MemPage are consistent.
  */
  pPage->isInit = 0;
  sqlite3BtreeInitPage(pPage, pPage->pParent);
  sqlite3PagerUnref(pPage->pParent->pDbPage);

  /* If everything else succeeded, balance the parent page, in 
  ** case the divider cell inserted caused it to become overfull.
  */
  if( rc==SQLITE_OK ){


    rc = balance(pParent, 0);
  }
  return rc;
}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*
** This routine redistributes Cells on pPage and up to NN*2 siblings







|
<





>
>
|







4888
4889
4890
4891
4892
4893
4894
4895

4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
  ** not important, as they will be recalculated when the page is rolled
  ** back. But here, in balance_quick(), it is possible that pPage has 
  ** not yet been marked dirty or written into the journal file. Therefore
  ** it will not be rolled back and so it is important to make sure that
  ** the page data and contents of MemPage are consistent.
  */
  pPage->isInit = 0;
  sqlite3BtreeInitPage(pPage);


  /* If everything else succeeded, balance the parent page, in 
  ** case the divider cell inserted caused it to become overfull.
  */
  if( rc==SQLITE_OK ){
    releasePage(pPage);
    pCur->iPage--;
    rc = balance(pCur, 0);
  }
  return rc;
}
#endif /* SQLITE_OMIT_QUICKBALANCE */

/*
** This routine redistributes Cells on pPage and up to NN*2 siblings
5024
5025
5026
5027
5028
5029
5030
5031

5032
5033
5034
5035
5036
5037
5038
** might become overfull or underfull.  If that happens, then this routine
** is called recursively on the parent.
**
** If this routine fails for any reason, it might leave the database
** in a corrupted state.  So if this routine fails, the database should
** be rolled back.
*/
static int balance_nonroot(MemPage *pPage){

  MemPage *pParent;            /* The parent of pPage */
  BtShared *pBt;               /* The whole database */
  int nCell = 0;               /* Number of cells in apCell[] */
  int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
  int nOld;                    /* Number of pages in apOld[] */
  int nNew;                    /* Number of pages in apNew[] */
  int nDiv;                    /* Number of cells in apDiv[] */







|
>







4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
** might become overfull or underfull.  If that happens, then this routine
** is called recursively on the parent.
**
** If this routine fails for any reason, it might leave the database
** in a corrupted state.  So if this routine fails, the database should
** be rolled back.
*/
static int balance_nonroot(BtCursor *pCur){
  MemPage *pPage;              /* The over or underfull page to balance */
  MemPage *pParent;            /* The parent of pPage */
  BtShared *pBt;               /* The whole database */
  int nCell = 0;               /* Number of cells in apCell[] */
  int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
  int nOld;                    /* Number of pages in apOld[] */
  int nNew;                    /* Number of pages in apNew[] */
  int nDiv;                    /* Number of cells in apDiv[] */
5059
5060
5061
5062
5063
5064
5065

5066
5067
5068
5069
5070

5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
  u8 **apCell = 0;             /* All cells begin balanced */
  u16 *szCell;                 /* Local size of all cells in apCell[] */
  u8 *aCopy[NB];         /* Space for holding data of apCopy[] */
  u8 *aSpace1;           /* Space for copies of dividers cells before balance */
  u8 *aSpace2 = 0;       /* Space for overflow dividers cells after balance */
  u8 *aFrom = 0;


  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */

  assert( pPage->isInit==PAGE_ISINIT_FULL );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  pBt = pPage->pBt;
  pParent = pPage->pParent;
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }

  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));

#ifndef SQLITE_OMIT_QUICKBALANCE
  /*
  ** A special case:  If a new entry has just been inserted into a
  ** table (that is, a btree with integer keys and all data at the leaves)
  ** and the new entry is the right-most entry in the tree (it has the
  ** largest key) then use the special balance_quick() routine for
  ** balancing.  balance_quick() is much faster and results in a tighter
  ** packing of data in the common case.
  */
  if( pPage->leaf &&
      pPage->intKey &&
      pPage->nOverflow==1 &&
      pPage->aOvfl[0].idx==pPage->nCell &&
      pPage->pParent->pgno!=1 &&
      get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  ){
    assert( pPage->intKey );
    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pPage, pParent);
  }
#endif

  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
    return rc;
  }








>





>
|


|




















|







|







4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
  u8 **apCell = 0;             /* All cells begin balanced */
  u16 *szCell;                 /* Local size of all cells in apCell[] */
  u8 *aCopy[NB];         /* Space for holding data of apCopy[] */
  u8 *aSpace1;           /* Space for copies of dividers cells before balance */
  u8 *aSpace2 = 0;       /* Space for overflow dividers cells after balance */
  u8 *aFrom = 0;

  pPage = pCur->apPage[pCur->iPage];
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );

  /* 
  ** Find the parent page.
  */
  assert( pCur->iPage>0 );
  assert( pPage->isInit );
  assert( sqlite3PagerIswriteable(pPage->pDbPage) || pPage->nOverflow==1 );
  pBt = pPage->pBt;
  pParent = pCur->apPage[pCur->iPage-1];
  assert( pParent );
  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pParent->pDbPage)) ){
    return rc;
  }

  TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));

#ifndef SQLITE_OMIT_QUICKBALANCE
  /*
  ** A special case:  If a new entry has just been inserted into a
  ** table (that is, a btree with integer keys and all data at the leaves)
  ** and the new entry is the right-most entry in the tree (it has the
  ** largest key) then use the special balance_quick() routine for
  ** balancing.  balance_quick() is much faster and results in a tighter
  ** packing of data in the common case.
  */
  if( pPage->leaf &&
      pPage->intKey &&
      pPage->nOverflow==1 &&
      pPage->aOvfl[0].idx==pPage->nCell &&
      pParent->pgno!=1 &&
      get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
  ){
    assert( pPage->intKey );
    /*
    ** TODO: Check the siblings to the left of pPage. It may be that
    ** they are not full and no new page is required.
    */
    return balance_quick(pCur);
  }
#endif

  if( SQLITE_OK!=(rc = sqlite3PagerWrite(pPage->pDbPage)) ){
    return rc;
  }

5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
      if( get4byte(findCell(pParent, idx))==pgno ){
        break;
      }
    }
    assert( idx<pParent->nCell
             || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
  }else{
    idx = pPage->idxParent;
  }

  /*
  ** Initialize variables so that it will be safe to jump
  ** directly to balance_cleanup at any moment.
  */
  nOld = nNew = 0;
  sqlite3PagerRef(pParent->pDbPage);

  /*
  ** Find sibling pages to pPage and the cells in pParent that divide
  ** the siblings.  An attempt is made to find NN siblings on either
  ** side of pPage.  More siblings are taken from one side, however, if
  ** pPage there are fewer than NN siblings on the other side.  If pParent
  ** has NB or fewer children then all children of pParent are taken.







|







|







5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
      if( get4byte(findCell(pParent, idx))==pgno ){
        break;
      }
    }
    assert( idx<pParent->nCell
             || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
  }else{
    idx = pCur->aiIdx[pCur->iPage-1];
  }

  /*
  ** Initialize variables so that it will be safe to jump
  ** directly to balance_cleanup at any moment.
  */
  nOld = nNew = 0;
  /* sqlite3PagerRef(pParent->pDbPage); */

  /*
  ** Find sibling pages to pPage and the cells in pParent that divide
  ** the siblings.  An attempt is made to find NN siblings on either
  ** side of pPage.  More siblings are taken from one side, however, if
  ** pPage there are fewer than NN siblings on the other side.  If pParent
  ** has NB or fewer children then all children of pParent are taken.
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
      assert( !pParent->leaf );
      pgnoOld[i] = get4byte(apDiv[i]);
    }else if( k==pParent->nCell ){
      pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
    }else{
      break;
    }
    rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
    if( rc ) goto balance_cleanup;
    apOld[i]->idxParent = k;
    apCopy[i] = 0;
    assert( i==nOld );
    nOld++;
    nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
  }

  /* Make nMaxCells a multiple of 4 in order to preserve 8-byte







|

|







5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
      assert( !pParent->leaf );
      pgnoOld[i] = get4byte(apDiv[i]);
    }else if( k==pParent->nCell ){
      pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
    }else{
      break;
    }
    rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i]);
    if( rc ) goto balance_cleanup;
    /* apOld[i]->idxParent = k; */
    apCopy[i] = 0;
    assert( i==nOld );
    nOld++;
    nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
  }

  /* Make nMaxCells a multiple of 4 in order to preserve 8-byte
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637


5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665

5666
5667
5668
5669
5670
5671
5672
5673
5674


5675
5676
5677
5678
5679
5680
5681
#endif

  /*
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist so it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit==PAGE_ISINIT_FULL );
  sqlite3ScratchFree(apCell);
  apCell = 0;


  rc = balance(pParent, 0);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqlite3PageFree(aSpace2);
  sqlite3ScratchFree(apCell);
  for(i=0; i<nOld; i++){
    releasePage(apOld[i]);
  }
  for(i=0; i<nNew; i++){
    releasePage(apNew[i]);
  }

  releasePage(pParent);
  TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
          pPage->pgno, nOld, nNew, nCell));

  return rc;
}

/*
** This routine is called for the root page of a btree when the root
** page contains no cells.  This is an opportunity to make the tree
** shallower by one level.
*/
static int balance_shallower(MemPage *pPage){

  MemPage *pChild;             /* The only child page of pPage */
  Pgno pgnoChild;              /* Page number for pChild */
  int rc = SQLITE_OK;          /* Return code from subprocedures */
  BtShared *pBt;                  /* The main BTree structure */
  int mxCellPerPage;           /* Maximum number of cells per page */
  u8 **apCell;                 /* All cells from pages being balanced */
  u16 *szCell;                 /* Local size of all cells */

  assert( pPage->pParent==0 );


  assert( pPage->nCell==0 );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pBt = pPage->pBt;
  mxCellPerPage = MX_CELL(pBt);
  apCell = sqlite3Malloc( mxCellPerPage*(sizeof(u8*)+sizeof(u16)) );
  if( apCell==0 ) return SQLITE_NOMEM;
  szCell = (u16*)&apCell[mxCellPerPage];







|


>
>
|














|











|
>








|
>
>







5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
#endif

  /*
  ** Balance the parent page.  Note that the current page (pPage) might
  ** have been added to the freelist so it might no longer be initialized.
  ** But the parent page will always be initialized.
  */
  assert( pParent->isInit );
  sqlite3ScratchFree(apCell);
  apCell = 0;
  releasePage(pPage);
  pCur->iPage--;
  rc = balance(pCur, 0);
  
  /*
  ** Cleanup before returning.
  */
balance_cleanup:
  sqlite3PageFree(aSpace2);
  sqlite3ScratchFree(apCell);
  for(i=0; i<nOld; i++){
    releasePage(apOld[i]);
  }
  for(i=0; i<nNew; i++){
    releasePage(apNew[i]);
  }

  /* releasePage(pParent); */
  TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
          pPage->pgno, nOld, nNew, nCell));

  return rc;
}

/*
** This routine is called for the root page of a btree when the root
** page contains no cells.  This is an opportunity to make the tree
** shallower by one level.
*/
static int balance_shallower(BtCursor *pCur){
  MemPage *pPage;              /* Root page of B-Tree */
  MemPage *pChild;             /* The only child page of pPage */
  Pgno pgnoChild;              /* Page number for pChild */
  int rc = SQLITE_OK;          /* Return code from subprocedures */
  BtShared *pBt;                  /* The main BTree structure */
  int mxCellPerPage;           /* Maximum number of cells per page */
  u8 **apCell;                 /* All cells from pages being balanced */
  u16 *szCell;                 /* Local size of all cells */

  assert( pCur->iPage==0 );
  pPage = pCur->apPage[0];

  assert( pPage->nCell==0 );
  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  pBt = pPage->pBt;
  mxCellPerPage = MX_CELL(pBt);
  apCell = sqlite3Malloc( mxCellPerPage*(sizeof(u8*)+sizeof(u16)) );
  if( apCell==0 ) return SQLITE_NOMEM;
  szCell = (u16*)&apCell[mxCellPerPage];
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
    */
    pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    assert( pgnoChild>0 );
    assert( pgnoChild<=pagerPagecount(pPage->pBt->pPager) );
    rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
    if( rc ) goto end_shallow_balance;
    if( pPage->pgno==1 ){
      rc = sqlite3BtreeInitPage(pChild, pPage);
      if( rc ) goto end_shallow_balance;
      assert( pChild->nOverflow==0 );
      if( pChild->nFree>=100 ){
        /* The child information will fit on the root page, so do the
        ** copy */
        int i;
        zeroPage(pPage, pChild->aData[0]);







|







5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
    */
    pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
    assert( pgnoChild>0 );
    assert( pgnoChild<=pagerPagecount(pPage->pBt->pPager) );
    rc = sqlite3BtreeGetPage(pPage->pBt, pgnoChild, &pChild, 0);
    if( rc ) goto end_shallow_balance;
    if( pPage->pgno==1 ){
      rc = sqlite3BtreeInitPage(pChild);
      if( rc ) goto end_shallow_balance;
      assert( pChild->nOverflow==0 );
      if( pChild->nFree>=100 ){
        /* The child information will fit on the root page, so do the
        ** copy */
        int i;
        zeroPage(pPage, pChild->aData[0]);
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
        /* The child has more information that will fit on the root.
        ** The tree is already balanced.  Do nothing. */
        TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
      }
    }else{
      memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
      pPage->isInit = 0;
      pPage->pParent = 0;
      rc = sqlite3BtreeInitPage(pPage, 0);
      assert( rc==SQLITE_OK );
      freePage(pChild);
      TRACE(("BALANCE: transfer child %d into root %d\n",
              pChild->pgno, pPage->pgno));
    }
    rc = reparentChildPages(pPage, 1);
    assert( pPage->nOverflow==0 );







<
|







5638
5639
5640
5641
5642
5643
5644

5645
5646
5647
5648
5649
5650
5651
5652
        /* The child has more information that will fit on the root.
        ** The tree is already balanced.  Do nothing. */
        TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
      }
    }else{
      memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
      pPage->isInit = 0;

      rc = sqlite3BtreeInitPage(pPage);
      assert( rc==SQLITE_OK );
      freePage(pChild);
      TRACE(("BALANCE: transfer child %d into root %d\n",
              pChild->pgno, pPage->pgno));
    }
    rc = reparentChildPages(pPage, 1);
    assert( pPage->nOverflow==0 );
5758
5759
5760
5761
5762
5763
5764
5765
5766

5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777


5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792

5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807

5808
5809

5810
5811
5812
5813

5814


5815


5816
5817
5818
5819
5820
5821
5822
5823


5824
5825



5826
5827
5828


5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
**
** When this happens, Create a new child page and copy the
** contents of the root into the child.  Then make the root
** page an empty page with rightChild pointing to the new
** child.   Finally, call balance_internal() on the new child
** to cause it to split.
*/
static int balance_deeper(MemPage *pPage){
  int rc;             /* Return value from subprocedures */

  MemPage *pChild;    /* Pointer to a new child page */
  Pgno pgnoChild;     /* Page number of the new child page */
  BtShared *pBt;         /* The BTree */
  int usableSize;     /* Total usable size of a page */
  u8 *data;           /* Content of the parent page */
  u8 *cdata;          /* Content of the child page */
  int hdr;            /* Offset to page header in parent */
  int cbrk;           /* Offset to content of first cell in parent */

  assert( pPage->pParent==0 );
  assert( pPage->nOverflow>0 );


  pBt = pPage->pBt;
  assert( sqlite3_mutex_held(pBt->mutex) );
  rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
  if( rc ) return rc;
  assert( sqlite3PagerIswriteable(pChild->pDbPage) );
  usableSize = pBt->usableSize;
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  cbrk = get2byte(&data[hdr+5]);
  cdata = pChild->aData;
  memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
  memcpy(&cdata[cbrk], &data[cbrk], usableSize-cbrk);
  if( pChild->isInit==PAGE_ISINIT_FULL ) return SQLITE_CORRUPT;
  rc = sqlite3BtreeInitPage(pChild, pPage);
  if( rc ) goto balancedeeper_out;

  memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
  pChild->nOverflow = pPage->nOverflow;
  if( pChild->nOverflow ){
    pChild->nFree = 0;
  }
  assert( pChild->nCell==pPage->nCell );
  zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
  put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
  TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
  if( ISAUTOVACUUM ){
    int i;
    rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
    if( rc ) goto balancedeeper_out;
    for(i=0; i<pChild->nCell; i++){
      rc = ptrmapPutOvfl(pChild, i);

      if( rc!=SQLITE_OK ){
        goto balancedeeper_out;

      }
    }
    rc = reparentChildPages(pChild, 1);
  }

  if( rc==SQLITE_OK ){


    rc = balance_nonroot(pChild);


  }

balancedeeper_out:
  releasePage(pChild);
  return rc;
}

/*


** Decide if the page pPage needs to be balanced.  If balancing is
** required, call the appropriate balancing routine.



*/
static int balance(MemPage *pPage, int insert){
  int rc = SQLITE_OK;


  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pPage->pParent==0 ){
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc==SQLITE_OK && pPage->nOverflow>0 ){
      rc = balance_deeper(pPage);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pPage);
    }
  }else{
    if( pPage->nOverflow>0 || 
        (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
      rc = balance_nonroot(pPage);
    }
  }
  return rc;
}

/*
** This routine checks all cursors that point to table pgnoRoot.







|

>









|
|
>
>












|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
<
|
|
>
|
<
>


<

>

>
>
|
>
>


<
<




>
>
|
|
>
>
>

|

>
>

|


|


|



|
|







5672
5673
5674
5675
5676
5677
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688
5689
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722

5723
5724
5725
5726

5727
5728
5729

5730
5731
5732
5733
5734
5735
5736
5737
5738
5739


5740
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
**
** When this happens, Create a new child page and copy the
** contents of the root into the child.  Then make the root
** page an empty page with rightChild pointing to the new
** child.   Finally, call balance_internal() on the new child
** to cause it to split.
*/
static int balance_deeper(BtCursor *pCur){
  int rc;             /* Return value from subprocedures */
  MemPage *pPage;     /* Pointer to the root page */
  MemPage *pChild;    /* Pointer to a new child page */
  Pgno pgnoChild;     /* Page number of the new child page */
  BtShared *pBt;         /* The BTree */
  int usableSize;     /* Total usable size of a page */
  u8 *data;           /* Content of the parent page */
  u8 *cdata;          /* Content of the child page */
  int hdr;            /* Offset to page header in parent */
  int cbrk;           /* Offset to content of first cell in parent */

  assert( pCur->iPage==0 );
  assert( pCur->apPage[0]->nOverflow>0 );

  pPage = pCur->apPage[0];
  pBt = pPage->pBt;
  assert( sqlite3_mutex_held(pBt->mutex) );
  rc = allocateBtreePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
  if( rc ) return rc;
  assert( sqlite3PagerIswriteable(pChild->pDbPage) );
  usableSize = pBt->usableSize;
  data = pPage->aData;
  hdr = pPage->hdrOffset;
  cbrk = get2byte(&data[hdr+5]);
  cdata = pChild->aData;
  memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
  memcpy(&cdata[cbrk], &data[cbrk], usableSize-cbrk);
  
  rc = sqlite3BtreeInitPage(pChild);
  if( rc==SQLITE_OK ){
    int nCopy = pPage->nOverflow*sizeof(pPage->aOvfl[0]);
    memcpy(pChild->aOvfl, pPage->aOvfl, nCopy);
    pChild->nOverflow = pPage->nOverflow;
    if( pChild->nOverflow ){
      pChild->nFree = 0;
    }
    assert( pChild->nCell==pPage->nCell );
    zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
    put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
    TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
    if( ISAUTOVACUUM ){
      int i;
      rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);

      for(i=0; rc==SQLITE_OK && i<pChild->nCell; i++){
        rc = ptrmapPutOvfl(pChild, i);
      }
      if( rc==SQLITE_OK ){

        rc = reparentChildPages(pChild, 1);
      }
    }

  }

  if( rc==SQLITE_OK ){
    pCur->iPage++;
    pCur->apPage[1] = pChild;
    rc = balance_nonroot(pCur);
  }else{
    releasePage(pChild);
  }



  return rc;
}

/*
** The page that pCur currently points to has just been modified in
** some way. This function figures out if this modification means the
** tree needs to be balanced, and if so calls the appropriate balancing 
** routine.
** 
** Parameter isInsert is true if a new cell was just inserted into the
** page, or false otherwise.
*/
static int balance(BtCursor *pCur, int isInsert){
  int rc = SQLITE_OK;
  MemPage *pPage = pCur->apPage[pCur->iPage];

  assert( sqlite3_mutex_held(pPage->pBt->mutex) );
  if( pCur->iPage==0 ){
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc==SQLITE_OK && pPage->nOverflow>0 ){
      rc = balance_deeper(pCur);
    }
    if( rc==SQLITE_OK && pPage->nCell==0 ){
      rc = balance_shallower(pCur);
    }
  }else{
    if( pPage->nOverflow>0 || 
        (!isInsert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
      rc = balance_nonroot(pCur);
    }
  }
  return rc;
}

/*
** This routine checks all cursors that point to table pgnoRoot.
5928
5929
5930
5931
5932
5933
5934

5935
5936
5937
5938
5939
5940
5941
  const void *pData, int nData,  /* The data of the new record */
  int nZero,                     /* Number of extra 0 bytes to append to data */
  int appendBias                 /* True if this is likely an append */
){
  int rc;
  int loc;
  int szNew;

  MemPage *pPage;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;
  unsigned char *oldCell;
  unsigned char *newCell = 0;

  assert( cursorHoldsMutex(pCur) );







>







5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
  const void *pData, int nData,  /* The data of the new record */
  int nZero,                     /* Number of extra 0 bytes to append to data */
  int appendBias                 /* True if this is likely an append */
){
  int rc;
  int loc;
  int szNew;
  int idx;
  MemPage *pPage;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;
  unsigned char *oldCell;
  unsigned char *newCell = 0;

  assert( cursorHoldsMutex(pCur) );
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980

5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026

6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
  if( 
    SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
    SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc))
  ){
    return rc;
  }

  pPage = pCur->pPage;
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->intKey );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );

  if( loc==0 && CURSOR_VALID==pCur->eState ){
    u16 szOld;
    assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc ){
      goto end_insert;
    }
    oldCell = findCell(pPage, pCur->idx);
    if( !pPage->leaf ){
      memcpy(newCell, oldCell, 4);
    }
    szOld = cellSizePtr(pPage, oldCell);
    rc = clearCell(pPage, oldCell);
    if( rc ) goto end_insert;
    dropCell(pPage, pCur->idx, szOld);
  }else if( loc<0 && pPage->nCell>0 ){
    assert( pPage->leaf );
    pCur->idx++;
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
  }else{
    assert( pPage->leaf );
  }
  rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
  if( rc!=SQLITE_OK ) goto end_insert;
  rc = balance(pPage, 1);
  if( rc==SQLITE_OK ){
    /* balance() may have messed up the chain of MemPage.pParent pointers.
    ** To prevent moveToRoot() from trying to use them to locate the root
    ** page of this table, release the reference to the current page before
    ** calling it.
    */
    releasePage(pCur->pPage);
    pCur->pPage = 0;
    moveToRoot(pCur);
  }
end_insert:
  return rc;
}

/*
** Delete the entry that the cursor is pointing to.  The cursor
** is left pointing at a random location.
*/
int sqlite3BtreeDelete(BtCursor *pCur){
  MemPage *pPage = pCur->pPage;

  unsigned char *pCell;
  int rc;
  Pgno pgnoChild = 0;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pPage->isInit==PAGE_ISINIT_FULL );
  if( pBt->inTransaction!=TRANS_WRITE ){
    /* Must start a transaction before doing a delete */
    rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
    return rc;
  }
  assert( !pBt->readOnly );
  if( pCur->eState==CURSOR_FAULT ){
    return pCur->skip;
  }
  if( pCur->idx >= pPage->nCell ){
    return SQLITE_ERROR;  /* The cursor is not pointing to anything */
  }
  if( !pCur->wrFlag ){
    return SQLITE_PERM;   /* Did not open this cursor for writing */
  }
  if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, pCur->info.nKey) ){
    return SQLITE_LOCKED; /* The table pCur points to has a read lock */







|





|







>


|




|






|


|





|

|

<
<
<
<
<
<
<











|
>







|









|







5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936







5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
  if( 
    SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
    SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, appendBias, &loc))
  ){
    return rc;
  }

  pPage = pCur->apPage[pCur->iPage];
  assert( pPage->intKey || nKey>=0 );
  assert( pPage->leaf || !pPage->intKey );
  TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
          pCur->pgnoRoot, nKey, nData, pPage->pgno,
          loc==0 ? "overwrite" : "new entry"));
  assert( pPage->isInit );
  allocateTempSpace(pBt);
  newCell = pBt->pTmpSpace;
  if( newCell==0 ) return SQLITE_NOMEM;
  rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew);
  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew<=MX_CELL_SIZE(pBt) );
  idx = pCur->aiIdx[pCur->iPage];
  if( loc==0 && CURSOR_VALID==pCur->eState ){
    u16 szOld;
    assert( idx<pPage->nCell );
    rc = sqlite3PagerWrite(pPage->pDbPage);
    if( rc ){
      goto end_insert;
    }
    oldCell = findCell(pPage, idx);
    if( !pPage->leaf ){
      memcpy(newCell, oldCell, 4);
    }
    szOld = cellSizePtr(pPage, oldCell);
    rc = clearCell(pPage, oldCell);
    if( rc ) goto end_insert;
    dropCell(pPage, idx, szOld);
  }else if( loc<0 && pPage->nCell>0 ){
    assert( pPage->leaf );
    idx = ++pCur->aiIdx[pCur->iPage];
    pCur->info.nSize = 0;
    pCur->validNKey = 0;
  }else{
    assert( pPage->leaf );
  }
  rc = insertCell(pPage, idx, newCell, szNew, 0, 0);
  if( rc!=SQLITE_OK ) goto end_insert;
  rc = balance(pCur, 1);
  if( rc==SQLITE_OK ){







    moveToRoot(pCur);
  }
end_insert:
  return rc;
}

/*
** Delete the entry that the cursor is pointing to.  The cursor
** is left pointing at a random location.
*/
int sqlite3BtreeDelete(BtCursor *pCur){
  MemPage *pPage = pCur->apPage[pCur->iPage];
  int idx;
  unsigned char *pCell;
  int rc;
  Pgno pgnoChild = 0;
  Btree *p = pCur->pBtree;
  BtShared *pBt = p->pBt;

  assert( cursorHoldsMutex(pCur) );
  assert( pPage->isInit );
  if( pBt->inTransaction!=TRANS_WRITE ){
    /* Must start a transaction before doing a delete */
    rc = pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
    return rc;
  }
  assert( !pBt->readOnly );
  if( pCur->eState==CURSOR_FAULT ){
    return pCur->skip;
  }
  if( pCur->aiIdx[pCur->iPage]>=pPage->nCell ){
    return SQLITE_ERROR;  /* The cursor is not pointing to anything */
  }
  if( !pCur->wrFlag ){
    return SQLITE_PERM;   /* Did not open this cursor for writing */
  }
  if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur, pCur->info.nKey) ){
    return SQLITE_LOCKED; /* The table pCur points to has a read lock */
6064
6065
6066
6067
6068
6069
6070

6071
6072
6073
6074
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086
6087
6088



6089
6090
6091
6092
6093
6094
6095


6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144
    return rc;
  }

  /* Locate the cell within its page and leave pCell pointing to the
  ** data. The clearCell() call frees any overflow pages associated with the
  ** cell. The cell itself is still intact.
  */

  pCell = findCell(pPage, pCur->idx);
  if( !pPage->leaf ){
    pgnoChild = get4byte(pCell);
  }
  rc = clearCell(pPage, pCell);
  if( rc ){
    return rc;
  }

  if( !pPage->leaf ){
    /*
    ** The entry we are about to delete is not a leaf so if we do not
    ** do something we will leave a hole on an internal page.
    ** We have to fill the hole by moving in a cell from a leaf.  The
    ** next Cell after the one to be deleted is guaranteed to exist and
    ** to be a leaf so we can use it.
    */
    BtCursor leafCur;



    unsigned char *pNext;
    int notUsed;
    unsigned char *tempCell = 0;
    assert( !pPage->intKey );
    sqlite3BtreeGetTempCursor(pCur, &leafCur);
    rc = sqlite3BtreeNext(&leafCur, &notUsed);
    if( rc==SQLITE_OK ){


      rc = sqlite3PagerWrite(leafCur.pPage->pDbPage);
    }
    if( rc==SQLITE_OK ){
      u16 szNext;
      TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
         pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
      dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
      pNext = findCell(leafCur.pPage, leafCur.idx);
      szNext = cellSizePtr(leafCur.pPage, pNext);
      assert( MX_CELL_SIZE(pBt)>=szNext+4 );
      allocateTempSpace(pBt);
      tempCell = pBt->pTmpSpace;
      if( tempCell==0 ){
        rc = SQLITE_NOMEM;
      }
      if( rc==SQLITE_OK ){
        rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
      }
      if( rc==SQLITE_OK ){
        put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
        rc = balance(pPage, 0);
      }
      if( rc==SQLITE_OK ){
        dropCell(leafCur.pPage, leafCur.idx, szNext);
        rc = balance(leafCur.pPage, 0);
      }
    }
    sqlite3BtreeReleaseTempCursor(&leafCur);
  }else{
    TRACE(("DELETE: table=%d delete from leaf %d\n",
       pCur->pgnoRoot, pPage->pgno));
    dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
    rc = balance(pPage, 0);
  }
  if( rc==SQLITE_OK ){
    /* balance() may have messed up the chain of MemPage.pParent pointers.
    ** To prevent moveToRoot() from trying to use them to locate the root
    ** page of this table, release the reference to the current page before
    ** calling it.
    */
    releasePage(pCur->pPage);
    pCur->pPage = 0;
    moveToRoot(pCur);
  }
  return rc;
}

/*
** Create a new BTree table.  Write into *piTable the page







>
|

















>
>
>







>
>
|




|
|
|
|







|


|
|


|
|






|
|


<
<
<
<
<
<
<







5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037
6038
6039
6040
6041
6042
6043
6044
6045
6046
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059







6060
6061
6062
6063
6064
6065
6066
    return rc;
  }

  /* Locate the cell within its page and leave pCell pointing to the
  ** data. The clearCell() call frees any overflow pages associated with the
  ** cell. The cell itself is still intact.
  */
  idx = pCur->aiIdx[pCur->iPage];
  pCell = findCell(pPage, idx);
  if( !pPage->leaf ){
    pgnoChild = get4byte(pCell);
  }
  rc = clearCell(pPage, pCell);
  if( rc ){
    return rc;
  }

  if( !pPage->leaf ){
    /*
    ** The entry we are about to delete is not a leaf so if we do not
    ** do something we will leave a hole on an internal page.
    ** We have to fill the hole by moving in a cell from a leaf.  The
    ** next Cell after the one to be deleted is guaranteed to exist and
    ** to be a leaf so we can use it.
    */
    BtCursor leafCur;
    MemPage *pLeafPage;
    int iLeafIdx;

    unsigned char *pNext;
    int notUsed;
    unsigned char *tempCell = 0;
    assert( !pPage->intKey );
    sqlite3BtreeGetTempCursor(pCur, &leafCur);
    rc = sqlite3BtreeNext(&leafCur, &notUsed);
    if( rc==SQLITE_OK ){
      pLeafPage = leafCur.apPage[leafCur.iPage];
      iLeafIdx = leafCur.aiIdx[leafCur.iPage];
      rc = sqlite3PagerWrite(pLeafPage->pDbPage);
    }
    if( rc==SQLITE_OK ){
      u16 szNext;
      TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
         pCur->pgnoRoot, pPage->pgno, pLeafPage->pgno));
      dropCell(pPage, idx, cellSizePtr(pPage, pCell));
      pNext = findCell(pLeafPage, iLeafIdx);
      szNext = cellSizePtr(pLeafPage, pNext);
      assert( MX_CELL_SIZE(pBt)>=szNext+4 );
      allocateTempSpace(pBt);
      tempCell = pBt->pTmpSpace;
      if( tempCell==0 ){
        rc = SQLITE_NOMEM;
      }
      if( rc==SQLITE_OK ){
        rc = insertCell(pPage, idx, pNext-4, szNext+4, tempCell, 0);
      }
      if( rc==SQLITE_OK ){
        put4byte(findOverflowCell(pPage, idx), pgnoChild);
        rc = balance(pCur, 0);
      }
      if( rc==SQLITE_OK ){
        dropCell(pLeafPage, iLeafIdx, szNext);
        rc = balance(&leafCur, 0);
      }
    }
    sqlite3BtreeReleaseTempCursor(&leafCur);
  }else{
    TRACE(("DELETE: table=%d delete from leaf %d\n",
       pCur->pgnoRoot, pPage->pgno));
    dropCell(pPage, idx, cellSizePtr(pPage, pCell));
    rc = balance(pCur, 0);
  }
  if( rc==SQLITE_OK ){







    moveToRoot(pCur);
  }
  return rc;
}

/*
** Create a new BTree table.  Write into *piTable the page
6307
6308
6309
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320
6321
  int i;

  assert( sqlite3_mutex_held(pBt->mutex) );
  if( pgno>pagerPagecount(pBt->pPager) ){
    return SQLITE_CORRUPT_BKPT;
  }

  rc = getAndInitPage(pBt, pgno, &pPage, pParent);
  if( rc ) goto cleardatabasepage_out;
  for(i=0; i<pPage->nCell; i++){
    pCell = findCell(pPage, i);
    if( !pPage->leaf ){
      rc = clearDatabasePage(pBt, get4byte(pCell), pPage, 1);
      if( rc ) goto cleardatabasepage_out;
    }







|







6229
6230
6231
6232
6233
6234
6235
6236
6237
6238
6239
6240
6241
6242
6243
  int i;

  assert( sqlite3_mutex_held(pBt->mutex) );
  if( pgno>pagerPagecount(pBt->pPager) ){
    return SQLITE_CORRUPT_BKPT;
  }

  rc = getAndInitPage(pBt, pgno, &pPage);
  if( rc ) goto cleardatabasepage_out;
  for(i=0; i<pPage->nCell; i++){
    pCell = findCell(pPage, i);
    if( !pPage->leaf ){
      rc = clearDatabasePage(pBt, get4byte(pCell), pPage, 1);
      if( rc ) goto cleardatabasepage_out;
    }
6610
6611
6612
6613
6614
6615
6616
6617
6618
6619
6620
6621
6622
6623
6624
*/
int sqlite3BtreeFlags(BtCursor *pCur){
  /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
  ** restoreCursorPosition() here.
  */
  MemPage *pPage;
  restoreCursorPosition(pCur);
  pPage = pCur->pPage;
  assert( cursorHoldsMutex(pCur) );
  assert( pPage->pBt==pCur->pBt );
  return pPage ? pPage->aData[pPage->hdrOffset] : 0;
}


/*







|







6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
*/
int sqlite3BtreeFlags(BtCursor *pCur){
  /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
  ** restoreCursorPosition() here.
  */
  MemPage *pPage;
  restoreCursorPosition(pCur);
  pPage = pCur->apPage[pCur->iPage];
  assert( cursorHoldsMutex(pCur) );
  assert( pPage->pBt==pCur->pBt );
  return pPage ? pPage->aData[pPage->hdrOffset] : 0;
}


/*
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
    checkAppendMsg(pCheck, zContext,
       "unable to get the page. error code=%d", rc);
    return 0;
  }
  if( (rc = sqlite3BtreeInitPage(pPage, pParent))!=0 ){
    checkAppendMsg(pCheck, zContext, 
                   "sqlite3BtreeInitPage() returns error code %d", rc);
    releasePage(pPage);
    return 0;
  }

  /* Check out all the cells.







|







6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758
6759
6760
6761
6762
  if( iPage==0 ) return 0;
  if( checkRef(pCheck, iPage, zParentContext) ) return 0;
  if( (rc = sqlite3BtreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
    checkAppendMsg(pCheck, zContext,
       "unable to get the page. error code=%d", rc);
    return 0;
  }
  if( (rc = sqlite3BtreeInitPage(pPage))!=0 ){
    checkAppendMsg(pCheck, zContext, 
                   "sqlite3BtreeInitPage() returns error code %d", rc);
    releasePage(pPage);
    return 0;
  }

  /* Check out all the cells.
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469
7470
7471
    return SQLITE_READONLY;
  }
  assert( !pCsr->pBt->readOnly 
          && pCsr->pBt->inTransaction==TRANS_WRITE );
  if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr, 0) ){
    return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  }
  if( pCsr->eState==CURSOR_INVALID || !pCsr->pPage->intKey ){
    return SQLITE_ERROR;
  }

  return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
}

/* 







|







7379
7380
7381
7382
7383
7384
7385
7386
7387
7388
7389
7390
7391
7392
7393
    return SQLITE_READONLY;
  }
  assert( !pCsr->pBt->readOnly 
          && pCsr->pBt->inTransaction==TRANS_WRITE );
  if( checkReadLocks(pCsr->pBtree, pCsr->pgnoRoot, pCsr, 0) ){
    return SQLITE_LOCKED; /* The table pCur points to has a read lock */
  }
  if( pCsr->eState==CURSOR_INVALID || !pCsr->apPage[pCsr->iPage]->intKey ){
    return SQLITE_ERROR;
  }

  return accessPayload(pCsr, offset, amt, (unsigned char *)z, 0, 1);
}

/* 
Changes to src/btreeInt.h.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.31 2008/09/18 17:34:44 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.











|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
** 2004 April 6
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btreeInt.h,v 1.32 2008/09/29 11:49:48 danielk1977 Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
**
**     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
**     "Sorting And Searching", pages 473-480. Addison-Wesley
**     Publishing Company, Reading, Massachusetts.
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  u8 leaf;             /* True if leaf flag is set */
  u8 hasData;          /* True if this page stores data */
  u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
  u16 cellOffset;      /* Index in aData of first cell pointer */
  u16 idxParent;       /* Index in parent of this node */
  u16 nFree;           /* Number of free bytes on the page */
  u16 nCell;           /* Number of cells on this page, local and ovfl */
  u16 maskPage;        /* Mask for page offset */
  struct _OvflCell {   /* Cells that will not fit on aData[] */
    u8 *pCell;          /* Pointers to the body of the overflow cell */
    u16 idx;            /* Insert this cell before idx-th non-overflow cell */
  } aOvfl[5];
  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
  u8 *aData;           /* Pointer to disk image of the page data */
  DbPage *pDbPage;     /* Pager page handle */
  Pgno pgno;           /* Page number for this page */
  MemPage *pParent;    /* The parent of this page.  NULL for root */
};

/*
** Possible values for the MemPage.isInit variable. When a page is first
** loaded or if the data stored in the MemPage struct is invalidated, 
** MemPage.isInit is set to PAGE_ISINIT_NONE. If the MemPage structure
** is fully initialized, then MemPage.isInit is set to PAGE_ISINIT_FULL.
** MemPage.isInit is set to PAGE_ISINIT_DATA when the MemPage struct is
** populated, but the MemPage.pParent variable is not necessarily correct.
*/
#define PAGE_ISINIT_NONE 0
#define PAGE_ISINIT_DATA 1
#define PAGE_ISINIT_FULL 2

/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE sizeof(MemPage)








<











<


<
<
<
<
<
<
<
<
<
<
<
<







277
278
279
280
281
282
283

284
285
286
287
288
289
290
291
292
293
294

295
296












297
298
299
300
301
302
303
  u8 leaf;             /* True if leaf flag is set */
  u8 hasData;          /* True if this page stores data */
  u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
  u16 cellOffset;      /* Index in aData of first cell pointer */

  u16 nFree;           /* Number of free bytes on the page */
  u16 nCell;           /* Number of cells on this page, local and ovfl */
  u16 maskPage;        /* Mask for page offset */
  struct _OvflCell {   /* Cells that will not fit on aData[] */
    u8 *pCell;          /* Pointers to the body of the overflow cell */
    u16 idx;            /* Insert this cell before idx-th non-overflow cell */
  } aOvfl[5];
  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
  u8 *aData;           /* Pointer to disk image of the page data */
  DbPage *pDbPage;     /* Pager page handle */
  Pgno pgno;           /* Page number for this page */

};













/*
** The in-memory image of a disk page has the auxiliary information appended
** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
** that extra information.
*/
#define EXTRA_SIZE sizeof(MemPage)

422
423
424
425
426
427
428











429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462




463
464
465
466
467
468
469
  u32 nPayload;  /* Total amount of payload */
  u16 nHeader;   /* Size of the cell content header in bytes */
  u16 nLocal;    /* Amount of payload held locally */
  u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
  u16 nSize;     /* Size of the cell content on the main b-tree page */
};












/*
** A cursor is a pointer to a particular entry within a particular
** b-tree within a database file.
**
** The entry is identified by its MemPage and the index in
** MemPage.aCell[] of the entry.
**
** When a single database file can shared by two more database connections,
** but cursors cannot be shared.  Each cursor is associated with a
** particular database connection identified BtCursor.pBtree.db.
**
** Fields in this structure are accessed under the BtShared.mutex
** found at self->pBt->mutex. 
*/
struct BtCursor {
  Btree *pBtree;            /* The Btree to which this cursor belongs */
  BtShared *pBt;            /* The BtShared this cursor points to */
  BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
  struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
  Pgno pgnoRoot;            /* The root page of this tree */
  MemPage *pPage;           /* Page that contains the entry */
  int idx;                  /* Index of the entry in pPage->aCell[] */
  CellInfo info;            /* A parse of the cell we are pointing at */
  u8 wrFlag;                /* True if writable */
  u8 atLast;                /* Cursor pointing to the last entry */
  u8 validNKey;             /* True if info.nKey is valid */
  u8 eState;                /* One of the CURSOR_XXX constants (see below) */
  void *pKey;      /* Saved key that was cursor's last known position */
  i64 nKey;        /* Size of pKey, or last integer key */
  int skip;        /* (skip<0) -> Prev() is a no-op. (skip>0) -> Next() is */
#ifndef SQLITE_OMIT_INCRBLOB
  u8 isIncrblobHandle;      /* True if this cursor is an incr. io handle */
  Pgno *aOverflow;          /* Cache of overflow page locations */
#endif




};

/*
** Potential values for BtCursor.eState.
**
** CURSOR_VALID:
**   Cursor points to a valid entry. getPayload() etc. may be called.







>
>
>
>
>
>
>
>
>
>
>




















<
<












>
>
>
>







408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445


446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
  u32 nPayload;  /* Total amount of payload */
  u16 nHeader;   /* Size of the cell content header in bytes */
  u16 nLocal;    /* Amount of payload held locally */
  u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
  u16 nSize;     /* Size of the cell content on the main b-tree page */
};

/*
** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
** this will be declared corrupt. This value is calculated based on a
** maximum database size of 2^31 pages a minimum fanout of 2 for a
** root-node and 3 for all other internal nodes.
**
** If a tree that appears to be taller than this is encountered, it is
** assumed that the database is corrupt.
*/
#define BTCURSOR_MAX_DEPTH 20

/*
** A cursor is a pointer to a particular entry within a particular
** b-tree within a database file.
**
** The entry is identified by its MemPage and the index in
** MemPage.aCell[] of the entry.
**
** When a single database file can shared by two more database connections,
** but cursors cannot be shared.  Each cursor is associated with a
** particular database connection identified BtCursor.pBtree.db.
**
** Fields in this structure are accessed under the BtShared.mutex
** found at self->pBt->mutex. 
*/
struct BtCursor {
  Btree *pBtree;            /* The Btree to which this cursor belongs */
  BtShared *pBt;            /* The BtShared this cursor points to */
  BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
  struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */
  Pgno pgnoRoot;            /* The root page of this tree */


  CellInfo info;            /* A parse of the cell we are pointing at */
  u8 wrFlag;                /* True if writable */
  u8 atLast;                /* Cursor pointing to the last entry */
  u8 validNKey;             /* True if info.nKey is valid */
  u8 eState;                /* One of the CURSOR_XXX constants (see below) */
  void *pKey;      /* Saved key that was cursor's last known position */
  i64 nKey;        /* Size of pKey, or last integer key */
  int skip;        /* (skip<0) -> Prev() is a no-op. (skip>0) -> Next() is */
#ifndef SQLITE_OMIT_INCRBLOB
  u8 isIncrblobHandle;      /* True if this cursor is an incr. io handle */
  Pgno *aOverflow;          /* Cache of overflow page locations */
#endif

  i16 iPage;                            /* Index of current page in apPage */
  MemPage *apPage[BTCURSOR_MAX_DEPTH];  /* Pages from root to current page */
  u16 aiIdx[BTCURSOR_MAX_DEPTH];        /* Current index in apPage[i] */
};

/*
** Potential values for BtCursor.eState.
**
** CURSOR_VALID:
**   Cursor points to a valid entry. getPayload() etc. may be called.
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
#define get4byte sqlite3Get4byte
#define put4byte sqlite3Put4byte

/*
** Internal routines that should be accessed by the btree layer only.
*/
int sqlite3BtreeGetPage(BtShared*, Pgno, MemPage**, int);
int sqlite3BtreeInitPage(MemPage *pPage, MemPage *pParent);
void sqlite3BtreeParseCellPtr(MemPage*, u8*, CellInfo*);
void sqlite3BtreeParseCell(MemPage*, int, CellInfo*);
int sqlite3BtreeRestoreCursorPosition(BtCursor *pCur);
void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur);
void sqlite3BtreeReleaseTempCursor(BtCursor *pCur);
int sqlite3BtreeIsRootPage(MemPage *pPage);
void sqlite3BtreeMoveToParent(BtCursor *pCur);







|





<

624
625
626
627
628
629
630
631
632
633
634
635
636

637
#define get4byte sqlite3Get4byte
#define put4byte sqlite3Put4byte

/*
** Internal routines that should be accessed by the btree layer only.
*/
int sqlite3BtreeGetPage(BtShared*, Pgno, MemPage**, int);
int sqlite3BtreeInitPage(MemPage *pPage);
void sqlite3BtreeParseCellPtr(MemPage*, u8*, CellInfo*);
void sqlite3BtreeParseCell(MemPage*, int, CellInfo*);
int sqlite3BtreeRestoreCursorPosition(BtCursor *pCur);
void sqlite3BtreeGetTempCursor(BtCursor *pCur, BtCursor *pTempCur);
void sqlite3BtreeReleaseTempCursor(BtCursor *pCur);

void sqlite3BtreeMoveToParent(BtCursor *pCur);
Changes to src/pager.c.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.495 2008/09/26 21:08:08 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/







|







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.496 2008/09/29 11:49:48 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"

/*
** Macros for troubleshooting.  Normally turned off
*/
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
** It is never written to disk.  This can be used to implement an
** in-memory database.
*/
int sqlite3PagerOpen(
  sqlite3_vfs *pVfs,       /* The virtual file system to use */
  Pager **ppPager,         /* Return the Pager structure here */
  const char *zFilename,   /* Name of the database file to open */
  void (*xDesc)(DbPage*),  /* Page destructor function */
  int nExtra,              /* Extra bytes append to each in-memory page */
  int flags,               /* flags controlling this file */
  int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
){
  u8 *pPtr;
  Pager *pPager = 0;
  int rc = SQLITE_OK;







<







1724
1725
1726
1727
1728
1729
1730

1731
1732
1733
1734
1735
1736
1737
** It is never written to disk.  This can be used to implement an
** in-memory database.
*/
int sqlite3PagerOpen(
  sqlite3_vfs *pVfs,       /* The virtual file system to use */
  Pager **ppPager,         /* Return the Pager structure here */
  const char *zFilename,   /* Name of the database file to open */

  int nExtra,              /* Extra bytes append to each in-memory page */
  int flags,               /* flags controlling this file */
  int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
){
  u8 *pPtr;
  Pager *pPager = 0;
  int rc = SQLITE_OK;
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
  */
  if( !pPager || !pPager->pTmpSpace ){
    sqlite3OsClose(pPager->fd);
    sqlite3_free(pPager);
    return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
  }
  nExtra = FORCE_ALIGNMENT(nExtra);
  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, xDesc, 
                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);

  PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))

  /* Fill in Pager.zDirectory[] */
  memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);







|







1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
  */
  if( !pPager || !pPager->pTmpSpace ){
    sqlite3OsClose(pPager->fd);
    sqlite3_free(pPager);
    return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
  }
  nExtra = FORCE_ALIGNMENT(nExtra);
  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);

  PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))

  /* Fill in Pager.zDirectory[] */
  memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
3870
3871
3872
3873
3874
3875
3876







3877
3878
3879
3880
3881
3882
3883

/*
** Return the number of references to the pager.
*/
int sqlite3PagerRefcount(Pager *pPager){
  return sqlite3PcacheRefCount(pPager->pPCache);
}








#ifdef SQLITE_TEST
/*
** This routine is used for testing and analysis only.
*/
int *sqlite3PagerStats(Pager *pPager){
  static int a[11];







>
>
>
>
>
>
>







3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889

/*
** Return the number of references to the pager.
*/
int sqlite3PagerRefcount(Pager *pPager){
  return sqlite3PcacheRefCount(pPager->pPCache);
}

/*
** Return the number of references to the specified page.
*/
int sqlite3PagerPageRefcount(DbPage *pPage){
  return sqlite3PcachePageRefcount(pPage);
}

#ifdef SQLITE_TEST
/*
** This routine is used for testing and analysis only.
*/
int *sqlite3PagerStats(Pager *pPager){
  static int a[11];
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
}
#endif

/*
** Return a pointer to the data for the specified page.
*/
void *sqlite3PagerGetData(DbPage *pPg){
  assert( pPg->nRef>0 );
  return pPg->pData;
}

/*
** Return a pointer to the Pager.nExtra bytes of "extra" space 
** allocated along with the specified page.
*/







|







4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
}
#endif

/*
** Return a pointer to the data for the specified page.
*/
void *sqlite3PagerGetData(DbPage *pPg){
  assert( pPg->nRef>0 || pPg->pPager->memDb );
  return pPg->pData;
}

/*
** Return a pointer to the Pager.nExtra bytes of "extra" space 
** allocated along with the specified page.
*/
Changes to src/pager.h.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.84 2008/09/26 21:08:08 drh Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem.  The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.85 2008/09/29 11:49:48 danielk1977 Exp $
*/

#ifndef _PAGER_H_
#define _PAGER_H_

/*
** If defined as non-zero, auto-vacuum is enabled by default. Otherwise
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

85
86
87
88
89
90
91
#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3PagerOpen(sqlite3_vfs *, Pager **ppPager, const char*, void(*)(DbPage*), int,int,int);
void sqlite3PagerSetBusyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3PagerSetReiniter(Pager*, void(*)(DbPage*));
int sqlite3PagerSetPagesize(Pager*, u16*);
int sqlite3PagerMaxPageCount(Pager*, int);
int sqlite3PagerReadFileheader(Pager*, int, unsigned char*);
void sqlite3PagerSetCachesize(Pager*, int);
int sqlite3PagerClose(Pager *pPager);
int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag);
#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0)
DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno);

int sqlite3PagerRef(DbPage*);
int sqlite3PagerUnref(DbPage*);
int sqlite3PagerWrite(DbPage*);
int sqlite3PagerPagecount(Pager*, int*);
int sqlite3PagerTruncate(Pager*,Pgno);
int sqlite3PagerBegin(DbPage*, int exFlag);
int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, Pgno, int);







|










>







67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */

/*
** See source code comments for a detailed description of the following
** routines:
*/
int sqlite3PagerOpen(sqlite3_vfs *, Pager **ppPager, const char*, int,int,int);
void sqlite3PagerSetBusyhandler(Pager*, BusyHandler *pBusyHandler);
void sqlite3PagerSetReiniter(Pager*, void(*)(DbPage*));
int sqlite3PagerSetPagesize(Pager*, u16*);
int sqlite3PagerMaxPageCount(Pager*, int);
int sqlite3PagerReadFileheader(Pager*, int, unsigned char*);
void sqlite3PagerSetCachesize(Pager*, int);
int sqlite3PagerClose(Pager *pPager);
int sqlite3PagerAcquire(Pager *pPager, Pgno pgno, DbPage **ppPage, int clrFlag);
#define sqlite3PagerGet(A,B,C) sqlite3PagerAcquire(A,B,C,0)
DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno);
int sqlite3PagerPageRefcount(DbPage*);
int sqlite3PagerRef(DbPage*);
int sqlite3PagerUnref(DbPage*);
int sqlite3PagerWrite(DbPage*);
int sqlite3PagerPagecount(Pager*, int*);
int sqlite3PagerTruncate(Pager*,Pgno);
int sqlite3PagerBegin(DbPage*, int exFlag);
int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, Pgno, int);
Changes to src/pcache.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2008 August 05
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements that page cache.
**
** @(#) $Id: pcache.c,v 1.32 2008/09/24 09:12:47 danielk1977 Exp $
*/
#include "sqliteInt.h"

/*
** A complete page cache is an instance of this structure.
**
** A cache may only be deleted by its owner and while holding the













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
** 2008 August 05
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements that page cache.
**
** @(#) $Id: pcache.c,v 1.33 2008/09/29 11:49:48 danielk1977 Exp $
*/
#include "sqliteInt.h"

/*
** A complete page cache is an instance of this structure.
**
** A cache may only be deleted by its owner and while holding the
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
  ** the cache owner or by any thread holding the the mutex.  Non-owner
  ** threads must hold the mutex when reading these elements to prevent
  ** the entire PCache object from being deleted during the read.
  */
  int szPage;                         /* Size of every page in this cache */
  int szExtra;                        /* Size of extra space for each page */
  int bPurgeable;                     /* True if pages are on backing store */
  void (*xDestroy)(PgHdr*);           /* Called when refcnt goes 1->0 */
  int (*xStress)(void*,PgHdr*);       /* Call to try make a page clean */
  void *pStress;                      /* Argument to xStress */
  /**********************************************************************
  ** The final group of elements can only be accessed while holding the
  ** mutex.  Both the cache owner and any other thread must hold the mutex
  ** to read or write any of these elements.
  */







<







39
40
41
42
43
44
45

46
47
48
49
50
51
52
  ** the cache owner or by any thread holding the the mutex.  Non-owner
  ** threads must hold the mutex when reading these elements to prevent
  ** the entire PCache object from being deleted during the read.
  */
  int szPage;                         /* Size of every page in this cache */
  int szExtra;                        /* Size of extra space for each page */
  int bPurgeable;                     /* True if pages are on backing store */

  int (*xStress)(void*,PgHdr*);       /* Call to try make a page clean */
  void *pStress;                      /* Argument to xStress */
  /**********************************************************************
  ** The final group of elements can only be accessed while holding the
  ** mutex.  Both the cache owner and any other thread must hold the mutex
  ** to read or write any of these elements.
  */
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
** Create a new PCache object.  Storage space to hold the object
** has already been allocated and is passed in as the p pointer.
*/
void sqlite3PcacheOpen(
  int szPage,                  /* Size of every page */
  int szExtra,                 /* Extra space associated with each page */
  int bPurgeable,              /* True if pages are on backing store */
  void (*xDestroy)(PgHdr*),    /* Called to destroy a page */
  int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
  void *pStress,               /* Argument to xStress */
  PCache *p                    /* Preallocated space for the PCache */
){
  assert( pcache_g.isInit );
  memset(p, 0, sizeof(PCache));
  p->szPage = szPage;
  p->szExtra = szExtra;
  p->bPurgeable = bPurgeable;
  p->xDestroy = xDestroy;
  p->xStress = xStress;
  p->pStress = pStress;
  p->nMax = 100;
  p->nMin = 10;

  pcacheEnterMutex();
  if( bPurgeable ){







<









<







638
639
640
641
642
643
644

645
646
647
648
649
650
651
652
653

654
655
656
657
658
659
660
** Create a new PCache object.  Storage space to hold the object
** has already been allocated and is passed in as the p pointer.
*/
void sqlite3PcacheOpen(
  int szPage,                  /* Size of every page */
  int szExtra,                 /* Extra space associated with each page */
  int bPurgeable,              /* True if pages are on backing store */

  int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
  void *pStress,               /* Argument to xStress */
  PCache *p                    /* Preallocated space for the PCache */
){
  assert( pcache_g.isInit );
  memset(p, 0, sizeof(PCache));
  p->szPage = szPage;
  p->szExtra = szExtra;
  p->bPurgeable = bPurgeable;

  p->xStress = xStress;
  p->pStress = pStress;
  p->nMax = 100;
  p->nMin = 10;

  pcacheEnterMutex();
  if( bPurgeable ){
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
** move the page to the LRU list if it is clean.
*/
void sqlite3PcacheRelease(PgHdr *p){
  assert( p->nRef>0 );
  p->nRef--;
  if( p->nRef==0 ){
    PCache *pCache = p->pCache;
    if( p->pCache->xDestroy ){
      p->pCache->xDestroy(p);
    }
    pCache->nRef--;
    if( (p->flags&PGHDR_DIRTY)==0 ){
      pCache->nPinned--;
      pcacheEnterMutex();
      if( pcache_g.nCurrentPage>pcache_g.nMaxPage ){
        pcacheRemoveFromList(&pCache->pClean, p);
        pcacheRemoveFromHash(p);







<
<
<







745
746
747
748
749
750
751



752
753
754
755
756
757
758
** move the page to the LRU list if it is clean.
*/
void sqlite3PcacheRelease(PgHdr *p){
  assert( p->nRef>0 );
  p->nRef--;
  if( p->nRef==0 ){
    PCache *pCache = p->pCache;



    pCache->nRef--;
    if( (p->flags&PGHDR_DIRTY)==0 ){
      pCache->nPinned--;
      pcacheEnterMutex();
      if( pcache_g.nCurrentPage>pcache_g.nMaxPage ){
        pcacheRemoveFromList(&pCache->pClean, p);
        pcacheRemoveFromHash(p);
1158
1159
1160
1161
1162
1163
1164




1165
1166
1167
1168
1169
1170
1171

/* 
** Return the total number of outstanding page references.
*/
int sqlite3PcacheRefCount(PCache *pCache){
  return pCache->nRef;
}





/* 
** Return the total number of pages in the cache.
*/
int sqlite3PcachePagecount(PCache *pCache){
  assert( pCache->nPage>=0 );
  return pCache->nPage;







>
>
>
>







1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169

/* 
** Return the total number of outstanding page references.
*/
int sqlite3PcacheRefCount(PCache *pCache){
  return pCache->nRef;
}

int sqlite3PcachePageRefcount(PgHdr *p){
  return p->nRef;
}

/* 
** Return the total number of pages in the cache.
*/
int sqlite3PcachePagecount(PCache *pCache){
  assert( pCache->nPage>=0 );
  return pCache->nPage;
Changes to src/pcache.h.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem. 
**
** @(#) $Id: pcache.h,v 1.11 2008/09/18 17:34:44 danielk1977 Exp $
*/

#ifndef _PCACHE_H_

typedef struct PgHdr PgHdr;
typedef struct PCache PCache;








|







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This header file defines the interface that the sqlite page cache
** subsystem. 
**
** @(#) $Id: pcache.h,v 1.12 2008/09/29 11:49:48 danielk1977 Exp $
*/

#ifndef _PCACHE_H_

typedef struct PgHdr PgHdr;
typedef struct PCache PCache;

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
** Under memory stress, invoke xStress to try to make pages clean.
** Only clean and unpinned pages can be reclaimed.
*/
void sqlite3PcacheOpen(
  int szPage,                    /* Size of every page */
  int szExtra,                   /* Extra space associated with each page */
  int bPurgeable,                /* True if pages are on backing store */
  void (*xDestroy)(PgHdr *),     /* Called to destroy a page */
  int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */
  void *pStress,                 /* Argument to xStress */
  PCache *pToInit                /* Preallocated space for the PCache */
);

/* Modify the page-size after the cache has been created. */
void sqlite3PcacheSetPageSize(PCache *, int);







<







75
76
77
78
79
80
81

82
83
84
85
86
87
88
** Under memory stress, invoke xStress to try to make pages clean.
** Only clean and unpinned pages can be reclaimed.
*/
void sqlite3PcacheOpen(
  int szPage,                    /* Size of every page */
  int szExtra,                   /* Extra space associated with each page */
  int bPurgeable,                /* True if pages are on backing store */

  int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */
  void *pStress,                 /* Argument to xStress */
  PCache *pToInit                /* Preallocated space for the PCache */
);

/* Modify the page-size after the cache has been created. */
void sqlite3PcacheSetPageSize(PCache *, int);
138
139
140
141
142
143
144


145
146
147
148
149
150
151
int sqlite3PcacheClear(PCache*);

/* Return the total number of outstanding page references */
int sqlite3PcacheRefCount(PCache*);

/* Increment the reference count of an existing page */
void sqlite3PcacheRef(PgHdr*);



/* Return the total number of pages stored in the cache */
int sqlite3PcachePagecount(PCache*);

/* Iterate through all pages currently stored in the cache. This interface
** is only available if SQLITE_CHECK_PAGES is defined when the library is 
** built.







>
>







137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
int sqlite3PcacheClear(PCache*);

/* Return the total number of outstanding page references */
int sqlite3PcacheRefCount(PCache*);

/* Increment the reference count of an existing page */
void sqlite3PcacheRef(PgHdr*);

int sqlite3PcachePageRefcount(PgHdr*);

/* Return the total number of pages stored in the cache */
int sqlite3PcachePagecount(PCache*);

/* Iterate through all pages currently stored in the cache. This interface
** is only available if SQLITE_CHECK_PAGES is defined when the library is 
** built.
Changes to src/test2.c.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.61 2008/08/26 18:05:48 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>








|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the pager.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test2.c,v 1.62 2008/09/29 11:49:48 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  char zBuf[100];
  if( argc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " FILENAME N-PAGE\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[2], &nPage) ) return TCL_ERROR;
  rc = sqlite3PagerOpen(sqlite3_vfs_find(0), &pPager, argv[1], 0, 0, 0,
      SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_MAIN_DB);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  sqlite3PagerSetCachesize(pPager, nPage);
  pageSize = test_pagesize;







|







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
  char zBuf[100];
  if( argc!=3 ){
    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
       " FILENAME N-PAGE\"", 0);
    return TCL_ERROR;
  }
  if( Tcl_GetInt(interp, argv[2], &nPage) ) return TCL_ERROR;
  rc = sqlite3PagerOpen(sqlite3_vfs_find(0), &pPager, argv[1], 0, 0,
      SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_MAIN_DB);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, errorName(rc), 0);
    return TCL_ERROR;
  }
  sqlite3PagerSetCachesize(pPager, nPage);
  pageSize = test_pagesize;
Changes to src/test_btree.c.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test_btree.c,v 1.7 2008/09/02 00:52:52 drh Exp $
*/
#include "btreeInt.h"
#include <tcl.h>

/*
** Usage: sqlite3_shared_cache_report
**







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the btree.c module in SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test_btree.c,v 1.8 2008/09/29 11:49:48 danielk1977 Exp $
*/
#include "btreeInt.h"
#include <tcl.h>

/*
** Usage: sqlite3_shared_cache_report
**
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
** Print debugging information about all cursors to standard output.
*/
void sqlite3BtreeCursorList(Btree *p){
#ifdef SQLITE_DEBUG
  BtCursor *pCur;
  BtShared *pBt = p->pBt;
  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
    MemPage *pPage = pCur->pPage;
    char *zMode = pCur->wrFlag ? "rw" : "ro";
    sqlite3DebugPrintf("CURSOR %p rooted at %4d(%s) currently at %d.%d%s\n",
       pCur, pCur->pgnoRoot, zMode,
       pPage ? pPage->pgno : 0, pCur->idx,
       (pCur->eState==CURSOR_VALID) ? "" : " eof"
    );
  }
#endif
}









|



|







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
** Print debugging information about all cursors to standard output.
*/
void sqlite3BtreeCursorList(Btree *p){
#ifdef SQLITE_DEBUG
  BtCursor *pCur;
  BtShared *pBt = p->pBt;
  for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
    MemPage *pPage = pCur->apPage[pCur->iPage];
    char *zMode = pCur->wrFlag ? "rw" : "ro";
    sqlite3DebugPrintf("CURSOR %p rooted at %4d(%s) currently at %d.%d%s\n",
       pCur, pCur->pgnoRoot, zMode,
       pPage ? pPage->pgno : 0, pCur->aiIdx[pCur->iPage],
       (pCur->eState==CURSOR_VALID) ? "" : " eof"
    );
  }
#endif
}


79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
**   aResult[8] =  Local payload size
**   aResult[9] =  Parent page number
**   aResult[10]=  Page number of the first overflow page
**
** This routine is used for testing and debugging only.
*/
int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){

  int cnt, idx;
  MemPage *pPage = pCur->pPage;
  BtCursor tmpCur;
  int rc;

  if( pCur->eState==CURSOR_REQUIRESEEK ){
    rc = sqlite3BtreeRestoreCursorPosition(pCur);
    if( rc!=SQLITE_OK ){
      return rc;







>

|







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
**   aResult[8] =  Local payload size
**   aResult[9] =  Parent page number
**   aResult[10]=  Page number of the first overflow page
**
** This routine is used for testing and debugging only.
*/
int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){
#if 0
  int cnt, idx;
  MemPage *pPage = pCur->apPage[pCur->iPage];
  BtCursor tmpCur;
  int rc;

  if( pCur->eState==CURSOR_REQUIRESEEK ){
    rc = sqlite3BtreeRestoreCursorPosition(pCur);
    if( rc!=SQLITE_OK ){
      return rc;
132
133
134
135
136
137
138

139
140
  }
  if( tmpCur.info.iOverflow ){
    aResult[10] = get4byte(&tmpCur.info.pCell[tmpCur.info.iOverflow]);
  }else{
    aResult[10] = 0;
  }
  sqlite3BtreeReleaseTempCursor(&tmpCur);

  return SQLITE_OK;
}







>


133
134
135
136
137
138
139
140
141
142
  }
  if( tmpCur.info.iOverflow ){
    aResult[10] = get4byte(&tmpCur.info.pCell[tmpCur.info.iOverflow]);
  }else{
    aResult[10] = 0;
  }
  sqlite3BtreeReleaseTempCursor(&tmpCur);
#endif
  return SQLITE_OK;
}
Changes to test/corrupt2.test.
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#
# This file implements tests to make sure SQLite does not crash or
# segfault if it sees a corrupt database file.
#
# $Id: corrupt2.test,v 1.17 2008/09/10 11:28:38 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# The following tests - corrupt2-1.* - create some databases corrupted in
# specific ways and ensure that SQLite detects them as corrupt.
#







|







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#
# This file implements tests to make sure SQLite does not crash or
# segfault if it sees a corrupt database file.
#
# $Id: corrupt2.test,v 1.18 2008/09/29 11:49:48 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# The following tests - corrupt2-1.* - create some databases corrupted in
# specific ways and ensure that SQLite detects them as corrupt.
#
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
  sqlite3 db2 corrupt.db 
  db2 eval {SELECT rowid FROM t1} {
    set result [db2 eval {pragma integrity_check}]
    break
  }
  set result
} {{*** in database main ***
Page 10: sqlite3BtreeInitPage() returns error code 11
On tree page 3 cell 1: Child page depth differs
On tree page 2 cell 0: 2nd reference to page 10
On tree page 2 cell 1: Child page depth differs
Page 4 is never used}}

db2 close

proc corruption_test {args} {







<
<







221
222
223
224
225
226
227


228
229
230
231
232
233
234
  sqlite3 db2 corrupt.db 
  db2 eval {SELECT rowid FROM t1} {
    set result [db2 eval {pragma integrity_check}]
    break
  }
  set result
} {{*** in database main ***


On tree page 2 cell 0: 2nd reference to page 10
On tree page 2 cell 1: Child page depth differs
Page 4 is never used}}

db2 close

proc corruption_test {args} {