SQLite4
Check-in [18ae7f9855]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance tweaks for seek operations.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 18ae7f98554bc11041ded8879d62fc7dba4cdab8
User & Date: dan 2014-02-21 17:36:30
Original Comment: :)
Context
2014-02-22
19:54
Add other bt optimizations. Fix a problem in mutex_noop.c. check-in: 1ecbf355e3 user: dan tags: trunk
2014-02-21
17:36
Performance tweaks for seek operations. check-in: 18ae7f9855 user: dan tags: trunk
2014-02-19
11:28
Fix a couple of problems related to log recovery and checkpointing. check-in: e64f3ba5f0 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to lsm-test/sqltest.c.

   242    242   
   243    243     return iRet;
   244    244   }
   245    245   /*
   246    246   ** End of integer query implementations.
   247    247   *************************************************************************/
   248    248   
          249  +
          250  +static int bt_open(sqlite4_env *pEnv, const char *zFile, sqlite4 **pDb){
          251  +  char *zUri = sqlite3_mprintf("file:%s?kv=bt", zFile);
          252  +  int rc = sqlite4_open(pEnv, zUri, pDb);
          253  +  sqlite3_free(zUri);
          254  +  return rc;
          255  +}
          256  +
   249    257   static int do_insert1_test4(
   250    258     const char *zFile,
   251    259     int nRow,                       /* Number of rows to insert in total */
   252    260     int nRowPerTrans,               /* Number of rows per transaction */
   253    261     int nIdx,                       /* Number of aux indexes (aside from PK) */
   254    262     int iSync                       /* PRAGMA synchronous value (0, 1 or 2) */
   255    263   ){
................................................................................
   260    268     int i;                          /* Counter to count nRow rows */
   261    269     int nMs;                        /* Test time in ms */
   262    270   
   263    271     lsm_db *pLsm;
   264    272   
   265    273     if( zFile==0 ) zFile = SQLITE4_DB_FILE;
   266    274     unlink_db(zFile);
   267         -  EXPLODE(  sqlite4_open(0, zFile, &db)  );
          275  +  EXPLODE(  bt_open(0, zFile, &db)  );
   268    276     sqlite4_kvstore_control(db, "main", SQLITE4_KVCTRL_LSM_HANDLE, &pLsm);
   269    277     i = iSync;
   270    278     lsm_config(pLsm, LSM_CONFIG_SAFETY, &i);
   271    279     assert( i==iSync );
   272    280   
   273    281     install_rblob_function4(db);
   274    282   
................................................................................
   440    448     sqlite4_stmt *pSelect = 0;
   441    449     char *zSelect;
   442    450     sqlite4 *db;
   443    451     int i;
   444    452     int nTblRow;
   445    453   
   446    454     if( zFile==0 ) zFile = SQLITE4_DB_FILE;
   447         -  EXPLODE( sqlite4_open(0, zFile, &db) );
          455  +  EXPLODE( bt_open(0, zFile, &db) );
   448    456     install_rblob_function4(db);
   449    457   
   450    458     nTblRow = integer_query4(db, "SELECT count(*) FROM t1");
   451    459   
   452    460     /* Create the db schema and prepare the INSERT statement */
   453    461     zSelect = create_select_sql(iIdx);
   454    462     EXPLODE(  sqlite4_prepare(db, zSelect, -1, &pSelect, 0)  );
................................................................................
   642    650         }
   643    651       }
   644    652       *pp = (SqlDatabase *)p;
   645    653     }else{
   646    654       SqlDatabase4 *p = sqlite4_malloc(0, sizeof(SqlDatabase4));
   647    655       memset(p, 0, sizeof(SqlDatabase4));
   648    656       p->x.iDb = 4;
   649         -    rc = sqlite4_open(0, zFile, &p->db);
          657  +    rc = bt_open(0, zFile, &p->db);
   650    658       if( rc!=SQLITE4_OK ){
   651    659         sqlite4_free(0, p);
   652    660         p = 0;
   653    661       }else{
   654    662         install_rint_function4(p->db);
   655    663         if( zConfig ) {
   656    664           rc = sqlite4_exec(p->db, zConfig, 0, 0);

Changes to src/btInt.h.

    46     46   #endif
    47     47   
    48     48   /* By default pages are 1024 bytes in size. */
    49     49   #define BT_DEFAULT_PGSZ 1024
    50     50   
    51     51   /* By default blocks are 512K bytes in size. */
    52     52   #define BT_DEFAULT_BLKSZ (512*1024)
           53  +
           54  +/* Default cache size in pages */
           55  +#define BT_DEFAULT_CACHESZ 1000
    53     56   
    54     57   /*
    55     58   ** This structure is the in-memory representation of all data stored in
    56     59   ** the database header at the start of the db file.
    57     60   **
    58     61   ** pgsz, blksz:
    59     62   **   Byte offset 0 of the database file is the first byte of both page 1

Changes to src/bt_log.c.

  1208   1208   
  1209   1209     u32 *aLog = pLog->snapshot.aLog;
  1210   1210     int iSafeIdx = sqlite4BtLogFrameToIdx(aLog, iSafe);
  1211   1211   
  1212   1212     /* Loop through regions (c), (b) and (a) of the log file. In that order. */
  1213   1213     for(i=2; i>=0 && rc==SQLITE4_NOTFOUND; i--){
  1214   1214       u32 iLo = pLog->snapshot.aLog[i*2+0];
  1215         -    u32 iHi = pLog->snapshot.aLog[i*2+1];
  1216         -    int iSide;
  1217         -    int iHash;
  1218         -    int iHashLast;
         1215  +    if( iLo ){
         1216  +      u32 iHi = pLog->snapshot.aLog[i*2+1];
         1217  +      int iSide;
         1218  +      int iHash;
         1219  +      int iHashLast;
  1219   1220   
  1220         -    iHash = btLogFrameHash(pLog, iHi);
  1221         -    iHashLast = btLogFrameHash(pLog, iLo);
  1222         -    iSide = (pLog->snapshot.iHashSide + (i==0)) % 2;
         1221  +      iHash = btLogFrameHash(pLog, iHi);
         1222  +      iHashLast = btLogFrameHash(pLog, iLo);
         1223  +      iSide = (pLog->snapshot.iHashSide + (i==0)) % 2;
  1223   1224   
  1224         -    for( ; rc==SQLITE4_NOTFOUND && iHash>=iHashLast; iHash--){
  1225         -      rc = btLogHashSearch(pLog, iSide, iHash, iHi, pgno, &iFrame);
  1226         -      if( rc==SQLITE4_OK ){
  1227         -        if( iFrame<iLo || iFrame>iHi ){
  1228         -          rc = SQLITE4_NOTFOUND;
  1229         -        }else{
  1230         -          assert( sqlite4BtLogFrameToIdx(aLog, iFrame)>=0 );
  1231         -          if( iSafeIdx>=0 && sqlite4BtLogFrameToIdx(aLog, iFrame)>iSafeIdx ){
  1232         -            return SQLITE4_NOTFOUND;
         1225  +      for( ; rc==SQLITE4_NOTFOUND && iHash>=iHashLast; iHash--){
         1226  +        rc = btLogHashSearch(pLog, iSide, iHash, iHi, pgno, &iFrame);
         1227  +        if( rc==SQLITE4_OK ){
         1228  +          if( iFrame<iLo || iFrame>iHi ){
         1229  +            rc = SQLITE4_NOTFOUND;
         1230  +          }else{
         1231  +            assert( sqlite4BtLogFrameToIdx(aLog, iFrame)>=0 );
         1232  +            if( iSafeIdx>=0 && sqlite4BtLogFrameToIdx(aLog, iFrame)>iSafeIdx ){
         1233  +              return SQLITE4_NOTFOUND;
         1234  +            }
  1233   1235             }
  1234   1236           }
  1235   1237         }
  1236   1238       }
  1237   1239     }
  1238   1240   
  1239   1241     btDebugLogSearch(pLog->pLock, pgno, iSafe, (rc==SQLITE4_OK ? iFrame : 0));
................................................................................
  1262   1264   ** If the log does not contain any version of page pgno, SQLITE4_NOTFOUND
  1263   1265   ** is returned and the contents of buffer aData[] are not modified.
  1264   1266   **
  1265   1267   ** If any other error occurs, an SQLite4 error code is returned. The final
  1266   1268   ** state of buffer aData[] is undefined in this case.
  1267   1269   */
  1268   1270   int sqlite4BtLogRead(BtLog *pLog, u32 pgno, u8 *aData){
         1271  +  if( pLog->snapshot.aLog[4]==0 ){
         1272  +    assert( pLog->snapshot.aLog[0]==0 && pLog->snapshot.aLog[2]==0 );
         1273  +    return SQLITE4_NOTFOUND;
         1274  +  }
  1269   1275     return btLogRead(pLog, pgno, aData, 0);
  1270   1276   }
  1271   1277   
  1272   1278   static int btLogZeroHash(BtLog *pLog, int iHash){
  1273   1279     int iSide = pLog->snapshot.iHashSide;
  1274   1280     ht_slot *aHash;
  1275   1281     u32 *aPgno;

Changes to src/bt_main.c.

    11     11   *************************************************************************
    12     12   **
    13     13   */
    14     14   
    15     15   #include "btInt.h"
    16     16   #include <string.h>
    17     17   #include <assert.h>
           18  +#include <stddef.h>
    18     19   
    19     20   #define BT_MAX_DEPTH 32           /* Maximum possible depth of tree */
    20     21   #define BT_MAX_DIRECT_OVERFLOW 8  /* Maximum direct overflow pages per cell */
    21     22   
           23  +/* Maximum size of a key-prefix stored on an internal node. Parts of the
           24  +** code in this file assume that this value can be encoded as a single
           25  +** byte SQLite4 varint.  */
           26  +#define BT_MAX_INTERNAL_KEY 200   /* Maximum bytes of key on internal node */
           27  +
    22     28   /*
    23     29   ** Values that make up the single byte flags field at the start of
    24     30   ** b-tree pages. 
    25     31   */
    26         -#define BT_PGFLAGS_INTERNAL 0x01  /* True for non-leaf nodes */
    27         -#define BT_PGFLAGS_METATREE 0x02  /* True for a meta-tree page */
    28         -#define BT_PGFLAGS_SCHEDULE 0x04  /* True for a schedule-tree page */
           32  +#define BT_PGFLAGS_INTERNAL  0x01  /* True for non-leaf nodes */
           33  +#define BT_PGFLAGS_METATREE  0x02  /* True for a meta-tree page */
           34  +#define BT_PGFLAGS_SCHEDULE  0x04  /* True for a schedule-tree page */
           35  +#define BT_PGFLAGS_LARGEKEYS 0x08  /* True if keys larger than 200 bytes */
    29     36   
    30     37   /*
    31     38   ** Maximum depth of fast-insert sub-trees.
    32     39   */
    33     40   #define MAX_SUBTREE_DEPTH 8
    34     41   
    35     42   /* #define BT_STDERR_DEBUG 1 */
................................................................................
    36     43   
    37     44   typedef struct BtCursor BtCursor;
    38     45   typedef struct FiCursor FiCursor;
    39     46   typedef struct FiSubCursor FiSubCursor;
    40     47   
    41     48   struct bt_db {
    42     49     sqlite4_env *pEnv;              /* SQLite environment */
           50  +  sqlite4_mm *pMM;                /* Memory allocator for pEnv */
    43     51     BtPager *pPager;                /* Underlying page-based database */
    44     52     bt_cursor *pAllCsr;             /* List of all open cursors */
    45     53     int nMinMerge;
    46     54     int nScheduleAlloc;
    47     55     int bFastInsertOp;              /* Set by CONTROL_FAST_INSERT_OP */
           56  +
           57  +  BtCursor *pFreeCsr;
    48     58   };
    49     59   
    50     60   /*
    51     61   ** Overflow buffer is valid if nKey!=0.
    52     62   */
    53     63   typedef struct BtOvfl BtOvfl;
    54     64   struct BtOvfl {
................................................................................
    91    101   ** Database b-tree cursor handle.
    92    102   */
    93    103   struct BtCursor {
    94    104     bt_cursor base;                 /* Base cursor class */
    95    105   
    96    106     u32 iRoot;                      /* Root page of b-tree this cursor queries */
    97    107     int nPg;                        /* Number of valid entries in apPage[] */
    98         -  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
    99         -  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
   100    108     BtOvfl ovfl;                    /* Overflow cache (see above) */
   101    109   
   102    110     int bRequireReseek;             /* True if a btCsrReseek() is required */
   103    111     int bSkipNext;                  /* True if next CsrNext() is a no-op */
   104    112     int bSkipPrev;                  /* True if next CsrPrev() is a no-op */
          113  +
          114  +  BtCursor *pNextFree;            /* Next in list of free BtCursor structures */
          115  +
          116  +  int aiCell[BT_MAX_DEPTH];       /* Current cell of each apPage[] entry */
          117  +  BtPage *apPage[BT_MAX_DEPTH];   /* All pages from root to current leaf */
   105    118   };
   106    119   
   107    120   /*
   108    121   ** Database f-tree cursor handle.
   109    122   */
   110    123   struct FiSubCursor {
   111    124     u8 aPrefix[8];                  /* Meta-tree key prefix for this age/level */
................................................................................
   205    218     a[0] = (u8)((i>>24) & 0xFF);
   206    219     a[1] = (u8)((i>>16) & 0xFF);
   207    220     a[2] = (u8)((i>>8) & 0xFF);
   208    221     a[3] = (u8)((i>>0) & 0xFF);
   209    222   }
   210    223   #define btPutU32(x,y) sqlite4BtPutU32(x,y)
   211    224   
          225  +struct FakePage { u8 *aData; };
          226  +#define btPageData(pPg) (((struct FakePage*)(pPg))->aData)
          227  +
   212    228   /*
   213    229   ** Allocate a new database handle.
   214    230   */
   215    231   int sqlite4BtNew(sqlite4_env *pEnv, int nExtra, bt_db **ppDb){
   216    232     static const int MIN_MERGE = 2;
   217    233     static const int SCHEDULE_ALLOC = 4;
   218    234   
................................................................................
   238    254   
   239    255   /*
   240    256   ** Close an existing database handle. Once this function has been 
   241    257   ** called, the handle may not be used for any purpose.
   242    258   */
   243    259   int sqlite4BtClose(bt_db *db){
   244    260     if( db ){
          261  +    BtCursor *pCsr;
          262  +    BtCursor *pNext;
          263  +    for(pCsr=db->pFreeCsr; pCsr; pCsr=pNext){
          264  +      pNext = pCsr->pNextFree;
          265  +      sqlite4_free(db->pEnv, pCsr);
          266  +    }
   245    267       sqlite4BtPagerClose(db->pPager);
   246    268     }
   247    269     return SQLITE4_OK;
   248    270   }
   249    271   
   250    272   /*
   251    273   ** Return a pointer to the nExtra bytes of space allocated along with 
................................................................................
   253    275   */
   254    276   void *sqlite4BtExtra(bt_db *db){
   255    277     return (void*)&db[1];
   256    278   }
   257    279   
   258    280   int sqlite4BtOpen(bt_db *db, const char *zFilename){
   259    281     int rc;
          282  +  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &db->pMM);
   260    283     rc = sqlite4BtPagerOpen(db->pPager, zFilename);
   261    284     return rc;
   262    285   }
   263    286   
   264    287   int sqlite4BtBegin(bt_db *db, int iLevel){
   265    288     int rc;
   266    289     rc = sqlite4BtPagerBegin(db->pPager, iLevel);
................................................................................
   286    309   }
   287    310   
   288    311   int sqlite4BtTransactionLevel(bt_db *db){
   289    312     return sqlite4BtPagerTransactionLevel(db->pPager);
   290    313   }
   291    314   
   292    315   static void btCsrSetup(bt_db *db, u32 iRoot, BtCursor *pCsr){
   293         -  memset(pCsr, 0, sizeof(BtCursor));
          316  +  memset(pCsr, 0, offsetof(BtCursor, aiCell));
   294    317     pCsr->base.flags = CSR_TYPE_BT;
   295    318     pCsr->base.pExtra = (void*)&pCsr[1];
   296    319     pCsr->base.pDb = db;
   297    320     pCsr->iRoot = iRoot;
   298         -  sqlite4_env_config(db->pEnv, SQLITE4_ENVCONFIG_GETMM, &pCsr->ovfl.buf.pMM);
          321  +  pCsr->ovfl.buf.pMM = db->pMM;
   299    322   }
   300    323   
   301    324   int sqlite4BtCsrOpen(bt_db *db, int nExtra, bt_cursor **ppCsr){
   302    325     int rc = SQLITE4_OK;            /* Return Code */
   303    326     bt_cursor *pRet = 0;
   304    327   
   305    328     assert( sqlite4BtPagerTransactionLevel(db->pPager)>0 );
................................................................................
   317    340         pCsr->base.pExtra = (void*)&pCsr[1];
   318    341         pCsr->base.pDb = db;
   319    342         pRet = (bt_cursor*)pCsr;
   320    343       }
   321    344   
   322    345     }else{
   323    346       BtCursor *pCsr;                /* New cursor object */
   324         -    int nByte = sizeof(BtCursor) + nExtra;
   325         -    pCsr = (BtCursor*)sqlite4_malloc(db->pEnv, nByte);
   326         -    if( pCsr==0 ){
   327         -      rc = btErrorBkpt(SQLITE4_NOMEM);
          347  +    u32 iRoot = sqlite4BtPagerDbhdr(db->pPager)->iRoot;
          348  +    
          349  +    if( db->pFreeCsr ){
          350  +      pCsr = db->pFreeCsr;
          351  +      db->pFreeCsr = pCsr->pNextFree;
   328    352       }else{
   329         -      u32 iRoot = sqlite4BtPagerDbhdr(db->pPager)->iRoot;
   330         -      btCsrSetup(db, iRoot, pCsr);
   331         -      pRet = (bt_cursor*)pCsr;
          353  +      int nByte = sizeof(BtCursor) + nExtra;
          354  +      pCsr = (BtCursor*)sqlite4_malloc(db->pEnv, nByte);
          355  +      if( pCsr==0 ){
          356  +        rc = btErrorBkpt(SQLITE4_NOMEM);
          357  +        goto csr_open_out;
          358  +      }
   332    359       }
          360  +
          361  +    btCsrSetup(db, iRoot, pCsr);
          362  +    pRet = (bt_cursor*)pCsr;
   333    363     }
   334    364   
   335    365     assert( (pRet==0)==(rc!=SQLITE4_OK) );
   336    366     if( rc==SQLITE4_OK ){
   337    367       pRet->pNextCsr = db->pAllCsr;
   338    368       db->pAllCsr = pRet;
   339    369     }
          370  +
          371  + csr_open_out:
   340    372     *ppCsr = pRet;
   341         -
   342    373     btCheckPageRefs(db);
   343    374     db->bFastInsertOp = 0;
   344    375     return rc;
   345    376   }
   346    377   
   347    378   static void btCsrReleaseAll(BtCursor *pCsr){
   348    379     int i;
................................................................................
   388    419       for(pp=&pDb->pAllCsr; *pp!=pCsr; pp=&(*pp)->pNextCsr);
   389    420       *pp = pCsr->pNextCsr;
   390    421   
   391    422       if( IsBtCsr(pCsr) ){
   392    423         /* A regular b-tree cursor */
   393    424         BtCursor *p = (BtCursor*)pCsr;
   394    425         btCsrReset(p, 1);
          426  +      p->pNextFree = pDb->pFreeCsr;
          427  +      pDb->pFreeCsr = p;
   395    428       }else{
   396    429         /* A fast-insert-tree cursor */
   397    430         fiCsrReset((FiCursor*)pCsr);
          431  +      sqlite4_free(pDb->pEnv, pCsr);
   398    432       }
   399         -    sqlite4_free(pDb->pEnv, pCsr);
   400    433       btCheckPageRefs(pDb);
   401    434     }
   402    435     return SQLITE4_OK;
   403    436   }
   404    437   
   405    438   void *sqlite4BtCsrExtra(bt_cursor *pCsr){
   406    439     return pCsr->pExtra;
   407    440   }
   408    441   
   409    442   /*
   410    443   ** Set pCsr->apPage[pCsr->nPg] to a reference to database page pgno.
   411    444   */
   412         -static int btCsrDescend(BtCursor *pCsr, u32 pgno){
          445  +static int btCsrDescend(BtCursor *pCsr, u32 pgno, BtPage **ppPg){
   413    446     int rc;
   414    447     if( pCsr->nPg>=BT_MAX_DEPTH ){
   415    448       rc = btErrorBkpt(SQLITE4_CORRUPT);
   416    449     }else{
   417         -    bt_db *pDb = pCsr->base.pDb;
   418    450       assert( pCsr->nPg>=0 );
   419         -    rc = sqlite4BtPageGet(pDb->pPager, pgno, &pCsr->apPage[pCsr->nPg]);
          451  +    rc = sqlite4BtPageGet(pCsr->base.pDb->pPager, pgno, ppPg);
   420    452       if( rc==SQLITE4_OK ){
   421         -      assert( pCsr->apPage[pCsr->nPg] );
          453  +      assert( *ppPg );
          454  +      pCsr->apPage[pCsr->nPg] = *ppPg;
   422    455         pCsr->nPg++;
   423    456       }
   424    457     }
   425    458     return rc;
   426    459   }
   427    460   
   428    461   /*
................................................................................
   702    735         for(i=0; i<=btCellCount(aData, nData); i++){
   703    736           BtPage *pChild;
   704    737           u8 *aChild;
   705    738           u32 child;
   706    739   
   707    740           child = btChildPgno(aData, nData, i);
   708    741           sqlite4BtPageGet(pPager, child, &pChild);
   709         -        aChild = sqlite4BtPageData(pChild);
          742  +        aChild = btPageData(pChild);
   710    743           btPageToAscii(child, bAscii, pPager, aChild, nData, pBuf);
   711    744           sqlite4BtPageRelease(pChild);
   712    745         }
   713    746       }
   714    747     }
   715    748     sqlite4BtBufAppendf(pBuf, "\n");
   716    749   }
................................................................................
   718    751   static int btFreelistToAscii(bt_db *db, u32 iFirst, sqlite4_buffer *pBuf){
   719    752     int rc = SQLITE4_OK;
   720    753     u32 iTrunk = iFirst;
   721    754     while( iTrunk && rc==SQLITE4_OK ){
   722    755       BtPage *pPg = 0;
   723    756       rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
   724    757       if( rc==SQLITE4_OK ){
   725         -      u8 *aData = sqlite4BtPageData(pPg);
          758  +      u8 *aData = btPageData(pPg);
   726    759         u32 nFree = btGetU32(aData);
   727    760         u32 iNext = btGetU32(&aData[4]);
   728    761         int i;
   729    762   
   730    763         sqlite4BtBufAppendf(pBuf, "iTrunk=%d ", (int)iTrunk);
   731    764         sqlite4BtBufAppendf(pBuf, "nFree=%d iNext=%d (", (int)nFree, (int)iNext);
   732    765         for(i=0; i<(int)nFree; i++){
................................................................................
   758    791   
   759    792   int printPgdataToStderr(u32 pgno, u8 *aData, int nData){
   760    793     printPage(stderr, pgno, aData, nData);
   761    794     return 0;
   762    795   }
   763    796   
   764    797   int printPgToStderr(BtPage *pPg){
   765         -  printPage(stderr, sqlite4BtPagePgno(pPg), sqlite4BtPageData(pPg), 1024);
          798  +  printPage(stderr, sqlite4BtPagePgno(pPg), btPageData(pPg), 1024);
   766    799     return 0;
   767    800   }
   768    801   
   769    802   static void btPrintMetaTree(BtPager *pPager, int bAscii, BtDbHdr *pHdr){
   770    803     u8 *aData;
   771    804     int nData;
   772    805     sqlite4_buffer buf;
   773    806     BtPage *pPg = 0;
   774    807   
   775    808     sqlite4BtPageGet(pPager, pHdr->iMRoot, &pPg);
   776         -  aData = sqlite4BtPageData(pPg);
          809  +  aData = btPageData(pPg);
   777    810     nData = pHdr->pgsz;
   778    811     sqlite4_buffer_init(&buf, 0);
   779    812     btPageToAscii(pHdr->iMRoot, bAscii, pPager, aData, nData, &buf);
   780    813     sqlite4_buffer_append(&buf, "", 1);
   781    814   
   782    815     fprintf(stderr, "%s", (char*)buf.p);
   783    816     sqlite4_buffer_clear(&buf);
................................................................................
   955    988     int nCsrKey;
   956    989     int nCmp;
   957    990     int nAscend = 0;
   958    991     int rc = SQLITE4_OK;
   959    992     int res;
   960    993   
   961    994     if( bLeaf ){
   962         -    rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
          995  +    rc = btCsrKey(pCsr, &pCsrKey, &nCsrKey);
   963    996     }else{
   964    997       const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
   965    998   
   966         -    u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
          999  +    u8 *aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
   967   1000       u8 *pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
   968   1001   
   969   1002       pCsrKey = pCell + sqlite4BtVarintGet32(pCell, &nCsrKey);
   970   1003       if( nCsrKey==0 ){
   971   1004         int iCell = pCsr->aiCell[pCsr->nPg-1]+1;
   972   1005         while( 1 ){
   973         -        u8 *aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1006  +        BtPage *pPg;
         1007  +        u8 *aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
   974   1008           u32 pgno = btChildPgno(aData, pgsz, iCell);
   975   1009           nAscend++;
   976         -        rc = btCsrDescend(pCsr, pgno);
         1010  +        rc = btCsrDescend(pCsr, pgno, &pPg);
   977   1011           if( rc!=SQLITE4_OK ) break;
   978         -        aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1012  +        aData = btPageData(pPg);
   979   1013           pCsr->aiCell[pCsr->nPg-1] = 0;
   980   1014           if( (btFlags(aData) & BT_PGFLAGS_INTERNAL)==0 ) break;
   981   1015           iCell = 0;
   982   1016         }
   983   1017         rc = sqlite4BtCsrKey((bt_cursor*)pCsr, &pCsrKey, &nCsrKey);
   984   1018       }
   985   1019     }
................................................................................
  1005   1039     }
  1006   1040   
  1007   1041    keycompare_done:
  1008   1042     btCsrAscend(pCsr, nAscend);
  1009   1043     *piRes = res;
  1010   1044     return rc;
  1011   1045   }
         1046  +
         1047  +/*
         1048  +** Return an integer representing the result of (K1 - K2).
         1049  +*/
         1050  +static int btKeyCompare(
         1051  +  const void *pKey1, int nKey1, 
         1052  +  const void *pKey2, int nKey2
         1053  +){
         1054  +  int nCmp = MIN(nKey1, nKey2);
         1055  +  int res;
         1056  +
         1057  +  res = memcmp(pKey1, pKey2, nCmp);
         1058  +  if( res==0 ){
         1059  +    res = nKey1 - nKey2;
         1060  +  }
         1061  +  return res;
         1062  +}
         1063  +
  1012   1064   
  1013   1065   #define BT_CSRSEEK_SEEK   0
  1014   1066   #define BT_CSRSEEK_UPDATE 1
  1015   1067   #define BT_CSRSEEK_RESEEK 2
  1016   1068   
  1017   1069   static int btCsrSeek(
  1018   1070     BtCursor *pCsr,                 /* Cursor object to seek */
................................................................................
  1034   1086   
  1035   1087     /* Figure out the root page number */
  1036   1088     assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  1037   1089     pgno = pCsr->iRoot;
  1038   1090   
  1039   1091     while( rc==SQLITE4_OK && pgno ){
  1040   1092       /* Load page number pgno into the b-tree */
  1041         -    rc = btCsrDescend(pCsr, pgno);
         1093  +    BtPage *pPg;
         1094  +    rc = btCsrDescend(pCsr, pgno, &pPg);
  1042   1095       if( rc==SQLITE4_OK ){
  1043   1096         int nCell;                  /* Number of cells on this page */
  1044   1097         int iHi;                    /* pK/nK is <= than cell iHi */
  1045   1098         int iLo;                    /* pK/nK is > than cell (iLo-1) */
  1046   1099         int res;                    /* Result of comparison */
  1047         -      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1100  +
         1101  +      u8 *aData = btPageData(pPg);
         1102  +      u16 *aCellPtr = btCellPtrFind(aData, pgsz, 0);
  1048   1103         int bLeaf = ((btFlags(aData) & BT_PGFLAGS_INTERNAL)==0);
  1049   1104   
  1050   1105         iLo = 0;
  1051   1106         iHi = nCell = btCellCount(aData, pgsz);
  1052   1107   
  1053         -      while( iHi>iLo ){
  1054         -        int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */
  1055         -        pCsr->aiCell[pCsr->nPg-1] = iTst;
  1056         -        rc = btCellKeyCompare(pCsr, bLeaf, aPrefix, pK, nK, &res);
  1057         -        if( rc!=SQLITE4_OK || res==0 ){
  1058         -          /* Cell iTst is EQUAL to pK/nK */
  1059         -          iHi = iLo = iTst;
  1060         -        }else if( res<0 ){
  1061         -          /* Cell iTst is SMALLER than pK/nK */
  1062         -          iLo = iTst+1;
  1063         -        }else{
  1064         -          /* Cell iTst is LARGER than pK/nK */
  1065         -          iHi = iTst;
  1066         -        }
  1067         -      }
  1068         -      if( rc!=SQLITE4_OK ) break;
  1069         -      assert( iHi==iLo );
         1108  +      if( btFlags(aData) & BT_PGFLAGS_LARGEKEYS ){
         1109  +        while( iHi>iLo ){
         1110  +          int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */
         1111  +          u8 *pCell = &aData[btGetU16(aCellPtr - iTst)];
         1112  +          int n = *pCell;
         1113  +
         1114  +          pCsr->aiCell[pCsr->nPg-1] = iTst;
         1115  +          rc = btCellKeyCompare(pCsr, bLeaf, 0, pK, nK, &res);
         1116  +          if( rc!=SQLITE4_OK ) break;
         1117  +
         1118  +          if( res<0 ){
         1119  +            /* Cell iTst is SMALLER than pK/nK */
         1120  +            iLo = iTst+1;
         1121  +          }else{
         1122  +            /* Cell iTst is LARGER than (or equal to) pK/nK */
         1123  +            iHi = iTst;
         1124  +            if( res==0 ) break;
         1125  +          }
         1126  +        }
         1127  +      }else{
         1128  +        while( iHi>iLo ){
         1129  +          int iTst = (iHi+iLo)/2;   /* Cell to compare to pK/nK */
         1130  +          u8 *pCell = &aData[btGetU16(aCellPtr - iTst)];
         1131  +          int n = *pCell;
         1132  +          res = memcmp(&pCell[1], pK, MIN(nK, n));
         1133  +
         1134  +          if( res<0 || (res==0 && (res = n - nK)<0) ){
         1135  +            /* Cell iTst is SMALLER than pK/nK */
         1136  +            iLo = iTst+1;
         1137  +          }else{
         1138  +            /* Cell iTst is LARGER than (or equal to) pK/nK */
         1139  +            iHi = iTst;
         1140  +            if( res==0 ) break;
         1141  +          }
         1142  +        }
         1143  +      }
         1144  +      if( rc!=SQLITE4_OK ) break;
  1070   1145   
  1071   1146         iHi += (nCell>0 && bLeaf==0 && res==0);
  1072   1147         pCsr->aiCell[pCsr->nPg-1] = iHi;
  1073   1148         if( bLeaf==0 ){
  1074         -        pgno = btChildPgno(aData, pgsz, iHi);
         1149  +        if( iHi==nCell ) pgno = btGetU32(&aData[1]);
         1150  +        else{
         1151  +          u8 *pCell = btCellFind(aData, pgsz, iHi);
         1152  +          pgno = btGetU32(&pCell[1 + (int)*pCell]);
         1153  +        }
  1075   1154         }else{
  1076   1155           pgno = 0;
  1077   1156   
  1078   1157           if( nCell==0 ){
  1079   1158             rc = SQLITE4_NOTFOUND;
  1080   1159           }else if( res!=0 ){
  1081   1160             if( eSeek==BT_SEEK_EQ ){
................................................................................
  1161   1240     pCsr->bSkipPrev = pCsr->bSkipNext = 0;
  1162   1241   
  1163   1242     while( rc==SQLITE4_OK ){
  1164   1243       int iPg = pCsr->nPg-1;
  1165   1244       int iCell = pCsr->aiCell[iPg];
  1166   1245   
  1167   1246       if( bNext ){
  1168         -      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[iPg]);
         1247  +      u8 *aData = (u8*)btPageData(pCsr->apPage[iPg]);
  1169   1248         int nCell = btCellCount(aData, pgsz);
  1170   1249         assert( bRequireDescent==0 || bRequireDescent==1 );
  1171   1250         if( iCell<(nCell+bRequireDescent-1) ){
  1172   1251           pCsr->aiCell[iPg]++;
  1173   1252           break;
  1174   1253         }
  1175   1254       }else{
................................................................................
  1181   1260   
  1182   1261       rc = btCsrAscend(pCsr, 1);
  1183   1262       bRequireDescent = 1;
  1184   1263     }
  1185   1264   
  1186   1265     if( bRequireDescent && rc==SQLITE4_OK ){
  1187   1266       u32 pgno;                   /* Child page number */
  1188         -    u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1267  +    u8 *aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  1189   1268   
  1190   1269       pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
  1191   1270   
  1192   1271       while( 1 ){
  1193         -      rc = btCsrDescend(pCsr, pgno);
         1272  +      BtPage *pPg;
         1273  +      rc = btCsrDescend(pCsr, pgno, &pPg);
  1194   1274         if( rc!=SQLITE4_OK ){
  1195   1275           break;
  1196   1276         }else{
  1197   1277           int nCell;
  1198         -        aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1278  +        aData = (u8*)btPageData(pPg);
  1199   1279           nCell = btCellCount(aData, pgsz);
  1200   1280           if( btFlags(aData) & BT_PGFLAGS_INTERNAL ){
  1201   1281             pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell);
  1202   1282             pgno = btChildPgno(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
  1203   1283           }else{
  1204   1284             pCsr->aiCell[pCsr->nPg-1] = (bNext ? 0 : nCell-1);
  1205   1285             break;
................................................................................
  1226   1306   
  1227   1307     /* Figure out the root page number */
  1228   1308     assert( pCsr->iRoot>1 && pCsr->nPg==0 );
  1229   1309     pgno = pCsr->iRoot;
  1230   1310   
  1231   1311     while( rc==SQLITE4_OK ){
  1232   1312       /* Load page number pgno into the b-tree */
  1233         -    rc = btCsrDescend(pCsr, pgno);
         1313  +    BtPage *pPg;
         1314  +    rc = btCsrDescend(pCsr, pgno, &pPg);
  1234   1315       if( rc==SQLITE4_OK ){
  1235   1316         int nCell;                  /* Number of cells on this page */
  1236   1317         int nByte;
  1237   1318         u8 *pCell;
  1238         -      u8 *aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1319  +      u8 *aData = (u8*)btPageData(pPg);
  1239   1320   
  1240   1321         nCell = btCellCount(aData, pgsz);
  1241   1322         pCsr->aiCell[pCsr->nPg-1] = (bLast ? nCell : 0);
  1242   1323   
  1243   1324         /* If the cursor has descended to a leaf break out of the loop. */
  1244   1325         if( (aData[0] & BT_PGFLAGS_INTERNAL)==0 ){
  1245   1326           if( nCell==0 ){
................................................................................
  1301   1382   static int btCsrIsDelete(BtCursor *pCsr){
  1302   1383     const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  1303   1384     int bRet;                       /* Return value */
  1304   1385     u8 *aData;
  1305   1386     u8 *pCell;
  1306   1387     int n;
  1307   1388   
  1308         -  aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1389  +  aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
  1309   1390     pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
  1310   1391   
  1311   1392     pCell += sqlite4BtVarintGet32(pCell, &n);
  1312   1393     if( n==0 ){
  1313   1394       /* Type (c) cell */
  1314   1395       pCell += sqlite4BtVarintGet32(pCell, &n);
  1315   1396       pCell += n;
................................................................................
  1327   1408   
  1328   1409   static int fiCsrIsDelete(FiCursor *pCsr){
  1329   1410     int res = 0;
  1330   1411     if( (pCsr->base.flags & CSR_VISIT_DEL)==0 ){
  1331   1412       BtCursor *p = &pCsr->aSub[pCsr->iBt].csr;
  1332   1413       res = btCsrIsDelete(p);
  1333   1414     }
  1334         -  return res;
  1335         -}
  1336         -
  1337         -/*
  1338         -** Return an integer representing the result of (K1 - K2).
  1339         -*/
  1340         -static int btKeyCompare(
  1341         -  const void *pKey1, int nKey1, 
  1342         -  const void *pKey2, int nKey2
  1343         -){
  1344         -  int nCmp = MIN(nKey1, nKey2);
  1345         -  int res;
  1346         -
  1347         -  res = memcmp(pKey1, pKey2, nCmp);
  1348         -  if( res==0 ){
  1349         -    res = nKey1 - nKey2;
  1350         -  }
  1351   1415     return res;
  1352   1416   }
  1353   1417   
  1354   1418   static int btOverflowArrayRead(
  1355   1419     bt_db *db,
  1356   1420     u8 *pOvfl,
  1357   1421     u8 *aOut,
................................................................................
  1374   1438     ** it has a depth of zero.  */
  1375   1439     for(iPg=0; rc==SQLITE4_OK && iPg<(nDirect+(nDepth==0)) && iOut<nOut; iPg++){
  1376   1440       u32 pgno = btGetU32(&pOvfl[1+iPg*4]);
  1377   1441       BtPage *pPg = 0;
  1378   1442       rc = sqlite4BtPageGet(db->pPager, pgno, &pPg);
  1379   1443       if( rc==SQLITE4_OK ){
  1380   1444         int nCopy = MIN(nOut-iOut, pgsz);
  1381         -      u8 *a = sqlite4BtPageData(pPg);
         1445  +      u8 *a = btPageData(pPg);
  1382   1446         memcpy(&aOut[iOut], a, nCopy);
  1383   1447         sqlite4BtPageRelease(pPg);
  1384   1448         iOut += nCopy;
  1385   1449       }
  1386   1450     }
  1387   1451   
  1388   1452     /* Read from the overflow tree, if it was not read by the block above. */
................................................................................
  1397   1461   
  1398   1462       /* Initialize the apHier[] array. */
  1399   1463       pgno = btGetU32(&pOvfl[1+nDirect*4]);
  1400   1464       for(i=0; i<nDepth && rc==SQLITE4_OK; i++){
  1401   1465         u8 *a;
  1402   1466         rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[i].pPg);
  1403   1467         if( rc==SQLITE4_OK ){
  1404         -        a = sqlite4BtPageData(apHier[i].pPg);
         1468  +        a = btPageData(apHier[i].pPg);
  1405   1469           pgno = btGetU32(a);
  1406   1470         }
  1407   1471       }
  1408   1472   
  1409   1473       /* Loop runs once for each leaf page we read from. */
  1410   1474       while( iOut<nOut ){
  1411   1475         u8 *a;                      /* Data associated with some page */
................................................................................
  1416   1480   
  1417   1481         nCopy =  MIN(nOut-iOut, pgsz);
  1418   1482         assert( nCopy>0 );
  1419   1483   
  1420   1484         /* Read data from the current leaf page */
  1421   1485         rc = sqlite4BtPageGet(db->pPager, pgno, &pLeaf);
  1422   1486         if( rc!=SQLITE4_OK ) break;
  1423         -      a = sqlite4BtPageData(pLeaf);
         1487  +      a = btPageData(pLeaf);
  1424   1488         memcpy(&aOut[iOut], a, nCopy);
  1425   1489         sqlite4BtPageRelease(pLeaf);
  1426   1490         iOut += nCopy;
  1427   1491   
  1428   1492         /* If all required data has been read, break out of the loop */
  1429   1493         if( iOut>=nOut ) break;
  1430   1494   
................................................................................
  1431   1495         for(iLvl=nDepth-1; iLvl>=0; iLvl--){
  1432   1496           if( apHier[iLvl].iCell<(nPgPtr-1) ) break;
  1433   1497         }
  1434   1498         if( iLvl<0 ) break; /* SQLITE4_CORRUPT? */
  1435   1499         apHier[iLvl].iCell++;
  1436   1500   
  1437   1501         for(; iLvl<nDepth && rc==SQLITE4_OK; iLvl++){
  1438         -        a = sqlite4BtPageData(apHier[iLvl].pPg);
         1502  +        a = btPageData(apHier[iLvl].pPg);
  1439   1503           pgno = btGetU32(&a[apHier[iLvl].iCell * 4]);
  1440   1504           if( iLvl<(nDepth-1) ){
  1441   1505             apHier[iLvl+1].iCell = 0;
  1442   1506             sqlite4BtPageRelease(apHier[iLvl+1].pPg);
  1443   1507             apHier[iLvl+1].pPg = 0;
  1444   1508             rc = sqlite4BtPageGet(db->pPager, pgno, &apHier[iLvl+1].pPg);
  1445   1509           }
................................................................................
  1471   1535       u8 *pKLocal = 0;                /* Pointer to local part of key */
  1472   1536       u8 *pVLocal = 0;                /* Pointer to local part of value, if any */
  1473   1537       int nKLocal = 0;                /* Bytes of key on page */
  1474   1538       int nVLocal = 0;                /* Bytes of value on page */
  1475   1539       int nKOvfl = 0;                 /* Bytes of key on overflow pages */
  1476   1540       int nVOvfl = 0;                 /* Bytes of value on overflow pages */
  1477   1541   
  1478         -    aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1542  +    aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  1479   1543       pCell = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
  1480   1544       pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
  1481   1545       if( nKLocal==0 ){
  1482   1546         /* Type (c) leaf cell. */
  1483   1547         pCell += sqlite4BtVarintGet32(pCell, &nKLocal);
  1484   1548         pKLocal = pCell;
  1485   1549         pCell += nKLocal;
................................................................................
  1541   1605     }else{
  1542   1606       const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  1543   1607       u8 *aData;
  1544   1608       u8 *pCell;
  1545   1609       int nK;
  1546   1610       int iCell = pCsr->aiCell[pCsr->nPg-1];
  1547   1611   
  1548         -    aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         1612  +    aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
  1549   1613       assert( btCellCount(aData, pgsz)>iCell );
  1550   1614       pCell = btCellFind(aData, pgsz, iCell);
  1551   1615       pCell += sqlite4BtVarintGet32(pCell, &nK);
  1552   1616   
  1553   1617       if( nK==0 ){
  1554   1618         /* type (c) leaf cell */
  1555   1619         rc = btCsrBuffer(pCsr, 0);
................................................................................
  1817   1881     BtDbHdr *pHdr = sqlite4BtPagerDbhdr(db->pPager);
  1818   1882   
  1819   1883     assert( eSeek==BT_SEEK_LE || eSeek==BT_SEEK_EQ || eSeek==BT_SEEK_GE );
  1820   1884     assert( (pCsr->base.flags & CSR_VISIT_DEL)==0 || eSeek==BT_SEEK_GE );
  1821   1885     fiCsrReset(pCsr);
  1822   1886   
  1823   1887     if( pHdr->iMRoot ){
         1888  +    u8 *pKey;
  1824   1889       FiLevelIter iter;
  1825   1890   
  1826   1891       /* Initialize the iterator used to skip through database levels */
  1827   1892       rc = fiLevelIterInit(db, &iter);
  1828   1893       if( rc!=SQLITE4_OK ) return rc;
         1894  +    pKey = sqlite4_malloc(db->pEnv, nK+8);
         1895  +    if( pKey==0 ) return SQLITE4_NOMEM;
  1829   1896   
  1830   1897       if( eSeek==BT_SEEK_EQ ){
  1831   1898         FiSubCursor *pSub;
  1832   1899         BtCursor *pM;
  1833   1900   
  1834   1901         pCsr->base.flags &= ~(CSR_NEXT_OK | CSR_PREV_OK);
  1835   1902   
................................................................................
  1843   1910         pSub = pCsr->aSub;
  1844   1911         pM = &pSub->mcsr;
  1845   1912   
  1846   1913         btCsrSetup(db, pHdr->iMRoot, pM);
  1847   1914         while( 0==fiLevelIterNext(&iter) ){
  1848   1915   
  1849   1916           fiFormatPrefix(pSub->aPrefix, iter.iAge, iter.iLvl);
  1850         -        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);
         1917  +        memcpy(pKey, pSub->aPrefix, sizeof(pSub->aPrefix));
         1918  +        rc = btCsrSeek(pM, 0, pKey, nK+8, BT_SEEK_LE, BT_CSRSEEK_SEEK);
  1851   1919   
  1852   1920           if( rc==SQLITE4_INEXACT ){
  1853   1921             rc = fiSubCsrCheckPrefix(pSub);
  1854   1922           }
  1855   1923   
  1856   1924           if( rc==SQLITE4_NOTFOUND ){
  1857   1925             /* All keys in this level are greater than pK/nK. */
................................................................................
  1897   1965   
  1898   1966         /* This loop runs once for each sub-cursor */
  1899   1967         while( rc==SQLITE4_OK && 0==fiLevelIterNext(&iter) ){
  1900   1968           FiSubCursor *pSub = &pCsr->aSub[iter.iSub];
  1901   1969           BtCursor *pM = &pSub->mcsr;
  1902   1970           btCsrSetup(db, pHdr->iMRoot, pM);
  1903   1971   
  1904         -        btPutU32(&pSub->aPrefix[0], (u32)iter.iAge);
  1905         -        btPutU32(&pSub->aPrefix[4], ~(u32)iter.iLvl);
         1972  +        fiFormatPrefix(pSub->aPrefix, iter.iAge, iter.iLvl);
         1973  +        memcpy(pKey, pSub->aPrefix, sizeof(pSub->aPrefix));
  1906   1974   
  1907         -        rc = btCsrSeek(pM, pSub->aPrefix, pK, nK, BT_SEEK_LE, BT_CSRSEEK_SEEK);
         1975  +        rc = btCsrSeek(pM, 0, pKey, nK+8, BT_SEEK_LE, BT_CSRSEEK_SEEK);
  1908   1976           if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
  1909   1977           if( rc==SQLITE4_NOTFOUND && eSeek==BT_SEEK_GE ){
  1910         -          rc = btCsrSeek(pM, pSub->aPrefix, 0, 0, BT_SEEK_GE, BT_CSRSEEK_SEEK);
         1978  +          rc = btCsrSeek(pM, 0, pSub->aPrefix, sizeof(pSub->aPrefix), 
         1979  +              BT_SEEK_GE, BT_CSRSEEK_SEEK
         1980  +          );
  1911   1981             if( rc==SQLITE4_INEXACT ) rc = fiSubCsrCheckPrefix(pSub);
  1912   1982           }
  1913   1983   
  1914   1984           if( rc==SQLITE4_NOTFOUND ){
  1915   1985             /* No keys to visit in this level */
  1916   1986             assert( pSub->mcsr.nPg==0 );
  1917   1987             assert( pSub->csr.nPg==0 );
................................................................................
  1963   2033             }else if( bMatch==0 ){
  1964   2034               rc = (bHit ? SQLITE4_INEXACT : SQLITE4_NOTFOUND);
  1965   2035             }
  1966   2036           }
  1967   2037         }
  1968   2038       }
  1969   2039   
         2040  +    sqlite4_free(db->pEnv, pKey);
  1970   2041       fiLevelIterCleanup(&iter);
  1971   2042     }
  1972   2043   
  1973   2044     return rc;
  1974   2045   }
  1975   2046   
  1976   2047   static int fiCsrEnd(FiCursor *pCsr, int bLast){
................................................................................
  2140   2211       const int nPgPtr = pgsz / 4;
  2141   2212       BtPage *pPg;
  2142   2213       u8 *aData;
  2143   2214       int i;
  2144   2215   
  2145   2216       rc = sqlite4BtPageGet(pPager, pgno, &pPg);
  2146   2217       if( rc!=SQLITE4_OK ) return rc;
  2147         -    aData = sqlite4BtPageData(pPg);
         2218  +    aData = btPageData(pPg);
  2148   2219   
  2149   2220       for(i=0; rc==SQLITE4_OK && i<nPgPtr; i++){
  2150   2221         u32 child = btGetU32(&aData[i*4]);
  2151   2222         if( child==0 ) break;
  2152   2223         rc = btOverflowTrimtree(pgsz, pPager, child, nDepth-1);
  2153   2224       }
  2154   2225   
................................................................................
  2172   2243     u8 *aData;
  2173   2244     u8 *pCell;
  2174   2245     u8 *pOvfl = 0;
  2175   2246     int iCell = pCsr->aiCell[pCsr->nPg-1];
  2176   2247     int n;
  2177   2248     int rc = SQLITE4_OK;
  2178   2249     
  2179         -  aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         2250  +  aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  2180   2251     assert( btCellCount(aData, pgsz)>iCell );
  2181   2252     pCell = btCellFind(aData, pgsz, iCell);
  2182   2253     pCell += sqlite4BtVarintGet32(pCell, &n);
  2183   2254   
  2184   2255     if( n==0 ){
  2185   2256       /* Type (c) cell */
  2186   2257       pCell += sqlite4BtVarintGet32(pCell, &n);
................................................................................
  2241   2312         /* The row has been deleted out from under this cursor. So return
  2242   2313          ** NULL for data.  */
  2243   2314         *ppV = 0;
  2244   2315         *pnV = 0;
  2245   2316       }else{
  2246   2317         int iCell = pCsr->aiCell[pCsr->nPg-1];
  2247   2318   
  2248         -      aData = (u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         2319  +      aData = (u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  2249   2320         pCell = btCellFind(aData, pgsz, iCell);
  2250   2321         pCell += sqlite4BtVarintGet32(pCell, &nK);
  2251   2322         if( nK>0 ){
  2252   2323           pCell += nK;
  2253   2324           pCell += sqlite4BtVarintGet32(pCell, &nV);
  2254   2325         }
  2255   2326   
................................................................................
  2374   2445     const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  2375   2446     FiSubCursor *pSub;              /* Current sub-cursor */
  2376   2447     u8 *aData;                      /* Current page data */
  2377   2448     int iCell;
  2378   2449   
  2379   2450     assert( pCsr->iBt>=0 );
  2380   2451     pSub = &pCsr->aSub[pCsr->iBt];
  2381         -  aData = sqlite4BtPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
         2452  +  aData = btPageData(pSub->csr.apPage[pSub->csr.nPg-1]);
  2382   2453     iCell = pSub->csr.aiCell[pSub->csr.nPg-1];
  2383   2454   
  2384   2455     *ppCell = btCellFindSize(aData, pgsz, iCell, pnCell);
  2385   2456   }
  2386   2457   
  2387   2458   /*
  2388   2459   ** Return true if the cell that the cursor currently points to contains 
................................................................................
  2390   2461   */
  2391   2462   static int btCsrOverflow(BtCursor *pCsr){
  2392   2463     const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  2393   2464     u8 *aData;                      /* Current page data */
  2394   2465     int nKey;                       /* First varint in cell */
  2395   2466     int res;                       /* First varint in cell */
  2396   2467   
  2397         -  aData = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         2468  +  aData = btPageData(pCsr->apPage[pCsr->nPg-1]);
  2398   2469     aData = btCellFind(aData, pgsz, pCsr->aiCell[pCsr->nPg-1]);
  2399   2470   
  2400   2471     aData += sqlite4BtVarintGet32(aData, &nKey);
  2401   2472     res = (nKey==0 || aData[nKey]==0);
  2402   2473     return res;
  2403   2474   }
  2404   2475   
................................................................................
  2432   2503   ** Attach a buffer to an existing page object.
  2433   2504   */
  2434   2505   static int btSetBuffer(bt_db *pDb, BtPage *pPg, u8 *aBuf){
  2435   2506     const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  2436   2507     int rc;
  2437   2508     rc = sqlite4BtPageWrite(pPg);
  2438   2509     if( rc==SQLITE4_OK ){
  2439         -    u8 *aData = sqlite4BtPageData(pPg);
         2510  +    u8 *aData = btPageData(pPg);
  2440   2511       memcpy(aData, aBuf, pgsz);
  2441   2512       sqlite4_free(pDb->pEnv, aBuf);
  2442   2513     }
  2443   2514     return rc;
  2444   2515   }
  2445   2516   
  2446   2517   /*
................................................................................
  2455   2526     int iWrite;                     /* Write next cell at this offset in aTmp[] */
  2456   2527     int i;                          /* Used to iterate through cells */
  2457   2528     int bLeaf;                      /* True if pPg is a leaf page */
  2458   2529     int nHdr;                       /* Bytes in header of this page */
  2459   2530   
  2460   2531     if( btNewBuffer(pDb, &aTmp) ) return SQLITE4_NOMEM;
  2461   2532   
  2462         -  aData = sqlite4BtPageData(pPg);
         2533  +  aData = btPageData(pPg);
  2463   2534     nCell = btCellCount(aData, pgsz);
  2464   2535   
  2465   2536     bLeaf = 0==(btFlags(aData) & BT_PGFLAGS_INTERNAL);
  2466   2537     nHdr = bLeaf ? 1 : 5;
  2467   2538   
  2468   2539     /* Set header bytes of new page */
  2469   2540     memcpy(aTmp, aData, nHdr);
................................................................................
  2632   2703   static int btAllocateAndZero(bt_db *db, BtPage **ppPg){
  2633   2704     BtPage *pPg = 0;                /* Allocated page handle */
  2634   2705     int rc;                         /* Return code */
  2635   2706   
  2636   2707     rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  2637   2708     if( rc==SQLITE4_OK ){
  2638   2709       const int pgsz = sqlite4BtPagerPagesize(db->pPager);
  2639         -    memset(sqlite4BtPageData(pPg), 0, pgsz);
         2710  +    memset(btPageData(pPg), 0, pgsz);
  2640   2711     }
  2641   2712   
  2642   2713     *ppPg = pPg;
  2643   2714     return rc;
  2644   2715   }
  2645   2716   
  2646   2717   static int btOverflowArrayPopulate(
................................................................................
  2678   2749     for(i=0; rc==SQLITE4_OK && i<nDepth; i++){
  2679   2750       u32 pgno;
  2680   2751       rc = btAllocateAndZero(db, &apHier[i].pPg);
  2681   2752       pgno = sqlite4BtPagePgno(apHier[i].pPg);
  2682   2753       if( i==0 ){
  2683   2754         btPutU32(&aOut[1 + BT_MAX_DIRECT_OVERFLOW*4], pgno);
  2684   2755       }else{
  2685         -      u8 *a = sqlite4BtPageData(apHier[i-1].pPg);
         2756  +      u8 *a = btPageData(apHier[i-1].pPg);
  2686   2757         btPutU32(a, pgno);
  2687   2758         apHier[i-1].iCell++;
  2688   2759       }
  2689   2760     }
  2690   2761   
  2691   2762     for(iOvfl=0; rc==SQLITE4_OK && (n1<nBuf1 || n2<nBuf2); iOvfl++){
  2692   2763       int nCopy1, nCopy2;           /* Bytes to copy from pBuf1 and pBuf2 */
  2693   2764       u8 *aData;
  2694   2765       BtPage *pPg;
  2695   2766       u32 pgno;
  2696   2767   
  2697   2768       rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  2698   2769       if( rc!=SQLITE4_OK ) break;
  2699         -    aData = sqlite4BtPageData(pPg);
         2770  +    aData = btPageData(pPg);
  2700   2771       pgno = sqlite4BtPagePgno(pPg);
  2701   2772   
  2702   2773       nCopy1 = MIN(pgsz, nBuf1 - n1);
  2703   2774       nCopy2 = MIN(pgsz - nCopy1, nBuf2 - n2);
  2704   2775   
  2705   2776       memcpy(aData, &pBuf1[n1], nCopy1); n1 += nCopy1;
  2706   2777       memcpy(&aData[nCopy1], &pBuf2[n2], nCopy2); n2 += nCopy2;
................................................................................
  2709   2780   
  2710   2781       if( iOvfl<(BT_MAX_DIRECT_OVERFLOW+(nDepth==0)) ){
  2711   2782         btPutU32(&aOut[1 + iOvfl*4], pgno);
  2712   2783         nDirect++;
  2713   2784       }else{
  2714   2785         assert( nDepth>0 );
  2715   2786         for(i=nDepth-1; pgno && i>=0; i--){
  2716         -        u8 *a = sqlite4BtPageData(apHier[i].pPg);
         2787  +        u8 *a = btPageData(apHier[i].pPg);
  2717   2788           if( apHier[i].iCell==nPgPtr ){
  2718   2789             BtPage *pNew = 0;
  2719   2790             rc = sqlite4BtPageRelease(apHier[i].pPg);
  2720   2791             if( rc==SQLITE4_OK ){
  2721   2792               rc = btAllocateAndZero(db, &pNew);
  2722   2793               if( rc==SQLITE4_OK ){
  2723         -              u8 *a = sqlite4BtPageData(pNew);
         2794  +              u8 *a = btPageData(pNew);
  2724   2795                 btPutU32(a, pgno);
  2725   2796                 pgno = sqlite4BtPagePgno(pNew);
  2726   2797               }
  2727   2798             }
  2728   2799   
  2729   2800             if( rc!=SQLITE4_OK ){
  2730   2801               pgno = 0;
................................................................................
  2912   2983     u8 *aParent;                    /* Buffer of parent page */
  2913   2984     int iChild;                     /* Index of child page within parent */
  2914   2985     int nSib;                       /* Number of siblings */
  2915   2986     int iSib;                       /* Index of left-most sibling page */
  2916   2987   
  2917   2988     int i;
  2918   2989   
  2919         -  aParent = sqlite4BtPageData(pCsr->apPage[pCsr->nPg-2]);
         2990  +  aParent = btPageData(pCsr->apPage[pCsr->nPg-2]);
  2920   2991     iChild = pCsr->aiCell[pCsr->nPg-2];
  2921   2992     nCell = btCellCount(aParent, pgsz);
  2922   2993   
  2923   2994     if( nCell<2 ){
  2924   2995       nSib = nCell+1;
  2925   2996     }else{
  2926   2997       nSib = 3;
................................................................................
  2964   3035   
  2965   3036   static int btSetChildPgno(bt_db *pDb, BtPage *pPg, int iChild, u32 pgno){
  2966   3037     const int pgsz = sqlite4BtPagerPagesize(pDb->pPager);
  2967   3038     int rc;
  2968   3039   
  2969   3040     rc = sqlite4BtPageWrite(pPg);
  2970   3041     if( rc==SQLITE4_OK ){
  2971         -    u8 *aData = sqlite4BtPageData(pPg);
         3042  +    u8 *aData = btPageData(pPg);
  2972   3043       int nCell = btCellCount(aData, pgsz);
  2973   3044       if( iChild>=nCell ){
  2974   3045         btPutU32(&aData[1], pgno);
  2975   3046       }else{
  2976   3047         int nKey;
  2977   3048         u8 *pCell = btCellFind(aData, pgsz, iChild);
  2978   3049         pCell += sqlite4BtVarintGet32(pCell, &nKey);
................................................................................
  3106   3177     for(iPg=0; iPg<p->nIn && rc==SQLITE4_OK; iPg++){
  3107   3178       BtPage *pPg;                  /* Current page */
  3108   3179       u8 *aData;                    /* Page data */
  3109   3180       int nCell;                    /* Number of cells on page pPg */
  3110   3181       int iCell;                    /* Current cell in pPg */
  3111   3182   
  3112   3183       pPg = p->apPg[iPg];
  3113         -    aData = sqlite4BtPageData(pPg);
         3184  +    aData = btPageData(pPg);
  3114   3185       nCell = btCellCount(aData, pgsz);
  3115   3186   
  3116   3187       for(iCell=0; iCell<nCell && rc==SQLITE4_OK; iCell++){
  3117   3188         int nByte;
  3118   3189         u8 *pCell;
  3119   3190   
  3120   3191         if( pPg==pIns && iCell==iIns ){
................................................................................
  3137   3208       }
  3138   3209   
  3139   3210       /* If the siblings being balanced are not leaves, and the page just
  3140   3211       ** processed was not the right-most sibling, visit a cell from the
  3141   3212       ** parent page.  */
  3142   3213       if( p->bLeaf==0 && iPg<(p->nIn-1) && rc==SQLITE4_OK ){
  3143   3214         int iPar = p->pCsr->nPg-2;
  3144         -      u8 *aParent = sqlite4BtPageData(p->pCsr->apPage[iPar]);
         3215  +      u8 *aParent = btPageData(p->pCsr->apPage[iPar]);
  3145   3216         u8 *pCell = btCellFind(aParent, pgsz, p->pCsr->aiCell[iPar] + iPg);
  3146   3217         KeyValue kv;
  3147   3218         btInternalCellToKeyValue(pCell, &kv);
  3148   3219         kv.pgno = btGetU32(&aData[1]);
  3149   3220         rc = xVisit(p, iCall++, 0, 0, &kv);
  3150   3221       }
  3151   3222     }
................................................................................
  3164   3235   ** set to the size of the prefix in bytes.
  3165   3236   */
  3166   3237   static u8 *btKeyPrefix(const int pgsz, BtPage *pPg, int bLast, int *pnByte){
  3167   3238     u8 *p;
  3168   3239     int n;
  3169   3240     u8 *aData;
  3170   3241   
  3171         -  aData = sqlite4BtPageData(pPg);
         3242  +  aData = btPageData(pPg);
  3172   3243     p = btCellFind(aData, pgsz, bLast ? btCellCount(aData, pgsz)-1 : 0);
  3173   3244     p += sqlite4BtVarintGet32(p, &n);
  3174   3245     if( n==0 ) p += sqlite4BtVarintGet32(p, &n);
  3175   3246   
  3176   3247     *pnByte = n;
  3177   3248     return p;
  3178   3249   }
................................................................................
  3186   3257   **   * larger than all keys on pLeft, and 
  3187   3258   **   * smaller than or equal to all keys on pRight.
  3188   3259   */
  3189   3260   static void btPrefixKey(
  3190   3261       const int pgsz, BtPage *pLeft, BtPage *pRight, KeyValue *pKV
  3191   3262   ){
  3192   3263     int nMax;
  3193         -  int nMaxPrefix = pgsz/4;
         3264  +  int nMaxPrefix = BT_MAX_INTERNAL_KEY;
  3194   3265   
  3195   3266     u8 *aLeft; int nLeft;
  3196   3267     u8 *aRight; int nRight;
  3197   3268     int i;
  3198   3269   
  3199   3270     aLeft = btKeyPrefix(pgsz, pLeft, 1, &nLeft);
  3200   3271     aRight = btKeyPrefix(pgsz, pRight, 0, &nRight);
................................................................................
  3230   3301     BalanceCtx ctx;
  3231   3302     memset(&ctx, 0, sizeof(ctx));
  3232   3303     ctx.pCsr = pCsr;
  3233   3304     ctx.nKV = nKV;
  3234   3305     ctx.apKV = apKV;
  3235   3306     ctx.pgsz = pgsz;
  3236   3307     ctx.bLeaf = bLeaf;
  3237         -  ctx.flags = *(u8*)sqlite4BtPageData(pCsr->apPage[pCsr->nPg-1]);
         3308  +  ctx.flags = *(u8*)btPageData(pCsr->apPage[pCsr->nPg-1]);
  3238   3309   
  3239   3310     memset(anByteOut, 0, sizeof(anByteOut));
  3240   3311   
  3241   3312     /* Gather the sibling pages from which cells will be redistributed into
  3242   3313     ** the ctx.apPg[] array.  */
  3243   3314     assert( bLeaf==0 || bLeaf==1 );
  3244   3315     assert( pCsr->nPg>1 );
................................................................................
  3246   3317     if( rc!=SQLITE4_OK ) goto rebalance_out;
  3247   3318     pPar = pCsr->apPage[pCsr->nPg-2];
  3248   3319     iSib = pCsr->aiCell[pCsr->nPg-2];
  3249   3320   
  3250   3321     /* Count the number of input cells. */
  3251   3322     ctx.nCell = nKV;
  3252   3323     for(iPg=0; iPg<ctx.nIn; iPg++){
  3253         -    u8 *aData = sqlite4BtPageData(ctx.apPg[iPg]);
         3324  +    u8 *aData = btPageData(ctx.apPg[iPg]);
  3254   3325       ctx.nCell += btCellCount(aData, pgsz);
  3255   3326     }
  3256   3327     if( bLeaf==0 ) ctx.nCell += (ctx.nIn-1);
  3257   3328     assert( ctx.nCell>0 );
  3258   3329   
  3259   3330     /* Allocate and populate the anCellSz[] array */
  3260   3331     ctx.anCellSz = (int*)sqlite4_malloc(pDb->pEnv, sizeof(int)*ctx.nCell);
................................................................................
  3311   3382   #ifdef BT_STDERR_DEBUG
  3312   3383     {
  3313   3384       int iDbg;
  3314   3385       fprintf(stderr, 
  3315   3386           "\nbtBalance(): bLeaf=%d nIn=%d anIn[] = ", ctx.bLeaf, ctx.nIn
  3316   3387       );
  3317   3388       for(iDbg=0; iDbg<ctx.nIn; iDbg++){
  3318         -      u8 *aData = sqlite4BtPageData(ctx.apPg[iDbg]);
         3389  +      u8 *aData = btPageData(ctx.apPg[iDbg]);
  3319   3390         fprintf(stderr, "%d ", btCellCount(aData, pgsz));
  3320   3391       }
  3321   3392       fprintf(stderr, " ->  nOut=%d anOut[] = ", ctx.nOut);
  3322   3393       for(iDbg=0; iDbg<ctx.nOut; iDbg++){
  3323   3394         fprintf(stderr, "%d ", ctx.anOut[iDbg]);
  3324   3395       }
  3325   3396       fprintf(stderr, "\n");
................................................................................
  3344   3415     /* Populate the new buffers with the new page images. */
  3345   3416     rc = btBalanceVisitCells(&ctx, btBalanceOutput);
  3346   3417     if( rc!=SQLITE4_OK ) goto rebalance_out;
  3347   3418   
  3348   3419     if( ctx.bLeaf==0 ){
  3349   3420       /* Set the right-child pointer of the rightmost new sibling to a copy
  3350   3421       ** of the same pointer from the rightmost original sibling.  */
  3351         -    u8 *aRightSibling = sqlite4BtPageData(ctx.apPg[ctx.nIn-1]);
         3422  +    u8 *aRightSibling = btPageData(ctx.apPg[ctx.nIn-1]);
  3352   3423       memcpy(&(ctx.apOut[ctx.nOut-1])[1], &aRightSibling[1], 4);
  3353   3424     }
  3354   3425   
  3355   3426     /* Clobber the old pages with the new buffers */
  3356   3427     for(iPg=0; iPg<ctx.nOut; iPg++){
  3357   3428       if( iPg>=ctx.nIn ){
  3358   3429         rc = btAllocateNonOverflow(pDb, &ctx.apPg[iPg]);
................................................................................
  3367   3438       if( rc!=SQLITE4_OK ) goto rebalance_out;
  3368   3439     }
  3369   3440   
  3370   3441   #ifdef BT_STDERR_DEBUG
  3371   3442     {
  3372   3443       int iDbg;
  3373   3444       for(iDbg=0; iDbg<ctx.nOut; iDbg++){
  3374         -      u8 *aData = sqlite4BtPageData(ctx.apPg[iDbg]);
         3445  +      u8 *aData = btPageData(ctx.apPg[iDbg]);
  3375   3446         printPage(stderr, sqlite4BtPagePgno(ctx.apPg[iDbg]), aData, pgsz);
  3376   3447       }
  3377   3448     }
  3378   3449   #endif
  3379   3450   
  3380   3451     /* The leaves are written. Now gather the keys and page numbers to
  3381   3452     ** push up into the parent page. This is only required when rebalancing
................................................................................
  3402   3473     }
  3403   3474     if( rc==SQLITE4_OK && iPg==pCsr->nPg ){
  3404   3475       rc = btBalanceIfUnderfull(pCsr);
  3405   3476     }
  3406   3477   
  3407   3478   #ifdef BT_STDERR_DEBUG
  3408   3479     {
  3409         -    u8 *aData = sqlite4BtPageData(pPar);
         3480  +    u8 *aData = btPageData(pPar);
  3410   3481       printPage(stderr, sqlite4BtPagePgno(pPar), aData, pgsz);
  3411   3482     }
  3412   3483   #endif
  3413   3484   
  3414   3485    rebalance_out:
  3415   3486     for(iPg=0; iPg<array_size(ctx.apPg); iPg++){
  3416   3487       sqlite4BtPageRelease(ctx.apPg[iPg]);
................................................................................
  3431   3502     assert( pCsr->nPg==1 );
  3432   3503   
  3433   3504     rc = sqlite4BtPageWrite(pRoot);
  3434   3505     if( rc==SQLITE4_OK ){
  3435   3506       rc = btAllocateNonOverflow(pDb, &pNew);
  3436   3507     }
  3437   3508     if( rc==SQLITE4_OK ){
  3438         -    u8 *aRoot = sqlite4BtPageData(pRoot);
  3439         -    u8 *aData = sqlite4BtPageData(pNew);
         3509  +    u8 *aRoot = btPageData(pRoot);
         3510  +    u8 *aData = btPageData(pNew);
  3440   3511   
  3441   3512       memcpy(aData, aRoot, pgsz);
  3442   3513       aRoot[0] = BT_PGFLAGS_INTERNAL;
  3443   3514       if( pHdr->iMRoot==pCsr->iRoot ) aRoot[0] |= BT_PGFLAGS_METATREE;
  3444   3515       btPutU32(&aRoot[1], sqlite4BtPagePgno(pNew));
  3445   3516       btPutU16(&aRoot[pgsz-2], 0);
  3446   3517       btPutU16(&aRoot[pgsz-4], 5);
................................................................................
  3493   3564     for(i=0; i<nKV; i++){
  3494   3565       nReq += btKVCellSize(&apKV[i]) + 2;
  3495   3566     }
  3496   3567   
  3497   3568     iCell = pCsr->aiCell[pCsr->nPg-1];
  3498   3569     assert( pCsr->nPg>0 );
  3499   3570     pLeaf = pCsr->apPage[pCsr->nPg-1];
  3500         -  aData = (u8*)sqlite4BtPageData(pLeaf);
         3571  +  aData = (u8*)btPageData(pLeaf);
  3501   3572   
  3502   3573     /* Set the bLeaf variable to true if inserting into a leaf page, or
  3503   3574     ** false otherwise. Return SQLITE4_CORRUPT if the page is a leaf but
  3504   3575     ** the KeyValue pairs being inserted are suitable for internal nodes,
  3505   3576     ** or vice-versa.  */
  3506   3577     assert( nKV>0 );
  3507   3578     if( (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL))!=bLeaf ){
................................................................................
  3520   3591       nFree = pgsz - iWrite - 6;
  3521   3592     }else{
  3522   3593       if( btFreeContiguous(aData, pgsz)<nReq && btFreeSpace(aData, pgsz)>=nReq ){
  3523   3594         /* Special case - the new entry will not fit on the page at present
  3524   3595         ** but would if the page were defragmented. So defragment it before
  3525   3596         ** continuing.  */
  3526   3597         rc = btDefragmentPage(pCsr->base.pDb, pLeaf);
  3527         -      aData = sqlite4BtPageData(pLeaf);
         3598  +      aData = btPageData(pLeaf);
  3528   3599       }
  3529   3600   
  3530   3601       iWrite = btFreeOffset(aData, pgsz);
  3531   3602       nFree = btFreeContiguous(aData, pgsz);
  3532   3603     }
  3533   3604   
  3534   3605     if( nFree>=nReq ){
  3535   3606       /* The new entry will fit on the page. So in this case all there
  3536   3607       ** is to do is update this single page. The easy case. */
  3537   3608       rc = sqlite4BtPageWrite(pLeaf);
  3538   3609       if( rc==SQLITE4_OK ){
  3539         -      aData = sqlite4BtPageData(pLeaf);
         3610  +      aData = btPageData(pLeaf);
  3540   3611   
  3541   3612         /* Make space within the cell pointer array */
  3542   3613         if( iCell!=nCell ){
  3543   3614           u8 *aFrom = btCellPtrFind(aData, pgsz, nCell-1);
  3544   3615           u8 *aTo = btCellPtrFind(aData, pgsz, nCell-1+nKV);
  3545   3616           memmove(aTo, aFrom, (nCell-iCell) * 2);
  3546   3617         }
................................................................................
  3606   3677       int i;                        /* Used to iterate through cells to delete */
  3607   3678       u8 *aData;                    /* Page buffer */
  3608   3679       int nCell;                    /* Number of cells initially on this page */
  3609   3680       int iDel;                     /* Index of cell to delete */
  3610   3681       int nFreed = 0;               /* Total bytes of space freed */
  3611   3682   
  3612   3683       iDel = pCsr->aiCell[pCsr->nPg-1];
  3613         -    aData = (u8*)sqlite4BtPageData(pPg);
         3684  +    aData = (u8*)btPageData(pPg);
  3614   3685       nCell = btCellCount(aData, pgsz);
  3615   3686   
  3616   3687       for(i=iDel; i<(iDel+nDel); i++){
  3617   3688         int nByte;
  3618   3689         btCellFindSize(aData, pgsz, i, &nByte);
  3619   3690         nFreed += nByte + 2;
  3620   3691       }
................................................................................
  3636   3707   }
  3637   3708   
  3638   3709   static int btBalanceIfUnderfull(BtCursor *pCsr){
  3639   3710     const int pgsz = sqlite4BtPagerPagesize(pCsr->base.pDb->pPager);
  3640   3711     int rc = SQLITE4_OK;
  3641   3712     int iPg = pCsr->nPg-1;
  3642   3713     BtPage *pPg = pCsr->apPage[iPg];
  3643         -  u8 *aData = sqlite4BtPageData(pPg);
         3714  +  u8 *aData = btPageData(pPg);
  3644   3715     int nCell = btCellCount(aData, pgsz);
  3645   3716     int nFree = btFreeSpace(aData, pgsz);
  3646   3717     int bLeaf = (0==(btFlags(aData) & BT_PGFLAGS_INTERNAL));
  3647   3718   
  3648   3719     if( iPg==0 ){
  3649   3720       /* Root page. If it contains no cells at all and is not already
  3650   3721       ** a leaf, shorten the tree by one here by copying the contents 
................................................................................
  3655   3726         BtPage *pChild;
  3656   3727   
  3657   3728         rc = sqlite4BtPageWrite(pPg);
  3658   3729         if( rc==SQLITE4_OK ){
  3659   3730           rc = sqlite4BtPageGet(pPager, pgno, &pChild);
  3660   3731         }
  3661   3732         if( rc==SQLITE4_OK ){
  3662         -        u8 *a = sqlite4BtPageData(pChild);
         3733  +        u8 *a = btPageData(pChild);
  3663   3734           memcpy(aData, a, pgsz);
  3664   3735           rc = btTrimNonOverflow(pCsr->base.pDb, pChild);
  3665   3736         }
  3666   3737       }
  3667   3738     }else if( nCell==0 || (nFree>(2*pgsz/3) && bLeaf==0) ){
  3668   3739       rc = btBalance(pCsr, bLeaf, 0, 0);
  3669   3740     }
................................................................................
  3781   3852     u32 iNew = 0;
  3782   3853     BtPage *pPg;
  3783   3854     int rc;
  3784   3855   
  3785   3856     assert( flag==BT_PGFLAGS_METATREE || flag==BT_PGFLAGS_SCHEDULE || flag==0 );
  3786   3857     rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  3787   3858     if( rc==SQLITE4_OK ){
  3788         -    u8 *aData = sqlite4BtPageData(pPg);
         3859  +    u8 *aData = btPageData(pPg);
  3789   3860       aData[0] = (flag & 0xFF);
  3790   3861       iNew = sqlite4BtPagePgno(pPg);
  3791   3862       sqlite4BtPageRelease(pPg);
  3792   3863     }
  3793   3864   
  3794   3865     *piNew = iNew;
  3795   3866     return rc;
................................................................................
  3929   4000     return SQLITE4_OK;
  3930   4001   }
  3931   4002   
  3932   4003   static void btWriteSchedulePage(BtPage *pPg, BtSchedule *p, int *pRc){
  3933   4004     if( *pRc==SQLITE4_OK ){
  3934   4005       int rc = sqlite4BtPageWrite(pPg);
  3935   4006       if( rc==SQLITE4_OK ){
  3936         -      u8 *aData = sqlite4BtPageData(pPg);
         4007  +      u8 *aData = btPageData(pPg);
  3937   4008         btWriteSchedule(aData, p, &rc);
  3938   4009       }
  3939   4010       *pRc = rc;
  3940   4011     }
  3941   4012   }
  3942   4013   
  3943   4014   static int btAllocateBlock(
................................................................................
  4080   4151       delcsr.nPg = 1;
  4081   4152       delcsr.base.pDb = db;
  4082   4153   
  4083   4154       while( rc==SQLITE4_OK && iTrunk!=0 ){
  4084   4155         BtPage *pTrunk = 0;
  4085   4156         rc = sqlite4BtPageGet(db->pPager, iTrunk, &pTrunk);
  4086   4157         if( rc==SQLITE4_OK ){
  4087         -        u8 *aTData = sqlite4BtPageData(pTrunk);
         4158  +        u8 *aTData = btPageData(pTrunk);
  4088   4159           int nOvfl = btGetU32(aTData);
  4089   4160           int i;
  4090   4161   
  4091   4162           for(i=0; i<nOvfl; i++){
  4092   4163             u32 lpgno = btGetU32(&aTData[8 + i*8]);
  4093   4164             delcsr.aiCell[0] = (int)btGetU32(&aTData[8 + i*8 + 4]);
  4094   4165             rc = sqlite4BtPageGet(db->pPager, lpgno, &delcsr.apPage[0]);
................................................................................
  4282   4353     u32 iMax;                       /* Maximum input level number */
  4283   4354     u32 iOutLvl;                    /* Output level number */
  4284   4355   
  4285   4356     /* Find the schedule page. If there is no schedule page, allocate it now. */
  4286   4357     if( pHdr->iSRoot==0 ){
  4287   4358       rc = sqlite4BtPageAllocate(db->pPager, &pPg);
  4288   4359       if( rc==SQLITE4_OK ){
  4289         -      u8 *aData = sqlite4BtPageData(pPg);
         4360  +      u8 *aData = btPageData(pPg);
  4290   4361         memset(aData, 0, pHdr->pgsz);
  4291   4362         sqlite4BtPagerDbhdrDirty(db->pPager);
  4292   4363         pHdr->iSRoot = sqlite4BtPagePgno(pPg);
  4293   4364       }
  4294   4365     }else{
  4295   4366       rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  4296   4367     }
  4297   4368   
  4298   4369     /* Check if the schedule page is busy. If so, no new merge may be 
  4299   4370     ** scheduled. If the schedule page is not busy, call btFindMerge() to
  4300   4371     ** figure out which levels should be scheduled for merge.  */
  4301   4372     if( rc==SQLITE4_OK ){
  4302         -    aData = sqlite4BtPageData(pPg);
         4373  +    aData = btPageData(pPg);
  4303   4374       
  4304   4375       switch( btGetU32(aData) ){
  4305   4376         case BT_SCHEDULE_BUSY:
  4306   4377           rc = SQLITE4_NOTFOUND;
  4307   4378           break;
  4308   4379   
  4309   4380         case BT_SCHEDULE_DONE: {
................................................................................
  4393   4464       if( rc==SQLITE4_OK ){
  4394   4465         u32 iRoot = btFirstOfBlock(pHdr, pHdr->iSubBlock);
  4395   4466         BtPage *pPg = 0;
  4396   4467   
  4397   4468         rc = sqlite4BtPageGet(db->pPager, iRoot, &pPg);
  4398   4469         if( rc==SQLITE4_OK ) rc = sqlite4BtPageWrite(pPg);
  4399   4470         if( rc==SQLITE4_OK ){
  4400         -        u8 *aData = sqlite4BtPageData(pPg);
         4471  +        u8 *aData = btPageData(pPg);
  4401   4472           memset(&aData[pHdr->pgsz-6], 0, 6);
  4402   4473           aData[0] = 0;
  4403   4474         }
  4404   4475         sqlite4BtPageRelease(pPg);
  4405   4476       }
  4406   4477     }
  4407   4478   
................................................................................
  4957   5028     BtPage *pPg;
  4958   5029     sqlite4_buffer buf;
  4959   5030     int pgsz;
  4960   5031   
  4961   5032     pgsz = sqlite4BtPagerPagesize(db->pPager);
  4962   5033     sqlite4_buffer_init(&buf, 0);
  4963   5034     sqlite4BtPageGet(db->pPager, iRoot, &pPg);
  4964         -  btPageToAscii(iRoot, 1, db->pPager, sqlite4BtPageData(pPg), pgsz, &buf);
         5035  +  btPageToAscii(iRoot, 1, db->pPager, btPageData(pPg), pgsz, &buf);
  4965   5036     fprintf(stderr, "%d TREE at %d:\n", iCall, (int)iRoot);
  4966   5037     fprintf(stderr, "%.*s", buf.n, (char*)buf.p);
  4967   5038     sqlite4_buffer_clear(&buf);
  4968   5039     sqlite4BtPageRelease(pPg);
  4969   5040   }
  4970   5041   
  4971   5042   void sqlite4BtDebugFastTree(bt_db *db, int iCall){
................................................................................
  5076   5147           BtPage *pPg = 0;
  5077   5148           rc = sqlite4BtPageGet(db->pPager, pInfo->pgno, &pPg);
  5078   5149           if( rc==SQLITE4_OK ){
  5079   5150             BtPager *p = db->pPager;
  5080   5151             int bAscii = (pInfo->eType==BT_INFO_PAGEDUMP_ASCII);
  5081   5152             u8 *aData;
  5082   5153             int nData;
  5083         -          aData = sqlite4BtPageData(pPg);
         5154  +          aData = btPageData(pPg);
  5084   5155             nData = sqlite4BtPagerPagesize(p);
  5085   5156             btPageToAscii(pInfo->pgno, bAscii, p, aData, nData, &pInfo->output);
  5086   5157             sqlite4_buffer_append(&pInfo->output, "", 1);
  5087   5158             sqlite4BtPageRelease(pPg);
  5088   5159           }
  5089   5160           btControlTransactionDone(db, iCtx);
  5090   5161         }
................................................................................
  5318   5389   
  5319   5390       while( rc==SQLITE4_OK && iTrunk ){
  5320   5391         BtPage *pPg = 0;
  5321   5392         rc = sqlite4BtPageGet(db->pPager, iTrunk, &pPg);
  5322   5393         if( rc==SQLITE4_OK ){
  5323   5394           int i;
  5324   5395           u32 nFree;
  5325         -        u8 *aData = sqlite4BtPageData(pPg);
         5396  +        u8 *aData = btPageData(pPg);
  5326   5397   
  5327   5398           nFree = btGetU32(aData);
  5328   5399           for(i=0; i<nFree; i++){
  5329   5400             u32 pgno = btGetU32(&aData[8 + i*4]);
  5330   5401             if( bBlocklist ){
  5331   5402               markBlockAsUsed(db, pgno, aUsed);
  5332   5403             }else{
................................................................................
  5351   5422       int rc;
  5352   5423   
  5353   5424       rc = sqlite4BtPageGet(db->pPager, pHdr->iSRoot, &pPg);
  5354   5425       if( rc==SQLITE4_OK ){
  5355   5426         BtSchedule s;
  5356   5427         int i;
  5357   5428   
  5358         -      btReadSchedule(db, sqlite4BtPageData(pPg), &s);
         5429  +      btReadSchedule(db, btPageData(pPg), &s);
  5359   5430         sqlite4BtPageRelease(pPg);
  5360   5431   
  5361   5432         assert( s.eBusy!=BT_SCHEDULE_BUSY || s.aRoot[0]==0 );
  5362   5433         if( s.eBusy!=BT_SCHEDULE_EMPTY ){
  5363   5434           for(i=0; rc==SQLITE4_OK && i<array_size(s.aBlock); i++){
  5364   5435             markBlockAsUsed(db, s.aBlock[i], aUsed);
  5365   5436           }

Changes to src/bt_pager.c.

    59     59     BtPage *pPg;                    /* Pointer to page this object belongs to */
    60     60     u8 *aData;                      /* Saved data */
    61     61     BtSavepage *pNext;              /* Next saved page in the same savepoint */
    62     62     int iSavepoint;                 /* Transaction number of savepoint */
    63     63     BtSavepage *pNextSavepage;      /* Next saved page on the same BtPage */
    64     64   };
    65     65   
           66  +/*
           67  +** See macro btPageData() in bt_main.c for why the aData variable must be
           68  +** first in this structure.
           69  +*/
    66     70   struct BtPage {
           71  +  u8 *aData;                      /* Pointer to current data. MUST BE FIRST */
    67     72     BtPager *pPager;                /* Pager object that owns this page handle */
    68     73     u32 pgno;                       /* Current page number */
    69     74     int nRef;                       /* Number of references to this page */
    70     75     int flags;                      /* Mask of BTPAGE_XXX flags */
    71         -  u8 *aData;                      /* Pointer to current data */
    72     76     BtPage *pNextHash;              /* Next entry with same hash key */
    73     77     BtPage *pNextDirty;             /* Next page in BtPager.pDirty list */
           78  +  BtPage *pNextLru;               /* Next page in LRU list */
           79  +  BtPage *pPrevLru;               /* Previous page in LRU list */
    74     80     BtSavepage *pSavepage;          /* List of saved page images */
    75     81   };
    76     82   
    77     83   /*
    78     84   ** Candidate values for BtPage.flags
    79     85   */
    80     86   #define BT_PAGE_DIRTY 0x0001      /* Set for pages in BtPager.pDirty list */
................................................................................
    88     94     BtLock btl;                     /* Variables shared with bt_lock module */
    89     95     BtLog *pLog;                    /* Logging module */
    90     96     int iTransactionLevel;          /* Current transaction level (see bt.h) */
    91     97     char *zFile;                    /* Database file name */
    92     98     int nFile;                      /* Length of string zFile in bytes */
    93     99     BtPageHash hash;                /* Hash table */
    94    100     BtPage *pDirty;                 /* List of all dirty pages */
          101  +  BtPage *pLru;                   /* Head of LRU list */
          102  +  BtPage *pLruTail;               /* Tail of LRU list */
          103  +  int nPageAlloc;                 /* Number of page objects allocated */
          104  +  int nPageLimit;                 /* Maximum page objects to allocate */
    95    105     int nTotalRef;                  /* Total number of outstanding page refs */
    96    106     int bDoAutoCkpt;                /* Do auto-checkpoint after next unlock */
    97    107     BtSavepoint *aSavepoint;        /* Savepoint array */
    98    108     int nSavepoint;                 /* Number of entries in aSavepoint array */
    99    109     BtDbHdr *pHdr;                  /* Header object for current read snapshot */
   100    110     int bDirtyHdr;                  /* True if pHdr has been modified */
   101    111     void *pLogsizeCtx;              /* A copy of this is passed to xLogsize() */
................................................................................
   202    212       }
   203    213     }
   204    214   }
   205    215   #endif
   206    216   /*
   207    217   ** End of BtPageHash object interface.
   208    218   **************************************************************************/
          219  +
          220  +static void btLruAdd(BtPager *pPager, BtPage *pPg){
          221  +  assert( pPg->pPrevLru==0 );
          222  +  assert( pPg->pNextLru==0 );
          223  +  if( pPager->pLru ){
          224  +    pPager->pLruTail->pNextLru = pPg;
          225  +    pPg->pPrevLru = pPager->pLruTail;
          226  +    pPager->pLruTail = pPg;
          227  +  }else{
          228  +    pPager->pLru = pPg;
          229  +    pPager->pLruTail = pPg;
          230  +  }
          231  +}
          232  +
          233  +/*
          234  +** Remove page pPg from the LRU list. If pPg is not currently part of
          235  +** the LRU list, the results are undefined.
          236  +*/
          237  +static void btLruRemove(BtPager *pPager, BtPage *pPg){
          238  +  assert( (pPg==pPager->pLru)==(pPg->pPrevLru==0) );
          239  +  assert( (pPg==pPager->pLruTail)==(pPg->pNextLru==0) );
          240  +
          241  +  if( pPg->pNextLru ){
          242  +    pPg->pNextLru->pPrevLru = pPg->pPrevLru;
          243  +  }else{
          244  +    pPager->pLruTail = pPg->pPrevLru;
          245  +  }
          246  +  if( pPg->pPrevLru ){
          247  +    pPg->pPrevLru->pNextLru = pPg->pNextLru;
          248  +  }else{
          249  +    pPager->pLru = pPg->pNextLru;
          250  +  }
          251  +
          252  +  pPg->pNextLru = 0;
          253  +  pPg->pPrevLru = 0;
          254  +}
   209    255   
   210    256   /*
   211    257   ** Open a new pager database handle.
   212    258   */
   213    259   int sqlite4BtPagerNew(sqlite4_env *pEnv, int nExtra, BtPager **pp){
   214    260     BtPager *p;
   215    261     int nByte;
................................................................................
   222    268     p->btl.pEnv = pEnv;
   223    269     p->btl.pVfs = sqlite4BtEnvDefault();
   224    270     p->btl.iSafetyLevel = BT_DEFAULT_SAFETY;
   225    271     p->btl.nAutoCkpt = BT_DEFAULT_AUTOCKPT;
   226    272     p->btl.bRequestMultiProc = BT_DEFAULT_MULTIPROC;
   227    273     p->btl.nBlksz = BT_DEFAULT_BLKSZ;
   228    274     p->btl.nPgsz = BT_DEFAULT_PGSZ;
          275  +  p->nPageLimit = BT_DEFAULT_CACHESZ;
   229    276     *pp = p;
   230    277     return SQLITE4_OK;
   231    278   }
   232    279   
   233    280   static void btFreePage(BtPager *p, BtPage *pPg){
   234    281     if( pPg ){
   235    282       sqlite4_free(p->btl.pEnv, pPg->aData);
................................................................................
   247    294       BtPage *pNext;
   248    295       for(pPg=p->hash.aHash[i]; pPg; pPg=pNext){
   249    296         pNext = pPg->pNextHash;
   250    297         btFreePage(p, pPg);
   251    298       }
   252    299     }
   253    300     btHashClear(p);
          301  +
          302  +  p->pLruTail = 0;
          303  +  p->pLru = 0;
   254    304   }
   255    305   
   256    306   static int btCheckpoint(BtLock *pLock){
   257    307     BtPager *p = (BtPager*)pLock;
   258    308     if( p->pLog==0 ) return SQLITE4_BUSY;
   259    309     return sqlite4BtLogCheckpoint(p->pLog, 0);
   260    310   }
................................................................................
   543    593   
   544    594     assert( p->pHdr );
   545    595     p->pHdr = 0;
   546    596     rc = sqlite4BtLogSnapshotClose(p->pLog);
   547    597   
   548    598     /* Purge the page cache. */
   549    599     assert( p->pDirty==0 );
   550         -  btPurgeCache(p);
          600  +  //btPurgeCache(p);
   551    601   
   552    602     if( rc==SQLITE4_OK && p->bDoAutoCkpt ){
   553    603       sqlite4BtLogCheckpoint(p->pLog, (p->btl.nAutoCkpt / 2));
   554    604     }
   555    605     p->bDoAutoCkpt = 0;
   556    606   
   557    607     return rc;
................................................................................
   583    633       pNext = pPg->pNextDirty;
   584    634       pPg->flags &= ~(BT_PAGE_DIRTY);
   585    635       pPg->pNextDirty = 0;
   586    636       if( rc==SQLITE4_OK ){
   587    637         int nPg = ((pNext==0) ? p->pHdr->nPg : 0);
   588    638         rc = sqlite4BtLogWrite(p->pLog, pPg->pgno, pPg->aData, nPg);
   589    639       }
          640  +    if( pPg->nRef==0 ) btLruAdd(p, pPg);
   590    641     }
   591         -  p->pDirty = 0;
          642  +  p->pDirty = pPg;
   592    643     sqlite4BtLogSnapshotEndWrite(p->pLog);
   593    644   
   594    645     nLogsize = sqlite4BtLogSize(p->pLog);
   595    646   
   596    647     if( p->btl.nAutoCkpt && nLogsize>=p->btl.nAutoCkpt ){
   597    648       p->bDoAutoCkpt = 1;
   598    649     }
................................................................................
   618    669       rc = p->btl.pVfs->xRead(p->btl.pFd, iOff, pPg->aData, p->pHdr->pgsz);
   619    670     }
   620    671   
   621    672     return rc;
   622    673   }
   623    674   
   624    675   static int btAllocatePage(BtPager *p, BtPage **ppPg){
   625         -  int rc;                         /* Return code */
          676  +  int rc = SQLITE4_OK;            /* Return code */
   626    677     BtPage *pRet;
   627         -  u8 *aData;
   628    678   
   629         -  pRet = (BtPage*)sqlite4_malloc(p->btl.pEnv, sizeof(BtPage));
   630         -  aData = (u8*)sqlite4_malloc(p->btl.pEnv, p->pHdr->pgsz);
          679  +  if( p->hash.nEntry>=p->nPageLimit && p->pLru ){
          680  +    BtPage **pp;
          681  +    int h;
   631    682   
   632         -  if( pRet && aData ){
   633         -    memset(pRet, 0, sizeof(BtPage));
   634         -    pRet->aData = aData;
   635         -    pRet->pPager = p;
   636         -    rc = SQLITE4_OK;
          683  +    /* Remove the page from the head of the LRU list. */
          684  +    pRet = p->pLru;
          685  +    assert( (pRet->pNextLru==0)==(pRet==p->pLruTail) );
          686  +    p->pLru = pRet->pNextLru;
          687  +    if( p->pLru==0 ){
          688  +      p->pLruTail = 0;
          689  +    }else{
          690  +      p->pLru->pPrevLru = 0;
          691  +    }
          692  +
          693  +    /* Remove the page from the hash table. */
          694  +    btHashRemove(p, pRet);
          695  +
          696  +    assert( pRet->pPrevLru==0 );
          697  +    assert( pRet->nRef==0 );
          698  +    assert( pRet->pSavepage==0 );
          699  +    pRet->flags = 0;
          700  +    pRet->pNextHash = 0;
          701  +    pRet->pNextDirty = 0;
          702  +    pRet->pNextLru = 0;
   637    703     }else{
   638         -    sqlite4_free(p->btl.pEnv, pRet);
   639         -    sqlite4_free(p->btl.pEnv, aData);
   640         -    rc = btErrorBkpt(SQLITE4_NOMEM);
   641         -    pRet = 0;
          704  +    u8 *aData = (u8*)sqlite4_malloc(p->btl.pEnv, p->pHdr->pgsz);
          705  +    pRet = (BtPage*)sqlite4_malloc(p->btl.pEnv, sizeof(BtPage));
          706  +
          707  +    if( pRet && aData ){
          708  +      memset(pRet, 0, sizeof(BtPage));
          709  +      pRet->aData = aData;
          710  +      pRet->pPager = p;
          711  +    }else{
          712  +      sqlite4_free(p->btl.pEnv, pRet);
          713  +      sqlite4_free(p->btl.pEnv, aData);
          714  +      rc = btErrorBkpt(SQLITE4_NOMEM);
          715  +      pRet = 0;
          716  +    }
   642    717     }
   643    718   
   644    719     *ppPg = pRet;
   645    720     return rc;
   646    721   }
   647    722   
   648    723   /*
................................................................................
   846    921         if( rc!=SQLITE4_OK ){
   847    922           btFreePage(p, pRet);
   848    923           pRet = 0;
   849    924         }else{
   850    925           sqlite4BtDebugReadPage(&p->btl, pgno, pRet->aData, p->pHdr->pgsz);
   851    926         }
   852    927       }
          928  +  }else if( pRet->nRef==0 && (pRet->flags & BT_PAGE_DIRTY)==0 ){
          929  +    btLruRemove(p, pRet);
   853    930     }
   854    931   
   855    932     assert( (pRet!=0)==(rc==SQLITE4_OK) );
   856    933     if( rc==SQLITE4_OK ){
   857    934       p->nTotalRef++;
   858    935       pRet->nRef++;
   859    936     }
................................................................................
  1007   1084   */
  1008   1085   int sqlite4BtPageTrimPgno(BtPager *pPager, u32 pgno){
  1009   1086     return btFreelistAdd(pPager, 0, pgno);
  1010   1087   }
  1011   1088   
  1012   1089   int sqlite4BtPageRelease(BtPage *pPg){
  1013   1090     if( pPg ){
         1091  +    BtPager *pPager = pPg->pPager;
         1092  +
  1014   1093       assert( pPg->nRef>=1 );
  1015   1094       pPg->nRef--;
  1016   1095       pPg->pPager->nTotalRef--;
         1096  +
         1097  +    /* If the refcount is now zero and the page is not dirty, add it to
         1098  +    ** the LRU list.  */
         1099  +    if( pPg->nRef==0 && (pPg->flags & BT_PAGE_DIRTY)==0 ){
         1100  +      btLruAdd(pPager, pPg);
         1101  +    }
  1017   1102     }
  1018   1103     return SQLITE4_OK;
  1019   1104   }
  1020   1105   
  1021   1106   void sqlite4BtPageReference(BtPage *pPg){
  1022   1107     assert( pPg->nRef>=1 );
  1023   1108     pPg->nRef++;

Changes to test/select1.test.

    12     12   # focus of this file is testing the SELECT statement.
    13     13   #
    14     14   # $Id: select1.test,v 1.70 2009/05/28 01:00:56 drh Exp $
    15     15   
    16     16   set testdir [file dirname $argv0]
    17     17   source $testdir/tester.tcl
    18     18   
    19         -btenv BT
    20         -BT attach db
    21         -
    22     19   # Try to select on a non-existant table.
    23     20   #
    24     21   do_test select1-1.1 {
    25     22     set v [catch {execsql {SELECT * FROM test1}} msg]
    26     23     lappend v $msg
    27     24   } {1 {no such table: test1}}
    28     25