SQLite4
Check-in [02954a5b8d]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge prefix-mmap branch with trunk. This allows lsm to memory map a prefix of the database file and use regular read and write system calls to access the remainder.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 02954a5b8dc57bba1277549b65aade472761de3b
User & Date: dan 2013-03-08 09:59:05
Context
2013-03-08
20:47
Fix various issues in lsm_file.c. check-in: 4418f901c9 user: dan tags: trunk
09:59
Merge prefix-mmap branch with trunk. This allows lsm to memory map a prefix of the database file and use regular read and write system calls to access the remainder. check-in: 02954a5b8d user: dan tags: trunk
2013-03-07
20:12
Fix some test cases to account for sqlite4 memory mapping the file. Leaf check-in: 5f9133f8a8 user: dan tags: prefix-mmap
2013-03-04
22:37
New autoconf/make apparatus. Usage:

./autogen.sh ./configure make check-in: 7bf5b6c8d0 user: owensmk tags: trunk

Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to lsm-test/lsmtest_tdb3.c.

986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
}

int test_lsm_small_open(
  const char *zFile, 
  int bClear, 
  TestDb **ppDb
){
  const char *zCfg = "page_size=256 block_size=64";
  return testLsmOpen(zCfg, zFile, bClear, ppDb);
}

int test_lsm_lomem_open(
  const char *zFilename, 
  int bClear, 
  TestDb **ppDb







|







986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
}

int test_lsm_small_open(
  const char *zFile, 
  int bClear, 
  TestDb **ppDb
){
  const char *zCfg = "page_size=256 block_size=64 mmap=1024";
  return testLsmOpen(zCfg, zFile, bClear, ppDb);
}

int test_lsm_lomem_open(
  const char *zFilename, 
  int bClear, 
  TestDb **ppDb

Changes to src/kvlsm.c.

464
465
466
467
468
469
470
471

472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
    struct Config {
      const char *zParam;
      int eParam;
    } aConfig[] = {
      { "lsm_mmap", LSM_CONFIG_MMAP },
      { "lsm_page_size", LSM_CONFIG_PAGE_SIZE },
      { "lsm_block_size", LSM_CONFIG_BLOCK_SIZE },
      { "lsm_multiple_processes", LSM_CONFIG_MULTIPLE_PROCESSES }

    };

    memset(pNew, 0, sizeof(KVLsm));
    pNew->base.pStoreVfunc = &kvlsmMethods;
    pNew->base.pEnv = pEnv;
    rc = lsm_new(0, &pNew->pDb);
    if( rc==SQLITE4_OK ){
      int i;
      int bMmap = 0;
      lsm_config(pNew->pDb, LSM_CONFIG_MMAP, &bMmap);

      for(i=0; i<ArraySize(aConfig); i++){
        const char *zVal = sqlite4_uri_parameter(zName, aConfig[i].zParam);
        if( zVal ){
          int nVal = sqlite4Atoi(zVal);
          lsm_config(pNew->pDb, aConfig[i].eParam, &nVal);
        }
      }







|
>








<
<
<







464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480



481
482
483
484
485
486
487
    struct Config {
      const char *zParam;
      int eParam;
    } aConfig[] = {
      { "lsm_mmap", LSM_CONFIG_MMAP },
      { "lsm_page_size", LSM_CONFIG_PAGE_SIZE },
      { "lsm_block_size", LSM_CONFIG_BLOCK_SIZE },
      { "lsm_multiple_processes", LSM_CONFIG_MULTIPLE_PROCESSES },
      { "lsm_automerge", LSM_CONFIG_AUTOMERGE }
    };

    memset(pNew, 0, sizeof(KVLsm));
    pNew->base.pStoreVfunc = &kvlsmMethods;
    pNew->base.pEnv = pEnv;
    rc = lsm_new(0, &pNew->pDb);
    if( rc==SQLITE4_OK ){
      int i;



      for(i=0; i<ArraySize(aConfig); i++){
        const char *zVal = sqlite4_uri_parameter(zName, aConfig[i].zParam);
        if( zVal ){
          int nVal = sqlite4Atoi(zVal);
          lsm_config(pNew->pDb, aConfig[i].eParam, &nVal);
        }
      }

Changes to src/lsm.h.

223
224
225
226
227
228
229
230
231







232
233
234
235
236
237
238
**   This means that this option may cause the connection to perform a 
**   checkpoint even if the current connection has itself written very little
**   data into the database file.
**
**   The default value is 2048 (checkpoint every 2MB).
**
** LSM_CONFIG_MMAP:
**   A read/write integer parameter. True to use mmap() to access the 
**   database file. False otherwise.







**
** LSM_CONFIG_USE_LOG:
**   A read/write boolean parameter. True (the default) to use the log
**   file normally. False otherwise.
**
** LSM_CONFIG_AUTOMERGE:
**   A read/write integer parameter. The minimum number of segments to







|
|
>
>
>
>
>
>
>







223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
**   This means that this option may cause the connection to perform a 
**   checkpoint even if the current connection has itself written very little
**   data into the database file.
**
**   The default value is 2048 (checkpoint every 2MB).
**
** LSM_CONFIG_MMAP:
**   A read/write integer parameter. If this value is set to 0, then the 
**   database file is accessed using ordinary read/write IO functions. Or,
**   if it is set to 1, then the database file is memory mapped and accessed
**   that way. If this parameter is set to any value N greater than 1, then
**   up to the first N KB of the file are memory mapped, and any remainder
**   accessed using read/write IO.
**
**   The default value is 1 on 64-bit platforms and 32768 on 32-bit platforms.
**   
**
** LSM_CONFIG_USE_LOG:
**   A read/write boolean parameter. True (the default) to use the log
**   file normally. False otherwise.
**
** LSM_CONFIG_AUTOMERGE:
**   A read/write integer parameter. The minimum number of segments to

Changes to src/lsmInt.h.

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#define LSM_DFLT_BLOCK_SIZE         (1 * 1024 * 1024)
#define LSM_DFLT_AUTOFLUSH          (1 * 1024 * 1024)
#define LSM_DFLT_AUTOCHECKPOINT     (i64)(2 * 1024 * 1024)
#define LSM_DFLT_AUTOWORK           1
#define LSM_DFLT_LOG_SIZE           (128*1024)
#define LSM_DFLT_AUTOMERGE          4
#define LSM_DFLT_SAFETY             LSM_SAFETY_NORMAL
#define LSM_DFLT_MMAP               LSM_IS_64_BIT
#define LSM_DFLT_MULTIPLE_PROCESSES 1
#define LSM_DFLT_USE_LOG            1

/* Initial values for log file checksums. These are only used if the 
** database file does not contain a valid checkpoint.  */
#define LSM_CKSUM0_INIT 42
#define LSM_CKSUM1_INIT 42
................................................................................
  int bAutowork;                  /* Configured by LSM_CONFIG_AUTOWORK */
  int nTreeLimit;                 /* Configured by LSM_CONFIG_AUTOFLUSH */
  int nMerge;                     /* Configured by LSM_CONFIG_AUTOMERGE */
  int bUseLog;                    /* Configured by LSM_CONFIG_USE_LOG */
  int nDfltPgsz;                  /* Configured by LSM_CONFIG_PAGE_SIZE */
  int nDfltBlksz;                 /* Configured by LSM_CONFIG_BLOCK_SIZE */
  int nMaxFreelist;               /* Configured by LSM_CONFIG_MAX_FREELIST */
  int bMmap;                      /* Configured by LSM_CONFIG_MMAP */
  i64 nAutockpt;                  /* Configured by LSM_CONFIG_AUTOCHECKPOINT */
  int bMultiProc;                 /* Configured by L_C_MULTIPLE_PROCESSES */
  int bReadonly;                  /* Configured by LSM_CONFIG_READONLY */
  lsm_compress compress;          /* Compression callbacks */
  lsm_compress_factory factory;   /* Compression callback factory */

  /* Sub-system handles */







|







 







|







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
...
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#define LSM_DFLT_BLOCK_SIZE         (1 * 1024 * 1024)
#define LSM_DFLT_AUTOFLUSH          (1 * 1024 * 1024)
#define LSM_DFLT_AUTOCHECKPOINT     (i64)(2 * 1024 * 1024)
#define LSM_DFLT_AUTOWORK           1
#define LSM_DFLT_LOG_SIZE           (128*1024)
#define LSM_DFLT_AUTOMERGE          4
#define LSM_DFLT_SAFETY             LSM_SAFETY_NORMAL
#define LSM_DFLT_MMAP               (LSM_IS_64_BIT ? 1 : 32768)
#define LSM_DFLT_MULTIPLE_PROCESSES 1
#define LSM_DFLT_USE_LOG            1

/* Initial values for log file checksums. These are only used if the 
** database file does not contain a valid checkpoint.  */
#define LSM_CKSUM0_INIT 42
#define LSM_CKSUM1_INIT 42
................................................................................
  int bAutowork;                  /* Configured by LSM_CONFIG_AUTOWORK */
  int nTreeLimit;                 /* Configured by LSM_CONFIG_AUTOFLUSH */
  int nMerge;                     /* Configured by LSM_CONFIG_AUTOMERGE */
  int bUseLog;                    /* Configured by LSM_CONFIG_USE_LOG */
  int nDfltPgsz;                  /* Configured by LSM_CONFIG_PAGE_SIZE */
  int nDfltBlksz;                 /* Configured by LSM_CONFIG_BLOCK_SIZE */
  int nMaxFreelist;               /* Configured by LSM_CONFIG_MAX_FREELIST */
  int iMmap;                      /* Configured by LSM_CONFIG_MMAP */
  i64 nAutockpt;                  /* Configured by LSM_CONFIG_AUTOCHECKPOINT */
  int bMultiProc;                 /* Configured by L_C_MULTIPLE_PROCESSES */
  int bReadonly;                  /* Configured by LSM_CONFIG_READONLY */
  lsm_compress compress;          /* Compression callbacks */
  lsm_compress_factory factory;   /* Compression callback factory */

  /* Sub-system handles */

Changes to src/lsm_file.c.

153
154
155
156
157
158
159
160
161
162
163
164





165
166
167
168
169
170




171
172





























173





174
175
176
177
178
179
180
...
188
189
190
191
192
193
194
195
196
197
198
199
200
201

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216






217
218
219
220
221
222
223
...
241
242
243
244
245
246
247
248
249


250
251
252
253
254
255
256
...
285
286
287
288
289
290
291


























292
293
294
295
296
297
298
...
450
451
452
453
454
455
456








457
458
459
460
461
462
463
...
599
600
601
602
603
604
605

606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621

622







623
624
625
626
627
628
629
630
631



632
633
634
635
636
637
638
639





640
641
642
643
644
645
646
...
882
883
884
885
886
887
888







889
890
891
892
893
894
895
896
897
898
899
900
901

902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
...
928
929
930
931
932
933
934





935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
...
956
957
958
959
960
961
962
963
964
965
966



967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
...
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010

1011
1012
1013
1014
1015
1016

1017
1018
1019
1020
1021
1022
1023
....
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
....
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
....
1342
1343
1344
1345
1346
1347
1348

1349
1350
1351
1352
1353

1354
1355







1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388





1389
1390
1391



1392
1393
1394
1395
1396
1397
1398
....
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
....
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
....
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
....
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
....
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
....
2127
2128
2129
2130
2131
2132
2133

2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151

2152



2153
2154












2155



2156
2157




2158
2159
2160
2161
2162


2163
2164
2165
2166
2167
2168
2169
....
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
....
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
....
2419
2420
2421
2422
2423
2424
2425

2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
....
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476

2477
2478
2479


2480
2481
2482
2483
2484
2485
2486
....
2572
2573
2574
2575
2576
2577
2578
2579







2580
2581
2582
2583
2584
2585
2586
2587
#include <sys/stat.h>
#include <fcntl.h>

/*
** File-system object. Each database connection allocates a single instance
** of the following structure. It is used for all access to the database and
** log files.
**
** pLruFirst, pLruLast:
**   The first and last entries in a doubly-linked list of pages. The 
**   Page.pLruNext and Page.pLruPrev pointers are used to link the list
**   elements together.





**
**   In mmap() mode, this list contains all currently allocated pages that
**   are carrying pointers into the database file mapping (pMap/nMap). If the
**   file has to be unmapped and then remapped (required to grow the mapping
**   as the file grows), the Page.aData pointers are updated by iterating
**   through the contents of this list.




**
**   In non-mmap() mode, this list is an LRU list of cached pages with 





























**   nRef==0.





*/
struct FileSystem {
  lsm_db *pDb;                    /* Database handle that owns this object */
  lsm_env *pEnv;                  /* Environment pointer */
  char *zDb;                      /* Database file name */
  char *zLog;                     /* Database file name */
  int nMetasize;                  /* Size of meta pages in bytes */
................................................................................
  int szSector;                   /* Database file sector size */

  /* If this is a compressed database, a pointer to the compression methods.
  ** For an uncompressed database, a NULL pointer.  */
  lsm_compress *pCompress;
  u8 *aIBuffer;                   /* Buffer to compress to */
  u8 *aOBuffer;                   /* Buffer to uncompress from */
  int nBuffer;                    /* Allocated size of aBuffer[] in bytes */

  /* mmap() mode things */
  int bUseMmap;                   /* True to use mmap() to access db file */
  void *pMap;                     /* Current mapping of database file */
  i64 nMap;                       /* Bytes mapped at pMap */
  Page *pFree;


  Page *pWaiting;                 /* b-tree pages waiting to be written */

  /* Statistics */
  int nWrite;                     /* Total number of pages written */
  int nRead;                      /* Total number of pages read */

  /* Page cache parameters for non-mmap() mode */
  int nOut;                       /* Number of outstanding pages */
  int nCacheMax;                  /* Configured cache size (in pages) */
  int nCacheAlloc;                /* Current cache size (in pages) */
  Page *pLruFirst;                /* Head of the LRU list */
  Page *pLruLast;                 /* Tail of the LRU list */
  int nHash;                      /* Number of hash slots in hash table */
  Page **apHash;                  /* nHash Hash slots */






};

/*
** Database page handle.
**
** pSeg:
**   When lsmFsSortedAppend() is called on a compressed database, the new
................................................................................
  FileSystem *pFS;                /* File system that owns this page */

  /* Only used in compressed database mode: */
  int nCompress;                  /* Compressed size (or 0 for uncomp. db) */
  int nCompressPrev;              /* Compressed size of prev page */
  Segment *pSeg;                  /* Segment this page will be written to */

  /* Fix this up somehow */
  Page *pNextWaiting;


};

/*
** Meta-data page handle. There are two meta-data pages at the start of
** the database file, each FileSystem.nMetasize bytes in size.
*/
struct MetaPage {
................................................................................
static int IOERR_WRAPPER(int rc){
  if( rc!=LSM_OK ) lsmIoerrBkpt();
  return rc;
}
#else
# define IOERR_WRAPPER(rc) (rc)
#endif



























/*
** Wrappers around the VFS methods of the lsm_env object:
**
**     lsmEnvOpen()
**     lsmEnvRead()
**     lsmEnvWrite()
................................................................................
  zDel = lsmMallocPrintf(pFS->pEnv, "%s-log", pFS->zDb);
  if( zDel ){
    lsmEnvUnlink(pFS->pEnv, zDel);
    lsmFree(pFS->pEnv, zDel);
  }
  return LSM_OK;
}









/*
** Given that there are currently nHash slots in the hash table, return 
** the hash key for file iFile, page iPg.
*/
static int fsHashKey(int nHash, int iPg){
  return (iPg % nHash);
................................................................................
  FileSystem *pFS = db->pFS;
  if( pFS ){
    lsm_env *pEnv = pFS->pEnv;
    Page *pPg;

    assert( pFS->nOut==0 );
    assert( pFS->pWaiting==0 );


    /* Reset any compression/decompression buffers already allocated */
    lsmFree(pEnv, pFS->aIBuffer);
    lsmFree(pEnv, pFS->aOBuffer);
    pFS->nBuffer = 0;

    /* Unmap the file, if it is currently mapped */
    if( pFS->pMap ){
      lsmEnvRemap(pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap);
      pFS->bUseMmap = 0;
    }

    /* Free all allocate page structures */
    pPg = pFS->pLruFirst;
    while( pPg ){
      Page *pNext = pPg->pLruNext;

      if( pPg->flags & PAGE_FREE ) lsmFree(pEnv, pPg->aData);







      lsmFree(pEnv, pPg);
      pPg = pNext;
    }

    /* Zero pointers that point to deleted page objects */
    pFS->nCacheAlloc = 0;
    pFS->pLruFirst = 0;
    pFS->pLruLast = 0;
    pFS->pFree = 0;




    /* Configure the FileSystem object */
    if( db->compress.xCompress ){
      pFS->pCompress = &db->compress;
      pFS->bUseMmap = 0;
    }else{
      pFS->pCompress = 0;
      pFS->bUseMmap = db->bMmap;





    }
  }

  return LSM_OK;
}

/*
................................................................................
static void fsPageRemoveFromHash(FileSystem *pFS, Page *pPg){
  int iHash;
  Page **pp;

  iHash = fsHashKey(pFS->nHash, pPg->iPg);
  for(pp=&pFS->apHash[iHash]; *pp!=pPg; pp=&(*pp)->pHashNext);
  *pp = pPg->pHashNext;







}


/*
** Purge the page cache of all entries with nRef==0.
*/
void lsmFsPurgeCache(FileSystem *pFS){
  if( pFS->bUseMmap==0 ){
    Page *pPg;

    pPg = pFS->pLruFirst;
    while( pPg ){
      Page *pNext = pPg->pLruNext;

      fsPageRemoveFromHash(pFS, pPg);
      if( pPg->flags & PAGE_FREE ){
        lsmFree(pFS->pEnv, pPg->aData);
      }
      lsmFree(pFS->pEnv, pPg);
      pPg = pNext;
      pFS->nCacheAlloc--;
    }
    pFS->pLruFirst = 0;
    pFS->pLruLast = 0;

    assert( pFS->nCacheAlloc<=pFS->nOut && pFS->nCacheAlloc>=0 );
  }
}

/*
** Search the hash-table for page iPg. If an entry is round, return a pointer
** to it. Otherwise, return NULL.
**
** Either way, if argument piHash is not NULL set *piHash to the hash slot
................................................................................
  if( piHash ) *piHash = iHash;
  for(p=pFS->apHash[iHash]; p; p=p->pHashNext){
    if( p->iPg==iPg) break;
  }
  return p;
}






static int fsPageBuffer(
  FileSystem *pFS, 
  Page **ppOut
){
  int rc = LSM_OK;
  Page *pPage = 0;
  if( pFS->bUseMmap || pFS->pLruFirst==0 || pFS->nCacheAlloc<pFS->nCacheMax ){
    pPage = lsmMallocZero(pFS->pEnv, sizeof(Page));
    if( !pPage ){
      rc = LSM_NOMEM_BKPT;
    }else{
      pPage->aData = (u8 *)lsmMalloc(pFS->pEnv, pFS->nPagesize);
      pPage->flags = PAGE_FREE;
      if( !pPage->aData ){
        lsmFree(pFS->pEnv, pPage);
        rc = LSM_NOMEM_BKPT;
        pPage = 0;
      }
      pFS->nCacheAlloc++;
    }
................................................................................
    u8 *aData;
    pPage = pFS->pLruFirst;
    aData = pPage->aData;
    fsPageRemoveFromLru(pFS, pPage);
    fsPageRemoveFromHash(pFS, pPage);

    memset(pPage, 0, sizeof(Page));
    pPage->flags = PAGE_FREE;
    pPage->aData = aData;
  }




  *ppOut = pPage;
  return rc;
}

static void fsPageBufferFree(Page *pPg){
  if( pPg->flags & PAGE_FREE ){
    lsmFree(pPg->pFS->pEnv, pPg->aData);
  }
  else if( pPg->pFS->bUseMmap ){
    fsPageRemoveFromLru(pPg->pFS, pPg);
  }
  lsmFree(pPg->pFS->pEnv, pPg);
}

static void fsGrowMapping(
  FileSystem *pFS,
  i64 iSz,
  int *pRc
){
  /* This function won't work with compressed databases yet. */
  assert( pFS->pCompress==0 );
................................................................................
  if( *pRc==LSM_OK && iSz>pFS->nMap ){
    int rc;
    u8 *aOld = pFS->pMap;
    rc = lsmEnvRemap(pFS->pEnv, pFS->fdDb, iSz, &pFS->pMap, &pFS->nMap);
    if( rc==LSM_OK && pFS->pMap!=aOld ){
      Page *pFix;
      i64 iOff = (u8 *)pFS->pMap - aOld;
      for(pFix=pFS->pLruFirst; pFix; pFix=pFix->pLruNext){
        pFix->aData += iOff;
      }
      lsmSortedRemap(pFS->pDb);
    }
    *pRc = rc;
  }
}


/*
** fsync() the database file.
*/
int lsmFsSyncDb(FileSystem *pFS, int nBlock){

  if( nBlock && pFS->bUseMmap ){
    int rc = LSM_OK;
    i64 nMin = (i64)nBlock * (i64)pFS->nBlocksize;
    fsGrowMapping(pFS, nMin, &rc);
    if( rc!=LSM_OK ) return rc;
  }

  return lsmEnvSync(pFS->pEnv, pFS->fdDb);
}

static int fsPageGet(FileSystem *, Segment *, Pgno, int, Page **, int *);

static int fsRedirectBlock(Redirect *p, int iBlk){
  if( p ){
................................................................................
  
  if( pSeg ){
    iRead = fsRedirectBlock(pSeg->pRedirect, iBlock);
  }else{
    iRead = iBlock;
  }

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  if( pFS->pCompress ){
    i64 iOff;                     /* File offset to read data from */
    u8 aNext[4];                  /* 4-byte pointer read from db file */

    iOff = (i64)iRead * pFS->nBlocksize - sizeof(aNext);
    rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aNext, sizeof(aNext));
    if( rc==LSM_OK ){
................................................................................
  FileSystem *pFS,                /* File-system object handle */
  Segment *pSeg,                  /* Use this segment for block redirects */
  int iBlock,                     /* Read field from this block */
  int *piPrev                     /* OUT: Previous block in linked list */
){
  int rc = LSM_OK;                /* Return code */

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  assert( iBlock>0 );

  if( pFS->pCompress ){
    i64 iOff = fsFirstPageOnBlock(pFS, iBlock) - 4;
    u8 aPrev[4];                  /* 4-byte pointer read from db file */
    rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aPrev, sizeof(aPrev));
    if( rc==LSM_OK ){
................................................................................
  int rc = LSM_OK;

  /* In most cases iReal is the same as iPg. Except, if pSeg->pRedirect is 
  ** not NULL, and the block containing iPg has been redirected, then iReal
  ** is the page number after redirection.  */
  Pgno iReal = lsmFsRedirectPage(pFS, (pSeg ? pSeg->pRedirect : 0), iPg);


  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  assert( iReal>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );

  if( pFS->bUseMmap ){
    Page *pTest;







    i64 iEnd = (i64)iReal * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;

    p = 0;
    for(pTest=pFS->pWaiting; pTest; pTest=pTest->pNextWaiting){
      if( pTest->iPg==iReal ){
        assert( iReal==iPg );
        p = pTest;
        p->nRef++;
        *ppPg = p;
        return LSM_OK;
      }
    }
    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );
    }else{
      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
      if( rc ) return rc;
      fsPageAddToLru(pFS, p);
      p->pFS = pFS;
    }
    p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (iReal-1)];
    p->iPg = iReal;
    assert( (p->flags & PAGE_FREE)==0 );
  }else{

    /* Search the hash-table for the page */
    iHash = fsHashKey(pFS->nHash, iReal);
    for(p=pFS->apHash[iHash]; p; p=p->pHashNext){
      if( p->iPg==iReal) break;





    }

    if( p==0 ){



      rc = fsPageBuffer(pFS, &p);
      if( rc==LSM_OK ){
        int nSpace = 0;
        p->iPg = iReal;
        p->nRef = 0;
        p->pFS = pFS;
        assert( p->flags==0 || p->flags==PAGE_FREE );
................................................................................
            i64 iOff = (i64)(iReal-1) * pFS->nPagesize;
            rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte);
          }
          pFS->nRead++;
        }

        /* If the xRead() call was successful (or not attempted), link the
         ** page into the page-cache hash-table. Otherwise, if it failed,
         ** free the buffer. */
        if( rc==LSM_OK && nSpace==0 ){
          p->pHashNext = pFS->apHash[iHash];
          pFS->apHash[iHash] = p;
        }else{
          fsPageBufferFree(p);
          p = 0;
          if( pnSpace ) *pnSpace = nSpace;
        }
      }
    }else if( p->nRef==0 ){
      fsPageRemoveFromLru(pFS, p);
    }

    assert( (rc==LSM_OK && (p || (pnSpace && *pnSpace)))
         || (rc!=LSM_OK && p==0) 
    );
  }

................................................................................
** may be garbage. It is the callers responsibility to deal with this.
*/
int lsmFsReadSyncedId(lsm_db *db, int iMeta, i64 *piVal){
  FileSystem *pFS = db->pFS;
  int rc = LSM_OK;

  assert( iMeta==1 || iMeta==2 );
  if( pFS->bUseMmap ){
    fsGrowMapping(pFS, iMeta*LSM_META_PAGE_SIZE, &rc);
    if( rc==LSM_OK ){
      *piVal = (i64)lsmGetU64(&((u8 *)pFS->pMap)[(iMeta-1)*LSM_META_PAGE_SIZE]);
    }
  }else{
    MetaPage *pMeta = 0;
    rc = lsmFsMetaPageGet(pFS, 0, iMeta, &pMeta);
................................................................................
  MetaPage *pPg;
  assert( iPg==1 || iPg==2 );

  pPg = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);

  if( pPg ){
    i64 iOff = (iPg-1) * pFS->nMetasize;
    if( pFS->bUseMmap ){
      fsGrowMapping(pFS, 2*pFS->nMetasize, &rc);
      pPg->aData = (u8 *)(pFS->pMap) + iOff;
    }else{
      pPg->aData = lsmMallocRc(pFS->pEnv, pFS->nMetasize, &rc);
      if( rc==LSM_OK && bWrite==0 ){
        rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, pFS->nMetasize);
      }
................................................................................
      else if( rc==LSM_OK ){
        memset( pPg->aData, 0x77, pFS->nMetasize );
      }
#endif
    }

    if( rc!=LSM_OK ){
      if( pFS->bUseMmap==0 ) lsmFree(pFS->pEnv, pPg->aData);
      lsmFree(pFS->pEnv, pPg);
      pPg = 0;
    }else{
      pPg->iPg = iPg;
      pPg->bWrite = bWrite;
      pPg->pFS = pFS;
    }
................................................................................
** Release a meta-page reference obtained via a call to lsmFsMetaPageGet().
*/
int lsmFsMetaPageRelease(MetaPage *pPg){
  int rc = LSM_OK;
  if( pPg ){
    FileSystem *pFS = pPg->pFS;

    if( pFS->bUseMmap==0 ){
      if( pPg->bWrite ){
        i64 iOff = (pPg->iPg==2 ? pFS->nMetasize : 0);
        int nWrite = pFS->nMetasize;
        rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, nWrite);
      }
      lsmFree(pFS->pEnv, pPg->aData);
    }
................................................................................
** It is safe to assume that there are no outstanding references to pages 
** on block iTo. And that block iFrom is not currently being written. In
** other words, the data can be read and written directly.
*/
int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom){
  Snapshot *p = pFS->pDb->pWorker;
  int rc = LSM_OK;


  i64 iFromOff = (i64)(iFrom-1) * pFS->nBlocksize;
  i64 iToOff = (i64)(iTo-1) * pFS->nBlocksize;
  
  assert( iTo!=1 );
  assert( iFrom>iTo );

  if( pFS->bUseMmap ){
    fsGrowMapping(pFS, (i64)iFrom * pFS->nBlocksize, &rc);
    if( rc==LSM_OK ){
      u8 *aMap = (u8 *)(pFS->pMap);
      memcpy(&aMap[iToOff], &aMap[iFromOff], pFS->nBlocksize);
    }
  }else{
    int nSz = pFS->nPagesize;
    u8 *aData = (u8 *)lsmMallocRc(pFS->pEnv, nSz, &rc);
    if( rc==LSM_OK ){
      const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);

      int i;



      for(i=0; rc==LSM_OK && i<nPagePerBlock; i++){
        i64 iOff = iFromOff + i*nSz;












        rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);



        if( rc==LSM_OK ){
          iOff = iToOff + i*nSz;




          rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);
        }
      }
      lsmFsPurgeCache(pFS);
    }


  }

  /* Update append-point list if necessary */
  if( rc==LSM_OK ){
    int i;
    for(i=0; i<LSM_APPLIST_SZ; i++){
      if( fsPageToBlock(pFS, p->aiAppend[i])==iFrom ){
................................................................................
  int rc = *pRc;
  Page *pPg;

  pPg = pFS->pWaiting;
  pFS->pWaiting = 0;

  while( pPg ){
    Page *pNext = pPg->pNextWaiting;
    if( rc==LSM_OK ) rc = lsmFsPagePersist(pPg);
    assert( pPg->nRef==1 );
    lsmFsPageRelease(pPg);
    pPg = pNext;
  }
  *pRc = rc;
}
................................................................................
static void fsRemoveHashEntry(FileSystem *pFS, Pgno iPg){
  Page *p;
  int iHash = fsHashKey(pFS->nHash, iPg);

  for(p=pFS->apHash[iHash]; p && p->iPg!=iPg; p=p->pHashNext);

  if( p ){
    assert( p->nRef==0 );
    fsPageRemoveFromHash(pFS, p);
    p->iPg = 0;
    iHash = fsHashKey(pFS->nHash, 0);
    p->pHashNext = pFS->apHash[iHash];
    pFS->apHash[iHash] = p;
  }
}
................................................................................
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;


        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;

        if( pFS->bUseMmap==0 ){
          int iHash = fsHashKey(pFS->nHash, pPg->iPg);
          fsRemoveHashEntry(pFS, pPg->iPg);
          pPg->pHashNext = pFS->apHash[iHash];
          pFS->apHash[iHash] = pPg;
          assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg );
        }

        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;
................................................................................
        }else{
          int nData = pPg->nData;
          pPg->nData += 4;
          lsmSortedExpandBtreePage(pPg, nData);
        }

        pPg->nRef++;
        for(pp=&pFS->pWaiting; *pp; pp=&(*pp)->pNextWaiting);
        *pp = pPg;
        assert( pPg->pNextWaiting==0 );

      }else{
        i64 iOff;                   /* Offset to write within database file */

        iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1);
        if( pFS->bUseMmap==0 ){
          u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV);
          rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize);
        }else if( pPg->flags & PAGE_FREE ){
          fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc);
          if( rc==LSM_OK ){
            u8 *aTo = &((u8 *)(pFS->pMap))[iOff];
            u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV);
            memcpy(aTo, aFrom, pFS->nPagesize);
            lsmFree(pFS->pEnv, aFrom);

            pPg->aData = aTo + (pPg->flags & PAGE_HASPREV);
            pPg->flags &= ~PAGE_FREE;
            fsPageAddToLru(pFS, pPg);


          }
        }

        lsmFsFlushWaiting(pFS, &rc);
        pPg->flags &= ~PAGE_DIRTY;
        pFS->nWrite++;
      }
................................................................................
      assert( pPg->pFS->pCompress 
           || fsIsFirst(pPg->pFS, pPg->iPg)==0 
           || (pPg->flags & PAGE_HASPREV)
      );
      pPg->aData -= (pPg->flags & PAGE_HASPREV);
      pPg->flags &= ~PAGE_HASPREV;

      if( pFS->bUseMmap ){







        pPg->pHashNext = pFS->pFree;
        pFS->pFree = pPg;
      }else{
#if 0
        assert( pPg->pLruNext==0 );
        assert( pPg->pLruPrev==0 );
        fsPageRemoveFromHash(pFS, pPg);
        fsPageBufferFree(pPg);








|
|
|
|
>
>
>
>
>

<
<
<
<
<
>
>
>
>

<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>







 







|

|
|


|
>

<
<
<
<
<
<
|
<






>
>
>
>
>
>







 







|
|
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>







 







>









|


|



>
|
>
>
>
>
>
>
>









>
>
>




|


|
>
>
>
>
>







 







>
>
>
>
>
>
>




|


<
|

|
|
|
>
|
|
<
<
<
|
<
|
|
|

|
<







 







>
>
>
>
>






|





<







 







<



>
>
>




<
<
<
<
<
<
<
<
<
<







 







|













>






>







 







|







 







|







 







>




<
>
|
|
>
>
>
>
>
>
>
|
|
|

<
<
<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
<
|
|
|
|
<
<

<
<
<
<
>
>
>
>
>
|
<
<
>
>
>







 







|
|









<
<







 







|







 







|







 







|







 







|







 







>







|
|
|
<
<
|
<
<
<
|
|
>
|
>
>
>
|
|
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
|
|
>
>
>
>



<

>
>







 







|







 







|







 







>









|
|
|
|
|
|
<







 







|

|





|









>


|
>
>







 







|
>
>
>
>
>
>
>
|







153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170





171
172
173
174
175

176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
...
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240






241

242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
...
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
...
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
...
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
...
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
...
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992

993
994
995
996
997
998
999
1000



1001

1002
1003
1004
1005
1006

1007
1008
1009
1010
1011
1012
1013
....
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043

1044
1045
1046
1047
1048
1049
1050
....
1052
1053
1054
1055
1056
1057
1058

1059
1060
1061
1062
1063
1064
1065
1066
1067
1068










1069
1070
1071
1072
1073
1074
1075
....
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
....
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
....
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
....
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443

1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457










1458
1459
1460
1461
1462
1463
1464

1465
1466
1467
1468


1469




1470
1471
1472
1473
1474
1475


1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
....
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513


1514
1515
1516
1517
1518
1519
1520
....
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
....
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
....
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
....
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
....
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229


2230



2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264

2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
....
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
....
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
....
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546

2547
2548
2549
2550
2551
2552
2553
....
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
....
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
#include <sys/stat.h>
#include <fcntl.h>

/*
** File-system object. Each database connection allocates a single instance
** of the following structure. It is used for all access to the database and
** log files.
**
** The database file may be accessed via two methods - using mmap() or using
** read() and write() calls. In the general case both methods are used - a
** prefix of the file is mapped into memory and the remainder accessed using
** read() and write(). This is helpful when accessing very large files (or
** files that may grow very large during the lifetime of a database
** connection) on systems with 32-bit address spaces. However, it also requires
** that this object manage two distinct types of Page objects simultaneously -
** those that carry pointers to the mapped file and those that carry arrays
** populated by read() calls.
**





** pFree:
**   The head of a singly-linked list that containing currently unused Page 
**   structures suitable for use as mmap-page handles. Connected by the
**   Page.pFreeNext pointers.
**

** pMapped:
**   The head of a singly-linked list that contains all pages that currently
**   carry pointers to the mapped region. This is used if the region is
**   every remapped - the pointers carried by existing pages can be adjusted
**   to account for the remapping. Connected by the Page.pMappedNext pointers.
**
** pWaiting:
**   When the upper layer wishes to append a new b-tree page to a segment,
**   it allocates a Page object that carries a malloc'd block of memory -
**   regardless of the mmap-related configuration. The page is not assigned
**   a page number at first. When the upper layer has finished constructing
**   the page contents, it calls lsmFsPagePersist() to assign a page number
**   to it. At this point it is likely that N pages have been written to the
**   segment, the (N+1)th page is still outstanding and the b-tree page is
**   assigned page number (N+2). To avoid writing page (N+2) before page 
**   (N+1), the recently completed b-tree page is held in the singly linked
**   list headed by pWaiting until page (N+1) has been written. 
**
**   Function lsmFsFlushWaiting() is responsible for eventually writing 
**   waiting pages to disk.
**
**
** apHash/nHash:
**   Hash table used to store all Page objects that carry malloc'd arrays,
**   except those b-tree pages that have not yet been assigned page numbers.
**   Once they have been assigned page numbers - they are added to this
**   hash table.
**
**   Hash table overflow chains are connected using the Page.pHashNext
**   pointers.
**
** pLruFirst, pLruLast:
**   The first and last entries in a doubly-linked list of pages. This
**   list contains all pages with malloc'd data that are present in the
**   hash table and have a ref-count of zero.
*/
struct FileSystem {
  lsm_db *pDb;                    /* Database handle that owns this object */
  lsm_env *pEnv;                  /* Environment pointer */
  char *zDb;                      /* Database file name */
  char *zLog;                     /* Database file name */
  int nMetasize;                  /* Size of meta pages in bytes */
................................................................................
  int szSector;                   /* Database file sector size */

  /* If this is a compressed database, a pointer to the compression methods.
  ** For an uncompressed database, a NULL pointer.  */
  lsm_compress *pCompress;
  u8 *aIBuffer;                   /* Buffer to compress to */
  u8 *aOBuffer;                   /* Buffer to uncompress from */
  int nBuffer;                    /* Allocated size of above buffers in bytes */

  /* mmap() page related things */
  i64 nMapLimit;                  /* Maximum bytes of file to map */
  void *pMap;                     /* Current mapping of database file */
  i64 nMap;                       /* Bytes mapped at pMap */
  Page *pFree;                    /* Unused Page structures */
  Page *pMapped;                  /* List of Page structs that point to pMap */







  /* Page cache parameters for non-mmap() pages */

  int nCacheMax;                  /* Configured cache size (in pages) */
  int nCacheAlloc;                /* Current cache size (in pages) */
  Page *pLruFirst;                /* Head of the LRU list */
  Page *pLruLast;                 /* Tail of the LRU list */
  int nHash;                      /* Number of hash slots in hash table */
  Page **apHash;                  /* nHash Hash slots */
  Page *pWaiting;                 /* b-tree pages waiting to be written */

  /* Statistics */
  int nOut;                       /* Number of outstanding pages */
  int nWrite;                     /* Total number of pages written */
  int nRead;                      /* Total number of pages read */
};

/*
** Database page handle.
**
** pSeg:
**   When lsmFsSortedAppend() is called on a compressed database, the new
................................................................................
  FileSystem *pFS;                /* File system that owns this page */

  /* Only used in compressed database mode: */
  int nCompress;                  /* Compressed size (or 0 for uncomp. db) */
  int nCompressPrev;              /* Compressed size of prev page */
  Segment *pSeg;                  /* Segment this page will be written to */

  /* Pointers for singly linked lists */
  Page *pWaitingNext;             /* Next page in FileSystem.pWaiting list */
  Page *pFreeNext;                /* Next page in FileSystem.pFree list */
  Page *pMappedNext;              /* Next page in FileSystem.pMapped list */
};

/*
** Meta-data page handle. There are two meta-data pages at the start of
** the database file, each FileSystem.nMetasize bytes in size.
*/
struct MetaPage {
................................................................................
static int IOERR_WRAPPER(int rc){
  if( rc!=LSM_OK ) lsmIoerrBkpt();
  return rc;
}
#else
# define IOERR_WRAPPER(rc) (rc)
#endif

#ifdef NDEBUG
# define assert_lists_are_ok(x)
#else
static Page *fsPageFindInHash(FileSystem *pFS, Pgno iPg, int *piHash);

static void assert_lists_are_ok(FileSystem *pFS){
#if 0
  Page *p;

  assert( pFS->nMapLimit>=0 );

  /* Check that all pages in the LRU list have nRef==0, pointers to buffers
  ** in heap memory, and corresponding entries in the hash table.  */
  for(p=pFS->pLruFirst; p; p=p->pLruNext){
    assert( p==pFS->pLruFirst || p->pLruPrev!=0 );
    assert( p==pFS->pLruLast || p->pLruNext!=0 );
    assert( p->pLruPrev==0 || p->pLruPrev->pLruNext==p );
    assert( p->pLruNext==0 || p->pLruNext->pLruPrev==p );
    assert( p->nRef==0 );
    assert( p->flags & PAGE_FREE );
    assert( p==fsPageFindInHash(pFS, p->iPg, 0) );
  }
#endif
}
#endif

/*
** Wrappers around the VFS methods of the lsm_env object:
**
**     lsmEnvOpen()
**     lsmEnvRead()
**     lsmEnvWrite()
................................................................................
  zDel = lsmMallocPrintf(pFS->pEnv, "%s-log", pFS->zDb);
  if( zDel ){
    lsmEnvUnlink(pFS->pEnv, zDel);
    lsmFree(pFS->pEnv, zDel);
  }
  return LSM_OK;
}

/*
** Return true if page iReal of the database should be accessed using mmap.
** False otherwise.
*/
static int fsMmapPage(FileSystem *pFS, Pgno iReal){
  return ((i64)iReal*pFS->nPagesize <= pFS->nMapLimit);
}

/*
** Given that there are currently nHash slots in the hash table, return 
** the hash key for file iFile, page iPg.
*/
static int fsHashKey(int nHash, int iPg){
  return (iPg % nHash);
................................................................................
  FileSystem *pFS = db->pFS;
  if( pFS ){
    lsm_env *pEnv = pFS->pEnv;
    Page *pPg;

    assert( pFS->nOut==0 );
    assert( pFS->pWaiting==0 );
    assert( pFS->pMapped==0 );

    /* Reset any compression/decompression buffers already allocated */
    lsmFree(pEnv, pFS->aIBuffer);
    lsmFree(pEnv, pFS->aOBuffer);
    pFS->nBuffer = 0;

    /* Unmap the file, if it is currently mapped */
    if( pFS->pMap ){
      lsmEnvRemap(pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap);
      pFS->nMapLimit = 0;
    }

    /* Free all allocated page structures */
    pPg = pFS->pLruFirst;
    while( pPg ){
      Page *pNext = pPg->pLruNext;
      assert( pPg->flags & PAGE_FREE );
      lsmFree(pEnv, pPg->aData);
      lsmFree(pEnv, pPg);
      pPg = pNext;
    }

    pPg = pFS->pFree;
    while( pPg ){
      Page *pNext = pPg->pFreeNext;
      lsmFree(pEnv, pPg);
      pPg = pNext;
    }

    /* Zero pointers that point to deleted page objects */
    pFS->nCacheAlloc = 0;
    pFS->pLruFirst = 0;
    pFS->pLruLast = 0;
    pFS->pFree = 0;
    if( pFS->apHash ){
      memset(pFS->apHash, 0, pFS->nHash*sizeof(pFS->apHash[0]));
    }

    /* Configure the FileSystem object */
    if( db->compress.xCompress ){
      pFS->pCompress = &db->compress;
      pFS->nMapLimit = 0;
    }else{
      pFS->pCompress = 0;
      if( db->iMmap==1 ){
        /* Unlimited */
        pFS->nMapLimit = (i64)1 << 60;
      }else{
        pFS->nMapLimit = (i64)db->iMmap * 1024;
      }
    }
  }

  return LSM_OK;
}

/*
................................................................................
static void fsPageRemoveFromHash(FileSystem *pFS, Page *pPg){
  int iHash;
  Page **pp;

  iHash = fsHashKey(pFS->nHash, pPg->iPg);
  for(pp=&pFS->apHash[iHash]; *pp!=pPg; pp=&(*pp)->pHashNext);
  *pp = pPg->pHashNext;
  pPg->pHashNext = 0;
}

static void fsPageBufferFree(Page *pPg){
  pPg->pFS->nCacheAlloc--;
  lsmFree(pPg->pFS->pEnv, pPg->aData);
  lsmFree(pPg->pFS->pEnv, pPg);
}


/*
** Purge the cache of all non-mmap pages with nRef==0.
*/
void lsmFsPurgeCache(FileSystem *pFS){

  Page *pPg;

  pPg = pFS->pLruFirst;
  while( pPg ){
    Page *pNext = pPg->pLruNext;
    assert( pPg->flags & PAGE_FREE );
    fsPageRemoveFromHash(pFS, pPg);
    fsPageBufferFree(pPg);



    pPg = pNext;

  }
  pFS->pLruFirst = 0;
  pFS->pLruLast = 0;

  assert( pFS->nCacheAlloc<=pFS->nOut && pFS->nCacheAlloc>=0 );

}

/*
** Search the hash-table for page iPg. If an entry is round, return a pointer
** to it. Otherwise, return NULL.
**
** Either way, if argument piHash is not NULL set *piHash to the hash slot
................................................................................
  if( piHash ) *piHash = iHash;
  for(p=pFS->apHash[iHash]; p; p=p->pHashNext){
    if( p->iPg==iPg) break;
  }
  return p;
}

/*
** Allocate and return a non-mmap Page object. If there are already 
** nCacheMax such Page objects outstanding, try to recycle an existing 
** Page instead.
*/
static int fsPageBuffer(
  FileSystem *pFS, 
  Page **ppOut
){
  int rc = LSM_OK;
  Page *pPage = 0;
  if( pFS->pLruFirst==0 || pFS->nCacheAlloc<pFS->nCacheMax ){
    pPage = lsmMallocZero(pFS->pEnv, sizeof(Page));
    if( !pPage ){
      rc = LSM_NOMEM_BKPT;
    }else{
      pPage->aData = (u8 *)lsmMalloc(pFS->pEnv, pFS->nPagesize);

      if( !pPage->aData ){
        lsmFree(pFS->pEnv, pPage);
        rc = LSM_NOMEM_BKPT;
        pPage = 0;
      }
      pFS->nCacheAlloc++;
    }
................................................................................
    u8 *aData;
    pPage = pFS->pLruFirst;
    aData = pPage->aData;
    fsPageRemoveFromLru(pFS, pPage);
    fsPageRemoveFromHash(pFS, pPage);

    memset(pPage, 0, sizeof(Page));

    pPage->aData = aData;
  }

  if( pPage ){
    pPage->flags = PAGE_FREE;
  }
  *ppOut = pPage;
  return rc;
}











static void fsGrowMapping(
  FileSystem *pFS,
  i64 iSz,
  int *pRc
){
  /* This function won't work with compressed databases yet. */
  assert( pFS->pCompress==0 );
................................................................................
  if( *pRc==LSM_OK && iSz>pFS->nMap ){
    int rc;
    u8 *aOld = pFS->pMap;
    rc = lsmEnvRemap(pFS->pEnv, pFS->fdDb, iSz, &pFS->pMap, &pFS->nMap);
    if( rc==LSM_OK && pFS->pMap!=aOld ){
      Page *pFix;
      i64 iOff = (u8 *)pFS->pMap - aOld;
      for(pFix=pFS->pMapped; pFix; pFix=pFix->pMappedNext){
        pFix->aData += iOff;
      }
      lsmSortedRemap(pFS->pDb);
    }
    *pRc = rc;
  }
}


/*
** fsync() the database file.
*/
int lsmFsSyncDb(FileSystem *pFS, int nBlock){
#if 0
  if( nBlock && pFS->bUseMmap ){
    int rc = LSM_OK;
    i64 nMin = (i64)nBlock * (i64)pFS->nBlocksize;
    fsGrowMapping(pFS, nMin, &rc);
    if( rc!=LSM_OK ) return rc;
  }
#endif
  return lsmEnvSync(pFS->pEnv, pFS->fdDb);
}

static int fsPageGet(FileSystem *, Segment *, Pgno, int, Page **, int *);

static int fsRedirectBlock(Redirect *p, int iBlk){
  if( p ){
................................................................................
  
  if( pSeg ){
    iRead = fsRedirectBlock(pSeg->pRedirect, iBlock);
  }else{
    iRead = iBlock;
  }

  assert( pFS->nMapLimit==0 || pFS->pCompress==0 );
  if( pFS->pCompress ){
    i64 iOff;                     /* File offset to read data from */
    u8 aNext[4];                  /* 4-byte pointer read from db file */

    iOff = (i64)iRead * pFS->nBlocksize - sizeof(aNext);
    rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aNext, sizeof(aNext));
    if( rc==LSM_OK ){
................................................................................
  FileSystem *pFS,                /* File-system object handle */
  Segment *pSeg,                  /* Use this segment for block redirects */
  int iBlock,                     /* Read field from this block */
  int *piPrev                     /* OUT: Previous block in linked list */
){
  int rc = LSM_OK;                /* Return code */

  assert( pFS->nMapLimit==0 || pFS->pCompress==0 );
  assert( iBlock>0 );

  if( pFS->pCompress ){
    i64 iOff = fsFirstPageOnBlock(pFS, iBlock) - 4;
    u8 aPrev[4];                  /* 4-byte pointer read from db file */
    rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aPrev, sizeof(aPrev));
    if( rc==LSM_OK ){
................................................................................
  int rc = LSM_OK;

  /* In most cases iReal is the same as iPg. Except, if pSeg->pRedirect is 
  ** not NULL, and the block containing iPg has been redirected, then iReal
  ** is the page number after redirection.  */
  Pgno iReal = lsmFsRedirectPage(pFS, (pSeg ? pSeg->pRedirect : 0), iPg);

  assert_lists_are_ok(pFS);
  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  assert( iReal>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;



  /* Search the hash-table for the page */
  p = fsPageFindInHash(pFS, iReal, &iHash);

  if( p ){
    assert( p->flags & PAGE_FREE );
    if( p->nRef==0 ) fsPageRemoveFromLru(pFS, p);
  }else{

    if( fsMmapPage(pFS, iReal) ){
      i64 iEnd = (i64)iReal * pFS->nPagesize;
      fsGrowMapping(pFS, iEnd, &rc);
      if( rc!=LSM_OK ) return rc;











      if( pFS->pFree ){
        p = pFS->pFree;
        pFS->pFree = p->pFreeNext;
        assert( p->nRef==0 );
      }else{
        p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
        if( rc ) return rc;

        p->pFS = pFS;
      }
      p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (iReal-1)];
      p->iPg = iReal;







      /* This page now carries a pointer to the mapping. Link it in to
      ** the FileSystem.pMapped list.  */
      assert( p->pMappedNext==0 );
      p->pMappedNext = pFS->pMapped;
      pFS->pMapped = p;



      assert( pFS->pCompress==0 );
      assert( (p->flags & PAGE_FREE)==0 );
    }else{
      rc = fsPageBuffer(pFS, &p);
      if( rc==LSM_OK ){
        int nSpace = 0;
        p->iPg = iReal;
        p->nRef = 0;
        p->pFS = pFS;
        assert( p->flags==0 || p->flags==PAGE_FREE );
................................................................................
            i64 iOff = (i64)(iReal-1) * pFS->nPagesize;
            rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte);
          }
          pFS->nRead++;
        }

        /* If the xRead() call was successful (or not attempted), link the
        ** page into the page-cache hash-table. Otherwise, if it failed,
        ** free the buffer. */
        if( rc==LSM_OK && nSpace==0 ){
          p->pHashNext = pFS->apHash[iHash];
          pFS->apHash[iHash] = p;
        }else{
          fsPageBufferFree(p);
          p = 0;
          if( pnSpace ) *pnSpace = nSpace;
        }
      }


    }

    assert( (rc==LSM_OK && (p || (pnSpace && *pnSpace)))
         || (rc!=LSM_OK && p==0) 
    );
  }

................................................................................
** may be garbage. It is the callers responsibility to deal with this.
*/
int lsmFsReadSyncedId(lsm_db *db, int iMeta, i64 *piVal){
  FileSystem *pFS = db->pFS;
  int rc = LSM_OK;

  assert( iMeta==1 || iMeta==2 );
  if( pFS->nMapLimit>0 ){
    fsGrowMapping(pFS, iMeta*LSM_META_PAGE_SIZE, &rc);
    if( rc==LSM_OK ){
      *piVal = (i64)lsmGetU64(&((u8 *)pFS->pMap)[(iMeta-1)*LSM_META_PAGE_SIZE]);
    }
  }else{
    MetaPage *pMeta = 0;
    rc = lsmFsMetaPageGet(pFS, 0, iMeta, &pMeta);
................................................................................
  MetaPage *pPg;
  assert( iPg==1 || iPg==2 );

  pPg = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);

  if( pPg ){
    i64 iOff = (iPg-1) * pFS->nMetasize;
    if( pFS->nMapLimit>0 ){
      fsGrowMapping(pFS, 2*pFS->nMetasize, &rc);
      pPg->aData = (u8 *)(pFS->pMap) + iOff;
    }else{
      pPg->aData = lsmMallocRc(pFS->pEnv, pFS->nMetasize, &rc);
      if( rc==LSM_OK && bWrite==0 ){
        rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, pFS->nMetasize);
      }
................................................................................
      else if( rc==LSM_OK ){
        memset( pPg->aData, 0x77, pFS->nMetasize );
      }
#endif
    }

    if( rc!=LSM_OK ){
      if( pFS->nMapLimit==0 ) lsmFree(pFS->pEnv, pPg->aData);
      lsmFree(pFS->pEnv, pPg);
      pPg = 0;
    }else{
      pPg->iPg = iPg;
      pPg->bWrite = bWrite;
      pPg->pFS = pFS;
    }
................................................................................
** Release a meta-page reference obtained via a call to lsmFsMetaPageGet().
*/
int lsmFsMetaPageRelease(MetaPage *pPg){
  int rc = LSM_OK;
  if( pPg ){
    FileSystem *pFS = pPg->pFS;

    if( pFS->nMapLimit==0 ){
      if( pPg->bWrite ){
        i64 iOff = (pPg->iPg==2 ? pFS->nMetasize : 0);
        int nWrite = pFS->nMetasize;
        rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, nWrite);
      }
      lsmFree(pFS->pEnv, pPg->aData);
    }
................................................................................
** It is safe to assume that there are no outstanding references to pages 
** on block iTo. And that block iFrom is not currently being written. In
** other words, the data can be read and written directly.
*/
int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom){
  Snapshot *p = pFS->pDb->pWorker;
  int rc = LSM_OK;
  i64 nMap;

  i64 iFromOff = (i64)(iFrom-1) * pFS->nBlocksize;
  i64 iToOff = (i64)(iTo-1) * pFS->nBlocksize;
  
  assert( iTo!=1 );
  assert( iFrom>iTo );

  /* Grow the mapping as required. */
  nMap = LSM_MIN(pFS->nMapLimit, (i64)iFrom * pFS->nBlocksize);
  fsGrowMapping(pFS, nMap, &rc);






  if( rc==LSM_OK ){
    const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
    int nSz = pFS->nPagesize;
    int i;
    u8 *aBuf = 0;
    u8 *aData = 0;

    for(i=0; rc==LSM_OK && i<nPagePerBlock; i++){
      i64 iOff = iFromOff + i*nSz;
      Page *pPg;

      /* Set aData to point to a buffer containing the from page */
      if( (iOff+nSz)<=pFS->nMapLimit ){
        u8 *aMap = (u8 *)(pFS->pMap);
        aData = &aMap[iOff];
      }else{
        if( aBuf==0 ){
          aBuf = (u8 *)lsmMallocRc(pFS->pEnv, nSz, &rc);
          if( aBuf==0 ) break;
        }
        aData = aBuf;
        rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);
      }

      /* Copy aData to the to page */
      if( rc==LSM_OK ){
        iOff = iToOff + i*nSz;
        if( (iOff+nSz)<=pFS->nMapLimit ){
          u8 *aMap = (u8 *)(pFS->pMap);
          memcpy(&aMap[iOff], aData, nSz);
        }else{
          rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);
        }
      }

    }
    lsmFree(pFS->pEnv, aBuf);
    lsmFsPurgeCache(pFS);
  }

  /* Update append-point list if necessary */
  if( rc==LSM_OK ){
    int i;
    for(i=0; i<LSM_APPLIST_SZ; i++){
      if( fsPageToBlock(pFS, p->aiAppend[i])==iFrom ){
................................................................................
  int rc = *pRc;
  Page *pPg;

  pPg = pFS->pWaiting;
  pFS->pWaiting = 0;

  while( pPg ){
    Page *pNext = pPg->pWaitingNext;
    if( rc==LSM_OK ) rc = lsmFsPagePersist(pPg);
    assert( pPg->nRef==1 );
    lsmFsPageRelease(pPg);
    pPg = pNext;
  }
  *pRc = rc;
}
................................................................................
static void fsRemoveHashEntry(FileSystem *pFS, Pgno iPg){
  Page *p;
  int iHash = fsHashKey(pFS->nHash, iPg);

  for(p=pFS->apHash[iHash]; p && p->iPg!=iPg; p=p->pHashNext);

  if( p ){
    assert( p->nRef==0 || (p->flags & PAGE_FREE)==0 );
    fsPageRemoveFromHash(pFS, p);
    p->iPg = 0;
    iHash = fsHashKey(pFS->nHash, 0);
    p->pHashNext = pFS->apHash[iHash];
    pFS->apHash[iHash] = p;
  }
}
................................................................................
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;
        int iHash;

        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;

        assert( pPg->flags & PAGE_FREE );
        iHash = fsHashKey(pFS->nHash, pPg->iPg);
        fsRemoveHashEntry(pFS, pPg->iPg);
        pPg->pHashNext = pFS->apHash[iHash];
        pFS->apHash[iHash] = pPg;
        assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg );


        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;
................................................................................
        }else{
          int nData = pPg->nData;
          pPg->nData += 4;
          lsmSortedExpandBtreePage(pPg, nData);
        }

        pPg->nRef++;
        for(pp=&pFS->pWaiting; *pp; pp=&(*pp)->pWaitingNext);
        *pp = pPg;
        assert( pPg->pWaitingNext==0 );

      }else{
        i64 iOff;                   /* Offset to write within database file */

        iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1);
        if( fsMmapPage(pFS, pPg->iPg)==0 ){
          u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV);
          rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize);
        }else if( pPg->flags & PAGE_FREE ){
          fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc);
          if( rc==LSM_OK ){
            u8 *aTo = &((u8 *)(pFS->pMap))[iOff];
            u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV);
            memcpy(aTo, aFrom, pFS->nPagesize);
            lsmFree(pFS->pEnv, aFrom);
            pFS->nCacheAlloc--;
            pPg->aData = aTo + (pPg->flags & PAGE_HASPREV);
            pPg->flags &= ~PAGE_FREE;
            fsPageRemoveFromHash(pFS, pPg);
            pPg->pMappedNext = pFS->pMapped;
            pFS->pMapped = pPg;
          }
        }

        lsmFsFlushWaiting(pFS, &rc);
        pPg->flags &= ~PAGE_DIRTY;
        pFS->nWrite++;
      }
................................................................................
      assert( pPg->pFS->pCompress 
           || fsIsFirst(pPg->pFS, pPg->iPg)==0 
           || (pPg->flags & PAGE_HASPREV)
      );
      pPg->aData -= (pPg->flags & PAGE_HASPREV);
      pPg->flags &= ~PAGE_HASPREV;

      if( (pPg->flags & PAGE_FREE)==0 ){
        /* Removed from mapped list */
        Page **pp;
        for(pp=&pFS->pMapped; (*pp)!=pPg; pp=&(*pp)->pMappedNext);
        *pp = pPg->pMappedNext;
        pPg->pMappedNext = 0;

        /* Add to free list */
        pPg->pFreeNext = pFS->pFree;
        pFS->pFree = pPg;
      }else{
#if 0
        assert( pPg->pLruNext==0 );
        assert( pPg->pLruPrev==0 );
        fsPageRemoveFromHash(pFS, pPg);
        fsPageBufferFree(pPg);

Changes to src/lsm_main.c.

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
...
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  pDb->nDfltBlksz = LSM_DFLT_BLOCK_SIZE;
  pDb->nMerge = LSM_DFLT_AUTOMERGE;
  pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES;
  pDb->bUseLog = LSM_DFLT_USE_LOG;
  pDb->iReader = -1;
  pDb->iRwclient = -1;
  pDb->bMultiProc = LSM_DFLT_MULTIPLE_PROCESSES;
  pDb->bMmap = LSM_DFLT_MMAP;
  pDb->xLog = xLog;
  pDb->compress.iId = LSM_COMPRESSION_NONE;
  return LSM_OK;
}

lsm_env *lsm_get_env(lsm_db *pDb){
  assert( pDb->pEnv );
................................................................................
      }
      *piVal = pDb->eSafety;
      break;
    }

    case LSM_CONFIG_MMAP: {
      int *piVal = va_arg(ap, int *);
      if( pDb->iReader<0 && *piVal>=0 && *piVal<=1 ){
        pDb->bMmap = *piVal;
        rc = lsmFsConfigure(pDb);
      }
      *piVal = pDb->bMmap;
      break;
    }

    case LSM_CONFIG_USE_LOG: {
      int *piVal = va_arg(ap, int *);
      if( pDb->nTransOpen==0 && (*piVal==0 || *piVal==1) ){
        pDb->bUseLog = *piVal;







|







 







|
|


|







92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
...
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
  pDb->nDfltBlksz = LSM_DFLT_BLOCK_SIZE;
  pDb->nMerge = LSM_DFLT_AUTOMERGE;
  pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES;
  pDb->bUseLog = LSM_DFLT_USE_LOG;
  pDb->iReader = -1;
  pDb->iRwclient = -1;
  pDb->bMultiProc = LSM_DFLT_MULTIPLE_PROCESSES;
  pDb->iMmap = LSM_DFLT_MMAP;
  pDb->xLog = xLog;
  pDb->compress.iId = LSM_COMPRESSION_NONE;
  return LSM_OK;
}

lsm_env *lsm_get_env(lsm_db *pDb){
  assert( pDb->pEnv );
................................................................................
      }
      *piVal = pDb->eSafety;
      break;
    }

    case LSM_CONFIG_MMAP: {
      int *piVal = va_arg(ap, int *);
      if( pDb->iReader<0 && *piVal>=0 ){
        pDb->iMmap = *piVal;
        rc = lsmFsConfigure(pDb);
      }
      *piVal = pDb->iMmap;
      break;
    }

    case LSM_CONFIG_USE_LOG: {
      int *piVal = va_arg(ap, int *);
      if( pDb->nTransOpen==0 && (*piVal==0 || *piVal==1) ){
        pDb->bUseLog = *piVal;

Changes to src/lsm_shared.c.

508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525

  /* If the db handle is read-write, then connect to the system now. Run
  ** recovery as necessary. Or, if this is a read-only database handle,
  ** defer attempting to connect to the system until a read-transaction
  ** is opened.  */
  if( pDb->bReadonly==0 ){
    if( rc==LSM_OK ){
      rc = doDbConnect(pDb);
    }
    if( rc==LSM_OK ){
      rc = lsmFsConfigure(pDb);
    }
  }

  return rc;
}

static void dbDeferClose(lsm_db *pDb){







|


|







508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525

  /* If the db handle is read-write, then connect to the system now. Run
  ** recovery as necessary. Or, if this is a read-only database handle,
  ** defer attempting to connect to the system until a read-transaction
  ** is opened.  */
  if( pDb->bReadonly==0 ){
    if( rc==LSM_OK ){
      rc = lsmFsConfigure(pDb);
    }
    if( rc==LSM_OK ){
      rc = doDbConnect(pDb);
    }
  }

  return rc;
}

static void dbDeferClose(lsm_db *pDb){

Changes to src/lsm_sorted.c.

2488
2489
2490
2491
2492
2493
2494


2495
2496
2497
2498
2499
2500
2501
    bOld = (lsmTreeHasOld(pDb) && pDb->treehdr.iOldLog!=pDb->pClient->iLogOff);
    if( !bOld && pCsr->apTreeCsr[1] ){
      lsmTreeCursorDestroy(pCsr->apTreeCsr[1]);
      pCsr->apTreeCsr[1] = 0;
    }else if( bOld && !pCsr->apTreeCsr[1] ){
      rc = lsmTreeCursorNew(pDb, 1, &pCsr->apTreeCsr[1]);
    }



  }else{
    pCsr = multiCursorNew(pDb, &rc);
    if( rc==LSM_OK ) rc = multiCursorInit(pCsr, pDb->pClient);
  }

  if( rc!=LSM_OK ){







>
>







2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
    bOld = (lsmTreeHasOld(pDb) && pDb->treehdr.iOldLog!=pDb->pClient->iLogOff);
    if( !bOld && pCsr->apTreeCsr[1] ){
      lsmTreeCursorDestroy(pCsr->apTreeCsr[1]);
      pCsr->apTreeCsr[1] = 0;
    }else if( bOld && !pCsr->apTreeCsr[1] ){
      rc = lsmTreeCursorNew(pDb, 1, &pCsr->apTreeCsr[1]);
    }

    pCsr->flags = (CURSOR_IGNORE_SYSTEM | CURSOR_IGNORE_DELETE);

  }else{
    pCsr = multiCursorNew(pDb, &rc);
    if( rc==LSM_OK ) rc = multiCursorInit(pCsr, pDb->pClient);
  }

  if( rc!=LSM_OK ){

Changes to src/lsm_unix.c.

198
199
200
201
202
203
204





205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
  lsm_i64 *pnOut
){
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;






  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    *ppOut = p->pMap = 0;
    *pnOut = p->nMap = 0;
  }

  if( iMin>=0 ){
    memset(&buf, 0, sizeof(buf));
    prc = fstat(p->fd, &buf);
    if( prc!=0 ) return LSM_IOERR_BKPT;
    iSz = buf.st_size;
    if( iSz<iMin ){
      iSz = ((iMin + (2<<20) - 1) / (2<<20)) * (2<<20);
      prc = ftruncate(p->fd, iSz);
      if( prc!=0 ) return LSM_IOERR_BKPT;
    }

    p->pMap = mmap(0, iSz, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0);
    p->nMap = iSz;
  }







>
>
>
>
>












|







198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
  lsm_i64 *pnOut
){
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;

  /* If the file is between 0 and 2MB in size, extend it in chunks of 256K.
  ** Thereafter, in chunks of 1MB at a time.  */
  const int aIncrSz[] = {256*1024, 1024*1024};
  int nIncrSz = aIncrSz[iMin>(2*1024*1024)];

  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    *ppOut = p->pMap = 0;
    *pnOut = p->nMap = 0;
  }

  if( iMin>=0 ){
    memset(&buf, 0, sizeof(buf));
    prc = fstat(p->fd, &buf);
    if( prc!=0 ) return LSM_IOERR_BKPT;
    iSz = buf.st_size;
    if( iSz<iMin ){
      iSz = ((iMin + nIncrSz-1) / nIncrSz) * nIncrSz;
      prc = ftruncate(p->fd, iSz);
      if( prc!=0 ) return LSM_IOERR_BKPT;
    }

    p->pMap = mmap(0, iSz, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0);
    p->nMap = iSz;
  }

Changes to test/log3.test.

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
reset_db
do_test 2.0 { sqlite4_lsm_config db main safety   2    } {2}

do_execsql_test 2.2 {
  CREATE TABLE t1(a PRIMARY KEY, b);
  INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
} {}
do_filesize_test 2.3   0 1024

do_execsql_test 2.4 {
  BEGIN;
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
  COMMIT;
} {}
do_filesize_test 2.5   0 2048

do_test         2.6 { optimize_db } {}
do_execsql_test 2.7 { INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50)) }
do_test         2.8 { sqlite4_lsm_checkpoint db main } {}
do_test 2.9 { sqlite4_lsm_info db main log-structure } {0 0 0 0 2560 3072}

for {set i 1} {$i <= 6} {incr i} {







|











|







37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
reset_db
do_test 2.0 { sqlite4_lsm_config db main safety   2    } {2}

do_execsql_test 2.2 {
  CREATE TABLE t1(a PRIMARY KEY, b);
  INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
} {}
do_filesize_test 2.3   262144 1024

do_execsql_test 2.4 {
  BEGIN;
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
    INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50));
  COMMIT;
} {}
do_filesize_test 2.5   262144 2048

do_test         2.6 { optimize_db } {}
do_execsql_test 2.7 { INSERT INTO t1 VALUES(randstr(50,50), randstr(50,50)) }
do_test         2.8 { sqlite4_lsm_checkpoint db main } {}
do_test 2.9 { sqlite4_lsm_info db main log-structure } {0 0 0 0 2560 3072}

for {set i 1} {$i <= 6} {incr i} {

Changes to test/lsm5.test.

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
..
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
set testprefix lsm5
db close

# Create a new database with file name $file.
#
proc create_abc_db {file} {
  forcedelete $file
  lsm_open db $file {block_size 256}
  db write a alpha
  db write b bravo
  db write c charlie
  db close
}

proc create_abc_log {file} {
  forcedelete $file ${file}-2
  lsm_open db ${file}-2
  db write a alpha
  db write b bravo
  db write c charlie
  file copy ${file}-2 $file
  file copy ${file}-2-log $file-log
  db close
}
................................................................................
# disconnects), an attempt is made to truncate the database file to the
# minimum number of blocks required.
# 
# This test case checks that this process does not actually cause the
# database to grow.
# 
do_test 1.1 {
  lsm_open db test.db
  db config {mmap 0}
} {0}
do_test 1.2 {
  db write 1 one
  db write 2 two
  db close
} {}
do_test 1.3 {
  expr [file size test.db] < (64*1024)







|








|







 







|
<
|







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
..
45
46
47
48
49
50
51
52

53
54
55
56
57
58
59
60
set testprefix lsm5
db close

# Create a new database with file name $file.
#
proc create_abc_db {file} {
  forcedelete $file
  lsm_open db $file {block_size 256 mmap 0}
  db write a alpha
  db write b bravo
  db write c charlie
  db close
}

proc create_abc_log {file} {
  forcedelete $file ${file}-2
  lsm_open db ${file}-2 {mmap 0}
  db write a alpha
  db write b bravo
  db write c charlie
  file copy ${file}-2 $file
  file copy ${file}-2-log $file-log
  db close
}
................................................................................
# disconnects), an attempt is made to truncate the database file to the
# minimum number of blocks required.
# 
# This test case checks that this process does not actually cause the
# database to grow.
# 
do_test 1.1 {
  lsm_open db test.db {mmap 0}

} {db}
do_test 1.2 {
  db write 1 one
  db write 2 two
  db close
} {}
do_test 1.3 {
  expr [file size test.db] < (64*1024)

Changes to test/mc1.test.

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
..
71
72
73
74
75
76
77
78
79
80
81
82
  # Test that connection 2 can see changes made by connection 1.
  do_test 1.$tn.1 { 
    sql1 { CREATE TABLE t1(a PRIMARY KEY, b) } 
    sql1 { INSERT INTO t1 VALUES(1, 2) }
    sql2 { SELECT * FROM t1 }
  } {1 2}

  do_test 1.$tn.2 { file size test.db } 0

  # Connection 1 does not see uncommitted changes made by connection 2.
  do_test 1.$tn.3 {
    sql2 { BEGIN; INSERT INTO t1 VALUES(2, 4); }
    sql1 { SELECT * FROM t1 }
  } {1 2}

................................................................................
  # But it can from a new snapshot.
  do_test 1.$tn.9 {
    sql1  { COMMIT; BEGIN }
    sql1  { INSERT INTO t1 VALUES(6, 12) }
    sql1  { SELECT * FROM t1 }
  } {1 2 2 4 3 6 4 8 5 10 6 12}

  do_test 1.$tn.10 { file size test.db } 0
}

finish_test








|







 







|




22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
..
71
72
73
74
75
76
77
78
79
80
81
82
  # Test that connection 2 can see changes made by connection 1.
  do_test 1.$tn.1 { 
    sql1 { CREATE TABLE t1(a PRIMARY KEY, b) } 
    sql1 { INSERT INTO t1 VALUES(1, 2) }
    sql2 { SELECT * FROM t1 }
  } {1 2}

  do_test 1.$tn.2 { file size test.db } [expr 256*1024]

  # Connection 1 does not see uncommitted changes made by connection 2.
  do_test 1.$tn.3 {
    sql2 { BEGIN; INSERT INTO t1 VALUES(2, 4); }
    sql1 { SELECT * FROM t1 }
  } {1 2}

................................................................................
  # But it can from a new snapshot.
  do_test 1.$tn.9 {
    sql1  { COMMIT; BEGIN }
    sql1  { INSERT INTO t1 VALUES(6, 12) }
    sql1  { SELECT * FROM t1 }
  } {1 2 2 4 3 6 4 8 5 10 6 12}

  do_test 1.$tn.10 { file size test.db } [expr 256*1024]
}

finish_test

Changes to test/simple.test.

1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
  INSERT INTO t1 SELECT randomblob(1024) FROM t1;    --  64
}
do_execsql_test 71.2 { SELECT count(*) FROM t1 } 64
db close
sqlite4 db test.db
do_execsql_test 71.3 { SELECT count(*) FROM t1 } 64
do_test 71.4 { 
  expr {[file size test.db] < 256*1024}
} {1}

#-------------------------------------------------------------------------
# This is testing that the "phantom" runs feature works with mmap.
#
# UPDATE: Said feature was dropped early in development. But the test 
# remains valid.







|







1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
  INSERT INTO t1 SELECT randomblob(1024) FROM t1;    --  64
}
do_execsql_test 71.2 { SELECT count(*) FROM t1 } 64
db close
sqlite4 db test.db
do_execsql_test 71.3 { SELECT count(*) FROM t1 } 64
do_test 71.4 { 
  expr {[file size test.db] <= 256*1024}
} {1}

#-------------------------------------------------------------------------
# This is testing that the "phantom" runs feature works with mmap.
#
# UPDATE: Said feature was dropped early in development. But the test 
# remains valid.