Index: lsm-test/lsmtest.h ================================================================== --- lsm-test/lsmtest.h +++ lsm-test/lsmtest.h @@ -93,10 +93,13 @@ int test_lsm_mt2(const char *zFilename, int bClear, TestDb **ppDb); int test_lsm_mt3(const char *zFilename, int bClear, TestDb **ppDb); int tdb_lsm_configure(lsm_db *, const char *); +/* Functions in lsmtest_tdb4.c */ +int test_bt_open(const char *zFilename, int bClear, TestDb **ppDb); + /* Functions in testutil.c. */ int testPrngInit(void); u32 testPrngValue(u32 iVal); void testPrngArray(u32 iVal, u32 *aOut, int nOut); @@ -169,12 +172,12 @@ /* lsmtest6.c */ void test_oom(const char *zPattern, int *pRc); void testDeleteLsmdb(const char *zFile); -void testSaveLsmdb(const char *zFile); -void testRestoreLsmdb(const char *zFile); +void testSaveDb(const char *zFile, const char *zAuxExt); +void testRestoreDb(const char *zFile, const char *zAuxExt); void testCopyLsmdb(const char *zFrom, const char *zTo); /* lsmtest7.c */ void test_api(const char *zPattern, int *pRc); Index: lsm-test/lsmtest2.c ================================================================== --- lsm-test/lsmtest2.c +++ lsm-test/lsmtest2.c @@ -188,11 +188,30 @@ TestDb *pDb; rc = tdb_lsm_open(zCfg, zFile, 1, &pDb); if( rc==0 ){ testWriteDatasourceRange(pDb, pData, 0, nRow, &rc); testClose(&pDb); - if( rc==0 ) testSaveLsmdb(zFile); + if( rc==0 ) testSaveDb(zFile, "log"); + } + *pRc = rc; + } +} + +void testSetupSavedBtdb( + const char *zFile, + Datasource *pData, + int nRow, + int *pRc +){ + if( *pRc==0 ){ + int rc; + TestDb *pDb; + rc = tdb_open("bt", zFile, 1, &pDb); + if( rc==0 ){ + testWriteDatasourceRange(pDb, pData, 0, nRow, &rc); + testClose(&pDb); + if( rc==0 ) testSaveDb(zFile, "wal"); } *pRc = rc; } } @@ -217,10 +236,47 @@ *pRc = tdb_lsm_open((bCompress?"compression=1 mmap=0":""), zFile, 0, &pDb); testCksumDatabase(pDb, zCksum); testClose(&pDb); + if( *pRc==0 ){ + int r1 = 0; + int r2 = -1; + + r1 = strcmp(zCksum, zExpect1); + if( zExpect2 ) r2 = strcmp(zCksum, zExpect2); + if( r1 && r2 ){ + if( zExpect2 ){ + testPrintError("testCompareCksumLsmdb: \"%s\" != (\"%s\" OR \"%s\")", + zCksum, zExpect1, zExpect2 + ); + }else{ + testPrintError("testCompareCksumLsmdb: \"%s\" != \"%s\"", + zCksum, zExpect1 + ); + } + *pRc = 1; + test_failed(); + } + } + } +} + +static void testCompareCksumBtdb( + const char *zFile, /* Path to LSM database */ + const char *zExpect1, /* Expected checksum 1 */ + const char *zExpect2, /* Expected checksum 2 (or NULL) */ + int *pRc /* IN/OUT: Test case error code */ +){ + if( *pRc==0 ){ + char zCksum[TEST_CKSUM_BYTES]; + TestDb *pDb; + + *pRc = tdb_open("bt", zFile, 0, &pDb); + testCksumDatabase(pDb, zCksum); + testClose(&pDb); + if( *pRc==0 ){ int r1 = 0; int r2 = -1; r1 = strcmp(zCksum, zExpect1); @@ -288,11 +344,11 @@ int testrc = 0; testCaseProgress(i, nIter, testCaseNDot(), &iDot); /* Restore and open the database. */ - testRestoreLsmdb(DBNAME); + testRestoreDb(DBNAME, "log"); testrc = tdb_lsm_open(azConfig[bCompress], DBNAME, 0, &pDb); assert( testrc==0 ); /* Call lsm_work() on the db */ tdb_lsm_prepare_sync_crash(pDb, 1 + (i%(nWork*2))); @@ -343,11 +399,11 @@ int testrc = 0; testCaseProgress(i, nIter, testCaseNDot(), &iDot); /* Restore and open the database. */ - testRestoreLsmdb(DBNAME); + testRestoreDb(DBNAME, "log"); testrc = tdb_lsm_open("safety=2", DBNAME, 0, &pDb); assert( testrc==0 ); /* Insert nInsert records into the database. Crash midway through. */ tdb_lsm_prepare_sync_crash(pDb, 1 + (i%(nInsert+2))); @@ -361,10 +417,70 @@ } tdb_close(pDb); /* Check that no data was lost when the system crashed. */ testCompareCksumLsmdb(DBNAME, bCompress, + testCksumArrayGet(pCksumDb, 100 + iIns), + testCksumArrayGet(pCksumDb, 100 + iIns + 1), + pRc + ); + } + + testDatasourceFree(pData); + testCksumArrayFree(pCksumDb); +} + + +/* +** This test verifies that if a system crash occurs while committing a +** transaction to the log file, no earlier transactions are lost or damaged. +*/ +static void crash_test2b(int bCompress, int *pRc){ + const char *DBNAME = "testdb.bt"; + const DatasourceDefn defn = {TEST_DATASOURCE_RANDOM, 12, 16, 1000, 1000}; + + const int nIter = 200; + const int nInsert = 20; + + int i; + int iDot = 0; + Datasource *pData; + CksumDb *pCksumDb; + TestDb *pDb; + + /* Allocate datasource. And calculate the expected checksums. */ + pData = testDatasourceNew(&defn); + pCksumDb = testCksumArrayNew(pData, 100, 100+nInsert+1, 1); + + /* Setup and save the initial database. */ + testSetupSavedBtdb(DBNAME, pData, 100, pRc); + + for(i=0; iaBuffer); - return sqlite4BtClose(p->pBt); -} - -static int btMinTransaction(BtDb *p, int iMin, int *piLevel){ - int iLevel; - int rc = SQLITE4_OK; - - iLevel = sqlite4BtTransactionLevel(p->pBt); - if( iLevelpBt, iMin); - *piLevel = iLevel; - }else{ - *piLevel = -1; - } - - return rc; -} -static int btRestoreTransaction(BtDb *p, int iLevel, int rcin){ - int rc = rcin; - if( iLevel>=0 ){ - if( rc==SQLITE4_OK ){ - rc = sqlite4BtCommit(p->pBt, iLevel); - }else{ - sqlite4BtRollback(p->pBt, iLevel); - } - assert( iLevel==sqlite4BtTransactionLevel(p->pBt) ); - } - return rc; -} - - - -int bt_write(TestDb *pTestDb, void *pK, int nK, void *pV, int nV){ - BtDb *p = (BtDb*)pTestDb; - int iLevel; - int rc; - - rc = btMinTransaction(p, 2, &iLevel); - if( rc==SQLITE4_OK ){ - rc = sqlite4BtReplace(p->pBt, pK, nK, pV, nV); - rc = btRestoreTransaction(p, iLevel, rc); - } - return rc; -} - -int bt_delete(TestDb *pTestDb, void *pK, int nK){ - return bt_write(pTestDb, pK, nK, 0, -1); -} - -int bt_delete_range( - TestDb *pTestDb, - void *pKey1, int nKey1, - void *pKey2, int nKey2 -){ - BtDb *p = (BtDb*)pTestDb; - bt_cursor *pCsr = 0; - int rc = SQLITE4_OK; - int iLevel; - - rc = btMinTransaction(p, 2, &iLevel); - if( rc==SQLITE4_OK ){ - rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); - } - while( rc==SQLITE4_OK ){ - const void *pK; - int n; - int nCmp; - int res; - - rc = sqlite4BtCsrSeek(pCsr, pKey1, nKey1, BT_SEEK_GE); - if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK; - if( rc!=SQLITE4_OK ) break; - - rc = sqlite4BtCsrKey(pCsr, &pK, &n); - if( rc!=SQLITE4_OK ) break; - - nCmp = MIN(n, nKey1); - res = memcmp(pKey1, pK, nCmp); - assert( res<0 || (res==0 && nKey1<=n) ); - if( res==0 && nKey1==n ){ - rc = sqlite4BtCsrNext(pCsr); - if( rc!=SQLITE4_OK ) break; - rc = sqlite4BtCsrKey(pCsr, &pK, &n); - if( rc!=SQLITE4_OK ) break; - } - - nCmp = MIN(n, nKey2); - res = memcmp(pKey2, pK, nCmp); - if( res<0 || (res==0 && nKey2<=n) ) break; - - rc = sqlite4BtDelete(pCsr); - } - if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; - - sqlite4BtCsrClose(pCsr); - - rc = btRestoreTransaction(p, iLevel, rc); - return rc; -} - -int bt_fetch(TestDb *pTestDb, void *pK, int nK, void **ppVal, int *pnVal){ - BtDb *p = (BtDb*)pTestDb; - bt_cursor *pCsr = 0; - int iLevel; - int rc = SQLITE4_OK; - - iLevel = sqlite4BtTransactionLevel(p->pBt); - if( iLevel==0 ){ - rc = sqlite4BtBegin(p->pBt, 1); - if( rc!=SQLITE4_OK ) return rc; - } - - rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); - if( rc==SQLITE4_OK ){ - rc = sqlite4BtCsrSeek(pCsr, pK, nK, BT_SEEK_EQ); - if( rc==SQLITE4_OK ){ - const void *pV = 0; - int nV = 0; - rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV); - if( rc==SQLITE4_OK ){ - if( nV>p->nBuffer ){ - free(p->aBuffer); - p->aBuffer = (u8*)malloc(nV*2); - p->nBuffer = nV*2; - } - memcpy(p->aBuffer, pV, nV); - *pnVal = nV; - *ppVal = (void*)(p->aBuffer); - } - - }else if( rc==SQLITE4_INEXACT || rc==SQLITE4_NOTFOUND ){ - *ppVal = 0; - *pnVal = -1; - rc = SQLITE4_OK; - } - sqlite4BtCsrClose(pCsr); - } - - if( iLevel==0 ) sqlite4BtCommit(p->pBt, 0); - return rc; -} - -static int bt_scan( - TestDb *pTestDb, - void *pCtx, - int bReverse, - void *pFirst, int nFirst, - void *pLast, int nLast, - void (*xCallback)(void *, void *, int , void *, int) -){ - BtDb *p = (BtDb*)pTestDb; - bt_cursor *pCsr = 0; - int rc; - int iLevel; - - rc = btMinTransaction(p, 1, &iLevel); - - if( rc==SQLITE4_OK ){ - rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); - } - if( rc==SQLITE4_OK ){ - if( bReverse ){ - if( pLast ){ - rc = sqlite4BtCsrSeek(pCsr, pLast, nLast, BT_SEEK_LE); - }else{ - rc = sqlite4BtCsrLast(pCsr); - } - }else{ - rc = sqlite4BtCsrSeek(pCsr, pFirst, nFirst, BT_SEEK_GE); - } - if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK; - - while( rc==SQLITE4_OK ){ - const void *pK = 0; int nK = 0; - const void *pV = 0; int nV = 0; - - rc = sqlite4BtCsrKey(pCsr, &pK, &nK); - if( rc==SQLITE4_OK ){ - rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV); - } - - if( rc!=SQLITE4_OK ) break; - if( bReverse ){ - if( pFirst ){ - int res; - int nCmp = MIN(nK, nFirst); - res = memcmp(pFirst, pK, nCmp); - if( res>0 || (res==0 && nKnLast) ) break; - } - } - - xCallback(pCtx, (void*)pK, nK, (void*)pV, nV); - if( bReverse ){ - rc = sqlite4BtCsrPrev(pCsr); - }else{ - rc = sqlite4BtCsrNext(pCsr); - } - } - if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; - - sqlite4BtCsrClose(pCsr); - } - - rc = btRestoreTransaction(p, iLevel, rc); - return rc; -} - -static int bt_begin(TestDb *pTestDb, int iLvl){ - BtDb *p = (BtDb*)pTestDb; - int rc = sqlite4BtBegin(p->pBt, iLvl); - return rc; -} - -static int bt_commit(TestDb *pTestDb, int iLvl){ - BtDb *p = (BtDb*)pTestDb; - int rc = sqlite4BtCommit(p->pBt, iLvl); - return rc; -} - -static int bt_rollback(TestDb *pTestDb, int iLvl){ - BtDb *p = (BtDb*)pTestDb; - int rc = sqlite4BtRollback(p->pBt, iLvl); - return rc; -} - -int bt_open(const char *zFilename, int bClear, TestDb **ppDb){ - static const DatabaseMethods SqlMethods = { - bt_close, - bt_write, - bt_delete, - bt_delete_range, - bt_fetch, - bt_scan, - bt_begin, - bt_commit, - bt_rollback - }; - BtDb *p = 0; - bt_db *pBt = 0; - int rc; - sqlite4_env *pEnv = sqlite4_env_default(); - - if( bClear && zFilename && zFilename[0] ){ - char *zLog = sqlite3_mprintf("%s-wal", zFilename); - unlink(zFilename); - unlink(zLog); - sqlite3_free(zLog); - } - - rc = sqlite4BtNew(pEnv, sizeof(BtDb), &pBt); - if( rc==SQLITE4_OK ){ - p = (BtDb*)sqlite4BtExtra(pBt); - p->base.pMethods = &SqlMethods; - p->pBt = pBt; - rc = sqlite4BtOpen(pBt, zFilename); - } - - if( rc!=SQLITE4_OK && p ){ - bt_close(&p->base); - } - - *ppDb = &p->base; - return rc; -} -/* -** End wrapper for bt. -*************************************************************************/ - /************************************************************************* ** Begin exported functions. */ static struct Lib { const char *zName; const char *zDefaultDb; int (*xOpen)(const char *zFilename, int bClear, TestDb **ppDb); } aLib[] = { - { "bt", "testdb.bt", bt_open }, + { "bt", "testdb.bt", test_bt_open }, { "sqlite3", "testdb.sqlite", sql_open }, { "lsm_small", "testdb.lsm_small", test_lsm_small_open }, { "lsm_lomem", "testdb.lsm_lomem", test_lsm_lomem_open }, #ifdef HAVE_ZLIB { "lsm_zip", "testdb.lsm_zip", test_lsm_zip_open }, Index: lsm-test/lsmtest_tdb.h ================================================================== --- lsm-test/lsmtest_tdb.h +++ lsm-test/lsmtest_tdb.h @@ -150,20 +150,28 @@ */ void tdb_lsm_enable_log(TestDb *pDb, int bEnable); void tdb_lsm_application_crash(TestDb *pDb); void tdb_lsm_prepare_system_crash(TestDb *pDb); void tdb_lsm_system_crash(TestDb *pDb); - -void tdb_lsm_safety(TestDb *pDb, int eMode); -void tdb_lsm_prepare_system_crash(TestDb *pDb); void tdb_lsm_prepare_sync_crash(TestDb *pDb, int iSync); + +void tdb_lsm_safety(TestDb *pDb, int eMode); void tdb_lsm_config_work_hook(TestDb *pDb, void (*)(lsm_db *, void *), void *); void tdb_lsm_write_hook(TestDb *, void(*)(void*,int,lsm_i64,int,int), void*); - int tdb_lsm_config_str(TestDb *pDb, const char *zStr); + +/************************************************************************* +** Start of bt specific things. From lsmtest_tdb4.c. +*/ + +/* +** Simulate a system crash during the iSync'th call to xSync(). Passing +** iSync==1 means crash the next time xSync is called. +*/ +void tdb_bt_prepare_sync_crash(TestDb *pDb, int iSync); #ifdef __cplusplus } /* End of the 'extern "C"' block */ #endif #endif ADDED lsm-test/lsmtest_tdb4.c Index: lsm-test/lsmtest_tdb4.c ================================================================== --- lsm-test/lsmtest_tdb4.c +++ lsm-test/lsmtest_tdb4.c @@ -0,0 +1,591 @@ + +/* +** This file contains the TestDb bt wrapper. +*/ + +#include "lsmtest_tdb.h" +#include "lsmtest.h" +#include +#include "bt.h" + +typedef struct BtDb BtDb; +typedef struct BtFile BtFile; + +/* +** Each database or log file opened by a database handle is wrapped by +** an object of the following type. +*/ +struct BtFile { + BtDb *pBt; /* Database handle that opened this file */ + bt_file *pFile; /* File handle belonging to underlying VFS */ + int nSectorSize; /* Size of sectors in bytes */ + int nSector; /* Allocated size of nSector array */ + u8 **apSector; /* Original sector data */ +}; + +/* +** nCrashSync: +** If this value is non-zero, then a "crash-test" is running. If +** nCrashSync==1, then the crash is simulated during the very next +** call to the xSync() VFS method (on either the db or log file). +** If nCrashSync==2, the following call to xSync(), and so on. +** +** bCrash: +** After a crash is simulated, this variable is set. Any subsequent +** attempts to write to a file or modify the file system in any way +** fail once this is set. All the caller can do is close the connection. +*/ +struct BtDb { + TestDb base; /* Base class */ + bt_db *pBt; /* bt database handle */ + sqlite4_env *pEnv; /* SQLite environment (for malloc/free) */ + bt_env *pVfs; /* Underlying VFS */ + + /* Space for bt_fetch() results */ + u8 *aBuffer; /* Space to store results */ + int nBuffer; /* Allocated size of aBuffer[] in bytes */ + + /* Stuff used for crash test simulation */ + BtFile *apFile[2]; /* Database and log files used by pBt */ + bt_env env; /* Private VFS for this object */ + int nCrashSync; /* Number of syncs until crash (see above) */ + int bCrash; /* True once a crash has been simulated */ +}; + +static int btVfsFullpath( + sqlite4_env *pEnv, + bt_env *pVfs, + const char *z, + char **pzOut +){ + BtDb *pBt = (BtDb*)pVfs->pVfsCtx; + if( pBt->bCrash ) return SQLITE4_IOERR; + return pBt->pVfs->xFullpath(pEnv, pBt->pVfs, z, pzOut); +} + +static int btVfsOpen( + sqlite4_env *pEnv, + bt_env *pVfs, + const char *zFile, + int flags, bt_file **ppFile +){ + BtFile *p; + BtDb *pBt = (BtDb*)pVfs->pVfsCtx; + int rc; + + if( pBt->bCrash ) return SQLITE4_IOERR; + + p = (BtFile*)testMalloc(sizeof(BtFile)); + if( !p ) return SQLITE4_NOMEM; + assert( pBt->apFile[0]==0 || pBt->apFile[1]==0 ); + pBt->apFile[pBt->apFile[0]!=0] = p; + + p->pBt = pBt; rc = pBt->pVfs->xOpen(pEnv, pVfs, zFile, flags, &p->pFile); + if( rc!=SQLITE4_OK ){ + testFree(p); + p = 0; + } + + *ppFile = (bt_file*)p; + return rc; +} + +static int btVfsSize(bt_file *pFile, sqlite4_int64 *piRes){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xSize(p->pFile, piRes); +} + +static int btVfsRead(bt_file *pFile, sqlite4_int64 iOff, void *pBuf, int nBuf){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xRead(p->pFile, iOff, pBuf, nBuf); +} + +static int btFlushSectors(BtFile *p){ + sqlite4_int64 iSz; + int rc; + int i; + u8 *aTmp = 0; + + rc = p->pBt->pVfs->xSize(p->pFile, &iSz); + for(i=0; rc==SQLITE4_OK && inSector; i++){ + if( p->pBt->bCrash && p->apSector[i] ){ + + /* The system is simulating a crash. There are three choices for + ** this sector: + ** + ** 1) Leave it as it is (simulating a successful write), + ** 2) Restore the original data (simulating a lost write), + ** 3) Populate the disk sector with garbage data. + */ + sqlite4_int64 iSOff = p->nSectorSize*i; + int nWrite = MIN(p->nSectorSize, iSz - iSOff); + + if( nWrite ){ + u8 *aWrite = 0; + int iOpt = (testPrngValue(i) % 3) + 1; + if( iOpt==1 ){ + aWrite = p->apSector[i]; + }else if( iOpt==3 ){ + if( aTmp==0 ) aTmp = testMalloc(p->nSectorSize); + aWrite = aTmp; + testPrngArray(i*13, (u32*)aWrite, nWrite/sizeof(u32)); + } + +#if 0 +fprintf(stderr, "handle sector %d with %s\n", i, + iOpt==1 ? "rollback" : iOpt==2 ? "write" : "omit" +); +fflush(stderr); +#endif + + if( aWrite ){ + rc = p->pBt->pVfs->xWrite(p->pFile, iSOff, aWrite, nWrite); + } + } + } + testFree(p->apSector[i]); + p->apSector[i] = 0; + } + + testFree(aTmp); + return rc; +} + +static int btSaveSectors(BtFile *p, sqlite4_int64 iOff, int nBuf){ + int rc; + sqlite4_int64 iSz; /* Size of file on disk */ + int iFirst; /* First sector affected */ + int iSector; /* Current sector */ + int iLast; /* Last sector affected */ + + if( p->nSectorSize==0 ){ + p->nSectorSize = p->pBt->pVfs->xSectorSize(p->pFile); + if( p->nSectorSize<512 ) p->nSectorSize = 512; + } + iLast = (iOff+nBuf) / p->nSectorSize; + iFirst = iOff / p->nSectorSize; + + rc = p->pBt->pVfs->xSize(p->pFile, &iSz); + for(iSector=iFirst; rc==SQLITE4_OK && iSector<=iLast; iSector++){ + int nRead; + sqlite4_int64 iSOff = iSector * p->nSectorSize; + u8 *aBuf = testMalloc(p->nSectorSize); + nRead = MIN(p->nSectorSize, (iSz - iSOff)); + if( nRead>0 ){ + rc = p->pBt->pVfs->xRead(p->pFile, iSOff, aBuf, nRead); + } + + while( rc==SQLITE4_OK && iSector>=p->nSector ){ + int nNew = p->nSector + 32; + u8 **apNew = (u8**)testMalloc(nNew * sizeof(u8*)); + memcpy(apNew, p->apSector, p->nSector*sizeof(u8*)); + testFree(p->apSector); + p->apSector = apNew; + p->nSector = nNew; + } + + p->apSector[iSector] = aBuf; + } + + return rc; +} + +static int btVfsWrite(bt_file *pFile, sqlite4_int64 iOff, void *pBuf, int nBuf){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + if( p->pBt->nCrashSync ){ + btSaveSectors(p, iOff, nBuf); + } + return p->pBt->pVfs->xWrite(p->pFile, iOff, pBuf, nBuf); +} + +static int btVfsTruncate(bt_file *pFile, sqlite4_int64 iOff){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xTruncate(p->pFile, iOff); +} + +static int btVfsSync(bt_file *pFile){ + int rc = SQLITE4_OK; + BtFile *p = (BtFile*)pFile; + BtDb *pBt = p->pBt; + + if( p->pBt->bCrash ) return SQLITE4_IOERR; + if( pBt->nCrashSync ){ + pBt->nCrashSync--; + pBt->bCrash = (pBt->nCrashSync==0); + if( pBt->bCrash ){ + btFlushSectors(pBt->apFile[0]); + btFlushSectors(pBt->apFile[1]); + rc = SQLITE4_IOERR; + }else{ + btFlushSectors(p); + } + } + + if( rc==SQLITE4_OK ){ + rc = p->pBt->pVfs->xSync(p->pFile); + } + return rc; +} + +static int btVfsSectorSize(bt_file *pFile){ + BtFile *p = (BtFile*)pFile; + return p->pBt->pVfs->xSectorSize(p->pFile); +} + +static int btVfsClose(bt_file *pFile){ + BtFile *p = (BtFile*)pFile; + int rc; + assert( p->pBt->apFile[0]==p || p->pBt->apFile[1]==p ); + btFlushSectors(p); + testFree(p->apSector); + rc = p->pBt->pVfs->xClose(p->pFile); + testFree(p); + return rc; +} + +static int btVfsUnlink(sqlite4_env *pEnv, bt_env *pVfs, const char *zFile){ + BtDb *pBt = (BtDb*)pVfs->pVfsCtx; + if( pBt->bCrash ) return SQLITE4_IOERR; + return pBt->pVfs->xUnlink(pEnv, pBt->pVfs, zFile); +} + +static int btVfsLock(bt_file *pFile, int iLock, int eType){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xLock(p->pFile, iLock, eType); +} + +static int btVfsTestLock(bt_file *pFile, int iLock, int nLock, int eType){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xTestLock(p->pFile, iLock, nLock, eType); +} + +static int btVfsShmMap(bt_file *pFile, int iChunk, int sz, void **ppOut){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xShmMap(p->pFile, iChunk, sz, ppOut); +} + +static void btVfsShmBarrier(bt_file *pFile){ + BtFile *p = (BtFile*)pFile; + return p->pBt->pVfs->xShmBarrier(p->pFile); +} + +static int btVfsShmUnmap(bt_file *pFile, int bDelete){ + BtFile *p = (BtFile*)pFile; + if( p->pBt->bCrash ) return SQLITE4_IOERR; + return p->pBt->pVfs->xShmUnmap(p->pFile, bDelete); +} + +static int bt_close(TestDb *pTestDb){ + BtDb *p = (BtDb*)pTestDb; + free(p->aBuffer); + return sqlite4BtClose(p->pBt); +} + +static int btMinTransaction(BtDb *p, int iMin, int *piLevel){ + int iLevel; + int rc = SQLITE4_OK; + + iLevel = sqlite4BtTransactionLevel(p->pBt); + if( iLevelpBt, iMin); + *piLevel = iLevel; + }else{ + *piLevel = -1; + } + + return rc; +} +static int btRestoreTransaction(BtDb *p, int iLevel, int rcin){ + int rc = rcin; + if( iLevel>=0 ){ + if( rc==SQLITE4_OK ){ + rc = sqlite4BtCommit(p->pBt, iLevel); + }else{ + sqlite4BtRollback(p->pBt, iLevel); + } + assert( iLevel==sqlite4BtTransactionLevel(p->pBt) ); + } + return rc; +} + +static int bt_write(TestDb *pTestDb, void *pK, int nK, void *pV, int nV){ + BtDb *p = (BtDb*)pTestDb; + int iLevel; + int rc; + + rc = btMinTransaction(p, 2, &iLevel); + if( rc==SQLITE4_OK ){ + rc = sqlite4BtReplace(p->pBt, pK, nK, pV, nV); + rc = btRestoreTransaction(p, iLevel, rc); + } + return rc; +} + +static int bt_delete(TestDb *pTestDb, void *pK, int nK){ + return bt_write(pTestDb, pK, nK, 0, -1); +} + +static int bt_delete_range( + TestDb *pTestDb, + void *pKey1, int nKey1, + void *pKey2, int nKey2 +){ + BtDb *p = (BtDb*)pTestDb; + bt_cursor *pCsr = 0; + int rc = SQLITE4_OK; + int iLevel; + + rc = btMinTransaction(p, 2, &iLevel); + if( rc==SQLITE4_OK ){ + rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); + } + while( rc==SQLITE4_OK ){ + const void *pK; + int n; + int nCmp; + int res; + + rc = sqlite4BtCsrSeek(pCsr, pKey1, nKey1, BT_SEEK_GE); + if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK; + if( rc!=SQLITE4_OK ) break; + + rc = sqlite4BtCsrKey(pCsr, &pK, &n); + if( rc!=SQLITE4_OK ) break; + + nCmp = MIN(n, nKey1); + res = memcmp(pKey1, pK, nCmp); + assert( res<0 || (res==0 && nKey1<=n) ); + if( res==0 && nKey1==n ){ + rc = sqlite4BtCsrNext(pCsr); + if( rc!=SQLITE4_OK ) break; + rc = sqlite4BtCsrKey(pCsr, &pK, &n); + if( rc!=SQLITE4_OK ) break; + } + + nCmp = MIN(n, nKey2); + res = memcmp(pKey2, pK, nCmp); + if( res<0 || (res==0 && nKey2<=n) ) break; + + rc = sqlite4BtDelete(pCsr); + } + if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; + + sqlite4BtCsrClose(pCsr); + + rc = btRestoreTransaction(p, iLevel, rc); + return rc; +} + +static int bt_fetch( + TestDb *pTestDb, + void *pK, int nK, + void **ppVal, int *pnVal +){ + BtDb *p = (BtDb*)pTestDb; + bt_cursor *pCsr = 0; + int iLevel; + int rc = SQLITE4_OK; + + iLevel = sqlite4BtTransactionLevel(p->pBt); + if( iLevel==0 ){ + rc = sqlite4BtBegin(p->pBt, 1); + if( rc!=SQLITE4_OK ) return rc; + } + + rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); + if( rc==SQLITE4_OK ){ + rc = sqlite4BtCsrSeek(pCsr, pK, nK, BT_SEEK_EQ); + if( rc==SQLITE4_OK ){ + const void *pV = 0; + int nV = 0; + rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV); + if( rc==SQLITE4_OK ){ + if( nV>p->nBuffer ){ + free(p->aBuffer); + p->aBuffer = (u8*)malloc(nV*2); + p->nBuffer = nV*2; + } + memcpy(p->aBuffer, pV, nV); + *pnVal = nV; + *ppVal = (void*)(p->aBuffer); + } + + }else if( rc==SQLITE4_INEXACT || rc==SQLITE4_NOTFOUND ){ + *ppVal = 0; + *pnVal = -1; + rc = SQLITE4_OK; + } + sqlite4BtCsrClose(pCsr); + } + + if( iLevel==0 ) sqlite4BtCommit(p->pBt, 0); + return rc; +} + +static int bt_scan( + TestDb *pTestDb, + void *pCtx, + int bReverse, + void *pFirst, int nFirst, + void *pLast, int nLast, + void (*xCallback)(void *, void *, int , void *, int) +){ + BtDb *p = (BtDb*)pTestDb; + bt_cursor *pCsr = 0; + int rc; + int iLevel; + + rc = btMinTransaction(p, 1, &iLevel); + + if( rc==SQLITE4_OK ){ + rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr); + } + if( rc==SQLITE4_OK ){ + if( bReverse ){ + if( pLast ){ + rc = sqlite4BtCsrSeek(pCsr, pLast, nLast, BT_SEEK_LE); + }else{ + rc = sqlite4BtCsrLast(pCsr); + } + }else{ + rc = sqlite4BtCsrSeek(pCsr, pFirst, nFirst, BT_SEEK_GE); + } + if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK; + + while( rc==SQLITE4_OK ){ + const void *pK = 0; int nK = 0; + const void *pV = 0; int nV = 0; + + rc = sqlite4BtCsrKey(pCsr, &pK, &nK); + if( rc==SQLITE4_OK ){ + rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV); + } + + if( rc!=SQLITE4_OK ) break; + if( bReverse ){ + if( pFirst ){ + int res; + int nCmp = MIN(nK, nFirst); + res = memcmp(pFirst, pK, nCmp); + if( res>0 || (res==0 && nKnLast) ) break; + } + } + + xCallback(pCtx, (void*)pK, nK, (void*)pV, nV); + if( bReverse ){ + rc = sqlite4BtCsrPrev(pCsr); + }else{ + rc = sqlite4BtCsrNext(pCsr); + } + } + if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK; + + sqlite4BtCsrClose(pCsr); + } + + rc = btRestoreTransaction(p, iLevel, rc); + return rc; +} + +static int bt_begin(TestDb *pTestDb, int iLvl){ + BtDb *p = (BtDb*)pTestDb; + int rc = sqlite4BtBegin(p->pBt, iLvl); + return rc; +} + +static int bt_commit(TestDb *pTestDb, int iLvl){ + BtDb *p = (BtDb*)pTestDb; + int rc = sqlite4BtCommit(p->pBt, iLvl); + return rc; +} + +static int bt_rollback(TestDb *pTestDb, int iLvl){ + BtDb *p = (BtDb*)pTestDb; + int rc = sqlite4BtRollback(p->pBt, iLvl); + return rc; +} + +int test_bt_open(const char *zFilename, int bClear, TestDb **ppDb){ + + static const DatabaseMethods SqlMethods = { + bt_close, + bt_write, + bt_delete, + bt_delete_range, + bt_fetch, + bt_scan, + bt_begin, + bt_commit, + bt_rollback + }; + BtDb *p = 0; + bt_db *pBt = 0; + int rc; + sqlite4_env *pEnv = sqlite4_env_default(); + + if( bClear && zFilename && zFilename[0] ){ + char *zLog = sqlite3_mprintf("%s-wal", zFilename); + unlink(zFilename); + unlink(zLog); + sqlite3_free(zLog); + } + + rc = sqlite4BtNew(pEnv, sizeof(BtDb), &pBt); + if( rc==SQLITE4_OK ){ + p = (BtDb*)sqlite4BtExtra(pBt); + p->base.pMethods = &SqlMethods; + p->pBt = pBt; + p->pEnv = pEnv; + + p->env.pVfsCtx = (void*)p; + p->env.xFullpath = btVfsFullpath; + p->env.xOpen = btVfsOpen; + p->env.xSize = btVfsSize; + p->env.xRead = btVfsRead; + p->env.xWrite = btVfsWrite; + p->env.xTruncate = btVfsTruncate; + p->env.xSync = btVfsSync; + p->env.xSectorSize = btVfsSectorSize; + p->env.xClose = btVfsClose; + p->env.xUnlink = btVfsUnlink; + p->env.xLock = btVfsLock; + p->env.xTestLock = btVfsTestLock; + p->env.xShmMap = btVfsShmMap; + p->env.xShmBarrier = btVfsShmBarrier; + p->env.xShmUnmap = btVfsShmUnmap; + + sqlite4BtControl(pBt, BT_CONTROL_GETVFS, (void*)&p->pVfs); + sqlite4BtControl(pBt, BT_CONTROL_SETVFS, (void*)&p->env); + + rc = sqlite4BtOpen(pBt, zFilename); + } + + if( rc!=SQLITE4_OK && p ){ + bt_close(&p->base); + } + + *ppDb = &p->base; + return rc; +} + +void tdb_bt_prepare_sync_crash(TestDb *pTestDb, int iSync){ + BtDb *p = (BtDb*)pTestDb; + assert( pTestDb->pMethods->xClose==bt_close ); + assert( p->bCrash==0 ); + p->nCrashSync = iSync; +} + Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -304,10 +304,11 @@ $(TOP)/lsm-test/lsmtest9.c \ $(TOP)/lsm-test/lsmtest_datasource.c \ $(TOP)/lsm-test/lsmtest_func.c $(TOP)/lsm-test/lsmtest_io.c \ $(TOP)/lsm-test/lsmtest_main.c $(TOP)/lsm-test/lsmtest_mem.c \ $(TOP)/lsm-test/lsmtest_tdb.c $(TOP)/lsm-test/lsmtest_tdb3.c \ + $(TOP)/lsm-test/lsmtest_tdb4.c \ $(TOP)/lsm-test/lsmtest_util.c LSMTESTHDR = $(TOP)/lsm-test/lsmtest.h $(TOP)/lsm-test/lsmtest_tdb.h # This is the default Makefile target. The objects listed here @@ -519,11 +520,11 @@ # Rules to build the 'lsmtest' application. # lsmtest$(EXE): libsqlite4.a $(LSMTESTSRC) $(LSMTESTHDR) $(TCPPX) -c $(TOP)/lsm-test/lsmtest_tdb2.cc - $(TCCX) $(LSMTESTSRC) lsmtest_tdb2.o libsqlite4.a -o lsmtest$(EXE) $(THREADLIB) -lsqlite3 + $(TCCX) $(LSMTESTSRC) lsmtest_tdb2.o libsqlite4.a -o lsmtest$(EXE) $(THREADLIB) -lsqlite3 -llmdb -lstdc++ varint$(EXE): $(TOP)/src/varint.c $(TCCX) -DVARINT_TOOL -o varint$(EXE) $(TOP)/src/varint.c Index: src/bt.h ================================================================== --- src/bt.h +++ src/bt.h @@ -130,18 +130,66 @@ ** the third is expected to be a pointer to an instance of type bt_info. ** The "output" buffer must already be initialized. Before ** sqlite4BtControl() returns it appends debugging information to the ** buffer. The specific information appended depends on the eType and ** pgno member variables. +** +** BT_CONTROL_SETVFS: +** The third argument is assumed to be a pointer to an instance of type +** bt_env. The database handle takes a copy of this pointer (not a copy +** of the object) and uses it for all subsequent IO. It is the +** responsibility of the caller to ensure that the pointer is valid for +** the lifetime of the database connection. +** +** BT_CONTROL_GETVFS: +** The third argument is assumed to be of type (bt_env**). Before +** returning, the value pointed to is populated with a pointer to +** to the current bt_env object. +** +** BT_CONTROL_SAFETY: +** The third argument is interpreted as a pointer to type (int). If +** the value stored in the (int) location is 0, 1 or 2, then the current +** b-tree safety level is set to 0, 1 or 2, respectively. Otherwise, the +** integer value is set to the current safety level. */ +#define BT_CONTROL_INFO 7706389 +#define BT_CONTROL_SETVFS 7706390 +#define BT_CONTROL_GETVFS 7706391 +#define BT_CONTROL_SAFETY 7706392 + int sqlite4BtControl(bt_db*, int op, void *pArg); -#define BT_CONTROL_INFO 7706389 typedef struct bt_info bt_info; struct bt_info { int eType; unsigned int pgno; sqlite4_buffer output; }; +/* +** File-system interface. +*/ +typedef struct bt_env bt_env; +typedef struct bt_file bt_file; +/* +** xFullpath: +*/ +struct bt_env { + void *pVfsCtx; + int (*xFullpath)(sqlite4_env*,bt_env*, const char *, char **); + int (*xOpen)(sqlite4_env*,bt_env*, const char *, int flags, bt_file**); + int (*xSize)(bt_file*, sqlite4_int64*); + int (*xRead)(bt_file*, sqlite4_int64, void *, int); + int (*xWrite)(bt_file*, sqlite4_int64, void *, int); + int (*xTruncate)(bt_file*, sqlite4_int64); + int (*xSync)(bt_file*); + int (*xSectorSize)(bt_file*); + int (*xClose)(bt_file*); + int (*xUnlink)(sqlite4_env*,bt_env*, const char *); + int (*xLock)(bt_file*, int, int); + int (*xTestLock)(bt_file*, int, int, int); + int (*xShmMap)(bt_file*, int, int, void **); + void (*xShmBarrier)(bt_file*); + int (*xShmUnmap)(bt_file*, int); +}; Index: src/btInt.h ================================================================== --- src/btInt.h +++ src/btInt.h @@ -125,41 +125,22 @@ #define BT_PAGERFILE_DATABASE 0 #define BT_PAGERFILE_LOG 1 #define BT_PAGERFILE_SHM 2 const char *sqlite4BtPagerFilename(BtPager*, int ePagerfile); +bt_env *sqlite4BtPagerGetEnv(BtPager*); +void sqlite4BtPagerSetEnv(BtPager*, bt_env*); + +void sqlite4BtPagerSetSafety(BtPager*, int*); + /* ** End of bt_pager.c interface. *************************************************************************/ /************************************************************************* ** File-system interface. */ -typedef struct bt_env bt_env; -typedef struct bt_file bt_file; - -/* -** xFullpath: -*/ -struct bt_env { - void *pVfsCtx; - int (*xFullpath)(sqlite4_env*,bt_env*, const char *, char **); - int (*xOpen)(sqlite4_env*,bt_env*, const char *, int flags, bt_file**); - int (*xSize)(bt_file*, i64*); - int (*xRead)(bt_file*, i64, void *, int); - int (*xWrite)(bt_file*, i64, void *, int); - int (*xTruncate)(bt_file*, i64); - int (*xSync)(bt_file*); - int (*xSectorSize)(bt_file*); - int (*xClose)(bt_file*); - int (*xUnlink)(sqlite4_env*,bt_env*, const char *); - int (*xLock)(bt_file*, int, int); - int (*xTestLock)(bt_file*, int, int, int); - int (*xShmMap)(bt_file*, int, int, void **); - void (*xShmBarrier)(bt_file*); - int (*xShmUnmap)(bt_file*, int); -}; /* Flags for the 3rd argument to xOpen */ #define BT_OPEN_READONLY 0x0001 /* Candidate values for the 3rd argument to bt_env.xLock() */ Index: src/bt_log.c ================================================================== --- src/bt_log.c +++ src/bt_log.c @@ -242,11 +242,11 @@ assert( (nByte&0x00000007)==4 && nByte>=8 ); btLogChecksum(nativeCksum, a, 8, aIn, aOut); btLogChecksum(nativeCksum, &a[4], nByte-4, aOut, aOut); } -#define BT_PAGE_DEBUG 0 +#define BT_PAGE_DEBUG 1 #define BT_VAL_DEBUG 0 static void btDebugTopology(BtLock *pLock, char *zStr, int iSide, u32 *aLog){ #if BT_PAGE_DEBUG fprintf(stderr, "%d:%s: (side=%d) %d..%d %d..%d %d..%d\n", @@ -256,19 +256,21 @@ ); fflush(stderr); #endif } +#ifndef NDEBUG void sqlite4BtDebugReadlock(BtLock *pLock, u32 iFirst, u32 iLast){ #if BT_PAGE_DEBUG static int nCall = 0; fprintf(stderr, "%d:%d: readlock=(%d..%d)\n", pLock->iDebugId, nCall++, (int)iFirst, (int)iLast ); fflush(stderr); #endif } +#endif #ifndef NDEBUG static void btDebugCheckSnapshot(BtShmHdr *pHdr){ u32 *aLog = pHdr->aLog; assert( pHdr->iNextFrame!=1 || @@ -295,11 +297,11 @@ ); fflush(stderr); #endif } #else -#define btDebugLogSafepoint(x) +#define btDebugLogSafepoint(x,y) #endif static void btDebugCkptPage(BtLock *pLock, u32 pgno, u8 *aData, int pgsz){ #if BT_PAGE_DEBUG static int nCall = 0; @@ -348,11 +350,11 @@ int nCopy = MIN(nIn, (nOut-1)); for(i=0; isnapshot.nSector*2 + (i64)(iFrame-1) * (i64)(pgsz + sizeof(BtFrameHdr)); } + +static int btLogSyncFile(BtLog *pLog, bt_file *pFd){ + bt_env *pVfs = pLog->pLock->pVfs; + return pVfs->xSync(pFd); +} static int btLogWriteData(BtLog *pLog, i64 iOff, u8 *aData, int nData){ bt_env *pVfs = pLog->pLock->pVfs; return pVfs->xWrite(pLog->pFd, iOff, aData, nData); } @@ -792,18 +799,18 @@ i64 nByte = 0; /* Size of log file on disk */ int rc; /* Return code */ BtWalHdr *pHdr = 0; int iSlot = 0; FrameRecoverCtx ctx = {0, 0}; + BtWalHdr hdr1; + BtWalHdr hdr2; /* Read a log file header from the start of the file. */ rc = pVfs->xSize(pLog->pFd, &nByte); if( rc==SQLITE4_OK && nByte>0 ){ - BtWalHdr hdr1; rc = btLogReadHeader(pLog, 0, &hdr1); if( rc==SQLITE4_OK ){ - BtWalHdr hdr2; rc = btLogReadHeader(pLog, hdr1.nSector, &hdr2); if( rc==SQLITE4_NOTFOUND ){ pHdr = &hdr1; }else if( rc==SQLITE4_OK ){ int aGreater[3] = {1, 2, 0}; @@ -842,10 +849,11 @@ pShm->ckpt.iWalHdr = (iSlot<<2) + pHdr->iCnt; pShm->ckpt.iFirstRead = pHdr->iFirstFrame; pShm->ckpt.iFirstRecover = pHdr->iFirstFrame; rc = btLogRollbackRecovery(pLog, &ctx); pLog->snapshot.iNextFrame = ctx.iNextFrame; + pLog->snapshot.pgsz = pHdr->nPgsz; assert( pShm->ckpt.iFirstRead>0 ); } } if( rc==SQLITE4_OK && ctx.iLast==0 ){ @@ -859,10 +867,15 @@ pLog->snapshot.nPg = dbhdr.nPg; pLog->snapshot.pgsz = dbhdr.pgsz; pLog->snapshot.iCookie = dbhdr.cookie; } + if( rc==SQLITE4_OK ){ + btDebugTopology( + pLog->pLock, "recovered", pLog->snapshot.iHashSide, pLog->snapshot.aLog + ); + } return rc; } /* ** Open the log file for pager pPager. If successful, return the BtLog* @@ -998,11 +1011,10 @@ int btLogRead(BtLog *pLog, u32 pgno, u8 *aData, u32 iSafe){ const int pgsz = sqlite4BtPagerPagesize((BtPager*)(pLog->pLock)); int rc = SQLITE4_NOTFOUND; u32 iFrame = 0; int i; - int bSeen = (iSafe==0); u32 *aLog = pLog->snapshot.aLog; int iSafeIdx = sqlite4BtLogFrameToIdx(aLog, iSafe); /* Loop through regions (c), (b) and (a) of the log file. In that order. */ @@ -1077,61 +1089,18 @@ memset(aHash, 0, sizeof(ht_slot)*HASHTABLE_NSLOT); } return rc; } -/* -** Write a frame to the log file. -*/ -int sqlite4BtLogWrite(BtLog *pLog, u32 pgno, u8 *aData, u32 nPg){ +static int btLogWriteFrame(BtLog *pLog, u32 pgno, u8 *aData, u32 nPg){ const int pgsz = sqlite4BtPagerPagesize((BtPager*)(pLog->pLock)); - int rc = SQLITE4_OK; + u32 *aLog = pLog->snapshot.aLog; + int rc; /* Return code */ u32 iFrame; /* Write this frame (numbered from 1) */ - BtFrameHdr frame; /* Header for new frame */ - u32 *a; /* Pointer to cksum of previous frame */ + u32 iNextFrame; /* Frame to write following this one */ i64 iOff; /* Offset of log file to write to */ - u32 iNextFrame; - u32 *aLog = pLog->snapshot.aLog; - - /* If this is a commit frame and the size of the database has changed, - ** ensure that the log file contains at least one copy of page 1 written - ** since the last checkpoint. This is required as a future checkpoint - ** will need to update the nPg field in the database header located on - ** page 1. */ - if( nPg && nPg!=pLog->snapshot.nPg ){ - BtPager *pPager = (BtPager *)(pLog->pLock); - BtPage *pOne = 0; - rc = sqlite4BtPageGet(pPager, 1, &pOne); - if( rc==SQLITE4_OK ){ - rc = sqlite4BtLogWrite(pLog, 1, sqlite4BtPageData(pOne), 0); - sqlite4BtPageRelease(pOne); - } - if( rc!=SQLITE4_OK ) return rc; - } - - /* Handle a special case - if the log file is completely empty then - ** this writer must write the first header into the WAL file. */ - if( btLogIsEmpty(pLog) ){ - BtWalHdr hdr; - memset(&hdr, 0, sizeof(BtWalHdr)); - - hdr.iMagic = BT_WAL_MAGIC; - hdr.iVersion = BT_WAL_VERSION; - hdr.nSector = pLog->snapshot.nSector; - hdr.nPgsz = pgsz; - hdr.iSalt1 = 22; - hdr.iSalt2 = 23; - hdr.iFirstFrame = 1; - - rc = btLogWriteHeader(pLog, 0, &hdr); - if( rc!=SQLITE4_OK ) return rc; - - pLog->snapshot.aFrameCksum[0] = hdr.iSalt1; - pLog->snapshot.aFrameCksum[1] = hdr.iSalt2; - pLog->snapshot.iNextFrame = 1; - } - btDebugCheckSnapshot(&pLog->snapshot); + BtFrameHdr frame; /* Header for new frame */ /* Figure out the offset to write the current frame to. */ iFrame = pLog->snapshot.iNextFrame; iOff = btLogFrameOffset(pLog, pgsz, iFrame); @@ -1163,10 +1132,11 @@ if( rc==SQLITE4_OK ){ if( iNextFrame & 0x80000000 ){ rc = SQLITE4_FULL; }else{ + u32 *a; /* Pointer to cksum of previous frame */ /* Populate the frame header object. */ memset(&frame, 0, sizeof(frame)); frame.pgno = pgno; frame.iNext = iNextFrame; @@ -1223,13 +1193,71 @@ aLog[5] = iFrame; memcpy(pLog->snapshot.aFrameCksum, frame.aCksum, sizeof(frame.aCksum)); } btDebugCheckSnapshot(&pLog->snapshot); - /* If this is a COMMIT, also update the shared shm-header. */ + return rc; +} + +/* +** Write a frame to the log file. +*/ +int sqlite4BtLogWrite(BtLog *pLog, u32 pgno, u8 *aData, u32 nPg){ + const int pgsz = sqlite4BtPagerPagesize((BtPager*)(pLog->pLock)); + int rc = SQLITE4_OK; + + int nPad = 1; + + /* If this is a commit frame and the size of the database has changed, + ** ensure that the log file contains at least one copy of page 1 written + ** since the last checkpoint. This is required as a future checkpoint + ** will need to update the nPg field in the database header located on + ** page 1. */ + if( nPg && nPg!=pLog->snapshot.nPg ){ + BtPager *pPager = (BtPager *)(pLog->pLock); + BtPage *pOne = 0; + rc = sqlite4BtPageGet(pPager, 1, &pOne); + if( rc==SQLITE4_OK ){ + rc = sqlite4BtLogWrite(pLog, 1, sqlite4BtPageData(pOne), 0); + sqlite4BtPageRelease(pOne); + } + if( rc!=SQLITE4_OK ) return rc; + } + + /* Handle a special case - if the log file is completely empty then + ** this writer must write the first header into the WAL file. */ + if( btLogIsEmpty(pLog) ){ + BtWalHdr hdr; + memset(&hdr, 0, sizeof(BtWalHdr)); + + hdr.iMagic = BT_WAL_MAGIC; + hdr.iVersion = BT_WAL_VERSION; + hdr.nSector = pLog->snapshot.nSector; + hdr.nPgsz = pgsz; + hdr.iSalt1 = 22; + hdr.iSalt2 = 23; + hdr.iFirstFrame = 1; + + rc = btLogWriteHeader(pLog, 0, &hdr); + if( rc!=SQLITE4_OK ) return rc; + + pLog->snapshot.aFrameCksum[0] = hdr.iSalt1; + pLog->snapshot.aFrameCksum[1] = hdr.iSalt2; + pLog->snapshot.iNextFrame = 1; + } + btDebugCheckSnapshot(&pLog->snapshot); + + rc = btLogWriteFrame(pLog, pgno, aData, nPg); + + /* If this is a COMMIT, sync the log and update the shared shm-header. */ if( nPg ){ - rc = btLogUpdateSharedHdr(pLog); + int i; + for(i=0; ipFd); + if( rc==SQLITE4_OK ) rc = btLogUpdateSharedHdr(pLog); } return rc; } Index: src/bt_main.c ================================================================== --- src/bt_main.c +++ src/bt_main.c @@ -2295,11 +2295,11 @@ */ int sqlite4BtReplace(bt_db *db, const void *pK, int nK, const void *pV, int nV){ int rc = SQLITE4_OK; bt_cursor csr; - sqlite4BtDebugKV((BtLock*)db->pPager, "replace", pK, nK, pV, nV); + sqlite4BtDebugKV((BtLock*)db->pPager, "replace", (u8*)pK, nK, (u8*)pV, nV); btCheckPageRefs(db); btCsrSetup(db, &csr); rc = btCsrSeek(&csr, pK, nK, BT_SEEK_GE, 1); if( rc==SQLITE4_OK ){ @@ -2356,10 +2356,11 @@ return sqlite4BtPagerGetCookie(db->pPager, piVal); } int sqlite4BtControl(bt_db *db, int op, void *pArg){ int rc = SQLITE4_OK; + switch( op ){ case BT_CONTROL_INFO: { bt_info *pInfo = (bt_info*)pArg; int iTrans = sqlite4BtTransactionLevel(db); if( iTrans==0 ) rc = sqlite4BtBegin(db, 1); @@ -2375,11 +2376,28 @@ sqlite4_buffer_append(&pInfo->output, "", 1); sqlite4BtPageRelease(pPg); } if( iTrans==0 ) rc = sqlite4BtCommit(db, 0); } + break; + } + + case BT_CONTROL_GETVFS: { + *((bt_env**)pArg) = sqlite4BtPagerGetEnv(db->pPager); + break; + } + + case BT_CONTROL_SETVFS: { + sqlite4BtPagerSetEnv(db->pPager, (bt_env*)pArg); + break; + } + + case BT_CONTROL_SAFETY: { + int *pInt = (int*)pArg; + sqlite4BtPagerSetSafety(db->pPager, pInt); + break; } } return rc; } Index: src/bt_pager.c ================================================================== --- src/bt_pager.c +++ src/bt_pager.c @@ -78,15 +78,19 @@ ** Pager object. ** ** nAutoCkpt: ** If a transaction is committed and there are this many frames in the ** log file, automatically run a checkpoint operation. +** +** iSafetyLevel: +** Current safety level. 0==off, 1==normal, 2=full. */ struct BtPager { BtLock btl; /* Variables shared with bt_lock module */ BtLog *pLog; /* Logging module */ int iTransactionLevel; /* Current transaction level (see bt.h) */ + int iSafetyLevel; /* Current safety level */ char *zFile; /* Database file name */ int nFile; /* Length of string zFile in bytes */ BtPageHash hash; /* Hash table */ BtPage *pDirty; /* List of all dirty pages */ int nTotalRef; /* Total number of outstanding page refs */ @@ -669,11 +673,12 @@ /* Commit the main write transaction. */ rc = btCommitTransaction(p); } p->iTransactionLevel = iLevel; if( iLevel==0 ){ - rc = btCloseReadTransaction(p); + int rc2 = btCloseReadTransaction(p); + if( rc==SQLITE4_OK ) rc = rc2; } } return rc; } @@ -762,16 +767,18 @@ } if( rc==SQLITE4_OK ){ rc = btHashAdd(p, pRet); } + if( rc!=SQLITE4_OK ){ btFreePage(p, pRet); pRet = 0; + }else{ + sqlite4BtDebugReadPage(&p->btl, pgno, pRet->aData, p->pgsz); } } - sqlite4BtDebugReadPage(&p->btl, pgno, pRet->aData, p->pgsz); } assert( (pRet!=0)==(rc==SQLITE4_OK) ); if( rc==SQLITE4_OK ){ p->nTotalRef++; @@ -906,12 +913,27 @@ break; } memcpy(&p->zFile[p->nFile], zTail, strlen(zTail)+1); return p->zFile; } + +bt_env *sqlite4BtPagerGetEnv(BtPager *p){ + return p->btl.pVfs; +} +void sqlite4BtPagerSetEnv(BtPager *p, bt_env *pVfs){ + p->btl.pVfs = pVfs; +} + +void sqlite4BtPagerSetSafety(BtPager *pPager, int *piVal){ + int iVal = *piVal; + if( iVal>=0 && iVal<=2 ){ + pPager->iSafetyLevel = iVal; + } + *piVal = pPager->iSafetyLevel; +} #ifndef NDEBUG int sqlite4BtPagerRefcount(BtPager *p){ return p->nTotalRef; } #endif Index: src/bt_unix.c ================================================================== --- src/bt_unix.c +++ src/bt_unix.c @@ -178,11 +178,11 @@ return rc; } static int btPosixOsSync(bt_file *pFile){ int rc = SQLITE4_OK; -#ifndef LSM_NO_SYNC +#ifndef SQLITE_NO_SYNC PosixFile *p = (PosixFile *)pFile; int prc = 0; #if 0 if( p->pMap ){