Index: src/backup.c ================================================================== --- src/backup.c +++ src/backup.c @@ -395,11 +395,12 @@ assert( nSrcPage>=0 ); for(ii=0; (nPage<0 || iiiNext<=(Pgno)nSrcPage && !rc; ii++){ const Pgno iSrcPg = p->iNext; /* Source page number */ if( iSrcPg!=PENDING_BYTE_PAGE(p->pSrc->pBt) ){ DbPage *pSrcPg; /* Source page object */ - rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg); + rc = sqlite3PagerAcquire(pSrcPager, iSrcPg, &pSrcPg, + PAGER_ACQUIRE_READONLY); if( rc==SQLITE_OK ){ rc = backupOnePage(p, iSrcPg, sqlite3PagerGetData(pSrcPg), 0); sqlite3PagerUnref(pSrcPg); } } Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -1567,17 +1567,21 @@ */ static int btreeGetPage( BtShared *pBt, /* The btree */ Pgno pgno, /* Number of the page to fetch */ MemPage **ppPage, /* Return the page in this parameter */ - int noContent /* Do not load page content if true */ + int noContent, /* Do not load page content if true */ + int bReadonly /* True if a read-only (mmap) page is ok */ ){ int rc; DbPage *pDbPage; + int flags = (noContent ? PAGER_ACQUIRE_NOCONTENT : 0) + | (bReadonly ? PAGER_ACQUIRE_READONLY : 0); + assert( noContent==0 || bReadonly==0 ); assert( sqlite3_mutex_held(pBt->mutex) ); - rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, noContent); + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, flags); if( rc ) return rc; *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); return SQLITE_OK; } @@ -1616,21 +1620,22 @@ ** ** If an error occurs, then the value *ppPage is set to is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( - BtShared *pBt, /* The database file */ - Pgno pgno, /* Number of the page to get */ - MemPage **ppPage /* Write the page pointer here */ + BtShared *pBt, /* The database file */ + Pgno pgno, /* Number of the page to get */ + MemPage **ppPage, /* Write the page pointer here */ + int bReadonly /* True if a read-only (mmap) page is ok */ ){ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, pgno, ppPage, 0); + rc = btreeGetPage(pBt, pgno, ppPage, 0, bReadonly); if( rc==SQLITE_OK ){ rc = btreeInitPage(*ppPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); } @@ -1857,10 +1862,11 @@ goto btree_open_out; } rc = sqlite3PagerOpen(pVfs, &pBt->pPager, zFilename, EXTRA_SIZE, flags, vfsFlags, pageReinit); if( rc==SQLITE_OK ){ + sqlite3PagerSetMmapLimit(pBt->pPager, db->mxMmap); rc = sqlite3PagerReadFileheader(pBt->pPager,sizeof(zDbHeader),zDbHeader); } if( rc!=SQLITE_OK ){ goto btree_open_out; } @@ -2122,10 +2128,23 @@ sqlite3BtreeEnter(p); sqlite3PagerSetCachesize(pBt->pPager, mxPage); sqlite3BtreeLeave(p); return SQLITE_OK; } + +/* +** Change the limit on the amount of the database file that may be +** memory mapped. +*/ +int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 mxMmap){ + BtShared *pBt = p->pBt; + assert( sqlite3_mutex_held(p->db->mutex) ); + sqlite3BtreeEnter(p); + sqlite3PagerSetMmapLimit(pBt->pPager, mxMmap); + sqlite3BtreeLeave(p); + return SQLITE_OK; +} /* ** Change the way data is synced to disk in order to increase or decrease ** how well the database resists damage due to OS crashes and power ** failures. Level 1 is the same as asynchronous (no syncs() occur and @@ -2348,11 +2367,11 @@ assert( sqlite3_mutex_held(pBt->mutex) ); assert( pBt->pPage1==0 ); rc = sqlite3PagerSharedLock(pBt->pPager); if( rc!=SQLITE_OK ) return rc; - rc = btreeGetPage(pBt, 1, &pPage1, 0); + rc = btreeGetPage(pBt, 1, &pPage1, 0, 0); if( rc!=SQLITE_OK ) return rc; /* Do some checking to help insure the file we opened really is ** a valid database file. */ @@ -2907,11 +2926,11 @@ /* Fix the database pointer on page iPtrPage that pointed at iDbPage so ** that it points at iFreePage. Also fix the pointer map entry for ** iPtrPage. */ if( eType!=PTRMAP_ROOTPAGE ){ - rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0); + rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0, 0); if( rc!=SQLITE_OK ){ return rc; } rc = sqlite3PagerWrite(pPtrPage->pDbPage); if( rc!=SQLITE_OK ){ @@ -2991,11 +3010,11 @@ Pgno iFreePg; /* Index of free page to move pLastPg to */ MemPage *pLastPg; u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ - rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); + rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0, 0); if( rc!=SQLITE_OK ){ return rc; } /* If bCommit is zero, this loop runs exactly once and page pLastPg @@ -3083,12 +3102,15 @@ Pgno nFin = finalDbSize(pBt, nOrig, nFree); if( nOrig0 ){ - invalidateAllOverflowCache(pBt); - rc = incrVacuumStep(pBt, nFin, nOrig, 0); + rc = saveAllCursors(pBt, 0, 0); + if( rc==SQLITE_OK ){ + invalidateAllOverflowCache(pBt); + rc = incrVacuumStep(pBt, nFin, nOrig, 0); + } if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[28], pBt->nPage); } }else{ @@ -3132,11 +3154,13 @@ } nFree = get4byte(&pBt->pPage1->aData[36]); nFin = finalDbSize(pBt, nOrig, nFree); if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; - + if( nFinnFin && rc==SQLITE_OK; iFree--){ rc = incrVacuumStep(pBt, nFin, iFree, 1); } if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); @@ -3149,11 +3173,11 @@ if( rc!=SQLITE_OK ){ sqlite3PagerRollback(pPager); } } - assert( nRef==sqlite3PagerRefcount(pPager) ); + assert( nRef>=sqlite3PagerRefcount(pPager) ); return rc; } #else /* ifndef SQLITE_OMIT_AUTOVACUUM */ # define setChildPtrmaps(x) SQLITE_OK @@ -3405,11 +3429,11 @@ } /* The rollback may have destroyed the pPage1->aData value. So ** call btreeGetPage() on page 1 again to make ** sure pPage1->aData is set correctly. */ - if( btreeGetPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){ + if( btreeGetPage(pBt, 1, &pPage1, 0, 0)==SQLITE_OK ){ int nPage = get4byte(28+(u8*)pPage1->aData); testcase( nPage==0 ); if( nPage==0 ) sqlite3PagerPagecount(pBt->pPager, &nPage); testcase( pBt->nPage!=nPage ); pBt->nPage = nPage; @@ -3839,11 +3863,11 @@ } #endif assert( next==0 || rc==SQLITE_DONE ); if( rc==SQLITE_OK ){ - rc = btreeGetPage(pBt, ovfl, &pPage, 0); + rc = btreeGetPage(pBt, ovfl, &pPage, 0, (ppPage==0)); assert( rc==SQLITE_OK || pPage==0 ); if( rc==SQLITE_OK ){ next = get4byte(pPage->aData); } } @@ -4060,11 +4084,13 @@ }else #endif { DbPage *pDbPage; - rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage); + rc = sqlite3PagerAcquire(pBt->pPager, nextPage, &pDbPage, + (eOp==0 ? PAGER_ACQUIRE_READONLY : 0) + ); if( rc==SQLITE_OK ){ aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); sqlite3PagerUnref(pDbPage); @@ -4239,14 +4265,15 @@ BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPageiPage>=0 ); if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage); + rc = getAndInitPage(pBt, newPgno, &pNewPage, (pCur->wrFlag==0)); if( rc ) return rc; pCur->apPage[i+1] = pNewPage; pCur->aiIdx[i+1] = 0; pCur->iPage++; @@ -4359,11 +4386,11 @@ pCur->iPage = 0; }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ - rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]); + rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0], pCur->wrFlag==0); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } pCur->iPage = 0; @@ -4973,11 +5000,11 @@ } testcase( iTrunk==mxPage ); if( iTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); } if( rc ){ pTrunk = 0; goto end_allocate_page; } @@ -5037,11 +5064,11 @@ if( iNewTrunk>mxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iNewTrunk==mxPage ); - rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); + rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } rc = sqlite3PagerWrite(pNewTrunk->pDbPage); if( rc!=SQLITE_OK ){ @@ -5117,11 +5144,11 @@ if( closestpDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); } @@ -5165,11 +5192,11 @@ ** becomes a new pointer-map page, the second is used by the caller. */ MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); + rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); } if( rc ) return rc; @@ -5179,11 +5206,11 @@ #endif put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage); *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); + rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent, 0); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); } @@ -5247,11 +5274,11 @@ if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. */ - if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) ) + if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0, 0))!=0) ) || ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0) ){ goto freepage_out; } memset(pPage->aData, 0, pPage->pBt->pageSize); @@ -5274,11 +5301,11 @@ */ if( nFree!=0 ){ u32 nLeaf; /* Initial number of leaf cells on trunk page */ iTrunk = get4byte(&pPage1->aData[32]); - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0, 0); if( rc!=SQLITE_OK ){ goto freepage_out; } nLeaf = get4byte(&pTrunk->aData[4]); @@ -5320,11 +5347,11 @@ ** the page being freed as a leaf page of the first trunk in the free-list. ** Possibly because the free-list is empty, or possibly because the ** first trunk in the free-list is full. Either way, the page being freed ** will become the new first trunk page in the free-list. */ - if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){ + if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0, 0)) ){ goto freepage_out; } rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ goto freepage_out; @@ -6121,11 +6148,11 @@ }else{ pRight = findCell(pParent, i+nxDiv-pParent->nOverflow); } pgno = get4byte(pRight); while( 1 ){ - rc = getAndInitPage(pBt, pgno, &apOld[i]); + rc = getAndInitPage(pBt, pgno, &apOld[i], 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; } nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow; @@ -7209,14 +7236,21 @@ ** is already journaled. */ u8 eType = 0; Pgno iPtrPage = 0; + /* Save the positions of any open cursors. This is required in + ** case they are holding a reference to an xFetch reference + ** corresponding to page pgnoRoot. */ + rc = saveAllCursors(pBt, 0, 0); releasePage(pPageMove); + if( rc!=SQLITE_OK ){ + return rc; + } /* Move the page currently at pgnoRoot to pgnoMove. */ - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); if( rc!=SQLITE_OK ){ return rc; } rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage); if( eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){ @@ -7233,11 +7267,11 @@ /* Obtain the page at pgnoRoot */ if( rc!=SQLITE_OK ){ return rc; } - rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0); + rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0, 0); if( rc!=SQLITE_OK ){ return rc; } rc = sqlite3PagerWrite(pRoot->pDbPage); if( rc!=SQLITE_OK ){ @@ -7309,11 +7343,11 @@ assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage); + rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; for(i=0; inCell; i++){ pCell = findCell(pPage, i); if( !pPage->leaf ){ rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); @@ -7411,11 +7445,11 @@ if( NEVER(pBt->pCursor) ){ sqlite3ConnectionBlocked(p->db, pBt->pCursor->pBtree->db); return SQLITE_LOCKED_SHAREDCACHE; } - rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0); + rc = btreeGetPage(pBt, (Pgno)iTable, &pPage, 0, 0); if( rc ) return rc; rc = sqlite3BtreeClearTable(p, iTable, 0); if( rc ){ releasePage(pPage); return rc; @@ -7446,21 +7480,21 @@ ** number in the database. So move the page that does into the ** gap left by the deleted root-page. */ MemPage *pMove; releasePage(pPage); - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); if( rc!=SQLITE_OK ){ return rc; } rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable, 0); releasePage(pMove); if( rc!=SQLITE_OK ){ return rc; } pMove = 0; - rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0); + rc = btreeGetPage(pBt, maxRootPgno, &pMove, 0, 0); freePage(pMove, &rc); releasePage(pMove); if( rc!=SQLITE_OK ){ return rc; } @@ -7868,11 +7902,11 @@ */ pBt = pCheck->pBt; usableSize = pBt->usableSize; if( iPage==0 ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0; - if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ + if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0, 0))!=0 ){ checkAppendMsg(pCheck, zContext, "unable to get the page. error code=%d", rc); return 0; } @@ -8339,10 +8373,21 @@ } assert( pCsr->eState!=CURSOR_REQUIRESEEK ); if( pCsr->eState!=CURSOR_VALID ){ return SQLITE_ABORT; } + + /* Save the positions of all other cursors open on this table. This is + ** required in case any of them are holding references to an xFetch + ** version of the b-tree page modified by the accessPayload call below. + ** + ** Note that pCsr must be open on a BTREE_INTKEY table and saveCursorPosition() + ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence + ** saveAllCursors can only return SQLITE_OK. + */ + VVA_ONLY(rc =) saveAllCursors(pCsr->pBt, pCsr->pgnoRoot, pCsr); + assert( rc==SQLITE_OK ); /* Check some assumptions: ** (a) the cursor is open for writing, ** (b) there is a read/write transaction open, ** (c) the connection holds a write-lock on the table (if required), Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -61,10 +61,11 @@ #define BTREE_SINGLE 4 /* The file contains at most 1 b-tree */ #define BTREE_UNORDERED 8 /* Use of a hash implementation is OK */ int sqlite3BtreeClose(Btree*); int sqlite3BtreeSetCacheSize(Btree*,int); +int sqlite3BtreeSetMmapLimit(Btree*,sqlite3_int64); int sqlite3BtreeSetSafetyLevel(Btree*,int,int,int); int sqlite3BtreeSyncDisabled(Btree*); int sqlite3BtreeSetPageSize(Btree *p, int nPagesize, int nReserve, int eFix); int sqlite3BtreeGetPageSize(Btree*); int sqlite3BtreeMaxPageCount(Btree*,int); Index: src/ctime.c ================================================================== --- src/ctime.c +++ src/ctime.c @@ -55,10 +55,13 @@ "DEBUG", #endif #ifdef SQLITE_DEFAULT_LOCKING_MODE "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE), #endif +#ifdef SQLITE_DEFAULT_MMAP_LIMIT + "DEFAULT_MMAP_LIMIT=" CTIMEOPT_VAL(SQLITE_DEFAULT_MMAP_LIMIT), +#endif #ifdef SQLITE_DISABLE_DIRSYNC "DISABLE_DIRSYNC", #endif #ifdef SQLITE_DISABLE_LFS "DISABLE_LFS", Index: src/global.c ================================================================== --- src/global.c +++ src/global.c @@ -154,10 +154,11 @@ {0,0,0,0,0,0,0,0,0}, /* mutex */ {0,0,0,0,0,0,0,0,0,0,0,0,0},/* pcache2 */ (void*)0, /* pHeap */ 0, /* nHeap */ 0, 0, /* mnHeap, mxHeap */ + SQLITE_DEFAULT_MMAP_LIMIT, /* mxMmap */ (void*)0, /* pScratch */ 0, /* szScratch */ 0, /* nScratch */ (void*)0, /* pPage */ 0, /* szPage */ Index: src/main.c ================================================================== --- src/main.c +++ src/main.c @@ -493,10 +493,17 @@ sqlite3GlobalConfig.xSqllog = va_arg(ap, SQLLOGFUNC_t); sqlite3GlobalConfig.pSqllogArg = va_arg(ap, void *); break; } #endif + + case SQLITE_CONFIG_MMAP_LIMIT: { + sqlite3_int64 mxMmap = va_arg(ap, sqlite3_int64); + if( mxMmap<0 ) mxMmap = SQLITE_DEFAULT_MMAP_LIMIT; + sqlite3GlobalConfig.mxMmap = mxMmap; + break; + } default: { rc = SQLITE_ERROR; break; } @@ -2314,10 +2321,11 @@ assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); db->autoCommit = 1; db->nextAutovac = -1; + db->mxMmap = sqlite3GlobalConfig.mxMmap; db->nextPagesize = 0; db->flags |= SQLITE_ShortColNames | SQLITE_AutoIndex | SQLITE_EnableTrigger #if SQLITE_DEFAULT_FILE_FORMAT<4 | SQLITE_LegacyFileFmt #endif Index: src/os.c ================================================================== --- src/os.c +++ src/os.c @@ -138,10 +138,18 @@ void volatile **pp /* OUT: Pointer to mapping */ ){ DO_OS_MALLOC_TEST(id); return id->pMethods->xShmMap(id, iPage, pgsz, bExtend, pp); } + +int sqlite3OsFetch(sqlite3_file *id, i64 iOff, int iAmt, void **pp){ + DO_OS_MALLOC_TEST(id); + return id->pMethods->xFetch(id, iOff, iAmt, pp); +} +int sqlite3OsUnfetch(sqlite3_file *id, i64 iOff, void *p){ + return id->pMethods->xUnfetch(id, iOff, p); +} /* ** The next group of routines are convenience wrappers around the ** VFS methods. */ Index: src/os.h ================================================================== --- src/os.h +++ src/os.h @@ -257,10 +257,12 @@ int sqlite3OsDeviceCharacteristics(sqlite3_file *id); int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **); int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmUnmap(sqlite3_file *id, int); +int sqlite3OsFetch(sqlite3_file *id, i64, int, void **); +int sqlite3OsUnfetch(sqlite3_file *, i64, void *); /* ** Functions for accessing sqlite3_vfs methods */ Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -223,10 +223,15 @@ void *lockingContext; /* Locking style specific state */ UnixUnusedFd *pUnused; /* Pre-allocated UnixUnusedFd */ const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ + int nFetchOut; /* Number of outstanding xFetch refs */ + sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ + sqlite3_int64 mmapOrigsize; /* Actual size of mapping at pMapRegion */ + sqlite3_int64 mmapLimit; /* Configured FCNTL_MMAP_LIMIT value */ + void *pMapRegion; /* Memory mapped region */ #ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ #endif #if SQLITE_ENABLE_LOCKING_STYLE @@ -247,11 +252,13 @@ ** one described by ticket #3584. */ unsigned char transCntrChng; /* True if the transaction counter changed */ unsigned char dbUpdate; /* True if any part of database file changed */ unsigned char inNormalWrite; /* True if in a normal write operation */ + #endif + #ifdef SQLITE_TEST /* In test mode, increase the size of this structure a bit so that ** it is larger than the struct CrashFile defined in test6.c. */ char aPadding[32]; @@ -304,10 +311,21 @@ #define threadid pthread_self() #else #define threadid 0 #endif +/* +** HAVE_MREMAP defaults to true on Linux and false everywhere else. +*/ +#if !defined(HAVE_MREMAP) +# if defined(__linux__) && defined(_GNU_SOURCE) +# define HAVE_MREMAP 1 +# else +# define HAVE_MREMAP 0 +# endif +#endif + /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). ** The difference is important when using a pointer to the function. ** @@ -435,10 +453,23 @@ #define osRmdir ((int(*)(const char*))aSyscall[19].pCurrent) { "fchown", (sqlite3_syscall_ptr)posixFchown, 0 }, #define osFchown ((int(*)(int,uid_t,gid_t))aSyscall[20].pCurrent) + { "mmap", (sqlite3_syscall_ptr)mmap, 0 }, +#define osMmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[21].pCurrent) + + { "munmap", (sqlite3_syscall_ptr)munmap, 0 }, +#define osMunmap ((void*(*)(void*,size_t))aSyscall[22].pCurrent) + +#if HAVE_MREMAP + { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, +#else + { "mremap", (sqlite3_syscall_ptr)0, 0 }, +#endif +#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) + }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the ** "unix" VFSes. Return SQLITE_OK opon successfully updating the @@ -1102,11 +1133,10 @@ #else /* Non-threadsafe build, use strerror(). */ zErr = strerror(iErrno); #endif - assert( errcode!=SQLITE_OK ); if( zPath==0 ) zPath = ""; sqlite3_log(errcode, "os_unix.c:%d: (%d) %s(%s) - %s", iLine, iErrno, zFunc, zPath, zErr ); @@ -1798,12 +1828,16 @@ ** ** If the locking level of the file descriptor is already at or below ** the requested locking level, this routine is a no-op. */ static int unixUnlock(sqlite3_file *id, int eFileLock){ + assert( eFileLock==SHARED_LOCK || ((unixFile *)id)->nFetchOut==0 ); return posixUnlock(id, eFileLock, 0); } + +static int unixMapfile(unixFile *pFd, i64 nByte); +static void unixUnmapfile(unixFile *pFd); /* ** This function performs the parts of the "close file" operation ** common to all locking schemes. It closes the directory and file ** handles, if they are valid, and sets all fields of the unixFile @@ -1813,10 +1847,11 @@ ** even on VxWorks. A mutex will be acquired on VxWorks by the ** vxworksReleaseFileId() routine. */ static int closeUnixFile(sqlite3_file *id){ unixFile *pFile = (unixFile*)id; + unixUnmapfile(pFile); if( pFile->h>=0 ){ robust_close(pFile, pFile->h, __LINE__); pFile->h = -1; } #if OS_VXWORKS @@ -3079,10 +3114,25 @@ assert( pFile->pUnused==0 || offset>=PENDING_BYTE+512 || offset+amt<=PENDING_BYTE ); #endif + + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } got = seekAndRead(pFile, offset, pBuf, amt); if( got==amt ){ return SQLITE_OK; }else if( got<0 ){ @@ -3183,10 +3233,25 @@ pFile->transCntrChng = 1; /* The transaction counter has changed */ } } } #endif + + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = pFile->mmapSize - offset; + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } while( amt>0 && (wrote = seekAndWrite(pFile, offset, pBuf, amt))>0 ){ amt -= wrote; offset += wrote; pBuf = &((char*)pBuf)[wrote]; @@ -3465,10 +3530,18 @@ */ if( pFile->inNormalWrite && nByte==0 ){ pFile->transCntrChng = 1; } #endif + + /* If the file was just truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( nBytemmapSize ){ + pFile->mmapSize = nByte; + } return SQLITE_OK; } } @@ -3553,10 +3626,23 @@ iWrite += nBlk; } #endif } } + + if( pFile->mmapLimit>0 && nByte>pFile->mmapSize ){ + int rc; + if( pFile->szChunk<=0 ){ + if( robust_ftruncate(pFile->h, nByte) ){ + pFile->lastErrno = errno; + return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath); + } + } + + rc = unixMapfile(pFile, nByte); + return rc; + } return SQLITE_OK; } /* @@ -3620,10 +3706,16 @@ if( zTFile ){ unixGetTempname(pFile->pVfs->mxPathname, zTFile); *(char**)pArg = zTFile; } return SQLITE_OK; + } + case SQLITE_FCNTL_MMAP_LIMIT: { + i64 newLimit = *(i64*)pArg; + *(i64*)pArg = pFile->mmapLimit; + if( newLimit>=0 ) pFile->mmapLimit = newLimit; + return SQLITE_OK; } #ifdef SQLITE_DEBUG /* The pager calls this method to signal that it has done ** a rollback and that the database is therefore unchanged and ** it hence it is OK for the transaction change counter to be @@ -3933,11 +4025,11 @@ int i; assert( p->pInode==pFd->pInode ); sqlite3_mutex_free(p->mutex); for(i=0; inRegion; i++){ if( p->h>=0 ){ - munmap(p->apRegion[i], p->szRegion); + osMunmap(p->apRegion[i], p->szRegion); }else{ sqlite3_free(p->apRegion[i]); } } sqlite3_free(p->apRegion); @@ -4206,11 +4298,11 @@ } pShmNode->apRegion = apNew; while(pShmNode->nRegion<=iRegion){ void *pMem; if( pShmNode->h>=0 ){ - pMem = mmap(0, szRegion, + pMem = osMmap(0, szRegion, pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); if( pMem==MAP_FAILED ){ rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); @@ -4422,10 +4514,227 @@ # define unixShmMap 0 # define unixShmLock 0 # define unixShmBarrier 0 # define unixShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ + +/* +** If it is currently memory mapped, unmap file pFd. +*/ +static void unixUnmapfile(unixFile *pFd){ + assert( pFd->nFetchOut==0 ); + if( pFd->pMapRegion ){ + osMunmap(pFd->pMapRegion, pFd->mmapOrigsize); + pFd->pMapRegion = 0; + pFd->mmapSize = 0; + pFd->mmapOrigsize = 0; + } +} + +/* +** Return the system page size. +*/ +static int unixGetPagesize(void){ +#if HAVE_MREMAP + return 512; +#elif defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +/* +** Attempt to set the size of the memory mapping maintained by file +** descriptor pFd to nNew bytes. Any existing mapping is discarded. +** +** If successful, this function sets the following variables: +** +** unixFile.pMapRegion +** unixFile.mmapSize +** unixFile.mmapOrigsize +** +** If unsuccessful, an error message is logged via sqlite3_log() and +** the three variables above are zeroed. In this case SQLite should +** continue accessing the database using the xRead() and xWrite() +** methods. +*/ +static void unixRemapfile( + unixFile *pFd, /* File descriptor object */ + i64 nNew /* Required mapping size */ +){ + const char *zErr = "mmap"; + int h = pFd->h; /* File descriptor open on db file */ + u8 *pOrig = (u8 *)pFd->pMapRegion; /* Pointer to current file mapping */ + i64 nOrig = pFd->mmapOrigsize; /* Size of pOrig region in bytes */ + u8 *pNew = 0; /* Location of new mapping */ + int flags = PROT_READ; /* Flags to pass to mmap() */ + + assert( pFd->nFetchOut==0 ); + assert( nNew>pFd->mmapSize ); + assert( nNew<=pFd->mmapLimit ); + assert( nNew>0 ); + assert( pFd->mmapOrigsize>=pFd->mmapSize ); + assert( MAP_FAILED!=0 ); + + if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; + + if( pOrig ){ + const int szSyspage = unixGetPagesize(); + i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); + u8 *pReq = &pOrig[nReuse]; + + /* Unmap any pages of the existing mapping that cannot be reused. */ + if( nReuse!=nOrig ){ + osMunmap(pReq, nOrig-nReuse); + } + +#if HAVE_MREMAP + pNew = osMremap(pOrig, nReuse, nNew, MREMAP_MAYMOVE); + zErr = "mremap"; +#else + pNew = osMmap(pReq, nNew-nReuse, flags, MAP_SHARED, h, nReuse); + if( pNew!=MAP_FAILED ){ + if( pNew!=pReq ){ + osMunmap(pNew, nNew - nReuse); + pNew = 0; + }else{ + pNew = pOrig; + } + } +#endif + + /* The attempt to extend the existing mapping failed. Free it. */ + if( pNew==MAP_FAILED || pNew==0 ){ + osMunmap(pOrig, nReuse); + } + } + + /* If pNew is still NULL, try to create an entirely new mapping. */ + if( pNew==0 ){ + pNew = osMmap(0, nNew, flags, MAP_SHARED, h, 0); + } + + if( pNew==MAP_FAILED ){ + pNew = 0; + nNew = 0; + unixLogError(SQLITE_OK, zErr, pFd->zPath); + + /* If the mmap() above failed, assume that all subsequent mmap() calls + ** will probably fail too. Fall back to using xRead/xWrite exclusively + ** in this case. */ + pFd->mmapLimit = 0; + } + pFd->pMapRegion = (void *)pNew; + pFd->mmapSize = pFd->mmapOrigsize = nNew; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int unixMapfile(unixFile *pFd, i64 nByte){ + i64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + struct stat statbuf; /* Low-level file information */ + rc = osFstat(pFd->h, &statbuf); + if( rc!=SQLITE_OK ){ + return SQLITE_IOERR_FSTAT; + } + nMap = statbuf.st_size; + } + if( nMap>pFd->mmapLimit ){ + nMap = pFd->mmapLimit; + } + + if( nMap!=pFd->mmapSize ){ + if( nMap>0 ){ + unixRemapfile(pFd, nMap); + }else{ + unixUnmapfile(pFd); + } + } + + return SQLITE_OK; +} + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int unixFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + *pp = 0; + + if( pFd->mmapLimit>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = unixMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int unixUnfetch(sqlite3_file *fd, i64 iOff, void *p){ + unixFile *pFd = (unixFile *)fd; /* The underlying database file */ + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + unixUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); + return SQLITE_OK; +} /* ** Here ends the implementation of all sqlite3_file methods. ** ********************** End sqlite3_file Methods ******************************* @@ -4481,11 +4790,13 @@ unixSectorSize, /* xSectorSize */ \ unixDeviceCharacteristics, /* xDeviceCapabilities */ \ unixShmMap, /* xShmMap */ \ unixShmLock, /* xShmLock */ \ unixShmBarrier, /* xShmBarrier */ \ - unixShmUnmap /* xShmUnmap */ \ + unixShmUnmap, /* xShmUnmap */ \ + unixFetch, /* xFetch */ \ + unixUnfetch, /* xUnfetch */ \ }; \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ return &METHOD; \ } \ @@ -4498,11 +4809,11 @@ ** are also created. */ IOMETHODS( posixIoFinder, /* Finder function name */ posixIoMethods, /* sqlite3_io_methods object name */ - 2, /* shared memory is enabled */ + 3, /* shared memory and mmap are enabled */ unixClose, /* xClose method */ unixLock, /* xLock method */ unixUnlock, /* xUnlock method */ unixCheckReservedLock /* xCheckReservedLock method */ ) @@ -4749,10 +5060,11 @@ OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; pNew->pVfs = pVfs; pNew->zPath = zFilename; pNew->ctrlFlags = (u8)ctrlFlags; + pNew->mmapLimit = sqlite3GlobalConfig.mxMmap; if( sqlite3_uri_boolean(((ctrlFlags & UNIXFILE_URI) ? zFilename : 0), "psow", SQLITE_POWERSAFE_OVERWRITE) ){ pNew->ctrlFlags |= UNIXFILE_PSOW; } if( strcmp(pVfs->zName,"unix-excl")==0 ){ @@ -6986,11 +7298,11 @@ }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==21 ); + assert( ArraySize(aSyscall)==24 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } Index: src/os_win.c ================================================================== --- src/os_win.c +++ src/os_win.c @@ -148,15 +148,22 @@ HANDLE hMutex; /* Mutex used to control access to shared lock */ HANDLE hShared; /* Shared memory segment used for locking */ winceLock local; /* Locks obtained by this instance of winFile */ winceLock *shared; /* Global shared lock memory for the file */ #endif + int nFetchOut; /* Number of outstanding xFetch references */ + HANDLE hMap; /* Handle for accessing memory mapping */ + void *pMapRegion; /* Area memory mapped */ + sqlite3_int64 mmapSize; /* Usable size of mapped region */ + sqlite3_int64 mmapOrigsize; /* Actual size of mapped region */ + sqlite3_int64 mmapLimit; /* Configured FCNTL_MMAP_LIMIT value */ }; /* ** Allowed values for winFile.ctrlFlags */ +#define WINFILE_RDONLY 0x02 /* Connection is read only */ #define WINFILE_PERSIST_WAL 0x04 /* Persistent WAL mode */ #define WINFILE_PSOW 0x10 /* SQLITE_IOCAP_POWERSAFE_OVERWRITE */ /* * The size of the buffer used by sqlite3_win32_write_debug(). @@ -2059,10 +2066,13 @@ return 0; #endif } +/* Forward references to VFS methods */ +static int winUnmapfile(winFile*); + /* ** Close a file. ** ** It is reported that an attempt to close a handle might sometimes ** fail. This is a very unreasonable result, but Windows is notorious @@ -2080,10 +2090,14 @@ #ifndef SQLITE_OMIT_WAL assert( pFile->pShm==0 ); #endif OSTRACE(("CLOSE %d\n", pFile->h)); assert( pFile->h!=NULL && pFile->h!=INVALID_HANDLE_VALUE ); + + rc = winUnmapfile(pFile); + if( rc!=SQLITE_OK ) return rc; + do{ rc = osCloseHandle(pFile->h); /* SimulateIOError( rc=0; cnt=MX_CLOSE_ATTEMPT; ); */ }while( rc==0 && ++cnt < MX_CLOSE_ATTEMPT && (sqlite3_win32_sleep(100), 1) ); #if SQLITE_OS_WINCE @@ -2128,12 +2142,28 @@ winFile *pFile = (winFile*)id; /* file handle */ DWORD nRead; /* Number of bytes actually read from file */ int nRetry = 0; /* Number of retrys */ assert( id!=0 ); + assert( amt>0 ); SimulateIOError(return SQLITE_IOERR_READ); OSTRACE(("READ %d lock=%d\n", pFile->h, pFile->locktype)); + + /* Deal with as much of this read request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], amt); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(pBuf, &((u8 *)(pFile->pMapRegion))[offset], nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } #if SQLITE_OS_WINCE if( seekWinFile(pFile, offset) ){ return SQLITE_FULL; } @@ -2179,10 +2209,25 @@ assert( pFile ); SimulateIOError(return SQLITE_IOERR_WRITE); SimulateDiskfullError(return SQLITE_FULL); OSTRACE(("WRITE %d lock=%d\n", pFile->h, pFile->locktype)); + + /* Deal with as much of this write request as possible by transfering + ** data from the memory mapping using memcpy(). */ + if( offsetmmapSize ){ + if( offset+amt <= pFile->mmapSize ){ + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, amt); + return SQLITE_OK; + }else{ + int nCopy = (int)(pFile->mmapSize - offset); + memcpy(&((u8 *)(pFile->pMapRegion))[offset], pBuf, nCopy); + pBuf = &((u8 *)pBuf)[nCopy]; + amt -= nCopy; + offset += nCopy; + } + } #if SQLITE_OS_WINCE rc = seekWinFile(pFile, offset); if( rc==0 ){ #else @@ -2247,10 +2292,11 @@ ** Truncate an open file to a specified size */ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){ winFile *pFile = (winFile*)id; /* File handle object */ int rc = SQLITE_OK; /* Return code for this function */ + DWORD lastErrno; assert( pFile ); OSTRACE(("TRUNCATE %d %lld\n", pFile->h, nByte)); SimulateIOError(return SQLITE_IOERR_TRUNCATE); @@ -2265,15 +2311,24 @@ } /* SetEndOfFile() returns non-zero when successful, or zero when it fails. */ if( seekWinFile(pFile, nByte) ){ rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, - "winTruncate1", pFile->zPath); - }else if( 0==osSetEndOfFile(pFile->h) ){ - pFile->lastErrno = osGetLastError(); + "winTruncate1", pFile->zPath); + }else if( 0==osSetEndOfFile(pFile->h) && + ((lastErrno = osGetLastError())!=ERROR_USER_MAPPED_FILE) ){ + pFile->lastErrno = lastErrno; rc = winLogError(SQLITE_IOERR_TRUNCATE, pFile->lastErrno, - "winTruncate2", pFile->zPath); + "winTruncate2", pFile->zPath); + } + + /* If the file was truncated to a size smaller than the currently + ** mapped region, reduce the effective mapping size as well. SQLite will + ** use read() and write() to access data beyond this point from now on. + */ + if( pFile->pMapRegion && nBytemmapSize ){ + pFile->mmapSize = nByte; } OSTRACE(("TRUNCATE %d %lld %s\n", pFile->h, nByte, rc ? "failed" : "ok")); return rc; } @@ -2778,10 +2833,16 @@ if( zTFile ){ getTempname(pFile->pVfs->mxPathname, zTFile); *(char**)pArg = zTFile; } return SQLITE_OK; + } + case SQLITE_FCNTL_MMAP_LIMIT: { + i64 newLimit = *(i64*)pArg; + *(i64*) = pFile->mmapLimit; + if( newLimit>=0 ) pFile->mmapLimit = newLimit; + return SQLITE_OK; } } return SQLITE_NOTFOUND; } @@ -3448,10 +3509,188 @@ # define winShmMap 0 # define winShmLock 0 # define winShmBarrier 0 # define winShmUnmap 0 #endif /* #ifndef SQLITE_OMIT_WAL */ + +/* +** Cleans up the mapped region of the specified file, if any. +*/ +static int winUnmapfile(winFile *pFile){ + assert( pFile!=0 ); + if( pFile->pMapRegion ){ + if( !osUnmapViewOfFile(pFile->pMapRegion) ){ + pFile->lastErrno = osGetLastError(); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmap1", pFile->zPath); + } + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapOrigsize = 0; + } + if( pFile->hMap!=NULL ){ + if( !osCloseHandle(pFile->hMap) ){ + pFile->lastErrno = osGetLastError(); + return winLogError(SQLITE_IOERR_MMAP, pFile->lastErrno, + "winUnmap2", pFile->zPath); + } + pFile->hMap = NULL; + } + return SQLITE_OK; +} + +/* +** Memory map or remap the file opened by file-descriptor pFd (if the file +** is already mapped, the existing mapping is replaced by the new). Or, if +** there already exists a mapping for this file, and there are still +** outstanding xFetch() references to it, this function is a no-op. +** +** If parameter nByte is non-negative, then it is the requested size of +** the mapping to create. Otherwise, if nByte is less than zero, then the +** requested size is the size of the file on disk. The actual size of the +** created mapping is either the requested size or the value configured +** using SQLITE_FCNTL_MMAP_LIMIT, whichever is smaller. +** +** SQLITE_OK is returned if no error occurs (even if the mapping is not +** recreated as a result of outstanding references) or an SQLite error +** code otherwise. +*/ +static int winMapfile(winFile *pFd, sqlite3_int64 nByte){ + sqlite3_int64 nMap = nByte; + int rc; + + assert( nMap>=0 || pFd->nFetchOut==0 ); + if( pFd->nFetchOut>0 ) return SQLITE_OK; + + if( nMap<0 ){ + rc = winFileSize((sqlite3_file*)pFd, &nMap); + if( rc ){ + return SQLITE_IOERR_FSTAT; + } + } + if( nMap>pFd->mmapLimit ){ + nMap = pFd->mmapLimit; + } + nMap &= ~(sqlite3_int64)(winSysInfo.dwPageSize - 1); + + if( nMap==0 && pFd->mmapSize>0 ){ + winUnmapfile(pFd); + } + if( nMap!=pFd->mmapSize ){ + void *pNew = 0; + DWORD protect = PAGE_READONLY; + DWORD flags = FILE_MAP_READ; + + winUnmapfile(pFd); + if( (pFd->ctrlFlags & WINFILE_RDONLY)==0 ){ + protect = PAGE_READWRITE; + flags |= FILE_MAP_WRITE; + } +#if SQLITE_OS_WINRT + pFd->hMap = osCreateFileMappingFromApp(pFd->h, NULL, protect, nMap, NULL); +#elif defined(SQLITE_WIN32_HAS_WIDE) + pFd->hMap = osCreateFileMappingW(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#elif defined(SQLITE_WIN32_HAS_ANSI) + pFd->hMap = osCreateFileMappingA(pFd->h, NULL, protect, + (DWORD)((nMap>>32) & 0xffffffff), + (DWORD)(nMap & 0xffffffff), NULL); +#endif + if( pFd->hMap==NULL ){ + pFd->lastErrno = osGetLastError(); + rc = winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile", pFd->zPath); + /* Log the error, but continue normal operation using xRead/xWrite */ + return SQLITE_OK; + } + assert( (nMap % winSysInfo.dwPageSize)==0 ); +#if SQLITE_OS_WINRT + pNew = osMapViewOfFileFromApp(pFd->hMap, flags, 0, nMap); +#else + assert( sizeof(SIZE_T)==sizeof(sqlite3_int64) || nMap<=0xffffffff ); + pNew = osMapViewOfFile(pFd->hMap, flags, 0, 0, (SIZE_T)nMap); +#endif + if( pNew==NULL ){ + osCloseHandle(pFd->hMap); + pFd->hMap = NULL; + pFd->lastErrno = osGetLastError(); + winLogError(SQLITE_IOERR_MMAP, pFd->lastErrno, + "winMapfile", pFd->zPath); + return SQLITE_OK; + } + pFd->pMapRegion = pNew; + pFd->mmapSize = nMap; + pFd->mmapOrigsize = nMap; + } + + return SQLITE_OK; +} + +/* +** If possible, return a pointer to a mapping of file fd starting at offset +** iOff. The mapping must be valid for at least nAmt bytes. +** +** If such a pointer can be obtained, store it in *pp and return SQLITE_OK. +** Or, if one cannot but no error occurs, set *pp to 0 and return SQLITE_OK. +** Finally, if an error does occur, return an SQLite error code. The final +** value of *pp is undefined in this case. +** +** If this function does return a pointer, the caller must eventually +** release the reference by calling unixUnfetch(). +*/ +static int winFetch(sqlite3_file *fd, i64 iOff, int nAmt, void **pp){ + winFile *pFd = (winFile*)fd; /* The underlying database file */ + *pp = 0; + + if( pFd->mmapLimit>0 ){ + if( pFd->pMapRegion==0 ){ + int rc = winMapfile(pFd, -1); + if( rc!=SQLITE_OK ) return rc; + } + if( pFd->mmapSize >= iOff+nAmt ){ + *pp = &((u8 *)pFd->pMapRegion)[iOff]; + pFd->nFetchOut++; + } + } + return SQLITE_OK; +} + +/* +** If the third argument is non-NULL, then this function releases a +** reference obtained by an earlier call to unixFetch(). The second +** argument passed to this function must be the same as the corresponding +** argument that was passed to the unixFetch() invocation. +** +** Or, if the third argument is NULL, then this function is being called +** to inform the VFS layer that, according to POSIX, any existing mapping +** may now be invalid and should be unmapped. +*/ +static int winUnfetch(sqlite3_file *fd, i64 iOff, void *p){ + winFile *pFd = (winFile*)fd; /* The underlying database file */ + + /* If p==0 (unmap the entire file) then there must be no outstanding + ** xFetch references. Or, if p!=0 (meaning it is an xFetch reference), + ** then there must be at least one outstanding. */ + assert( (p==0)==(pFd->nFetchOut==0) ); + + /* If p!=0, it must match the iOff value. */ + assert( p==0 || p==&((u8 *)pFd->pMapRegion)[iOff] ); + + if( p ){ + pFd->nFetchOut--; + }else{ + /* FIXME: If Windows truly always prevents truncating or deleting a + ** file while a mapping is held, then the following winUnmapfile() call + ** is unnecessary can can be omitted - potentially improving + ** performance. */ + winUnmapfile(pFd); + } + + assert( pFd->nFetchOut>=0 ); + return SQLITE_OK; +} /* ** Here ends the implementation of all sqlite3_file methods. ** ********************** End sqlite3_file Methods ******************************* @@ -3460,11 +3699,11 @@ /* ** This vector defines all the methods that can operate on an ** sqlite3_file for win32. */ static const sqlite3_io_methods winIoMethod = { - 2, /* iVersion */ + 3, /* iVersion */ winClose, /* xClose */ winRead, /* xRead */ winWrite, /* xWrite */ winTruncate, /* xTruncate */ winSync, /* xSync */ @@ -3476,11 +3715,13 @@ winSectorSize, /* xSectorSize */ winDeviceCharacteristics, /* xDeviceCharacteristics */ winShmMap, /* xShmMap */ winShmLock, /* xShmLock */ winShmBarrier, /* xShmBarrier */ - winShmUnmap /* xShmUnmap */ + winShmUnmap, /* xShmUnmap */ + winFetch, /* xFetch */ + winUnfetch /* xUnfetch */ }; /**************************************************************************** **************************** sqlite3_vfs methods **************************** ** @@ -3652,13 +3893,11 @@ #endif int isExclusive = (flags & SQLITE_OPEN_EXCLUSIVE); int isDelete = (flags & SQLITE_OPEN_DELETEONCLOSE); int isCreate = (flags & SQLITE_OPEN_CREATE); -#ifndef NDEBUG int isReadonly = (flags & SQLITE_OPEN_READONLY); -#endif int isReadWrite = (flags & SQLITE_OPEN_READWRITE); #ifndef NDEBUG int isOpenJournal = (isCreate && ( eType==SQLITE_OPEN_MASTER_JOURNAL @@ -3865,15 +4104,23 @@ } pFile->pMethod = &winIoMethod; pFile->pVfs = pVfs; pFile->h = h; + if( isReadonly ){ + pFile->ctrlFlags |= WINFILE_RDONLY; + } if( sqlite3_uri_boolean(zName, "psow", SQLITE_POWERSAFE_OVERWRITE) ){ pFile->ctrlFlags |= WINFILE_PSOW; } pFile->lastErrno = NO_ERROR; pFile->zPath = zName; + pFile->hMap = NULL; + pFile->pMapRegion = 0; + pFile->mmapSize = 0; + pFile->mmapOrigsize = 0; + pFile->mmapLimit = sqlite3GlobalConfig.mxMmap; OpenCounter(+1); return rc; } @@ -4498,20 +4745,19 @@ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ assert( ArraySize(aSyscall)==74 ); -#ifndef SQLITE_OMIT_WAL /* get memory map allocation granularity */ memset(&winSysInfo, 0, sizeof(SYSTEM_INFO)); #if SQLITE_OS_WINRT osGetNativeSystemInfo(&winSysInfo); #else osGetSystemInfo(&winSysInfo); #endif - assert(winSysInfo.dwAllocationGranularity > 0); -#endif + assert( winSysInfo.dwAllocationGranularity>0 ); + assert( winSysInfo.dwPageSize>0 ); sqlite3_vfs_register(&winVfs, 1); return SQLITE_OK; } Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -653,10 +653,15 @@ i64 journalHdr; /* Byte offset to previous journal header */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ PagerSavepoint *aSavepoint; /* Array of active savepoints */ int nSavepoint; /* Number of elements in aSavepoint[] */ char dbFileVers[16]; /* Changes whenever database file changes */ + + u8 bUseFetch; /* True to use xFetch() */ + int nMmapOut; /* Number of mmap pages currently outstanding */ + sqlite3_int64 mxMmap; /* Desired maximum mmap size */ + PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */ /* ** End of the routinely-changing class members ***************************************************************************/ u16 nExtra; /* Add this many bytes to each in-memory page */ @@ -2250,11 +2255,11 @@ && isSynced ){ i64 ofst = (pgno-1)*(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); assert( !pagerUseWal(pPager) ); - rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst); + rc = sqlite3OsWrite(pPager->fd, (u8 *)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; } if( pPager->pBackup ){ CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM); @@ -2832,15 +2837,14 @@ ** the value read from the database file. ** ** If an IO error occurs, then the IO error is returned to the caller. ** Otherwise, SQLITE_OK is returned. */ -static int readDbPage(PgHdr *pPg){ +static int readDbPage(PgHdr *pPg, u32 iFrame){ Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ Pgno pgno = pPg->pgno; /* Page number to read */ int rc = SQLITE_OK; /* Return code */ - int isInWal = 0; /* True if page is in log file */ int pgsz = pPager->pageSize; /* Number of bytes to read */ assert( pPager->eState>=PAGER_READER && !MEMDB ); assert( isOpen(pPager->fd) ); @@ -2848,15 +2852,14 @@ assert( pPager->tempFile ); memset(pPg->pData, 0, pPager->pageSize); return SQLITE_OK; } - if( pagerUseWal(pPager) ){ + if( iFrame ){ /* Try to pull the page from the write-ahead log. */ - rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData); - } - if( rc==SQLITE_OK && !isInWal ){ + rc = sqlite3WalReadFrame(pPager->pWal, iFrame, pgsz, pPg->pData); + }else{ i64 iOffset = (pgno-1)*(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ rc = SQLITE_OK; } @@ -2931,16 +2934,21 @@ static int pagerUndoCallback(void *pCtx, Pgno iPg){ int rc = SQLITE_OK; Pager *pPager = (Pager *)pCtx; PgHdr *pPg; + assert( pagerUseWal(pPager) ); pPg = sqlite3PagerLookup(pPager, iPg); if( pPg ){ if( sqlite3PcachePageRefcount(pPg)==1 ){ sqlite3PcacheDrop(pPg); }else{ - rc = readDbPage(pPg); + u32 iFrame = 0; + rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); + if( rc==SQLITE_OK ){ + rc = readDbPage(pPg, iFrame); + } if( rc==SQLITE_OK ){ pPager->xReiniter(pPg); } sqlite3PagerUnref(pPg); } @@ -3080,10 +3088,11 @@ sqlite3WalEndReadTransaction(pPager->pWal); rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); if( rc!=SQLITE_OK || changed ){ pager_reset(pPager); + if( pPager->bUseFetch ) sqlite3OsUnfetch(pPager->fd, 0, 0); } return rc; } #endif @@ -3340,10 +3349,31 @@ ** Change the maximum number of in-memory pages that are allowed. */ void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){ sqlite3PcacheSetCachesize(pPager->pPCache, mxPage); } + +/* +** Invoke SQLITE_FCNTL_MMAP_LIMIT based on the current value of mxMmap. +*/ +static void pagerFixMaplimit(Pager *pPager){ + sqlite3_file *fd = pPager->fd; + if( isOpen(fd) ){ + sqlite3_int64 mx; + pPager->bUseFetch = (fd->pMethods->iVersion>=3) && pPager->mxMmap>0; + mx = pPager->mxMmap; + sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_MMAP_LIMIT, &mx); + } +} + +/* +** Change the maximum size of any memory mapping made of the database file. +*/ +void sqlite3PagerSetMmapLimit(Pager *pPager, sqlite3_int64 mxMmap){ + pPager->mxMmap = mxMmap; + pagerFixMaplimit(pPager); +} /* ** Free as much memory as possible from the pager. */ void sqlite3PagerShrink(Pager *pPager){ @@ -3576,10 +3606,11 @@ if( rc==SQLITE_OK ){ if( nReserve<0 ) nReserve = pPager->nReserve; assert( nReserve>=0 && nReserve<1000 ); pPager->nReserve = (i16)nReserve; pagerReportSize(pPager); + pagerFixMaplimit(pPager); } return rc; } /* @@ -3800,10 +3831,85 @@ if( rc==SQLITE_OK ){ rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr); } return rc; } + +/* +** Obtain a reference to a memory mapped page object for page number pgno. +** The new object will use the pointer pData, obtained from xFetch(). +** If successful, set *ppPage to point to the new page reference +** and return SQLITE_OK. Otherwise, return an SQLite error code and set +** *ppPage to zero. +** +** Page references obtained by calling this function should be released +** by calling pagerReleaseMapPage(). +*/ +static int pagerAcquireMapPage( + Pager *pPager, /* Pager object */ + Pgno pgno, /* Page number */ + void *pData, /* xFetch()'d data for this page */ + PgHdr **ppPage /* OUT: Acquired page object */ +){ + PgHdr *p; /* Memory mapped page to return */ + + if( pPager->pMmapFreelist ){ + *ppPage = p = pPager->pMmapFreelist; + pPager->pMmapFreelist = p->pDirty; + p->pDirty = 0; + memset(p->pExtra, 0, pPager->nExtra); + }else{ + *ppPage = p = (PgHdr *)sqlite3MallocZero(sizeof(PgHdr) + pPager->nExtra); + if( p==0 ){ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1) * pPager->pageSize, pData); + return SQLITE_NOMEM; + } + p->pExtra = (void *)&p[1]; + p->flags = PGHDR_MMAP; + p->nRef = 1; + p->pPager = pPager; + } + + assert( p->pExtra==(void *)&p[1] ); + assert( p->pPage==0 ); + assert( p->flags==PGHDR_MMAP ); + assert( p->pPager==pPager ); + assert( p->nRef==1 ); + + p->pgno = pgno; + p->pData = pData; + pPager->nMmapOut++; + + return SQLITE_OK; +} + +/* +** Release a reference to page pPg. pPg must have been returned by an +** earlier call to pagerAcquireMapPage(). +*/ +static void pagerReleaseMapPage(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + pPager->nMmapOut--; + pPg->pDirty = pPager->pMmapFreelist; + pPager->pMmapFreelist = pPg; + + assert( pPager->fd->pMethods->iVersion>=3 ); + sqlite3OsUnfetch(pPager->fd, (i64)(pPg->pgno-1)*pPager->pageSize, pPg->pData); +} + +/* +** Free all PgHdr objects stored in the Pager.pMmapFreelist list. +*/ +static void pagerFreeMapHdrs(Pager *pPager){ + PgHdr *p; + PgHdr *pNext; + for(p=pPager->pMmapFreelist; p; p=pNext){ + pNext = p->pDirty; + sqlite3_free(p); + } +} + /* ** Shutdown the page cache. Free all memory and close all files. ** ** If a transaction was in progress when this routine is called, that @@ -3821,10 +3927,11 @@ u8 *pTmp = (u8 *)pPager->pTmpSpace; assert( assert_pager_state(pPager) ); disable_simulated_io_errors(); sqlite3BeginBenignMalloc(); + pagerFreeMapHdrs(pPager); /* pPager->errCode = 0; */ pPager->exclusiveMode = 0; #ifndef SQLITE_OMIT_WAL sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp); pPager->pWal = 0; @@ -4082,11 +4189,13 @@ /* Before the first write, give the VFS a hint of what the final ** file size will be. */ assert( rc!=SQLITE_OK || isOpen(pPager->fd) ); - if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){ + if( rc==SQLITE_OK + && (pList->pDirty ? pPager->dbSize : pList->pgno+1)>pPager->dbHintSize + ){ sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize; sqlite3OsFileControlHint(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile); pPager->dbHintSize = pPager->dbSize; } @@ -4636,10 +4745,11 @@ } /* pPager->xBusyHandler = 0; */ /* pPager->pBusyHandlerArg = 0; */ pPager->xReiniter = xReinit; /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ + /* pPager->mxMmap = SQLITE_DEFAULT_MMAP_LIMIT // will be set by btree.c */ *ppPager = pPager; return SQLITE_OK; } @@ -4927,13 +5037,15 @@ assert( (pPager->eLock==SHARED_LOCK) || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK) ); } - if( !pPager->tempFile - && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) - ){ + if( !pPager->tempFile && ( + pPager->pBackup + || sqlite3PcachePagecount(pPager->pPCache)>0 + || pPager->bUseFetch + )){ /* The shared-lock has just been acquired on the database file ** and there are already pages in the cache (from a previous ** read or write transaction). Check to see if the database ** has been modified. If the database has changed, flush the ** cache. @@ -4955,19 +5067,29 @@ if( rc ) goto failed; if( nPage>0 ){ IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers))); rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24); - if( rc!=SQLITE_OK ){ + if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){ goto failed; } }else{ memset(dbFileVers, 0, sizeof(dbFileVers)); } if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){ pager_reset(pPager); + + /* Unmap the database file. It is possible that external processes + ** may have truncated the database file and then extended it back + ** to its original size while this process was not holding a lock. + ** In this case there may exist a Pager.pMap mapping that appears + ** to be the right size but is not actually valid. Avoid this + ** possibility by unmapping the db here. */ + if( pPager->bUseFetch ){ + sqlite3OsUnfetch(pPager->fd, 0, 0); + } } } /* If there is a WAL file in the file-system, open this database in WAL ** mode. Otherwise, the following function call is a no-op. @@ -5005,11 +5127,11 @@ ** Except, in locking_mode=EXCLUSIVE when there is nothing to in ** the rollback journal, the unlock is not performed and there is ** nothing to rollback, so this routine is a no-op. */ static void pagerUnlockIfUnused(Pager *pPager){ - if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ + if( pPager->nMmapOut==0 && (sqlite3PcacheRefCount(pPager->pPCache)==0) ){ pagerUnlockAndRollback(pPager); } } /* @@ -5064,17 +5186,28 @@ */ int sqlite3PagerAcquire( Pager *pPager, /* The pager open on the database file */ Pgno pgno, /* Page number to fetch */ DbPage **ppPage, /* Write a pointer to the page here */ - int noContent /* Do not bother reading content from disk if true */ + int flags /* PAGER_ACQUIRE_XXX flags */ ){ - int rc; - PgHdr *pPg; + int rc = SQLITE_OK; + PgHdr *pPg = 0; + u32 iFrame = 0; /* Frame to read from WAL file */ + const int noContent = (flags & PAGER_ACQUIRE_NOCONTENT); + + /* It is acceptable to use a read-only (mmap) page for any page except + ** page 1 if there is no write-transaction open or the ACQUIRE_READONLY + ** flag was specified by the caller. And so long as the db is not a + ** temporary or in-memory database. */ + const int bMmapOk = (pgno!=1 && pPager->bUseFetch + && (pPager->eState==PAGER_READER || (flags & PAGER_ACQUIRE_READONLY)) + ); assert( pPager->eState>=PAGER_READER ); assert( assert_pager_state(pPager) ); + assert( noContent==0 || bMmapOk==0 ); if( pgno==0 ){ return SQLITE_CORRUPT_BKPT; } @@ -5081,10 +5214,43 @@ /* If the pager is in the error state, return an error immediately. ** Otherwise, request the page from the PCache layer. */ if( pPager->errCode!=SQLITE_OK ){ rc = pPager->errCode; }else{ + + if( bMmapOk && pagerUseWal(pPager) ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } + + if( iFrame==0 && bMmapOk ){ + void *pData = 0; + + rc = sqlite3OsFetch(pPager->fd, + (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData + ); + + if( rc==SQLITE_OK && pData ){ + if( pPager->eState>PAGER_READER ){ + (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg); + } + if( pPg==0 ){ + rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg); + }else{ + sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData); + } + if( pPg ){ + assert( rc==SQLITE_OK ); + *ppPage = pPg; + return SQLITE_OK; + } + } + if( rc!=SQLITE_OK ){ + goto pager_acquire_err; + } + } + rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage); } if( rc!=SQLITE_OK ){ /* Either the call to sqlite3PcacheFetch() returned an error or the @@ -5139,13 +5305,17 @@ sqlite3EndBenignMalloc(); } memset(pPg->pData, 0, pPager->pageSize); IOTRACE(("ZERO %p %d\n", pPager, pgno)); }else{ + if( pagerUseWal(pPager) && bMmapOk==0 ){ + rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame); + if( rc!=SQLITE_OK ) goto pager_acquire_err; + } assert( pPg->pPager==pPager ); pPager->aStat[PAGER_STAT_MISS]++; - rc = readDbPage(pPg); + rc = readDbPage(pPg, iFrame); if( rc!=SQLITE_OK ){ goto pager_acquire_err; } } pager_set_pagehash(pPg); @@ -5194,11 +5364,15 @@ ** removed. */ void sqlite3PagerUnref(DbPage *pPg){ if( pPg ){ Pager *pPager = pPg->pPager; - sqlite3PcacheRelease(pPg); + if( pPg->flags & PGHDR_MMAP ){ + pagerReleaseMapPage(pPg); + }else{ + sqlite3PcacheRelease(pPg); + } pagerUnlockIfUnused(pPager); } } /* @@ -5529,10 +5703,11 @@ PgHdr *pPg = pDbPage; Pager *pPager = pPg->pPager; Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); + assert( (pPg->flags & PGHDR_MMAP)==0 ); assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( pPager->eState!=PAGER_ERROR ); assert( assert_pager_state(pPager) ); if( nPagePerSector>1 ){ @@ -6085,11 +6260,11 @@ }else{ rc = pager_playback(pPager, 0); } assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK ); - assert( rc==SQLITE_OK || rc==SQLITE_FULL + assert( rc==SQLITE_OK || rc==SQLITE_FULL || rc==SQLITE_CORRUPT || rc==SQLITE_NOMEM || (rc&0xFF)==SQLITE_IOERR ); /* If an error occurs during a ROLLBACK, we can no longer trust the pager ** cache. So call pager_error() on the way out to make any error persistent. */ @@ -6819,15 +6994,16 @@ /* Open the connection to the log file. If this operation fails, ** (e.g. due to malloc() failure), return an error code. */ if( rc==SQLITE_OK ){ - rc = sqlite3WalOpen(pPager->pVfs, + rc = sqlite3WalOpen(pPager->pVfs, pPager->fd, pPager->zWal, pPager->exclusiveMode, pPager->journalSizeLimit, &pPager->pWal ); } + pagerFixMaplimit(pPager); return rc; } @@ -6914,10 +7090,11 @@ rc = pagerExclusiveLock(pPager); if( rc==SQLITE_OK ){ rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, (u8*)pPager->pTmpSpace); pPager->pWal = 0; + pagerFixMaplimit(pPager); } } return rc; } Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -76,10 +76,16 @@ #define PAGER_JOURNALMODE_OFF 2 /* Journal omitted. */ #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ +/* +** Flags that make up the mask passed to sqlite3PagerAcquire(). +*/ +#define PAGER_ACQUIRE_NOCONTENT 0x01 /* Do not load data from disk */ +#define PAGER_ACQUIRE_READONLY 0x02 /* Read-only page is acceptable */ + /* ** The remainder of this file contains the declarations of the functions ** that make up the Pager sub-system API. See source code comments for ** a detailed description of each routine. */ @@ -100,10 +106,11 @@ /* Functions used to configure a Pager object. */ void sqlite3PagerSetBusyhandler(Pager*, int(*)(void *), void *); int sqlite3PagerSetPagesize(Pager*, u32*, int); int sqlite3PagerMaxPageCount(Pager*, int); void sqlite3PagerSetCachesize(Pager*, int); +void sqlite3PagerSetMmapLimit(Pager *, sqlite3_int64); void sqlite3PagerShrink(Pager*); void sqlite3PagerSetSafetyLevel(Pager*,int,int,int); int sqlite3PagerLockingMode(Pager *, int); int sqlite3PagerSetJournalMode(Pager *, int); int sqlite3PagerGetJournalMode(Pager*); Index: src/pcache.h ================================================================== --- src/pcache.h +++ src/pcache.h @@ -50,10 +50,12 @@ #define PGHDR_NEED_SYNC 0x004 /* Fsync the rollback journal before ** writing this page to the database */ #define PGHDR_NEED_READ 0x008 /* Content is unread */ #define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */ #define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ + +#define PGHDR_MMAP 0x040 /* This is an mmap page object */ /* Initialize and shutdown the page cache subsystem */ int sqlite3PcacheInitialize(void); void sqlite3PcacheShutdown(void); Index: src/pragma.c ================================================================== --- src/pragma.c +++ src/pragma.c @@ -742,10 +742,44 @@ pDb->pSchema->cache_size = size; sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); } }else + /* + ** PRAGMA [database.]mmap_limit(N) + ** + ** Used to set mapping size limit. The mapping size limit is + ** used to limit the aggregate size of all memory mapped regions of the + ** database file. If this parameter is set to zero, then memory mapping + ** is not used at all. If N is negative, then the default memory map + ** limit determined by sqlite3_config(SQLITE_CONFIG_MMAP_LIMIT) is set. + ** The parameter N is measured in bytes. + ** + ** This value is advisory. The underlying VFS is free to memory map + ** as little or as much as it wants. Except, if N is set to 0 then the + ** upper layers will never invoke the xFetch interfaces to the VFS. + */ + if( sqlite3StrICmp(zLeft,"mmap_limit")==0 ){ + sqlite3_int64 mx; + assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + if( zRight ){ + int ii; + sqlite3Atoi64(zRight, &mx, 1000, SQLITE_UTF8); + if( mx<0 ) mx = sqlite3GlobalConfig.mxMmap; + if( pId2->n==0 ) db->mxMmap = mx; + for(ii=db->nDb-1; ii>=0; ii--){ + if( db->aDb[ii].pBt && (ii==iDb || pId2->n==0) ){ + sqlite3BtreeSetMmapLimit(db->aDb[ii].pBt, mx); + } + } + } + mx = -1; + if( sqlite3_file_control(db,zDb,SQLITE_FCNTL_MMAP_LIMIT,&mx)==SQLITE_OK ){ + returnSingleInt(pParse, "mmap_limit", mx); + } + }else + /* ** PRAGMA temp_store ** PRAGMA temp_store = "default"|"memory"|"file" ** ** Return or set the local value of the temp_store flag. Changing Index: src/shell.c ================================================================== --- src/shell.c +++ src/shell.c @@ -1549,10 +1549,47 @@ } fprintf(stderr, "ERROR: Not a boolean value: \"%s\". Assuming \"no\".\n", zArg); return 0; } + +/* +** Interpret zArg as an integer value, possibly with suffixes. +*/ +static sqlite3_int64 integerValue(const char *zArg){ + sqlite3_int64 v = 0; + static const struct { char *zSuffix; int iMult; } aMult[] = { + { "KiB", 1024 }, + { "MiB", 1024*1024 }, + { "GiB", 1024*1024*1024 }, + { "KB", 1000 }, + { "MB", 1000000 }, + { "GB", 1000000000 }, + { "K", 1000 }, + { "M", 1000000 }, + { "G", 1000000000 }, + }; + int i; + int isNeg = 0; + if( zArg[0]=='-' ){ + isNeg = 1; + zArg++; + }else if( zArg[0]=='+' ){ + zArg++; + } + while( isdigit(zArg[0]) ){ + v = v*10 + zArg[0] - '0'; + zArg++; + } + for(i=0; i1 ){ int j; @@ -2880,10 +2917,11 @@ " -help show this message\n" " -html set output mode to HTML\n" " -interactive force interactive I/O\n" " -line set output mode to 'line'\n" " -list set output mode to 'list'\n" + " -mmap N default mmap size set to N\n" #ifdef SQLITE_ENABLE_MULTIPLEX " -multiplex enable the multiplexor VFS\n" #endif " -nullvalue TEXT set text string for NULL values. Default ''\n" " -separator SEP set output field separator. Default: '|'\n" @@ -2999,16 +3037,11 @@ int j, c; const char *zSize; sqlite3_int64 szHeap; zSize = cmdline_option_value(argc, argv, ++i); - szHeap = atoi(zSize); - for(j=0; (c = zSize[j])!=0; j++){ - if( c=='M' ){ szHeap *= 1000000; break; } - if( c=='K' ){ szHeap *= 1000; break; } - if( c=='G' ){ szHeap *= 1000000000; break; } - } + szHeap = integerValue(zSize); if( szHeap>0x7fff0000 ) szHeap = 0x7fff0000; sqlite3_config(SQLITE_CONFIG_HEAP, malloc((int)szHeap), (int)szHeap, 64); #endif #ifdef SQLITE_ENABLE_VFSTRACE }else if( strcmp(z,"-vfstrace")==0 ){ @@ -3024,10 +3057,12 @@ #ifdef SQLITE_ENABLE_MULTIPLEX }else if( strcmp(z,"-multiplex")==0 ){ extern int sqlite3_multiple_initialize(const char*,int); sqlite3_multiplex_initialize(0, 1); #endif + }else if( strcmp(z,"-mmap")==0 ){ + sqlite3_config(SQLITE_CONFIG_MMAP_LIMIT, integerValue(cmdline_option_value(argc,argv,++i))); }else if( strcmp(z,"-vfs")==0 ){ sqlite3_vfs *pVfs = sqlite3_vfs_find(cmdline_option_value(argc,argv,++i)); if( pVfs ){ sqlite3_vfs_register(pVfs, 1); }else{ @@ -3108,10 +3143,12 @@ }else if( strcmp(z,"-interactive")==0 ){ stdin_is_interactive = 1; }else if( strcmp(z,"-batch")==0 ){ stdin_is_interactive = 0; }else if( strcmp(z,"-heap")==0 ){ + i++; + }else if( strcmp(z,"-mmap")==0 ){ i++; }else if( strcmp(z,"-vfs")==0 ){ i++; #ifdef SQLITE_ENABLE_VFSTRACE }else if( strcmp(z,"-vfstrace")==0 ){ Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -468,10 +468,11 @@ #define SQLITE_IOERR_SHMSIZE (SQLITE_IOERR | (19<<8)) #define SQLITE_IOERR_SHMLOCK (SQLITE_IOERR | (20<<8)) #define SQLITE_IOERR_SHMMAP (SQLITE_IOERR | (21<<8)) #define SQLITE_IOERR_SEEK (SQLITE_IOERR | (22<<8)) #define SQLITE_IOERR_DELETE_NOENT (SQLITE_IOERR | (23<<8)) +#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) #define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) #define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8)) @@ -726,10 +727,13 @@ int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmUnmap)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ + int (*xFetch)(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **pp); + int (*xUnfetch)(sqlite3_file*, sqlite3_int64 iOfst, void *p); + /* Methods above are valid for version 3 */ /* Additional methods may be added in future releases */ }; /* ** CAPI3REF: Standard File Control Opcodes @@ -880,10 +884,16 @@ ** temporary filenames for TEMP tables and other internal uses. The ** argument should be a char** which will be filled with the filename ** written into memory obtained from [sqlite3_malloc()]. The caller should ** invoke [sqlite3_free()] on the result to avoid a memory leak. ** +**
  • [[SQLITE_FCNTL_MMAP_LIMIT]] +** The argument is assumed to pointer to a value of type sqlite3_int64 that +** is an advisory maximum number of bytes in the file to memory map. The +** pointer is overwritten with the old value. The limit is not changed if +** the original value pointed to is negative. +** ** */ #define SQLITE_FCNTL_LOCKSTATE 1 #define SQLITE_GET_LOCKPROXYFILE 2 #define SQLITE_SET_LOCKPROXYFILE 3 @@ -898,10 +908,11 @@ #define SQLITE_FCNTL_VFSNAME 12 #define SQLITE_FCNTL_POWERSAFE_OVERWRITE 13 #define SQLITE_FCNTL_PRAGMA 14 #define SQLITE_FCNTL_BUSYHANDLER 15 #define SQLITE_FCNTL_TEMPFILENAME 16 +#define SQLITE_FCNTL_MMAP_LIMIT 18 /* ** CAPI3REF: Mutex Handle ** ** The mutex module within SQLite defines [sqlite3_mutex] to be an @@ -1626,10 +1637,23 @@ ** points to a buffer containing the name of the main database file. If the ** fourth parameter is 1, then the SQL statement that the third parameter ** points to has just been executed. Or, if the fourth parameter is 2, then ** the connection being passed as the second parameter is being closed. The ** third parameter is passed NULL In this case. +** +** [[SQLITE_CONFIG_MMAP_LIMIT]] +**
    SQLITE_CONFIG_MMAP_LIMIT +**
    The sole argument should be a 64-bit integer (an sqlite3_int64) that +** is the default maximum number of bytes of process address space that +** should be used for accessing each database file using memory mapping. +** The default setting can be overridden by each database connection using +** either the [PRAGMA mmap_limit] command or the "mmaplimit" query parameter +** on the [URI filename] when opening the databaes file or by using the +** [SQLITE_FCNTL_MMAP_LIMIT] file control. The value set here overrides the +** compile-time default that is set using the [SQLITE_DEFAULT_MMAP_LIMIT] +** compile-time option. If the argument to this option is negative, then +** the memory map limit is set to the compile-time default. ** */ #define SQLITE_CONFIG_SINGLETHREAD 1 /* nil */ #define SQLITE_CONFIG_MULTITHREAD 2 /* nil */ #define SQLITE_CONFIG_SERIALIZED 3 /* nil */ @@ -1649,10 +1673,11 @@ #define SQLITE_CONFIG_URI 17 /* int */ #define SQLITE_CONFIG_PCACHE2 18 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_GETPCACHE2 19 /* sqlite3_pcache_methods2* */ #define SQLITE_CONFIG_COVERING_INDEX_SCAN 20 /* int */ #define SQLITE_CONFIG_SQLLOG 21 /* xSqllog, void* */ +#define SQLITE_CONFIG_MMAP_LIMIT 22 /* sqlite3_int64 */ /* ** CAPI3REF: Database Connection Configuration Options ** ** These constants are the available integer configuration options that Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -832,10 +832,11 @@ sqlite3_mutex *mutex; /* Connection mutex */ Db *aDb; /* All backends */ int nDb; /* Number of backends currently in use */ int flags; /* Miscellaneous flags. See below */ i64 lastRowid; /* ROWID of most recent insert (see above) */ + i64 mxMmap; /* Default mmap_limit setting */ unsigned int openFlags; /* Flags passed to sqlite3_vfs.xOpen() */ int errCode; /* Most recent error code (SQLITE_*) */ int errMask; /* & result codes with this before returning */ u16 dbOptFlags; /* Flags to enable/disable optimizations */ u8 autoCommit; /* The auto-commit flag. */ @@ -2503,10 +2504,11 @@ sqlite3_mutex_methods mutex; /* Low-level mutex interface */ sqlite3_pcache_methods2 pcache2; /* Low-level page-cache interface */ void *pHeap; /* Heap storage space */ int nHeap; /* Size of pHeap[] */ int mnReq, mxReq; /* Min and max heap requests sizes */ + sqlite3_int64 mxMmap; /* Maximum mmap() space per open file */ void *pScratch; /* Scratch memory */ int szScratch; /* Size of each scratch buffer */ int nScratch; /* Number of scratch buffers */ void *pPage; /* Page cache memory */ int szPage; /* Size of each page in pPage[] */ Index: src/sqliteLimit.h ================================================================== --- src/sqliteLimit.h +++ src/sqliteLimit.h @@ -204,5 +204,25 @@ ** may be executed. */ #ifndef SQLITE_MAX_TRIGGER_DEPTH # define SQLITE_MAX_TRIGGER_DEPTH 1000 #endif + +/* +** Default maximum size of memory used by xFetch in the VFS. +*/ +#ifdef __APPLE__ +# include +# if TARGET_OS_IPHONE +# define SQLITE_DEFAULT_MMAP_LIMIT 0 +# endif +#endif +#ifndef SQLITE_DEFAULT_MMAP_LIMIT +# if defined(__linux__) \ + || defined(_WIN32) \ + || (defined(__APPLE__) && defined(__MACH__)) \ + || defined(__sun) +# define SQLITE_DEFAULT_MMAP_LIMIT 268435456 /* = 256*1024*1024 */ +# else +# define SQLITE_DEFAULT_MMAP_LIMIT 0 +# endif +#endif Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -5841,10 +5841,35 @@ } Tcl_ResetResult(interp); return TCL_OK; } + +#if SQLITE_OS_UNIX +#include +#include + +static int test_getrusage( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + char buf[1024]; + struct rusage r; + memset(&r, 0, sizeof(r)); + getrusage(RUSAGE_SELF, &r); + + sprintf(buf, "ru_utime=%d.%06d ru_stime=%d.%06d ru_minflt=%d ru_majflt=%d", + (int)r.ru_utime.tv_sec, (int)r.ru_utime.tv_usec, + (int)r.ru_stime.tv_sec, (int)r.ru_stime.tv_usec, + (int)r.ru_minflt, (int)r.ru_majflt + ); + Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, -1)); + return TCL_OK; +} +#endif #if SQLITE_OS_WIN /* ** Information passed from the main thread into the windows file locker ** background thread. @@ -6231,10 +6256,13 @@ { "test_sqlite3_log", test_sqlite3_log, 0 }, #ifndef SQLITE_OMIT_EXPLAIN { "print_explain_query_plan", test_print_eqp, 0 }, #endif { "sqlite3_test_control", test_test_control }, +#if SQLITE_OS_UNIX + { "getrusage", test_getrusage }, +#endif }; static int bitmask_size = sizeof(Bitmask)*8; int i; extern int sqlite3_sync_count, sqlite3_fullsync_count; extern int sqlite3_opentemp_count; Index: src/test_syscall.c ================================================================== --- src/test_syscall.c +++ src/test_syscall.c @@ -21,11 +21,11 @@ ** LIST must be a list consisting of zero or more of the following ** literal values: ** ** open close access getcwd stat fstat ** ftruncate fcntl read pread pread64 write -** pwrite pwrite64 fchmod fallocate +** pwrite pwrite64 fchmod fallocate mmap ** ** test_syscall uninstall ** Uninstall all wrapper functions. ** ** test_syscall fault ?COUNT PERSIST? @@ -79,10 +79,11 @@ #if SQLITE_OS_UNIX /* From test1.c */ extern const char *sqlite3TestErrorName(int); +#include #include #include static struct TestSyscallGlobal { int bPersist; /* 1 for persistent errors, 0 for transient */ @@ -104,11 +105,12 @@ static int ts_write(int fd, const void *aBuf, size_t nBuf); static int ts_pwrite(int fd, const void *aBuf, size_t nBuf, off_t off); static int ts_pwrite64(int fd, const void *aBuf, size_t nBuf, off_t off); static int ts_fchmod(int fd, mode_t mode); static int ts_fallocate(int fd, off_t off, off_t len); - +static void *ts_mmap(void *, size_t, int, int, int, off_t); +static void *ts_mremap(void*, size_t, size_t, int, ...); struct TestSyscallArray { const char *zName; sqlite3_syscall_ptr xTest; sqlite3_syscall_ptr xOrig; @@ -129,10 +131,12 @@ /* 11 */ { "write", (sqlite3_syscall_ptr)ts_write, 0, 0, 0 }, /* 12 */ { "pwrite", (sqlite3_syscall_ptr)ts_pwrite, 0, 0, 0 }, /* 13 */ { "pwrite64", (sqlite3_syscall_ptr)ts_pwrite64, 0, 0, 0 }, /* 14 */ { "fchmod", (sqlite3_syscall_ptr)ts_fchmod, 0, 0, 0 }, /* 15 */ { "fallocate", (sqlite3_syscall_ptr)ts_fallocate, 0, 0, 0 }, + /* 16 */ { "mmap", (sqlite3_syscall_ptr)ts_mmap, 0, 0, 0 }, + /* 17 */ { "mremap", (sqlite3_syscall_ptr)ts_mremap, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }; #define orig_open ((int(*)(const char *, int, int))aSyscall[0].xOrig) #define orig_close ((int(*)(int))aSyscall[1].xOrig) @@ -150,10 +154,12 @@ aSyscall[12].xOrig) #define orig_pwrite64 ((ssize_t(*)(int,const void*,size_t,off_t))\ aSyscall[13].xOrig) #define orig_fchmod ((int(*)(int,mode_t))aSyscall[14].xOrig) #define orig_fallocate ((int(*)(int,off_t,off_t))aSyscall[15].xOrig) +#define orig_mmap ((void*(*)(void*,size_t,int,int,int,off_t))aSyscall[16].xOrig) +#define orig_mremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[17].xOrig) /* ** This function is called exactly once from within each invocation of a ** system call wrapper in this file. It returns 1 if the function should ** fail, or 0 if it should succeed. @@ -374,10 +380,35 @@ if( tsIsFail() ){ return tsErrno("fallocate"); } return orig_fallocate(fd, off, len); } + +static void *ts_mmap( + void *pAddr, + size_t nByte, + int prot, + int flags, + int fd, + off_t iOff +){ + if( tsIsFailErrno("mmap") ){ + return MAP_FAILED; + } + return orig_mmap(pAddr, nByte, prot, flags, fd, iOff); +} + +static void *ts_mremap(void *a, size_t b, size_t c, int d, ...){ + va_list ap; + void *pArg; + if( tsIsFailErrno("mremap") ){ + return MAP_FAILED; + } + va_start(ap, d); + pArg = va_arg(ap, void *); + return orig_mremap(a, b, c, d, pArg); +} static int test_syscall_install( void * clientData, Tcl_Interp *interp, int objc, Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -1206,11 +1206,11 @@ ** problems caused by applications routinely shutting down without ** checkpointing the log file. */ if( pWal->hdr.nPage ){ sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s", - pWal->hdr.nPage, pWal->zWalName + pWal->hdr.mxFrame, pWal->zWalName ); } } recovery_error: @@ -1720,20 +1720,21 @@ /* Sync the WAL to disk */ if( sync_flags ){ rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - /* If the database file may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. */ if( rc==SQLITE_OK ){ i64 nReq = ((i64)mxPage * szPage); rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); if( rc==SQLITE_OK && nSizepDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); } } + /* Iterate through the contents of the WAL, copying data to the db file. */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ i64 iOffset; assert( walFramePgno(pWal, iFrame)==iDbpage ); @@ -2285,23 +2286,21 @@ pWal->readLock = -1; } } /* -** Read a page from the WAL, if it is present in the WAL and if the -** current read transaction is configured to use the WAL. +** Search the wal file for page pgno. If found, set *piRead to the frame that +** contains the page. Otherwise, if pgno is not in the wal file, set *piRead +** to zero. ** -** The *pInWal is set to 1 if the requested page is in the WAL and -** has been loaded. Or *pInWal is set to 0 if the page was not in -** the WAL and needs to be read out of the database. +** Return SQLITE_OK if successful, or an error code if an error occurs. If an +** error does occur, the final value of *piRead is undefined. */ -int sqlite3WalRead( +int sqlite3WalFindFrame( Wal *pWal, /* WAL handle */ Pgno pgno, /* Database page number to read data for */ - int *pInWal, /* OUT: True if data is read from WAL */ - int nOut, /* Size of buffer pOut in bytes */ - u8 *pOut /* Buffer to write page data to */ + u32 *piRead /* OUT: Frame number (or zero) */ ){ u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ @@ -2313,11 +2312,11 @@ ** in this case as an optimization. Likewise, if pWal->readLock==0, ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ if( iLast==0 || pWal->readLock==0 ){ - *pInWal = 0; + *piRead = 0; return SQLITE_OK; } /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one @@ -2384,30 +2383,35 @@ } assert( iRead==iRead2 ); } #endif - /* If iRead is non-zero, then it is the log frame number that contains the - ** required page. Read and return data from the log file. - */ - if( iRead ){ - int sz; - i64 iOffset; - sz = pWal->hdr.szPage; - sz = (sz&0xfe00) + ((sz&0x0001)<<16); - testcase( sz<=32768 ); - testcase( sz>=65536 ); - iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; - *pInWal = 1; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); - } - - *pInWal = 0; + *piRead = iRead; return SQLITE_OK; } +/* +** Read the contents of frame iRead from the wal file into buffer pOut +** (which is nOut bytes in size). Return SQLITE_OK if successful, or an +** error code otherwise. +*/ +int sqlite3WalReadFrame( + Wal *pWal, /* WAL handle */ + u32 iRead, /* Frame to read */ + int nOut, /* Size of buffer pOut in bytes */ + u8 *pOut /* Buffer to write page data to */ +){ + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + testcase( sz<=32768 ); + testcase( sz>=65536 ); + iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ + return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); +} /* ** Return the size of the database in pages (or zero, if unknown). */ Pgno sqlite3WalDbsize(Wal *pWal){ @@ -2950,10 +2954,13 @@ } /* Read the wal-index header. */ if( rc==SQLITE_OK ){ rc = walIndexReadHdr(pWal, &isChanged); + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } } /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ Index: src/wal.h ================================================================== --- src/wal.h +++ src/wal.h @@ -29,11 +29,10 @@ # define sqlite3WalOpen(x,y,z) 0 # define sqlite3WalLimit(x,y) # define sqlite3WalClose(w,x,y,z) 0 # define sqlite3WalBeginReadTransaction(y,z) 0 # define sqlite3WalEndReadTransaction(z) -# define sqlite3WalRead(v,w,x,y,z) 0 # define sqlite3WalDbsize(y) 0 # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWriteTransaction(x) 0 # define sqlite3WalUndo(x,y,z) 0 # define sqlite3WalSavepoint(y,z) @@ -69,11 +68,12 @@ */ int sqlite3WalBeginReadTransaction(Wal *pWal, int *); void sqlite3WalEndReadTransaction(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ -int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut); +int sqlite3WalFindFrame(Wal *, Pgno, u32 *); +int sqlite3WalReadFrame(Wal *, u32, int, u8 *); /* If the WAL is not empty, return the size of the database. */ Pgno sqlite3WalDbsize(Wal *pWal); /* Obtain or release the WRITER lock. */ ADDED test/btreefault.test Index: test/btreefault.test ================================================================== --- /dev/null +++ test/btreefault.test @@ -0,0 +1,50 @@ +# 2013 April 02 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file contains fault injection tests designed to test the btree.c +# module. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl +set testprefix btreefault + +do_test 1-pre1 { + execsql { + PRAGMA auto_vacuum = incremental; + PRAGMA journal_mode = DELETE; + CREATE TABLE t1(a PRIMARY KEY, b); + INSERT INTO t1 VALUES(randomblob(1000), randomblob(100)); + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + DELETE FROM t1 WHERE rowid%2; + } + faultsim_save_and_close +} {} + +do_faultsim_test 1 -prep { + faultsim_restore_and_reopen + set ::STMT [sqlite3_prepare db "SELECT * FROM t1 ORDER BY a" -1 DUMMY] + sqlite3_step $::STMT + sqlite3_step $::STMT +} -body { + execsql { PRAGMA incremental_vacuum = 10 } +} -test { + sqlite3_finalize $::STMT + faultsim_test_result {0 {}} + faultsim_integrity_check +} + +finish_test + Index: test/dbstatus2.test ================================================================== --- test/dbstatus2.test +++ test/dbstatus2.test @@ -38,10 +38,11 @@ } do_test 1.1 { db close sqlite3 db test.db + execsql { PRAGMA mmap_limit = 0 } expr {[file size test.db] / 1024} } 6 do_test 1.2 { execsql { SELECT b FROM t1 WHERE a=2 } Index: test/exclusive2.test ================================================================== --- test/exclusive2.test +++ test/exclusive2.test @@ -22,10 +22,18 @@ ifcapable {!pager_pragmas} { finish_test return } + +# Tests in this file verify that locking_mode=exclusive causes SQLite to +# use cached pages even if the database is changed on disk. This doesn't +# work with mmap. +if {[permutation]!="nommap"} { + finish_test + return +} # This module does not work right if the cache spills at unexpected # moments. So disable the soft-heap-limit. # sqlite3_soft_heap_limit 0 Index: test/func.test ================================================================== --- test/func.test +++ test/func.test @@ -1271,15 +1271,17 @@ db close sqlite3 db test.db sqlite3_db_status db CACHE_MISS 1 db eval {SELECT typeof(+x) FROM t29 ORDER BY id} } {integer null real blob text} -do_test func-29.4 { - set x [lindex [sqlite3_db_status db CACHE_MISS 1] 1] - if {$x>100} {set x many} - set x -} {many} +if {[permutation] == "nommap"} { + do_test func-29.4 { + set x [lindex [sqlite3_db_status db CACHE_MISS 1] 1] + if {$x>100} {set x many} + set x + } {many} +} do_test func-29.5 { db close sqlite3 db test.db sqlite3_db_status db CACHE_MISS 1 db eval {SELECT sum(length(x)) FROM t29} Index: test/incrblob.test ================================================================== --- test/incrblob.test +++ test/incrblob.test @@ -121,10 +121,11 @@ db close forcedelete test.db test.db-journal sqlite3 db test.db + execsql "PRAGMA mmap_limit = 0" execsql "PRAGMA auto_vacuum = $AutoVacuumMode" do_test incrblob-2.$AutoVacuumMode.1 { set ::str [string repeat abcdefghij 2900] execsql { @@ -147,10 +148,11 @@ do_test incrblob-2.$AutoVacuumMode.3 { # Open and close the db to make sure the page cache is empty. db close sqlite3 db test.db + execsql "PRAGMA mmap_limit = 0" # Read the last 20 bytes of the blob via a blob handle. set ::blob [db incrblob blobs v 1] seek $::blob -20 end set ::fragment [read $::blob] @@ -169,10 +171,11 @@ do_test incrblob-2.$AutoVacuumMode.5 { # Open and close the db to make sure the page cache is empty. db close sqlite3 db test.db + execsql "PRAGMA mmap_limit = 0" # Write the second-to-last 20 bytes of the blob via a blob handle. # set ::blob [db incrblob blobs v 1] seek $::blob -40 end @@ -198,10 +201,11 @@ do_test incrblob-2.$AutoVacuumMode.8 { # Open and close the db to make sure the page cache is empty. db close sqlite3 db test.db + execsql { PRAGMA mmap_limit = 0 } execsql { SELECT i FROM blobs } } {45} do_test incrblob-2.$AutoVacuumMode.9 { Index: test/malloc_common.tcl ================================================================== --- test/malloc_common.tcl +++ test/malloc_common.tcl @@ -262,11 +262,11 @@ proc faultsim_test_result_int {args} { upvar testrc testrc testresult testresult testnfail testnfail set t [list $testrc $testresult] set r $args if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch $r $t]<0 } { - error "nfail=$testnfail rc=$testrc result=$testresult" + error "nfail=$testnfail rc=$testrc result=$testresult list=$r" } } #-------------------------------------------------------------------------- # Usage do_one_faultsim_test NAME ?OPTIONS...? ADDED test/mmap1.test Index: test/mmap1.test ================================================================== --- /dev/null +++ test/mmap1.test @@ -0,0 +1,250 @@ +# 2013 March 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set testprefix mmap1 + +proc nRead {db} { + set bt [btree_from_db $db] + db_enter $db + array set stats [btree_pager_stats $bt] + db_leave $db + # puts [array get stats] + return $stats(read) +} + +foreach {t mmap_limit nRead c2init} { + 1.1 { PRAGMA mmap_limit = 67108864 } 4 {} + 1.2 { PRAGMA mmap_limit = 53248 } 150 {} + 1.3 { PRAGMA mmap_limit = 0 } 344 {} + 1.4 { PRAGMA mmap_limit = 67108864 } 4 {PRAGMA mmap_limit = 67108864 } + 1.5 { PRAGMA mmap_limit = 53248 } 150 {PRAGMA mmap_limit = 67108864 } + 1.6 { PRAGMA mmap_limit = 0 } 344 {PRAGMA mmap_limit = 67108864 } +} { + do_multiclient_test tn { + sql1 {PRAGMA page_size=1024} + sql1 $mmap_limit + sql2 $c2init + + code2 { + set ::rcnt 0 + proc rblob {n} { + set ::rcnt [expr (($::rcnt << 3) + $::rcnt + 456) & 0xFFFFFFFF] + set str [format %.8x [expr $::rcnt ^ 0xbdf20da3]] + string range [string repeat $str [expr $n/4]] 1 $n + } + db2 func rblob rblob + } + + sql2 { + PRAGMA page_size=1024; + PRAGMA auto_vacuum = 1; + CREATE TABLE t1(a, b, UNIQUE(a, b)); + INSERT INTO t1 VALUES(rblob(500), rblob(500)); + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 2 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 4 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 8 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 16 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 32 + } + do_test $t.$tn.1 { + sql1 "SELECT count(*) FROM t1; PRAGMA integrity_check ; PRAGMA page_count" + } {32 ok 77} + + # Have connection 2 shrink the file. Check connection 1 can still read it. + sql2 { DELETE FROM t1 WHERE rowid%2; } + do_test $t.$tn.2 { + sql1 "SELECT count(*) FROM t1; PRAGMA integrity_check ; PRAGMA page_count" + } {16 ok 42} + + # Have connection 2 grow the file. Check connection 1 can still read it. + sql2 { INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1 } + do_test $t.$tn.3 { + sql1 "SELECT count(*) FROM t1; PRAGMA integrity_check ; PRAGMA page_count" + } {32 ok 79} + + # Have connection 2 grow the file again. Check connection 1 is still ok. + sql2 { INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1 } + do_test $t.$tn.4 { + sql1 "SELECT count(*) FROM t1; PRAGMA integrity_check ; PRAGMA page_count" + } {64 ok 149} + + # Check that the number of pages read by connection 1 indicates that the + # "PRAGMA mmap_limit" command worked. + do_test $t.$tn.5 { nRead db } $nRead + } +} + +set ::rcnt 0 +proc rblob {n} { + set ::rcnt [expr (($::rcnt << 3) + $::rcnt + 456) & 0xFFFFFFFF] + set str [format %.8x [expr $::rcnt ^ 0xbdf20da3]] + string range [string repeat $str [expr $n/4]] 1 $n +} + +reset_db +db func rblob rblob + +do_execsql_test 2.1 { + PRAGMA auto_vacuum = 1; + PRAGMA mmap_limit = 67108864; + PRAGMA journal_mode = wal; + CREATE TABLE t1(a, b, UNIQUE(a, b)); + INSERT INTO t1 VALUES(rblob(500), rblob(500)); + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 2 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 4 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 8 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 16 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 32 + PRAGMA wal_checkpoint; +} {67108864 wal 0 103 103} + +do_execsql_test 2.2 { + PRAGMA auto_vacuum; + SELECT count(*) FROM t1; +} {1 32} + +do_test 2.3 { + sqlite3 db2 test.db + db2 func rblob rblob + db2 eval { + DELETE FROM t1 WHERE (rowid%4); + PRAGMA wal_checkpoint; + } + db2 eval { + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 16 + SELECT count(*) FROM t1; + } +} {16} + +do_execsql_test 2.4 { + PRAGMA wal_checkpoint; +} {0 24 24} + +db2 close +reset_db +db func rblob rblob +do_execsql_test 3.1 { + PRAGMA auto_vacuum = 1; + + CREATE TABLE t1(a, b, UNIQUE(a, b)); + INSERT INTO t1 VALUES(rblob(500), rblob(500)); + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 2 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 4 + INSERT INTO t1 SELECT rblob(500), rblob(500) FROM t1; -- 8 + + CREATE TABLE t2(a, b, UNIQUE(a, b)); + INSERT INTO t2 SELECT * FROM t1; +} {} + +do_test 3.2 { + set nRow 0 + db eval {SELECT * FROM t2 ORDER BY a, b} { + if {$nRow==4} { db eval { DELETE FROM t1 } } + incr nRow + } + set nRow +} {8} + +#------------------------------------------------------------------------- +# Ensure that existing cursors using xFetch() pages see changes made +# to rows using the incrblob API. +# +reset_db +set aaa [string repeat a 400] +set bbb [string repeat b 400] +set ccc [string repeat c 400] +set ddd [string repeat d 400] +set eee [string repeat e 400] + +do_execsql_test 4.1 { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES($aaa); + INSERT INTO t1 VALUES($bbb); + INSERT INTO t1 VALUES($ccc); + INSERT INTO t1 VALUES($ddd); + SELECT * FROM t1; + BEGIN; +} [list $aaa $bbb $ccc $ddd] + +do_test 4.2 { + set ::STMT [sqlite3_prepare db "SELECT * FROM t1 ORDER BY rowid" -1 dummy] + sqlite3_step $::STMT + sqlite3_column_text $::STMT 0 +} $aaa + +do_test 4.3 { + foreach r {2 3 4} { + set fd [db incrblob t1 x $r] + puts -nonewline $fd $eee + close $fd + } + + set res [list] + while {"SQLITE_ROW" == [sqlite3_step $::STMT]} { + lappend res [sqlite3_column_text $::STMT 0] + } + set res +} [list $eee $eee $eee] + +do_test 4.4 { + sqlite3_finalize $::STMT +} SQLITE_OK + +do_execsql_test 4.5 { COMMIT } + +#------------------------------------------------------------------------- +# Ensure that existing cursors holding xFetch() references are not +# confused if those pages are moved to make way for the root page of a +# new table or index. +# +reset_db +do_execsql_test 5.1 { + PRAGMA auto_vacuum = 2; + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES($aaa); + INSERT INTO t1 VALUES($bbb); + INSERT INTO t1 VALUES($ccc); + INSERT INTO t1 VALUES($ddd); + + PRAGMA auto_vacuum; + SELECT * FROM t1; +} [list 2 $aaa $bbb $ccc $ddd] + +do_test 5.2 { + set ::STMT [sqlite3_prepare db "SELECT * FROM t1 ORDER BY rowid" -1 dummy] + sqlite3_step $::STMT + sqlite3_column_text $::STMT 0 +} $aaa + +do_execsql_test 5.3 { + CREATE TABLE t2(x); + INSERT INTO t2 VALUES('tricked you!'); + INSERT INTO t2 VALUES('tricked you!'); +} + +do_test 5.4 { + sqlite3_step $::STMT + sqlite3_column_text $::STMT 0 +} $bbb + +do_test 5.5 { + sqlite3_finalize $::STMT +} SQLITE_OK + +finish_test + + ADDED test/mmap2.test Index: test/mmap2.test ================================================================== --- /dev/null +++ test/mmap2.test @@ -0,0 +1,83 @@ +# 2013 March 20 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file tests the effect of the mmap() or mremap() system calls +# returning an error on the library. +# +# If either mmap() or mremap() fails, SQLite should log an error +# message, then continue accessing the database using read() and +# write() exclusively. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix mmap2 + +if {[test_syscall defaultvfs] != "unix"} { + finish_test + return +} + +db close +sqlite3_shutdown +test_sqlite3_log xLog +proc xLog {error_code msg} { + if {[string match os_unix.c* $msg]} { + lappend ::log $msg + } +} + +foreach syscall {mmap mremap} { + test_syscall uninstall + if {[catch {test_syscall install $syscall}]} continue + + for {set i 1} {$i < 20} {incr i} { + reset_db + + test_syscall fault $i 1 + test_syscall errno $syscall ENOMEM + set ::log "" + + do_execsql_test 1.$syscall.$i.1 { + CREATE TABLE t1(a, b, UNIQUE(a, b)); + INSERT INTO t1 VALUES(randomblob(1000), randomblob(1000)); + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + INSERT INTO t1 SELECT randomblob(1000), randomblob(1000) FROM t1; + } + + set nFail [test_syscall fault 0 0] + + do_execsql_test 1.$syscall.$i.2 { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } {64 ok} + + do_test 1.$syscall.$i.3 { + expr {$nFail==0 || $nFail==1} + } {1} + + do_test 1.$syscall.$i.4.nFail=$nFail { + regexp ".*${syscall}.*" $::log + } [expr $nFail>0] + } +} + +db close +test_syscall uninstall +sqlite3_shutdown +test_sqlite3_log +sqlite3_initialize +finish_test + Index: test/pagerfault.test ================================================================== --- test/pagerfault.test +++ test/pagerfault.test @@ -1240,10 +1240,67 @@ puts $::channel [string repeat abc 6000] flush $::channel } -test { catchsql { UPDATE t2 SET a = a_string(800), b = a_string(800) } catch { close $::channel } + catchsql { ROLLBACK } + faultsim_integrity_check +} + +#------------------------------------------------------------------------- +# +do_test pagerfault-28-pre { + faultsim_delete_and_reopen + db func a_string a_string + execsql { + PRAGMA page_size = 512; + + PRAGMA journal_mode = wal; + PRAGMA wal_autocheckpoint = 0; + PRAGMA cache_size = 100000; + + BEGIN; + CREATE TABLE t2(a UNIQUE, b UNIQUE); + INSERT INTO t2 VALUES( a_string(800), a_string(800) ); + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + INSERT INTO t2 SELECT a_string(800), a_string(800) FROM t2; + COMMIT; + CREATE TABLE t1(a PRIMARY KEY, b); + } + expr {[file size test.db-shm] >= 96*1024} +} {1} +faultsim_save_and_close + +do_faultsim_test pagerfault-28 -faults oom* -prep { + faultsim_restore_and_reopen + execsql { PRAGMA mmap_limit=0 } + + sqlite3 db2 test.db + db2 eval { SELECT count(*) FROM t2 } + + db func a_string a_string + execsql { + BEGIN; + INSERT INTO t1 VALUES(a_string(2000), a_string(2000)); + INSERT INTO t1 VALUES(a_string(2000), a_string(2000)); + } + set ::STMT [sqlite3_prepare db "SELECT * FROM t1 ORDER BY a" -1 DUMMY] + sqlite3_step $::STMT +} -body { + execsql { ROLLBACK } +} -test { + db2 close + sqlite3_finalize $::STMT catchsql { ROLLBACK } faultsim_integrity_check } finish_test Index: test/pageropt.test ================================================================== --- test/pageropt.test +++ test/pageropt.test @@ -90,11 +90,11 @@ do_test pageropt-1.5 { db2 eval {CREATE TABLE t2(y)} pagercount_sql { SELECT hex(x) FROM t1 } -} [list 6 0 0 $blobcontent] +} [list [expr {[permutation]=="nommap" ? 6 : 1}] 0 0 $blobcontent] do_test pageropt-1.6 { pagercount_sql { SELECT hex(x) FROM t1 } } [list 0 0 0 $blobcontent] Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -135,10 +135,18 @@ This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. } -files [ test_set $allquicktests -exclude *malloc* *ioerr* *fault* ] + +test_suite "nommap" -prefix "nomm-" -description { + Similar to veryquick. Except with memory mapping disabled. +} -presql { + pragma mmap_limit = 0; +} -files [ + test_set $allquicktests -exclude *malloc* *ioerr* *fault* -include malloc.test +] test_suite "valgrind" -prefix "" -description { Run the "veryquick" test suite with a couple of multi-process tests (that fail under valgrind) omitted. } -files [ Index: test/speed1p.test ================================================================== --- test/speed1p.test +++ test/speed1p.test @@ -22,10 +22,12 @@ #sqlite3_config_pagecache 1024 11000 set testdir [file dirname $argv0] source $testdir/tester.tcl speed_trial_init speed1 +sqlite3_memdebug_vfs_oom_test 0 + # Set a uniform random seed expr srand(0) # The number_name procedure below converts its argment (an integer) # into a string which is the English-language name for that number. @@ -75,11 +77,10 @@ } execsql { SELECT name FROM sqlite_master ORDER BY 1; } } {i2a i2b t1 t2} - # 50000 INSERTs on an unindexed table # set list {} for {set i 1} {$i<=50000} {incr i} { Index: test/syscall.test ================================================================== --- test/syscall.test +++ test/syscall.test @@ -58,11 +58,11 @@ # foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate pread64 pwrite64 unlink openDirectory mkdir rmdir - statvfs fchown umask + statvfs fchown umask mmap munmap mremap } { if {[test_syscall exists $s]} {lappend syscall_list $s} } do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] Index: test/sysfault.test ================================================================== --- test/sysfault.test +++ test/sysfault.test @@ -241,7 +241,37 @@ } } -test { faultsim_test_result {0 20000} } -finish_test +#------------------------------------------------------------------------- +# Test errors in mmap(). +# +proc vfsfault_install {} { + test_syscall reset + test_syscall install {mmap} +} + +faultsim_delete_and_reopen +execsql { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); +} +faultsim_save_and_close + +do_faultsim_test 4 -faults vfsfault-* -prep { + faultsim_restore_and_reopen + file_control_chunksize_test db main 8192 + execsql { + PRAGMA mmap_limit = 1000000; + } +} -body { + test_syscall errno mmap EACCES + + execsql { + SELECT * FROM t1; + } +} -test { + faultsim_test_result {0 {1 2}} {1 {disk I/O error}} +} +finish_test Index: test/wal.test ================================================================== --- test/wal.test +++ test/wal.test @@ -725,39 +725,41 @@ do_test wal-11.9 { db close list [expr [file size test.db]/1024] [log_deleted test.db-wal] } {37 1} sqlite3_wal db test.db +set nWal 39 +if {[permutation]=="nommap"} {set nWal 37} do_test wal-11.10 { execsql { PRAGMA cache_size = 10; BEGIN; INSERT INTO t1 SELECT blob(900) FROM t1; -- 32 SELECT count(*) FROM t1; } list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 37 [wal_file_size 37 1024]] +} [list 37 [wal_file_size $nWal 1024]] do_test wal-11.11 { execsql { SELECT count(*) FROM t1; ROLLBACK; SELECT count(*) FROM t1; } } {32 16} do_test wal-11.12 { list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 37 [wal_file_size 37 1024]] +} [list 37 [wal_file_size $nWal 1024]] do_test wal-11.13 { execsql { INSERT INTO t1 VALUES( blob(900) ); SELECT count(*) FROM t1; PRAGMA integrity_check; } } {17 ok} do_test wal-11.14 { list [expr [file size test.db]/1024] [file size test.db-wal] -} [list 37 [wal_file_size 37 1024]] +} [list 37 [wal_file_size $nWal 1024]] #------------------------------------------------------------------------- # This block of tests, wal-12.*, tests the fix for a problem that # could occur if a log that is a prefix of an older log is written @@ -1564,12 +1566,32 @@ # ZERO_DAMAGE flag were not set. do_test 24.5 { file size test.db-wal } [wal_file_size 1 1024] } + +reset_db +do_test 25 { + sqlite3 db test.db + + execsql { + CREATE TABLE t1(x, y); + PRAGMA journal_mode = WAL; + INSERT INTO t1 VALUES(1, 2); + } + + execsql { + BEGIN; + CREATE TABLE t2(a, b); + } + + hexio_write test.db-shm [expr 16*1024] [string repeat 0055 8192] + catchsql ROLLBACK +} {0 {}} + db close sqlite3_shutdown test_sqlite3_log sqlite3_initialize finish_test Index: test/wal5.test ================================================================== --- test/wal5.test +++ test/wal5.test @@ -233,11 +233,19 @@ do_test 2.3.$tn.3 { sql2 { BEGIN; SELECT * FROM t1 } } {1 2} do_test 2.3.$tn.4 { sql1 { INSERT INTO t1 VALUES(3, 4) } } {} do_test 2.3.$tn.5 { sql1 { INSERT INTO t2 VALUES(3, 4) } } {} do_test 2.3.$tn.6 { file_page_counts } {1 4 1 4} do_test 2.3.$tn.7 { code1 { do_wal_checkpoint db -mode full } } {1 4 3} - do_test 2.3.$tn.8 { file_page_counts } {1 4 2 4} + + # The checkpoint above only writes page 1 of the db file. The other + # page (page 2) is locked by the read-transaction opened by the + # [sql2] commmand above. So normally, the db is 1 page in size here. + # However, in mmap() mode, the db is pre-allocated to 2 pages at the + # start of the checkpoint, even though page 2 cannot be written. + set nDb 2 + if {[permutation]=="no-mmap"} {set nDb 1} + do_test 2.3.$tn.8 { file_page_counts } [list $nDb 4 2 4] } # Check that checkpoints block on the correct locks. And respond correctly # if they cannot obtain those locks. There are three locks that a checkpoint # may block on (in the following order): @@ -341,6 +349,5 @@ } } finish_test -