Index: lsm-test/lsmtest_tdb3.c ================================================================== --- lsm-test/lsmtest_tdb3.c +++ lsm-test/lsmtest_tdb3.c @@ -301,10 +301,11 @@ lsm_env *pRealEnv = tdb_lsm_env(); LsmFile *p = (LsmFile *)pFile; return pRealEnv->xSectorSize(p->pReal); } +#if 0 static int testEnvRemap( lsm_file *pFile, lsm_i64 iMin, void **ppOut, lsm_i64 *pnOut @@ -311,10 +312,33 @@ ){ lsm_env *pRealEnv = tdb_lsm_env(); LsmFile *p = (LsmFile *)pFile; return pRealEnv->xRemap(p->pReal, iMin, ppOut, pnOut); } +#endif + +static int testEnvMap( + lsm_file *pFile, + lsm_i64 iOff, + lsm_i64 nByte, + void **ppOut, + lsm_i64 *pszOut +){ + lsm_env *pRealEnv = tdb_lsm_env(); + LsmFile *p = (LsmFile *)pFile; + return pRealEnv->xMap(p->pReal, iOff, nByte, ppOut, pszOut); +} + +static int testEnvUnmap( + lsm_file *pFile, + void *pMap, + lsm_i64 nMap +){ + lsm_env *pRealEnv = tdb_lsm_env(); + LsmFile *p = (LsmFile *)pFile; + return pRealEnv->xUnmap(p->pReal, pMap, nMap); +} static int testEnvFileid( lsm_file *pFile, void *ppOut, int *pnOut @@ -942,11 +966,15 @@ pDb->env.xRead = testEnvRead; pDb->env.xWrite = testEnvWrite; pDb->env.xTruncate = testEnvTruncate; pDb->env.xSync = testEnvSync; pDb->env.xSectorSize = testEnvSectorSize; +#if 0 pDb->env.xRemap = testEnvRemap; +#endif + pDb->env.xMap = testEnvMap; + pDb->env.xUnmap = testEnvUnmap; pDb->env.xFileid = testEnvFileid; pDb->env.xClose = testEnvClose; pDb->env.xUnlink = testEnvUnlink; pDb->env.xLock = testEnvLock; pDb->env.xTestLock = testEnvTestLock; Index: src/kvlsm.c ================================================================== --- src/kvlsm.c +++ src/kvlsm.c @@ -461,11 +461,11 @@ pNew->base.pStoreVfunc = &kvlsmMethods; pNew->base.pEnv = pEnv; rc = lsm_new(0, &pNew->pDb); if( rc==SQLITE4_OK ){ int i; - int bMmap = 0; + int bMmap = 1; lsm_config(pNew->pDb, LSM_CONFIG_MMAP, &bMmap); for(i=0; i #include #include +/* +** An object of this type provides an abstraction supporting the two +** different mmap() strategies. See the fsMmapXXX() methods below for +** details. +*/ +typedef struct MmapMgr MmapMgr; +typedef struct MmapMgrRef MmapMgrRef; + +struct MmapMgr { + int eUseMmap; /* LSM_MMAP_XXX mode */ + int nRef; /* Number of outstanding fsMmapRef() calls */ + + /* Used when eUseMmap==LSM_MMAP_FULL */ + void *pMap; /* Current mapping of database file */ + i64 nMap; /* Bytes mapped at pMap */ + + /* Used when eUseMmap==LSM_MMAP_LIMITED */ + int nEntry; /* Total number of currently mapped blocks */ + int nMapsz; /* Size of each mapping (default 1MB) */ + int nMaphash; /* Size of apHash[] in entries */ + MmapMgrRef **apHash; /* Hash table of existing mappings */ + MmapMgrRef *pAll; /* Linked list of all mappings */ + MmapMgrRef *pLast; /* Last element in pAll list */ +}; + +struct MmapMgrRef { + void *pMap; /* Pointer to mapped memory */ + int nRef; /* Number of refs to this mapping */ + int iMap; /* Mapping number */ + MmapMgrRef *pNextHash; /* Next mapped block with same hash key */ + MmapMgrRef *pNextAll; /* Next mapped block in pAll list */ + MmapMgrRef *pPrevAll; /* Previous mapped block in pAll list */ +}; + +#define LSM_MAPPED_BLOCKS 16 + /* ** File-system object. Each database connection allocates a single instance ** of the following structure. It is used for all access to the database and ** log files. ** +** eUseMmap: +** This variable determines whether or not, and how, the lsm_env.xMap() is +** used to access the database file. It may be set to 0, 1 or 2. As follows: +** +** 0 - xMap() is not used at all. +** +** 1 - xMap() is used to map the entire database file. +** +** 2 - xMap() is used map 1MB blocks of the database file as required. +** The library attempts to limit the number of mapped blocks to +** LSM_MAPPED_BLOCKS at a time. This mode is intended for use on +** 32-bit platforms where mapping the entire database file may +** not be possible due to the limited address space available. +** ** pLruFirst, pLruLast: ** The first and last entries in a doubly-linked list of pages. The ** Page.pLruNext and Page.pLruPrev pointers are used to link the list ** elements together. ** @@ -169,10 +219,12 @@ ** as the file grows), the Page.aData pointers are updated by iterating ** through the contents of this list. ** ** In non-mmap() mode, this list is an LRU list of cached pages with ** nRef==0. +** +** apHash, nHash: */ struct FileSystem { lsm_db *pDb; /* Database handle that owns this object */ lsm_env *pEnv; /* Environment pointer */ char *zDb; /* Database file name */ @@ -192,14 +244,12 @@ lsm_compress *pCompress; u8 *aIBuffer; /* Buffer to compress to */ u8 *aOBuffer; /* Buffer to uncompress from */ int nBuffer; /* Allocated size of aBuffer[] in bytes */ - /* mmap() mode things */ - int bUseMmap; /* True to use mmap() to access db file */ - void *pMap; /* Current mapping of database file */ - i64 nMap; /* Bytes mapped at pMap */ + /* State variables used in mmap() mode. */ + MmapMgr mmapmgr; Page *pFree; Page *pWaiting; /* b-tree pages waiting to be written */ /* Statistics */ @@ -230,10 +280,11 @@ ** lsmFsPagePersist() after the page is written to disk. */ struct Page { u8 *aData; /* Buffer containing page data */ int nData; /* Bytes of usable data at aData[] */ + MmapMgrRef *pRef; /* Mapping manager reference token */ Pgno iPg; /* Page number */ int nRef; /* Number of outstanding references */ int flags; /* Combination of PAGE_XXX flags */ Page *pHashNext; /* Next page in hash table slot */ Page *pLruNext; /* Next page in LRU list */ @@ -255,10 +306,11 @@ */ struct MetaPage { int iPg; /* Either 1 or 2 */ int bWrite; /* Write back to db file on release */ u8 *aData; /* Pointer to buffer */ + MmapMgrRef *pRef; /* Mmap manager reference token */ FileSystem *pFS; /* FileSystem that owns this page */ }; /* ** Values for LsmPage.flags @@ -337,19 +389,39 @@ return IOERR_WRAPPER( pEnv->xTruncate(pFile, nByte) ); } static int lsmEnvUnlink(lsm_env *pEnv, const char *zDel){ return IOERR_WRAPPER( pEnv->xUnlink(pEnv, zDel) ); } +#if 0 static int lsmEnvRemap( lsm_env *pEnv, lsm_file *pFile, i64 szMin, void **ppMap, i64 *pszMap ){ return pEnv->xRemap(pFile, szMin, ppMap, pszMap); } +#endif +static void lsmEnvUnmap( + lsm_env *pEnv, + lsm_file *pFile, + void *pMap, + lsm_i64 nMap +){ + pEnv->xUnmap(pFile, pMap, nMap); +} +static int lsmEnvMap( + lsm_env *pEnv, + lsm_file *pFile, + i64 iOff, + i64 szMin, + void **ppMap, + i64 *pszMap +){ + return pEnv->xMap(pFile, iOff, szMin, ppMap, pszMap); +} int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock){ if( pFile==0 ) return LSM_OK; return pEnv->xLock(pFile, iLock, eLock); } @@ -441,11 +513,11 @@ */ int lsmFsCloseAndDeleteLog(FileSystem *pFS){ char *zDel; if( pFS->fdLog ){ - lsmEnvClose(pFS->pEnv, pFS->fdLog ); + lsmEnvClose(pFS->pEnv, pFS->fdLog); pFS->fdLog = 0; } zDel = lsmMallocPrintf(pFS->pEnv, "%s-log", pFS->zDb); if( zDel ){ @@ -481,10 +553,253 @@ *pRc = lsmEnvOpen(pFS->pEnv, zPath, flags, &pFile); } return pFile; } + +static void fsGrowMapping( + FileSystem *pFS, + i64 iSz, + int *pRc +){ + MmapMgr *p = &pFS->mmapmgr; + + /* This function won't work with compressed databases yet. */ + assert( pFS->pCompress==0 ); + assert( PAGE_HASPREV==4 ); + assert( p->eUseMmap==LSM_MMAP_FULL ); + + if( *pRc==LSM_OK && iSz>p->nMap ){ + int rc; + u8 *aOld = p->pMap; + if( aOld ){ + lsmEnvUnmap(pFS->pEnv, pFS->fdDb, aOld, p->nMap); + p->pMap = 0; + } + rc = lsmEnvMap(pFS->pEnv, pFS->fdDb, 0, iSz, &p->pMap, &p->nMap); + + if( rc==LSM_OK && p->pMap!=aOld ){ + Page *pFix; + i64 iOff = (u8 *)p->pMap - aOld; + for(pFix=pFS->pLruFirst; pFix; pFix=pFix->pLruNext){ + pFix->aData += iOff; + } + lsmSortedRemap(pFS->pDb); + } + *pRc = rc; + } +} + +/* +** Remove this block from the pAll list +*/ +static void fsMmapRemoveFromAll(MmapMgr *p, MmapMgrRef *pRef){ + if( pRef->pPrevAll ){ + pRef->pPrevAll->pNextAll = pRef->pNextAll; + }else{ + assert( pRef==p->pAll ); + p->pAll = pRef->pNextAll; + } + + if( pRef->pNextAll ){ + pRef->pNextAll->pPrevAll = pRef->pPrevAll; + }else{ + assert( pRef==p->pLast ); + p->pLast = pRef->pPrevAll; + } + + pRef->pNextAll = 0; + pRef->pPrevAll = 0; +} + +/* +** Add this block to the pAll list. To the start if bLast==0, or the end +** if bLast==1. +*/ +static void fsMmapAddToAll(MmapMgr *p, MmapMgrRef *pRef, int bLast){ + if( bLast ){ + assert( p->pLast==0 || p->pLast->pNextAll==0 ); + if( p->pLast ){ + p->pLast->pNextAll = pRef; + }else{ + p->pAll = pRef; + } + pRef->pPrevAll = p->pLast; + p->pLast = pRef; + }else{ + assert( p->pAll==0 || p->pAll->pPrevAll==0 ); + if( p->pAll ){ + p->pAll->pPrevAll = pRef; + }else{ + p->pLast = pRef; + } + pRef->pNextAll = p->pAll; + p->pAll = pRef; + } +} + +/* +** Obtain a reference to a mapping of the database file. +*/ +static void *fsMmapRef( + FileSystem *pFS, /* Memory map manager object */ + i64 iOff, /* File offset to return reference to */ + int nByte, /* Size of referenced block */ + MmapMgrRef **ppRef, /* OUT: Reference used to free memory */ + int *pRc /* IN/OUT: Error code */ +){ + lsm_env *pEnv = pFS->pEnv; + MmapMgr *p = &pFS->mmapmgr; + void *pRet = 0; + int rc = *pRc; + + assert( iOff>=0 && nByte>0 ); + assert( p->eUseMmap!=LSM_MMAP_OFF ); + assert( *ppRef==0 ); + + if( p->eUseMmap==LSM_MMAP_FULL ){ + fsGrowMapping(pFS, LSM_MAX(iOff+nByte, 8192), &rc); + if( rc==LSM_OK ){ + pRet = (void *)&((u8 *)p->pMap)[iOff]; + *ppRef = (MmapMgrRef *)pRet; + } + }else{ + MmapMgrRef *pRef = 0; /* Mapping reference */ + + /* If the hash table has not been allocated, allocate it now. */ + if( p->apHash==0 ){ + int nSz = sizeof(MmapMgrRef *) * LSM_MAPPED_BLOCKS * 2; + p->apHash = (MmapMgrRef **)lsmMallocZeroRc(pEnv, nSz, &rc); + p->nMaphash = LSM_MAPPED_BLOCKS * 2; + } + + if( rc==LSM_OK ){ + int iMap = (iOff / p->nMapsz); + + /* Search the hash table for the required mapping */ + int iHash = fsHashKey(p->nMaphash, iMap); + for(pRef=p->apHash[iHash]; pRef; pRef=pRef->pNextHash){ + if( pRef->iMap==iMap ) break; + } + + if( pRef ){ + /* Mapping was found. */ + fsMmapRemoveFromAll(p, pRef); + }else{ + /* We have no existing mapping for mapping block iMap. */ + if( p->nEntry>=LSM_MAPPED_BLOCKS && p->pAll->nRef==0 ){ + MmapMgrRef **pp; + int iHash2; + pRef = p->pAll; + + /* Unmap this blocks mapping */ + lsmEnvUnmap(pFS->pEnv, pFS->fdDb, pRef->pMap, p->nMapsz); + pRef->pMap = 0; + + /* Remove this mapped block from the hash table */ + iHash2 = fsHashKey(p->nMaphash, pRef->iMap); + for(pp=&p->apHash[iHash2]; *pp!=pRef; pp = &((*pp)->pNextHash)); + *pp = (*pp)->pNextHash; + pRef->pNextHash = 0; + + /* Remove this mapped block from the pAll list */ + fsMmapRemoveFromAll(p, pRef); +#if 0 + printf("recycling mapping %d -> %d\n", pRef->iMap, iMap); +#endif + + }else{ + pRef = (MmapMgrRef *)lsmMallocZeroRc(pEnv, sizeof(MmapMgrRef), &rc); + if( pRef ) p->nEntry++; + assert( p->nEntry<20 ); + } + + if( rc==LSM_OK ){ + i64 dummy; + rc = lsmEnvMap(pEnv, + pFS->fdDb, (i64)iMap*p->nMapsz, p->nMapsz, &pRef->pMap, &dummy + ); + if( rc==LSM_OK ){ + /* Link the mapped block into the hash table */ + pRef->pNextHash = p->apHash[iHash]; + pRef->iMap = iMap; + p->apHash[iHash] = pRef; + } + } + } + + if( rc==LSM_OK ){ + pRef->nRef++; + pRet = (void *)&((u8 *)(pRef->pMap))[iOff % p->nMapsz]; + fsMmapAddToAll(p, pRef, 1); + *ppRef = pRef; + }else{ + lsmFree(pEnv, pRef); + } + } + } + + *pRc = rc; + if( rc==LSM_OK ) p->nRef++; + return pRet; +} + +/* +** Release a reference returned by an earlier call to fsMmapRef(). +*/ +static void fsMmapUnref(FileSystem *pFS, MmapMgrRef **ppRef){ + MmapMgr *p = &pFS->mmapmgr; + MmapMgrRef *pRef = *ppRef; + + assert( p->eUseMmap!=LSM_MMAP_OFF ); + if( pRef ){ + p->nRef--; + + if( p->eUseMmap==LSM_MMAP_LIMITED ){ + pRef->nRef--; + assert( pRef->nRef>=0 ); + if( pRef->nRef==0 ){ + fsMmapRemoveFromAll(p, pRef); + fsMmapAddToAll(p, pRef, 0); + } + } + } + + *ppRef = 0; + assert( p->nRef>=0 ); +} + +/* +** Unmap all currently mapped blocks. Release all allocated memory. +*/ +static void fsMmapClose(FileSystem *pFS){ + MmapMgr *p = &pFS->mmapmgr; + assert( p->nRef==0 ); + if( p->eUseMmap==LSM_MMAP_FULL ){ + if( p->pMap ){ + lsmEnvUnmap(pFS->pEnv, pFS->fdDb, p->pMap, p->nMap); + p->pMap = 0; + p->nMap = 0; + } + } + else if( p->eUseMmap==LSM_MMAP_LIMITED ){ + MmapMgrRef *pRef; + MmapMgrRef *pNext; + for(pRef=p->pAll; pRef; pRef=pNext){ + pNext = pRef->pNextAll; + lsmEnvUnmap(pFS->pEnv, pFS->fdDb, pRef->pMap, p->nMapsz); + lsmFree(pFS->pEnv, pRef); + } + lsmFree(pFS->pEnv, p->apHash); + p->nEntry = 0; + p->nMaphash = 0; + p->apHash = 0; + p->pAll = 0; + p->pLast = 0; + } +} + /* ** If it is not already open, this function opens the log file. It returns ** LSM_OK if successful (or if the log file was already open) or an LSM ** error code otherwise. ** @@ -552,10 +867,16 @@ pFS->nPagesize = LSM_DFLT_PAGE_SIZE; pFS->nBlocksize = LSM_DFLT_BLOCK_SIZE; pFS->nMetasize = 4 * 1024; pFS->pDb = pDb; pFS->pEnv = pDb->pEnv; + if( !pDb->compress.xCompress ){ + pFS->mmapmgr.eUseMmap = pDb->eMmap; + pFS->mmapmgr.nMapsz = 1*1024*1024; + + /* pFS->mmapmgr.nMapsz = 4*1024; */ + } /* Make a copy of the database and log file names. */ memcpy(pFS->zDb, zDb, nDb+1); memcpy(pFS->zLog, zDb, nDb); memcpy(&pFS->zLog[nDb], "-log", 5); @@ -608,14 +929,11 @@ lsmFree(pEnv, pFS->aIBuffer); lsmFree(pEnv, pFS->aOBuffer); pFS->nBuffer = 0; /* Unmap the file, if it is currently mapped */ - if( pFS->pMap ){ - lsmEnvRemap(pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap); - pFS->bUseMmap = 0; - } + fsMmapClose(pFS); /* Free all allocate page structures */ pPg = pFS->pLruFirst; while( pPg ){ Page *pNext = pPg->pLruNext; @@ -631,14 +949,14 @@ pFS->pFree = 0; /* Configure the FileSystem object */ if( db->compress.xCompress ){ pFS->pCompress = &db->compress; - pFS->bUseMmap = 0; + pFS->mmapmgr.eUseMmap = LSM_MMAP_OFF; }else{ pFS->pCompress = 0; - pFS->bUseMmap = db->bMmap; + pFS->mmapmgr.eUseMmap = db->eMmap; } } return LSM_OK; } @@ -658,10 +976,11 @@ if( pPg->flags & PAGE_FREE ) lsmFree(pEnv, pPg->aData); lsmFree(pEnv, pPg); pPg = pNext; } + fsMmapClose(pFS); if( pFS->fdDb ) lsmEnvClose(pFS->pEnv, pFS->fdDb ); if( pFS->fdLog ) lsmEnvClose(pFS->pEnv, pFS->fdLog ); lsmFree(pEnv, pFS->pLsmFile); lsmFree(pEnv, pFS->apHash); lsmFree(pEnv, pFS->aIBuffer); @@ -671,10 +990,11 @@ } void lsmFsDeferClose(FileSystem *pFS, LsmFile **pp){ LsmFile *p = pFS->pLsmFile; assert( p->pNext==0 ); + fsMmapClose(pFS); p->pFile = pFS->fdDb; pFS->fdDb = 0; pFS->pLsmFile = 0; *pp = p; } @@ -885,32 +1205,37 @@ iHash = fsHashKey(pFS->nHash, pPg->iPg); for(pp=&pFS->apHash[iHash]; *pp!=pPg; pp=&(*pp)->pHashNext); *pp = pPg->pHashNext; } +/* +** This function is only called if FileSystem.eUseMmap==0 (i.e. in non-mmap +** mode). +*/ static int fsPageBuffer( FileSystem *pFS, - int bRequireData, /* True to allocate buffer as well */ Page **ppOut ){ int rc = LSM_OK; Page *pPage = 0; - if( pFS->bUseMmap || pFS->pLruFirst==0 || pFS->nCacheAllocnCacheMax ){ + + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF + || pFS->pLruFirst==0 + || pFS->nCacheAllocnCacheMax + ){ pPage = lsmMallocZero(pFS->pEnv, sizeof(Page)); if( !pPage ){ rc = LSM_NOMEM_BKPT; - }else if( bRequireData ){ + }else{ pPage->aData = (u8 *)lsmMalloc(pFS->pEnv, pFS->nPagesize); pPage->flags = PAGE_FREE; if( !pPage->aData ){ lsmFree(pFS->pEnv, pPage); rc = LSM_NOMEM_BKPT; pPage = 0; } pFS->nCacheAlloc++; - }else{ - fsPageAddToLru(pFS, pPage); } }else{ pPage = pFS->pLruFirst; fsPageRemoveFromLru(pFS, pPage); fsPageRemoveFromHash(pFS, pPage); @@ -920,55 +1245,32 @@ *ppOut = pPage; return rc; } static void fsPageBufferFree(Page *pPg){ + FileSystem *pFS = pPg->pFS; + assert( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF ); + if( pPg->flags & PAGE_FREE ){ lsmFree(pPg->pFS->pEnv, pPg->aData); } - else if( pPg->pFS->bUseMmap ){ - fsPageRemoveFromLru(pPg->pFS, pPg); - } lsmFree(pPg->pFS->pEnv, pPg); } -static void fsGrowMapping( - FileSystem *pFS, - i64 iSz, - int *pRc -){ - /* This function won't work with compressed databases yet. */ - assert( pFS->pCompress==0 ); - assert( PAGE_HASPREV==4 ); - - if( *pRc==LSM_OK && iSz>pFS->nMap ){ - int rc; - u8 *aOld = pFS->pMap; - rc = lsmEnvRemap(pFS->pEnv, pFS->fdDb, iSz, &pFS->pMap, &pFS->nMap); - if( rc==LSM_OK && pFS->pMap!=aOld ){ - Page *pFix; - i64 iOff = (u8 *)pFS->pMap - aOld; - for(pFix=pFS->pLruFirst; pFix; pFix=pFix->pLruNext){ - pFix->aData += iOff; - } - lsmSortedRemap(pFS->pDb); - } - *pRc = rc; - } -} - /* ** fsync() the database file. */ int lsmFsSyncDb(FileSystem *pFS, int nBlock){ +#if 0 if( nBlock && pFS->bUseMmap ){ int rc = LSM_OK; i64 nMin = (i64)nBlock * (i64)pFS->nBlocksize; fsGrowMapping(pFS, nMin, &rc); if( rc!=LSM_OK ) return rc; } +#endif return lsmEnvSync(pFS->pEnv, pFS->fdDb); } static int fsPageGet(FileSystem *, Segment *, Pgno, int, Page **, int *); @@ -1029,11 +1331,11 @@ iRead = fsRedirectBlock(pSeg->pRedirect, iBlock); }else{ iRead = iBlock; } - assert( pFS->bUseMmap==0 || pFS->pCompress==0 ); + assert( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF || pFS->pCompress==0 ); if( pFS->pCompress ){ i64 iOff; /* File offset to read data from */ u8 aNext[4]; /* 4-byte pointer read from db file */ iOff = (i64)iRead * pFS->nBlocksize - sizeof(aNext); @@ -1109,11 +1411,11 @@ int iBlock, /* Read field from this block */ int *piPrev /* OUT: Previous block in linked list */ ){ int rc = LSM_OK; /* Return code */ - assert( pFS->bUseMmap==0 || pFS->pCompress==0 ); + assert( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF || pFS->pCompress==0 ); assert( iBlock>0 ); if( pFS->pCompress ){ i64 iOff = fsFirstPageOnBlock(pFS, iBlock) - 4; u8 aPrev[4]; /* 4-byte pointer read from db file */ @@ -1296,22 +1598,22 @@ /* In most cases iReal is the same as iPg. Except, if pSeg->pRedirect is ** not NULL, and the block containing iPg has been redirected, then iReal ** is the page number after redirection. */ Pgno iReal = lsmFsRedirectPage(pFS, (pSeg ? pSeg->pRedirect : 0), iPg); + i64 iOff = (i64)(iReal-1) * pFS->nPagesize; assert( iPg>=fsFirstPageOnBlock(pFS, 1) ); assert( iReal>=fsFirstPageOnBlock(pFS, 1) ); *ppPg = 0; - assert( pFS->bUseMmap==0 || pFS->pCompress==0 ); - if( pFS->bUseMmap ){ + assert( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF || pFS->pCompress==0 ); + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF ){ Page *pTest; - i64 iEnd = (i64)iReal * pFS->nPagesize; - fsGrowMapping(pFS, iEnd, &rc); - if( rc!=LSM_OK ) return rc; + /* Check if the page is currently in the waiting list. If so, increment + ** the refcount and return a pointer to it. No more to do in this case. */ p = 0; for(pTest=pFS->pWaiting; pTest; pTest=pTest->pNextWaiting){ if( pTest->iPg==iReal ){ assert( iReal==iPg ); p = pTest; @@ -1318,10 +1620,12 @@ p->nRef++; *ppPg = p; return LSM_OK; } } + + /* Allocate or recycle a Page structure */ if( pFS->pFree ){ p = pFS->pFree; pFS->pFree = p->pHashNext; assert( p->nRef==0 ); }else{ @@ -1328,12 +1632,19 @@ p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc); if( rc ) return rc; fsPageAddToLru(pFS, p); p->pFS = pFS; } - p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (iReal-1)]; p->iPg = iReal; + + p->aData = fsMmapRef(pFS, iOff, pFS->nPagesize, &p->pRef, &rc); + if( rc!=LSM_OK ){ + p->pHashNext = pFS->pFree; + pFS->pFree = p; + p = 0; + } + assert( (p->flags & PAGE_FREE)==0 ); }else{ /* Search the hash-table for the page */ iHash = fsHashKey(pFS->nHash, iReal); @@ -1340,11 +1651,11 @@ for(p=pFS->apHash[iHash]; p; p=p->pHashNext){ if( p->iPg==iReal) break; } if( p==0 ){ - rc = fsPageBuffer(pFS, 1, &p); + rc = fsPageBuffer(pFS, &p); if( rc==LSM_OK ){ int nSpace = 0; p->iPg = iReal; p->nRef = 0; p->pFS = pFS; @@ -1357,11 +1668,10 @@ if( noContent==0 ){ if( pFS->pCompress ){ rc = fsReadPagedata(pFS, pSeg, p, &nSpace); }else{ int nByte = pFS->nPagesize; - i64 iOff = (i64)(iReal-1) * pFS->nPagesize; rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte); } pFS->nRead++; } @@ -1416,14 +1726,19 @@ int lsmFsReadSyncedId(lsm_db *db, int iMeta, i64 *piVal){ FileSystem *pFS = db->pFS; int rc = LSM_OK; assert( iMeta==1 || iMeta==2 ); - if( pFS->bUseMmap ){ - fsGrowMapping(pFS, iMeta*LSM_META_PAGE_SIZE, &rc); + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF ){ + MmapMgrRef *pRef = 0; + u8 *pMap; /* Mapping of meta pages */ + int iOff = (iMeta==2 ? LSM_META_PAGE_SIZE : 0); + + pMap = (u8 *)fsMmapRef(pFS, iOff, LSM_META_PAGE_SIZE, &pRef, &rc); if( rc==LSM_OK ){ - *piVal = (i64)lsmGetU64(&((u8 *)pFS->pMap)[(iMeta-1)*LSM_META_PAGE_SIZE]); + *piVal = (i64)lsmGetU64(pMap); + fsMmapUnref(pFS, &pRef); } }else{ MetaPage *pMeta = 0; rc = lsmFsMetaPageGet(pFS, 0, iMeta, &pMeta); if( rc==LSM_OK ){ @@ -1821,11 +2136,11 @@ if( pFS->pCompress || bDefer ){ /* In compressed database mode the page is not assigned a page number ** or location in the database file at this point. This will be done ** by the lsmFsPagePersist() call. */ - rc = fsPageBuffer(pFS, 1, &pPg); + rc = fsPageBuffer(pFS, &pPg); if( rc==LSM_OK ){ pPg->pFS = pFS; pPg->pSeg = p; pPg->iPg = 0; pPg->flags |= PAGE_DIRTY; @@ -1982,13 +2297,12 @@ pPg = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc); if( pPg ){ i64 iOff = (iPg-1) * pFS->nMetasize; - if( pFS->bUseMmap ){ - fsGrowMapping(pFS, 2*pFS->nMetasize, &rc); - pPg->aData = (u8 *)(pFS->pMap) + iOff; + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF ){ + pPg->aData = fsMmapRef(pFS, iOff, LSM_META_PAGE_SIZE, &pPg->pRef, &rc); }else{ pPg->aData = lsmMallocRc(pFS->pEnv, pFS->nMetasize, &rc); if( rc==LSM_OK && bWrite==0 ){ rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, pFS->nMetasize); } @@ -2003,11 +2317,15 @@ } #endif } if( rc!=LSM_OK ){ - if( pFS->bUseMmap==0 ) lsmFree(pFS->pEnv, pPg->aData); + if( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF ){ + lsmFree(pFS->pEnv, pPg->aData); + }else{ + fsMmapUnref(pFS, &pPg->pRef); + } lsmFree(pFS->pEnv, pPg); pPg = 0; }else{ pPg->iPg = iPg; pPg->bWrite = bWrite; @@ -2025,17 +2343,19 @@ int lsmFsMetaPageRelease(MetaPage *pPg){ int rc = LSM_OK; if( pPg ){ FileSystem *pFS = pPg->pFS; - if( pFS->bUseMmap==0 ){ + if( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF ){ if( pPg->bWrite ){ i64 iOff = (pPg->iPg==2 ? pFS->nMetasize : 0); int nWrite = pFS->nMetasize; rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, nWrite); } lsmFree(pFS->pEnv, pPg->aData); + }else{ + fsMmapUnref(pFS, &pPg->pRef); } lsmFree(pFS->pEnv, pPg); } return rc; @@ -2089,16 +2409,23 @@ i64 iToOff = (i64)(iTo-1) * pFS->nBlocksize; assert( iTo!=1 ); assert( iFrom>iTo ); - if( pFS->bUseMmap ){ - fsGrowMapping(pFS, (i64)iFrom * pFS->nBlocksize, &rc); + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF ){ + MmapMgrRef *pRef1 = 0; + MmapMgrRef *pRef2 = 0; + void *pTo, *pFrom; + + pTo = fsMmapRef(pFS, iToOff, pFS->nBlocksize, &pRef1, &rc); + pFrom = fsMmapRef(pFS, iFromOff, pFS->nBlocksize, &pRef2, &rc); if( rc==LSM_OK ){ - u8 *aMap = (u8 *)(pFS->pMap); - memcpy(&aMap[iToOff], &aMap[iFromOff], pFS->nBlocksize); + memcpy(pTo, pFrom, pFS->nBlocksize); } + fsMmapUnref(pFS, &pRef1); + fsMmapUnref(pFS, &pRef2); + }else{ int nSz = pFS->nPagesize; u8 *aData = (u8 *)lsmMallocRc(pFS->pEnv, nSz, &rc); if( rc==LSM_OK ){ const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize); @@ -2366,11 +2693,11 @@ assert( pPg->nData==pFS->nPagesize-4 ); rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext); if( rc!=LSM_OK ) return rc; - if( pFS->bUseMmap==0 ){ + if( pFS->mmapmgr.eUseMmap==LSM_MMAP_OFF ){ int iHash = fsHashKey(pFS->nHash, pPg->iPg); pPg->pHashNext = pFS->apHash[iHash]; pFS->apHash[iHash] = pPg; assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg ); } @@ -2397,17 +2724,16 @@ }else{ i64 iOff; /* Offset to write within database file */ iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1); - if( pFS->bUseMmap==0 ){ + if( pFS->mmapmgr.eUseMmap==0 ){ u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV); rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize); }else if( pPg->flags & PAGE_FREE ){ - fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc); + u8 *aTo = (u8 *)fsMmapRef(pFS, iOff, pFS->nPagesize, &pPg->pRef, &rc); if( rc==LSM_OK ){ - u8 *aTo = &((u8 *)(pFS->pMap))[iOff]; u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV); memcpy(aTo, aFrom, pFS->nPagesize); lsmFree(pFS->pEnv, aFrom); pPg->aData = aTo + (pPg->flags & PAGE_HASPREV); pPg->flags &= ~PAGE_FREE; @@ -2509,11 +2835,12 @@ || (pPg->flags & PAGE_HASPREV) ); pPg->aData -= (pPg->flags & PAGE_HASPREV); pPg->flags &= ~PAGE_HASPREV; - if( pFS->bUseMmap ){ + if( pFS->mmapmgr.eUseMmap!=LSM_MMAP_OFF ){ + fsMmapUnref(pFS, &pPg->pRef); pPg->pHashNext = pFS->pFree; pFS->pFree = pPg; }else{ assert( pPg->pLruNext==0 ); assert( pPg->pLruPrev==0 ); Index: src/lsm_main.c ================================================================== --- src/lsm_main.c +++ src/lsm_main.c @@ -94,11 +94,11 @@ pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES; pDb->bUseLog = LSM_DFLT_USE_LOG; pDb->iReader = -1; pDb->iRwclient = -1; pDb->bMultiProc = LSM_DFLT_MULTIPLE_PROCESSES; - pDb->bMmap = LSM_DFLT_MMAP; + pDb->eMmap = LSM_DFLT_MMAP; pDb->xLog = xLog; pDb->compress.iId = LSM_COMPRESSION_NONE; return LSM_OK; } @@ -327,15 +327,15 @@ break; } case LSM_CONFIG_MMAP: { int *piVal = va_arg(ap, int *); - if( pDb->iReader<0 && *piVal>=0 && *piVal<=1 ){ - pDb->bMmap = *piVal; + if( pDb->iReader<0 && *piVal>=0 && *piVal<=2 ){ + pDb->eMmap = *piVal; rc = lsmFsConfigure(pDb); } - *piVal = pDb->bMmap; + *piVal = pDb->eMmap; break; } case LSM_CONFIG_USE_LOG: { int *piVal = va_arg(ap, int *); Index: src/lsm_sorted.c ================================================================== --- src/lsm_sorted.c +++ src/lsm_sorted.c @@ -702,12 +702,12 @@ pCsr->aPg[pCsr->iPg].iCell++; iLoad = btreeCursorPtr(aData, nData, pPg->iCell); do { Page *pLoad; - pCsr->iPg++; rc = lsmFsDbPageGet(pCsr->pFS, pCsr->pSeg, iLoad, &pLoad); + pCsr->iPg++; pCsr->aPg[pCsr->iPg].pPage = pLoad; pCsr->aPg[pCsr->iPg].iCell = 0; if( rc==LSM_OK ){ if( pCsr->iPg==(pCsr->nDepth-1) ) break; aData = fsPageData(pLoad, &nData); Index: src/lsm_unix.c ================================================================== --- src/lsm_unix.c +++ src/lsm_unix.c @@ -188,10 +188,57 @@ } static int lsmPosixOsSectorSize(lsm_file *pFile){ return 512; } + +static int lsmPosixOsMap( + lsm_file *pFile, + lsm_i64 iOff, + lsm_i64 nByte, + void **ppOut, + lsm_i64 *pszOut +){ + PosixFile *p = (PosixFile *)pFile; + off_t off = (off_t)iOff; + int prc; /* Posix Return Code */ + struct stat buf; + size_t sz; + + memset(&buf, 0, sizeof(buf)); + prc = fstat(p->fd, &buf); + if( prc!=0 ) return LSM_IOERR_BKPT; + + if( nByte<=0 ){ + sz = (size_t)(LSM_MAX(nByte*-1, (i64)buf.st_size)); + }else{ + sz = (size_t)nByte; + } + + if( (off+sz)>buf.st_size ){ + prc = ftruncate(p->fd, (off+sz)); + if( prc!=0 ) return LSM_IOERR_BKPT; + } + + *ppOut = mmap(0, sz, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, off); + if( *ppOut==MAP_FAILED ){ + return LSM_IOERR_BKPT; + } + + *pszOut = (i64)sz; + return LSM_OK; +} + +static int lsmPosixOsUnmap( + lsm_file *pFile, + void *pMap, + lsm_i64 nMap +){ + PosixFile *p = (PosixFile *)pFile; + munmap(pMap, (size_t)nMap); + return LSM_OK; +} static int lsmPosixOsRemap( lsm_file *pFile, lsm_i64 iMin, void **ppOut, @@ -702,11 +749,15 @@ lsmPosixOsRead, /* xRead */ lsmPosixOsWrite, /* xWrite */ lsmPosixOsTruncate, /* xTruncate */ lsmPosixOsSync, /* xSync */ lsmPosixOsSectorSize, /* xSectorSize */ +#if 0 lsmPosixOsRemap, /* xRemap */ +#endif + lsmPosixOsMap, /* xMap */ + lsmPosixOsUnmap, /* xUnmap */ lsmPosixOsFileid, /* xFileid */ lsmPosixOsClose, /* xClose */ lsmPosixOsUnlink, /* xUnlink */ lsmPosixOsLock, /* xLock */ lsmPosixOsTestLock, /* xTestLock */ Index: tool/lsmperf.tcl ================================================================== --- tool/lsmperf.tcl +++ tool/lsmperf.tcl @@ -379,10 +379,11 @@ } } #run_all_tests +if 0 { generate_chart png res.db 1 2 update capture_photo lsmperf1.gif destroy .c @@ -393,9 +394,18 @@ generate_chart png res.db 5 6 update capture_photo lsmperf3.gif destroy .c +} + +run_speed_test res.db 900 20000 20000 0 100 "mmap=2" mmap=2 +after 10000 +run_speed_test res.db 900 20000 20000 0 100 "mmap=0" mmap=0 +generate_chart png res.db 1 2 +update +#capture_photo lsmperf.gif +#destroy .c -exit +#exit