Index: src/lsm.h ================================================================== --- src/lsm.h +++ src/lsm.h @@ -22,10 +22,11 @@ /* ** Opaque handle types. */ typedef struct lsm_compress lsm_compress; /* Compression library functions */ +typedef struct lsm_compress_factory lsm_compress_factory; typedef struct lsm_cursor lsm_cursor; /* Database cursor handle */ typedef struct lsm_db lsm_db; /* Database connection handle */ typedef struct lsm_env lsm_env; /* Runtime environment */ typedef struct lsm_file lsm_file; /* OS file handle */ typedef struct lsm_mutex lsm_mutex; /* Mutex handle */ @@ -104,10 +105,12 @@ #define LSM_FULL 13 #define LSM_CANTOPEN 14 #define LSM_PROTOCOL 15 #define LSM_MISUSE 21 +#define LSM_MISMATCH 50 + /* ** CAPI: Creating and Destroying Database Connection Handles ** ** Open and close a database connection handle. */ @@ -118,11 +121,11 @@ ** CAPI: Connecting to a Database */ int lsm_open(lsm_db *pDb, const char *zFilename); /* -** CAPI: Obtaining pointers to databases environments +** CAPI: Obtaining pointers to database environments ** ** Return a pointer to the environment used by the database connection ** passed as the first argument. Assuming the argument is valid, this ** function always returns a valid environment pointer - it cannot fail. */ @@ -253,24 +256,29 @@ ** after lsm_open() has been called results in an LSM_MISUSE error. ** ** LSM_CONFIG_GET_COMPRESSION: ** Query the compression methods used to compress and decompress database ** content. +** +** LSM_CONFIG_SET_COMPRESSION_FACTORY: +** Configure a factory method to be invoked in case of an LSM_MISMATCH +** error. */ -#define LSM_CONFIG_AUTOFLUSH 1 -#define LSM_CONFIG_PAGE_SIZE 2 -#define LSM_CONFIG_SAFETY 3 -#define LSM_CONFIG_BLOCK_SIZE 4 -#define LSM_CONFIG_AUTOWORK 5 -#define LSM_CONFIG_MMAP 7 -#define LSM_CONFIG_USE_LOG 8 -#define LSM_CONFIG_AUTOMERGE 9 -#define LSM_CONFIG_MAX_FREELIST 10 -#define LSM_CONFIG_MULTIPLE_PROCESSES 11 -#define LSM_CONFIG_AUTOCHECKPOINT 12 -#define LSM_CONFIG_SET_COMPRESSION 13 -#define LSM_CONFIG_GET_COMPRESSION 14 +#define LSM_CONFIG_AUTOFLUSH 1 +#define LSM_CONFIG_PAGE_SIZE 2 +#define LSM_CONFIG_SAFETY 3 +#define LSM_CONFIG_BLOCK_SIZE 4 +#define LSM_CONFIG_AUTOWORK 5 +#define LSM_CONFIG_MMAP 7 +#define LSM_CONFIG_USE_LOG 8 +#define LSM_CONFIG_AUTOMERGE 9 +#define LSM_CONFIG_MAX_FREELIST 10 +#define LSM_CONFIG_MULTIPLE_PROCESSES 11 +#define LSM_CONFIG_AUTOCHECKPOINT 12 +#define LSM_CONFIG_SET_COMPRESSION 13 +#define LSM_CONFIG_GET_COMPRESSION 14 +#define LSM_CONFIG_SET_COMPRESSION_FACTORY 15 #define LSM_SAFETY_OFF 0 #define LSM_SAFETY_NORMAL 1 #define LSM_SAFETY_FULL 2 @@ -281,12 +289,21 @@ void *pCtx; unsigned int iId; int (*xBound)(void *, int nSrc); int (*xCompress)(void *, char *, int *, const char *, int); int (*xUncompress)(void *, char *, int *, const char *, int); + void (*xFree)(void *pCtx); +}; + +struct lsm_compress_factory { + void *pCtx; + int (*xFactory)(void *, lsm_db *, unsigned int); + void (*xFree)(void *pCtx); }; +#define LSM_COMPRESSION_EMPTY 0 +#define LSM_COMPRESSION_NONE 1 /* ** CAPI: Allocating and Freeing Memory ** ** Invoke the memory allocation functions that belong to environment @@ -413,10 +430,15 @@ ** to disk at any time. ** ** Assuming no error occurs, the location pointed to by the first of the two ** (int *) arguments is set to the size of the old in-memory tree in KB. ** The second is set to the size of the current, or live in-memory tree. +** +** LSM_INFO_COMPRESSION_ID: +** This value should be followed by a single argument of type +** (unsigned int *). If successful, the location pointed to is populated +** with the database compression id before returning. */ #define LSM_INFO_NWRITE 1 #define LSM_INFO_NREAD 2 #define LSM_INFO_DB_STRUCTURE 3 #define LSM_INFO_LOG_STRUCTURE 4 @@ -425,12 +447,12 @@ #define LSM_INFO_PAGE_HEX_DUMP 7 #define LSM_INFO_FREELIST 8 #define LSM_INFO_ARRAY_PAGES 9 #define LSM_INFO_CHECKPOINT_SIZE 10 #define LSM_INFO_TREE_SIZE 11 - #define LSM_INFO_FREELIST_SIZE 12 +#define LSM_INFO_COMPRESSION_ID 13 /* ** CAPI: Opening and Closing Write Transactions ** Index: src/lsmInt.h ================================================================== --- src/lsmInt.h +++ src/lsmInt.h @@ -314,10 +314,11 @@ int nMaxFreelist; /* Configured by LSM_CONFIG_MAX_FREELIST */ int bMmap; /* Configured by LSM_CONFIG_MMAP */ i64 nAutockpt; /* Configured by LSM_CONFIG_AUTOCHECKPOINT */ int bMultiProc; /* Configured by L_C_MULTIPLE_PROCESSES */ lsm_compress compress; /* Compression callbacks */ + lsm_compress_factory factory; /* Compression callback factory */ /* Sub-system handles */ FileSystem *pFS; /* On-disk portion of database */ Database *pDatabase; /* Database shared data */ @@ -334,10 +335,12 @@ /* Worker context */ Snapshot *pWorker; /* Worker snapshot (or NULL) */ Freelist *pFreelist; /* See sortedNewToplevel() */ int bUseFreelist; /* True to use pFreelist */ int bIncrMerge; /* True if currently doing a merge */ + + int bInFactory; /* True if within factory.xFactory() */ /* Debugging message callback */ void (*xLog)(void *, int, const char *); void *pLogCtx; @@ -524,10 +527,11 @@ ** to read or write a database file on disk. See the description of struct ** Database below for futher details. */ struct Snapshot { Database *pDatabase; /* Database this snapshot belongs to */ + u32 iCmpId; /* Id of compression scheme */ Level *pLevel; /* Pointer to level 0 of snapshot (or NULL) */ i64 iId; /* Snapshot id */ i64 iLogOff; /* Log file offset */ Redirect redirect; /* Block redirection array */ @@ -569,10 +573,12 @@ int lsmDatabaseFull(lsm_db *pDb); int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite); int lsmCheckpointSize(lsm_db *db, int *pnByte); +int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId); + /* ** Functions from file "lsm_tree.c". */ int lsmTreeNew(lsm_env *, int (*)(void *, int, void *, int), Tree **ppTree); void lsmTreeRelease(lsm_env *, Tree *); @@ -587,10 +593,11 @@ int lsmTreeEndTransaction(lsm_db *pDb, int bCommit); int lsmTreeLoadHeader(lsm_db *pDb, int *); int lsmTreeLoadHeaderOk(lsm_db *, int); int lsmTreeInsert(lsm_db *pDb, void *pKey, int nKey, void *pVal, int nVal); +int lsmTreeDelete(lsm_db *db, void *pKey1, int nKey1, void *pKey2, int nKey2); void lsmTreeRollback(lsm_db *pDb, TreeMark *pMark); void lsmTreeMark(lsm_db *pDb, TreeMark *pMark); int lsmTreeCursorNew(lsm_db *pDb, int, TreeCursor **); void lsmTreeCursorDestroy(TreeCursor *); @@ -641,10 +648,12 @@ /************************************************************************** ** Start of functions from "lsm_file.c". */ int lsmFsOpen(lsm_db *, const char *); void lsmFsClose(FileSystem *); + +int lsmFsConfigure(lsm_db *db); int lsmFsBlockSize(FileSystem *); void lsmFsSetBlockSize(FileSystem *, int); int lsmFsPageSize(FileSystem *); @@ -890,10 +899,12 @@ int lsmDbMultiProc(lsm_db *); void lsmDbDeferredClose(lsm_db *, lsm_file *, LsmFile *); LsmFile *lsmDbRecycleFd(lsm_db *); int lsmWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *); + +int lsmCheckCompressionId(lsm_db *, u32); /************************************************************************** ** functions in lsm_str.c */ Index: src/lsm_ckpt.c ================================================================== --- src/lsm_ckpt.c +++ src/lsm_ckpt.c @@ -30,15 +30,16 @@ ** ** 1. The checkpoint id MSW. ** 2. The checkpoint id LSW. ** 3. The number of integer values in the entire checkpoint, including ** the two checksum values. -** 4. The total number of blocks in the database. -** 5. The block size. -** 6. The number of levels. -** 7. The nominal database page size. -** 8. The number of pages (in total) written to the database file. +** 4. The compression scheme id. +** 5. The total number of blocks in the database. +** 6. The block size. +** 7. The number of levels. +** 8. The nominal database page size. +** 9. The number of pages (in total) written to the database file. ** ** Log pointer: ** ** 1. The log offset MSW. ** 2. The log offset LSW. @@ -172,28 +173,29 @@ static const int one = 1; #define LSM_LITTLE_ENDIAN (*(u8 *)(&one)) /* Sizes, in integers, of various parts of the checkpoint. */ -#define CKPT_HDR_SIZE 8 +#define CKPT_HDR_SIZE 9 #define CKPT_LOGPTR_SIZE 4 #define CKPT_APPENDLIST_SIZE (LSM_APPLIST_SZ * 2) /* A #define to describe each integer in the checkpoint header. */ #define CKPT_HDR_ID_MSW 0 #define CKPT_HDR_ID_LSW 1 #define CKPT_HDR_NCKPT 2 -#define CKPT_HDR_NBLOCK 3 -#define CKPT_HDR_BLKSZ 4 -#define CKPT_HDR_NLEVEL 5 -#define CKPT_HDR_PGSZ 6 -#define CKPT_HDR_NWRITE 7 - -#define CKPT_HDR_LO_MSW 8 -#define CKPT_HDR_LO_LSW 9 -#define CKPT_HDR_LO_CKSUM1 10 -#define CKPT_HDR_LO_CKSUM2 11 +#define CKPT_HDR_CMPID 3 +#define CKPT_HDR_NBLOCK 4 +#define CKPT_HDR_BLKSZ 5 +#define CKPT_HDR_NLEVEL 6 +#define CKPT_HDR_PGSZ 7 +#define CKPT_HDR_NWRITE 8 + +#define CKPT_HDR_LO_MSW 9 +#define CKPT_HDR_LO_LSW 10 +#define CKPT_HDR_LO_CKSUM1 11 +#define CKPT_HDR_LO_CKSUM2 12 typedef struct CkptBuffer CkptBuffer; /* ** Dynamic buffer used to accumulate data for a checkpoint. @@ -447,13 +449,17 @@ } } /* Write the checkpoint header */ assert( iId>=0 ); + assert( pSnap->iCmpId==pDb->compress.iId + || pSnap->iCmpId==LSM_COMPRESSION_EMPTY + ); ckptSetValue(&ckpt, CKPT_HDR_ID_MSW, (u32)(iId>>32), &rc); ckptSetValue(&ckpt, CKPT_HDR_ID_LSW, (u32)(iId&0xFFFFFFFF), &rc); ckptSetValue(&ckpt, CKPT_HDR_NCKPT, iOut+2, &rc); + ckptSetValue(&ckpt, CKPT_HDR_CMPID, pDb->compress.iId, &rc); ckptSetValue(&ckpt, CKPT_HDR_NBLOCK, pSnap->nBlock, &rc); ckptSetValue(&ckpt, CKPT_HDR_BLKSZ, lsmFsBlockSize(pFS), &rc); ckptSetValue(&ckpt, CKPT_HDR_NLEVEL, nLevel, &rc); ckptSetValue(&ckpt, CKPT_HDR_PGSZ, lsmFsPageSize(pFS), &rc); ckptSetValue(&ckpt, CKPT_HDR_NWRITE, pSnap->nWrite, &rc); @@ -759,23 +765,24 @@ ** Initialize the shared-memory header with an empty snapshot. This function ** is called when no valid snapshot can be found in the database header. */ static void ckptLoadEmpty(lsm_db *pDb){ u32 aCkpt[] = { - 0, /* CKPT_HDR_ID_MSW */ - 10, /* CKPT_HDR_ID_LSW */ - 0, /* CKPT_HDR_NCKPT */ - 0, /* CKPT_HDR_NBLOCK */ - 0, /* CKPT_HDR_BLKSZ */ - 0, /* CKPT_HDR_NLEVEL */ - 0, /* CKPT_HDR_PGSZ */ - 0, /* CKPT_HDR_OVFL */ - 0, /* CKPT_HDR_NWRITE */ - 0, 0, 1234, 5678, /* The log pointer and initial checksum */ - 0,0,0,0, 0,0,0,0, /* The append list */ - 0, /* The free block list */ - 0, 0 /* Space for checksum values */ + 0, /* CKPT_HDR_ID_MSW */ + 10, /* CKPT_HDR_ID_LSW */ + 0, /* CKPT_HDR_NCKPT */ + LSM_COMPRESSION_EMPTY, /* CKPT_HDR_CMPID */ + 0, /* CKPT_HDR_NBLOCK */ + 0, /* CKPT_HDR_BLKSZ */ + 0, /* CKPT_HDR_NLEVEL */ + 0, /* CKPT_HDR_PGSZ */ + 0, /* CKPT_HDR_OVFL */ + 0, /* CKPT_HDR_NWRITE */ + 0, 0, 1234, 5678, /* The log pointer and initial checksum */ + 0,0,0,0, 0,0,0,0, /* The append list */ + 0, /* The free block list */ + 0, 0 /* Space for checksum values */ }; u32 nCkpt = array_size(aCkpt); ShmHeader *pShm = pDb->pShmhdr; aCkpt[CKPT_HDR_NCKPT] = nCkpt; @@ -876,10 +883,22 @@ lsmShmBarrier(pDb); } return LSM_PROTOCOL; } + +int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId){ + int rc; + + assert( db->pClient==0 && db->pWorker==0 ); + rc = lsmCheckpointLoad(db, 0); + if( rc==LSM_OK ){ + *piCmpId = db->aSnapshot[CKPT_HDR_CMPID]; + } + + return rc; +} int lsmCheckpointLoadOk(lsm_db *pDb, int iSnap){ u32 *aShm; assert( iSnap==1 || iSnap==2 ); aShm = (iSnap==1) ? pDb->pShmhdr->aSnap1 : pDb->pShmhdr->aSnap2; @@ -919,10 +938,14 @@ } } rc = lsmCheckpointDeserialize(pDb, 1, pShm->aSnap1, &pDb->pWorker); if( pDb->pWorker ) pDb->pWorker->pDatabase = pDb->pDatabase; + + if( rc==LSM_OK ){ + rc = lsmCheckCompressionId(pDb, pDb->pWorker->iCmpId); + } #if 0 assert( rc!=LSM_OK || lsmFsIntegrityCheck(pDb) ); #endif return rc; @@ -948,10 +971,11 @@ pNew->iId = lsmCheckpointId(aCkpt, 0); pNew->nBlock = aCkpt[CKPT_HDR_NBLOCK]; pNew->nWrite = aCkpt[CKPT_HDR_NWRITE]; rc = ckptLoadLevels(pDb, aCkpt, &iIn, nLevel, &pNew->pLevel); pNew->iLogOff = lsmCheckpointLogOffset(aCkpt); + pNew->iCmpId = aCkpt[CKPT_HDR_CMPID]; /* Make a copy of the append-list */ for(i=0; iaiAppend[i] = ckptRead64(a); Index: src/lsm_file.c ================================================================== --- src/lsm_file.c +++ src/lsm_file.c @@ -508,15 +508,10 @@ pFS->nPagesize = LSM_DFLT_PAGE_SIZE; pFS->nBlocksize = LSM_DFLT_BLOCK_SIZE; pFS->nMetasize = 4 * 1024; pFS->pDb = pDb; pFS->pEnv = pDb->pEnv; - if( pDb->compress.xCompress ){ - pFS->pCompress = &pDb->compress; - }else{ - pFS->bUseMmap = pDb->bMmap; - } /* Make a copy of the database and log file names. */ memcpy(pFS->zDb, zDb, nDb+1); memcpy(pFS->zLog, zDb, nDb); memcpy(&pFS->zLog[nDb], "-log", 5); @@ -549,10 +544,60 @@ } pDb->pFS = pFS; return rc; } + +/* +** Configure the file-system object according to the current values of +** the LSM_CONFIG_MMAP and LSM_CONFIG_SET_COMPRESSION options. +*/ +int lsmFsConfigure(lsm_db *db){ + FileSystem *pFS = db->pFS; + lsm_env *pEnv = pFS->pEnv; + Page *pPg; + + assert( pFS->nOut==0 ); + assert( pFS->pWaiting==0 ); + + /* Reset any compression/decompression buffers already allocated */ + lsmFree(pEnv, pFS->aIBuffer); + lsmFree(pEnv, pFS->aOBuffer); + pFS->nBuffer = 0; + + /* Unmap the file, if it is currently mapped */ + if( pFS->pMap ){ + lsmEnvRemap(pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap); + pFS->bUseMmap = 0; + } + + /* Free all allocate page structures */ + pPg = pFS->pLruFirst; + while( pPg ){ + Page *pNext = pPg->pLruNext; + if( pPg->flags & PAGE_FREE ) lsmFree(pEnv, pPg->aData); + lsmFree(pEnv, pPg); + pPg = pNext; + } + + /* Zero pointers that point to deleted page objects */ + pFS->nCacheAlloc = 0; + pFS->pLruFirst = 0; + pFS->pLruLast = 0; + pFS->pFree = 0; + + /* Configure the FileSystem object */ + if( db->compress.xCompress ){ + pFS->pCompress = &db->compress; + pFS->bUseMmap = 0; + }else{ + pFS->pCompress = 0; + pFS->bUseMmap = db->bMmap; + } + + return LSM_OK; +} /* ** Close and destroy a FileSystem object. */ void lsmFsClose(FileSystem *pFS){ Index: src/lsm_main.c ================================================================== --- src/lsm_main.c +++ src/lsm_main.c @@ -94,10 +94,11 @@ pDb->bUseLog = LSM_DFLT_USE_LOG; pDb->iReader = -1; pDb->bMultiProc = LSM_DFLT_MULTIPLE_PROCESSES; pDb->bMmap = LSM_DFLT_MMAP; pDb->xLog = xLog; + pDb->compress.iId = LSM_COMPRESSION_NONE; return LSM_OK; } lsm_env *lsm_get_env(lsm_db *pDb){ assert( pDb->pEnv ); @@ -192,10 +193,16 @@ lsmFreeSnapshot(pDb->pEnv, pDb->pClient); pDb->pClient = 0; lsmDbDatabaseRelease(pDb); lsmLogClose(pDb); lsmFsClose(pDb->pFS); + + /* Invoke any destructors registered for the compression or + ** compression factory callbacks. */ + if( pDb->factory.xFree ) pDb->factory.xFree(pDb->factory.pCtx); + if( pDb->compress.xFree ) pDb->compress.xFree(pDb->compress.pCtx); + lsmFree(pDb->pEnv, pDb->rollback.aArray); lsmFree(pDb->pEnv, pDb->aTrans); lsmFree(pDb->pEnv, pDb->apShm); lsmFree(pDb->pEnv, pDb); } @@ -335,16 +342,36 @@ break; } case LSM_CONFIG_SET_COMPRESSION: { lsm_compress *p = va_arg(ap, lsm_compress *); - if( pDb->pDatabase ){ - /* If lsm_open() has been called, this call is against the rules. */ + if( pDb->iReader>=0 && pDb->bInFactory==0 ){ + /* May not change compression schemes with an open transaction */ rc = LSM_MISUSE_BKPT; }else{ - memcpy(&pDb->compress, p, sizeof(lsm_compress)); + if( pDb->compress.xFree ){ + /* Invoke any destructor belonging to the current compression. */ + pDb->compress.xFree(pDb->compress.pCtx); + } + if( p->xBound==0 ){ + memset(&pDb->compress, 0, sizeof(lsm_compress)); + pDb->compress.iId = LSM_COMPRESSION_NONE; + }else{ + memcpy(&pDb->compress, p, sizeof(lsm_compress)); + } + rc = lsmFsConfigure(pDb); + } + break; + } + + case LSM_CONFIG_SET_COMPRESSION_FACTORY: { + lsm_compress_factory *p = va_arg(ap, lsm_compress_factory *); + if( pDb->factory.xFree ){ + /* Invoke any destructor belonging to the current factory. */ + pDb->factory.xFree(pDb->factory.pCtx); } + memcpy(&pDb->factory, p, sizeof(lsm_compress_factory)); break; } case LSM_CONFIG_GET_COMPRESSION: { lsm_compress *p = va_arg(ap, lsm_compress *); @@ -430,11 +457,10 @@ int lsmInfoFreelist(lsm_db *pDb, char **pzOut){ Snapshot *pWorker; /* Worker snapshot */ int bUnlock = 0; LsmString s; - int i; int rc; /* Obtain the worker snapshot */ rc = infoGetWorker(pDb, &pWorker, &bUnlock); if( rc!=LSM_OK ) return rc; @@ -450,13 +476,10 @@ /* Release the snapshot and return */ infoFreeWorker(pDb, bUnlock); return rc; } -static int infoFreelistSize(lsm_db *pDb, int *pnFree, int *pnWaiting){ -} - static int infoTreeSize(lsm_db *db, int *pnOldKB, int *pnNewKB){ ShmHeader *pShm = db->pShmhdr; TreeHeader *p = &pShm->hdr1; /* The following code suffers from two race conditions, as it accesses and @@ -566,10 +589,20 @@ int *pnOld = va_arg(ap, int *); int *pnNew = va_arg(ap, int *); rc = infoTreeSize(pDb, pnOld, pnNew); break; } + + case LSM_INFO_COMPRESSION_ID: { + unsigned int *piOut = va_arg(ap, unsigned int *); + if( pDb->pClient ){ + *piOut = pDb->pClient->iCmpId; + }else{ + rc = lsmInfoCompressionId(pDb, piOut); + } + break; + } default: rc = LSM_MISUSE; break; } @@ -837,12 +870,10 @@ /* A value less than zero means close the innermost nested transaction. */ if( iLevel<0 ) iLevel = LSM_MAX(0, pDb->nTransOpen - 1); if( iLevelnTransOpen ){ if( iLevel==0 ){ - int bAutowork = 0; - /* Commit the transaction to disk. */ if( rc==LSM_OK ) rc = lsmLogCommit(pDb); if( rc==LSM_OK && pDb->eSafety==LSM_SAFETY_FULL ){ rc = lsmFsSyncLog(pDb->pFS); } Index: src/lsm_shared.c ================================================================== --- src/lsm_shared.c +++ src/lsm_shared.c @@ -432,10 +432,13 @@ rc = lsmFsOpen(pDb, zName); } if( rc==LSM_OK ){ rc = doDbConnect(pDb); } + if( rc==LSM_OK ){ + rc = lsmFsConfigure(pDb); + } return rc; } static void dbDeferClose(lsm_db *pDb){ @@ -911,18 +914,47 @@ } lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK, 0); } - /* ** Called when recovery is finished. */ int lsmFinishRecovery(lsm_db *pDb){ lsmTreeEndTransaction(pDb, 1); return LSM_OK; } + +/* +** Check if the currently configured compression functions +** (LSM_CONFIG_SET_COMPRESSION) are compatible with a database that has its +** compression id set to iReq. Compression routines are compatible if iReq +** is zero (indicating the database is empty), or if it is equal to the +** compression id of the configured compression routines. +** +** If the check shows that the current compression are incompatible and there +** is a compression factory registered, give it a chance to install new +** compression routines. +** +** If, after any registered factory is invoked, the compression functions +** are still incompatible, return LSM_MISMATCH. Otherwise, LSM_OK. +*/ +int lsmCheckCompressionId(lsm_db *pDb, u32 iReq){ + if( iReq!=LSM_COMPRESSION_EMPTY && pDb->compress.iId!=iReq ){ + if( pDb->factory.xFactory ){ + pDb->bInFactory = 1; + pDb->factory.xFactory(pDb->factory.pCtx, pDb, iReq); + pDb->bInFactory = 0; + } + if( pDb->compress.iId!=iReq ){ + /* Incompatible */ + return LSM_MISMATCH; + } + } + /* Compatible */ + return LSM_OK; +} /* ** Begin a read transaction. This function is a no-op if the connection ** passed as the only argument already has an open read transaction. */ @@ -973,14 +1005,21 @@ if( pDb->pClient==0 ){ rc = lsmCheckpointDeserialize(pDb, 0, pDb->aSnapshot,&pDb->pClient); } assert( (rc==LSM_OK)==(pDb->pClient!=0) ); assert( pDb->iReader>=0 ); + + /* Check that the client has the right compression hooks loaded. + ** If not, set rc to LSM_MISMATCH. */ + if( rc==LSM_OK ){ + rc = lsmCheckCompressionId(pDb, pDb->pClient->iCmpId); + } }else{ rc = lsmReleaseReadlock(pDb); } } + if( rc==LSM_BUSY ){ rc = LSM_OK; } } #if 0 Index: src/lsm_unix.c ================================================================== --- src/lsm_unix.c +++ src/lsm_unix.c @@ -192,22 +192,24 @@ munmap(p->pMap, p->nMap); *ppOut = p->pMap = 0; *pnOut = p->nMap = 0; } - memset(&buf, 0, sizeof(buf)); - prc = fstat(p->fd, &buf); - if( prc!=0 ) return LSM_IOERR_BKPT; - iSz = buf.st_size; - if( iSzfd, iSz); - if( prc!=0 ) return LSM_IOERR_BKPT; - } - - p->pMap = mmap(0, iSz, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0); - p->nMap = iSz; + if( iMin>=0 ){ + memset(&buf, 0, sizeof(buf)); + prc = fstat(p->fd, &buf); + if( prc!=0 ) return LSM_IOERR_BKPT; + iSz = buf.st_size; + if( iSzfd, iSz); + if( prc!=0 ) return LSM_IOERR_BKPT; + } + + p->pMap = mmap(0, iSz, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0); + p->nMap = iSz; + } *ppOut = p->pMap; *pnOut = p->nMap; return LSM_OK; } ADDED test/lsm4.test Index: test/lsm4.test ================================================================== --- /dev/null +++ test/lsm4.test @@ -0,0 +1,118 @@ +# 2013 February 06 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix lsm4 +db close + +# Compression scheme ids (defined in test_lsm.c): +# +set compression_id(encrypt) 43 +set compression_id(rle) 44 +set compression_id(noop) 45 + +proc db_fetch {db key} { + db csr_open csr + csr seek $key eq + set ret [csr value] + csr close + set ret +} + +do_test 1.1 { + lsm_open db test.db + db config {set_compression noop} + db write 1 abc + db write 2 def + db close +} {} + +do_test 1.2 { + lsm_open db test.db + db config {set_compression noop} + list [db_fetch db 1] [db_fetch db 2] +} {abc def} + +do_test 1.3 { + db close + lsm_open db test.db + db config {set_compression rle} + list [catch {db_fetch db 1} msg] $msg +} {1 {error in lsm_csr_open() - 50}} + +do_test 1.4 { + db close + lsm_open db test.db + list [catch {db_fetch db 1} msg] $msg +} {1 {error in lsm_csr_open() - 50}} + +do_test 1.5 { + db config {set_compression_factory true} + list [db_fetch db 1] [db_fetch db 2] +} {abc def} + +do_test 1.6 { db info compression_id } $compression_id(noop) +db close + +#------------------------------------------------------------------------- +# +forcedelete test.db + +do_test 2.1 { + lsm_open db test.db + db info compression_id +} {0} + +do_test 2.2 { + db write 1 abc + db write 2 abc + db info compression_id +} {0} + +do_test 2.3 { + lsm_open db2 test.db + db2 info compression_id +} {0} + +do_test 2.4 { + db close + db2 info compression_id +} {0} + +do_test 2.5 { + db2 close + lsm_open db test.db + db info compression_id +} {1} + +db close +forcedelete test.db + +do_test 2.6 { + lsm_open db test.db + db config {set_compression rle} + db write 3 three + db write 4 four + db close + + lsm_open db test.db + db info compression_id +} $compression_id(rle) + +do_test 2.7 { + db config {set_compression rle} + list [db_fetch db 3] [db_fetch db 4] +} {three four} + +finish_test + Index: test/test_lsm.c ================================================================== --- test/test_lsm.c +++ test/test_lsm.c @@ -7,11 +7,10 @@ ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* -** */ #include #include "lsm.h" #include "sqlite4.h" @@ -18,10 +17,137 @@ #include #include extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite4 **ppDb); extern const char *sqlite4TestErrorName(int); + +/************************************************************************* +*/ +#define ENCRYPTION_XOR_MASK 0x23b2bbb6 +static int testCompressEncBound(void *pCtx, int nSrc){ + return nSrc; +} +static int testCompressEncCompress( + void *pCtx, + char *pOut, int *pnOut, + const char *pIn, int nIn +){ + int i; + unsigned int *aIn = (unsigned int *)pOut; + unsigned int *aOut = (unsigned int *)pIn; + + assert( (nIn%4)==0 ); + for(i=0; i<(nIn/4); i++){ + aOut[i] = (aIn[i] ^ ENCRYPTION_XOR_MASK); + } + *pnOut = nIn; + + return LSM_OK; +} +static int testCompressEncUncompress( + void *pCtx, + char *pOut, int *pnOut, + const char *pIn, int nIn +){ + return testCompressEncCompress(pCtx, pOut, pnOut, pIn, nIn); +} +static void testCompressEncFree(void *pCtx){ + /* no-op */ +} +/* +** End of compression routines "encrypt". +*************************************************************************/ + +/************************************************************************* +*/ +static int testCompressRleBound(void *pCtx, int nSrc){ + return nSrc*2; +} +static int testCompressRleCompress( + void *pCtx, + char *pOut, int *pnOut, + const char *pIn, int nIn +){ + int iOut = 0; + int i; + char c; + int n; + + c = pIn[0]; + n = 1; + for(i=1; idb); ckfree((char *)p); } } + +static int testInfoLsm(Tcl_Interp *interp, lsm_db *db, Tcl_Obj *pObj){ + struct Lsminfo { + const char *zOpt; + int eOpt; + } aInfo[] = { + { "compression_id", LSM_INFO_COMPRESSION_ID }, + { 0, 0 } + }; + int rc; + int iOpt; + + rc = Tcl_GetIndexFromObjStruct( + interp, pObj, aInfo, sizeof(aInfo[0]), "option", 0, &iOpt + ); + if( rc==LSM_OK ){ + switch( aInfo[iOpt].eOpt ){ + case LSM_INFO_COMPRESSION_ID: { + unsigned int iCmpId = 0; + rc = lsm_info(db, LSM_INFO_COMPRESSION_ID, &iCmpId); + if( rc==LSM_OK ){ + Tcl_SetObjResult(interp, Tcl_NewWideIntObj((Tcl_WideInt)iCmpId)); + }else{ + test_lsm_error(interp, "lsm_info", rc); + } + break; + } + } + } + + return rc; +} /* ** Usage: CSR sub-command ... */ static int test_lsm_cursor_cmd( @@ -519,10 +755,11 @@ /* 7 */ {"csr_open", 1, "CSR"}, /* 8 */ {"work", -1, "?NMERGE? NPAGE"}, /* 9 */ {"flush", 0, ""}, /* 10 */ {"config", 1, "LIST"}, /* 11 */ {"checkpoint", 0, ""}, + /* 12 */ {"info", 1, "OPTION"}, {0, 0, 0} }; int iCmd; int rc; TclLsm *p = (TclLsm *)clientData; @@ -621,11 +858,10 @@ case 8: assert( 0==strcmp(aCmd[8].zCmd, "work") ); { int nWork = 0; int nMerge = 1; int nWrite = 0; - int i; if( objc==3 ){ rc = Tcl_GetIntFromObj(interp, objv[2], &nWork); }else if( objc==4 ){ rc = Tcl_GetIntFromObj(interp, objv[2], &nMerge); @@ -654,10 +890,14 @@ case 11: assert( 0==strcmp(aCmd[11].zCmd, "checkpoint") ); { rc = lsm_checkpoint(p->db, 0); return test_lsm_error(interp, "lsm_checkpoint", rc); } + + case 12: assert( 0==strcmp(aCmd[12].zCmd, "info") ); { + return testInfoLsm(interp, p->db, objv[2]); + } default: assert( 0 ); } Index: www/lsmapi.wiki ================================================================== --- www/lsmapi.wiki +++ www/lsmapi.wiki @@ -19,11 +19,11 @@
  1. Database Runtime Environment
  2. LSM Error Codes
  3. Creating and Destroying Database Connection Handles
  4. Connecting to a Database -
  5. Obtaining pointers to databases environments +
  6. Obtaining pointers to database environments
  7. Configuring a database connection.
  8. Compression and/or Encryption Hooks
  9. Allocating and Freeing Memory
  10. Querying a Connection For Operational Data
  11. Opening and Closing Write Transactions @@ -65,10 +65,11 @@ lsm_rollback lsm_work

    All LSM API Types

    lsm_compress +lsm_compress lsm_env

    All LSM API Constants

    LSM_BUSY LSM_CANTOPEN @@ -76,26 +77,27 @@ LSM_CONFIG_AUTOFLUSH LSM_CONFIG_AUTOMERGE LSM_CONFIG_AUTOWORK LSM_CONFIG_BLOCK_SIZE LSM_CONFIG_GET_COMPRESSION -LSM_CONFIG_LOG_SIZE LSM_CONFIG_MAX_FREELIST LSM_CONFIG_MMAP LSM_CONFIG_MULTIPLE_PROCESSES LSM_CONFIG_PAGE_SIZE LSM_CONFIG_SAFETY LSM_CONFIG_SET_COMPRESSION +LSM_CONFIG_SET_COMPRESSION_FACTORY LSM_CONFIG_USE_LOG LSM_CORRUPT LSM_ERROR LSM_FULL LSM_INFO_ARRAY_PAGES LSM_INFO_ARRAY_STRUCTURE LSM_INFO_CHECKPOINT_SIZE LSM_INFO_DB_STRUCTURE LSM_INFO_FREELIST +LSM_INFO_FREELIST_SIZE LSM_INFO_LOG_STRUCTURE LSM_INFO_NREAD LSM_INFO_NWRITE LSM_INFO_PAGE_ASCII_DUMP LSM_INFO_PAGE_HEX_DUMP @@ -180,49 +182,63 @@

    Open and close a database connection handle.

    Connecting to a Database

    int lsm_open(lsm_db *pDb, const char *zFilename); -

    Obtaining pointers to databases environments

    +

    Obtaining pointers to database environments

    lsm_env *lsm_get_env(lsm_db *pDb); lsm_env *lsm_default_env(void);

    Return a pointer to the environment used by the database connection passed as the first argument. Assuming the argument is valid, this function always returns a valid environment pointer - it cannot fail. The lsm_default_env() function returns a pointer to the default LSM environment for the current platform. -

    Configuring a database connection.

    +

    Configuring a database connection.

    int lsm_config(lsm_db *, int, ...); -#define LSM_CONFIG_AUTOFLUSH 1 -#define LSM_CONFIG_PAGE_SIZE 2 -#define LSM_CONFIG_SAFETY 3 -#define LSM_CONFIG_BLOCK_SIZE 4 -#define LSM_CONFIG_AUTOWORK 5 -#define LSM_CONFIG_LOG_SIZE 6 -#define LSM_CONFIG_MMAP 7 -#define LSM_CONFIG_USE_LOG 8 -#define LSM_CONFIG_AUTOMERGE 9 -#define LSM_CONFIG_MAX_FREELIST 10 -#define LSM_CONFIG_MULTIPLE_PROCESSES 11 -#define LSM_CONFIG_AUTOCHECKPOINT 12 -#define LSM_CONFIG_SET_COMPRESSION 13 -#define LSM_CONFIG_GET_COMPRESSION 14 +#define LSM_CONFIG_AUTOFLUSH 1 +#define LSM_CONFIG_PAGE_SIZE 2 +#define LSM_CONFIG_SAFETY 3 +#define LSM_CONFIG_BLOCK_SIZE 4 +#define LSM_CONFIG_AUTOWORK 5 +#define LSM_CONFIG_MMAP 7 +#define LSM_CONFIG_USE_LOG 8 +#define LSM_CONFIG_AUTOMERGE 9 +#define LSM_CONFIG_MAX_FREELIST 10 +#define LSM_CONFIG_MULTIPLE_PROCESSES 11 +#define LSM_CONFIG_AUTOCHECKPOINT 12 +#define LSM_CONFIG_SET_COMPRESSION 13 +#define LSM_CONFIG_GET_COMPRESSION 14 +#define LSM_CONFIG_SET_COMPRESSION_FACTORY 15 #define LSM_SAFETY_OFF 0 #define LSM_SAFETY_NORMAL 1 #define LSM_SAFETY_FULL 2

    The lsm_config() function is used to configure a database connection. The following values may be passed as the second argument to lsm_config(). -

    LSM_CONFIG_AUTOFLUSH
    A read/write integer parameter. This value determines the maximum amount -of space (in bytes) used to accumulate writes in main-memory before -they are flushed to a level 0 segment. +

    LSM_CONFIG_AUTOFLUSH
    A read/write integer parameter. +

    This value determines the amount of data allowed to accumulate in a +live in-memory tree before it is marked as old. After committing a +transaction, a connection checks if the size of the live in-memory tree, +including data structure overhead, is greater than the value of this +option in KB. If it is, and there is not already an old in-memory tree, +the live in-memory tree is marked as old. +

    The maximum allowable value is 1048576 (1GB). There is no minimum +value. If this parameter is set to zero, then an attempt is made to +mark the live in-memory tree as old after each transaction is committed. +

    The default value is 1024 (1MB).

    LSM_CONFIG_PAGE_SIZE
    A read/write integer parameter. This parameter may only be set before lsm_open() has been called. -

    LSM_CONFIG_BLOCK_SIZE
    A read/write integer parameter. This parameter may only be set before -lsm_open() has been called. -

    LSM_CONFIG_LOG_SIZE
    A read/write integer parameter. +

    LSM_CONFIG_BLOCK_SIZE
    A read/write integer parameter. +

    This parameter may only be set before lsm_open() has been called. It +must be set to a power of two between 64 and 65536, inclusive (block +sizes between 64KB and 64MB). +

    If the connection creates a new database, the block size of the new +database is set to the value of this option in KB. After lsm_open() +has been called, querying this parameter returns the actual block +size of the opened database. +

    The default value is 1024 (1MB blocks).

    LSM_CONFIG_SAFETY
    A read/write integer parameter. Valid values are 0, 1 (the default) and 2. This parameter determines how robust the database is in the face of a system crash (e.g. a power failure or operating system crash). As follows:

    0 (off): No robustness. A system crash may corrupt the database. @@ -232,10 +248,21 @@

    2 (full): Full robustness. A system crash may not corrupt the database file. Following recovery the database file contains all successfully committed transactions.

    LSM_CONFIG_AUTOWORK
    A read/write integer parameter.

    LSM_CONFIG_AUTOCHECKPOINT
    A read/write integer parameter. +

    If this option is set to non-zero value N, then a checkpoint is +automatically attempted after each N KB of data have been written to +the database file. +

    The amount of uncheckpointed data already written to the database file +is a global parameter. After performing database work (writing to the +database file), the process checks if the total amount of uncheckpointed +data exceeds the value of this paramter. If so, a checkpoint is performed. +This means that this option may cause the connection to perform a +checkpoint even if the current connection has itself written very little +data into the database file. +

    The default value is 2048 (checkpoint every 2MB).

    LSM_CONFIG_MMAP
    A read/write integer parameter. True to use mmap() to access the database file. False otherwise.

    LSM_CONFIG_USE_LOG
    A read/write boolean parameter. True (the default) to use the log file normally. False otherwise.

    LSM_CONFIG_AUTOMERGE
    A read/write integer parameter. The minimum number of segments to @@ -257,17 +284,25 @@ structures contents.

    This option may only be used before lsm_open() is called. Invoking it after lsm_open() has been called results in an LSM_MISUSE error.

    LSM_CONFIG_GET_COMPRESSION
    Query the compression methods used to compress and decompress database content. -

    Compression and/or Encryption Hooks

    +

    LSM_CONFIG_SET_COMPRESSION_FACTORY
    Configure a factory method to be invoked in case of an LSM_MISMATCH +error. +

    Compression and/or Encryption Hooks

    struct lsm_compress { void *pCtx; unsigned int iId; int (*xBound)(void *, int nSrc); int (*xCompress)(void *, char *, int *, const char *, int); int (*xUncompress)(void *, char *, int *, const char *, int); + void (*xFree)(void *pCtx); +}; +struct lsm_compress_factory { + void *pCtx; + int (*xFactory)(void *, lsm_db *, u32); + void (*xFree)(void *pCtx); };

    Allocating and Freeing Memory

    void *lsm_malloc(lsm_env*, size_t); void *lsm_realloc(lsm_env*, void *, size_t); @@ -274,11 +309,11 @@ void lsm_free(lsm_env*, void *);

    Invoke the memory allocation functions that belong to environment pEnv. Or the system defaults if no memory allocation functions have been registered. -

    Querying a Connection For Operational Data

    +

    Querying a Connection For Operational Data

    int lsm_info(lsm_db *, int, ...); #define LSM_INFO_NWRITE 1 #define LSM_INFO_NREAD 2 #define LSM_INFO_DB_STRUCTURE 3 #define LSM_INFO_LOG_STRUCTURE 4 @@ -287,10 +322,11 @@ #define LSM_INFO_PAGE_HEX_DUMP 7 #define LSM_INFO_FREELIST 8 #define LSM_INFO_ARRAY_PAGES 9 #define LSM_INFO_CHECKPOINT_SIZE 10 #define LSM_INFO_TREE_SIZE 11 +#define LSM_INFO_FREELIST_SIZE 12

    Query a database connection for operational statistics or data. The following values may be passed as the second argument to lsm_info().

    LSM_INFO_NWRITE
    The third parameter should be of type (int *). The location pointed to by the third parameter is set to the number of 4KB pages written to @@ -352,11 +388,11 @@ string should be eventually freed by the caller using lsm_free().

    The Tcl structure returned is a list containing one element for each free block in the database. The element itself consists of two integers - the block number and the id of the snapshot that freed it.

    LSM_INFO_CHECKPOINT_SIZE
    The third argument should be of type (int *). The location pointed to -by this argument is populated with the number of bytes written to the +by this argument is populated with the number of KB written to the database file since the most recent checkpoint.

    LSM_INFO_TREE_SIZE
    If this value is passed as the second argument to an lsm_info() call, it should be followed by two arguments of type (int *) (for a total of four arguments).

    At any time, there are either one or two tree structures held in shared @@ -365,11 +401,11 @@ information on them). One tree structure - the current tree - is used to accumulate new data written to the database. The other tree structure - the old tree - is a read-only tree holding older data and may be flushed to disk at any time.

    Assuming no error occurs, the location pointed to by the first of the two -(int *) arguments is set to the size of the old in-memory tree in bytes. +(int *) arguments is set to the size of the old in-memory tree in KB. The second is set to the size of the current, or live in-memory tree.

    Opening and Closing Write Transactions

    int lsm_begin(lsm_db *pDb, int iLevel); int lsm_commit(lsm_db *pDb, int iLevel); int lsm_rollback(lsm_db *pDb, int iLevel); @@ -407,21 +443,21 @@ Delete all database entries with keys that are greater than (pKey1/nKey1) and smaller than (pKey2/nKey2). Note that keys (pKey1/nKey1) and (pKey2/nKey2) themselves, if they exist in the database, are not deleted.

    Return LSM_OK if successful, or an LSM error code otherwise.

    Explicit Database Work and Checkpointing

    -int lsm_work(lsm_db *pDb, int nMerge, int nPage, int *pnWrite); +int lsm_work(lsm_db *pDb, int nMerge, int nKB, int *pnWrite); int lsm_flush(lsm_db *pDb); -int lsm_checkpoint(lsm_db *pDb, int *pnByte); +int lsm_checkpoint(lsm_db *pDb, int *pnKB);

    This function is called by a thread to work on the database structure. Attempt to checkpoint the current database snapshot. Return an LSM error code if an error occurs or LSM_OK otherwise.

    If the current snapshot has already been checkpointed, calling this -function is a no-op. In this case if pnByte is not NULL, *pnByte is +function is a no-op. In this case if pnKB is not NULL, *pnKB is set to 0. Or, if the current snapshot is successfully checkpointed -by this function and pbCkpt is not NULL, *pnByte is set to the number +by this function and pbKB is not NULL, *pnKB is set to the number of bytes written to the database file since the previous checkpoint (the same measure as returned by the LSM_INFO_CHECKPOINT_SIZE query).

    Opening and Closing Database Cursors

    int lsm_csr_open(lsm_db *pDb, lsm_cursor **ppCsr); int lsm_csr_close(lsm_cursor *pCsr); Index: www/lsmusr.wiki ================================================================== --- www/lsmusr.wiki +++ www/lsmusr.wiki @@ -372,17 +372,17 @@ lsm_csr_valid() returns 0, and the loop is finished. API function lsm_csr_key() is used to retrieve the key associated with each database entry visited. - for(rc = lsm_csr_first(csr); lsm_csr_valid(csr); rc = lsm_csr_next(csr)){ + for(rc=lsm_csr_first(csr); rc==LSM_OK && lsm_csr_valid(csr); rc=lsm_csr_next(csr)){ const void *pKey; int nKey; const void *pVal; int nVal; rc = lsm_csr_key(csr, &pKey, &nKey); if( rc==LSM_OK ) rc = lsm_csr_value(csr, &pVal, &nVal); - if( rc!=LSM_OK ) break; + if( rc==LSM_OK ) break; /* At this point pKey points to the current key (size nKey bytes) and ** pVal points to the corresponding value (size nVal bytes). */ } @@ -725,51 +725,84 @@ int (*xUncompress)(void *pCtx, void *pOut, int *pnOut, const void *pIn, int nIn); void (*xFree)(void *pCtx); }; -

    Explain how the hooks work here (same as zipvfs) +

    Explain how the hooks work here (same as zipvfs) + -

    Example code? Using zlib? Or something simple like an RLE -implementation? +

    Example code? Using zlib? Or something simple like an +RLE implementation?

    The database file header of any LSM database contains a 32-bit unsigned "compression id" field. If the database is not a compressed database, this field is set to 1. Otherwise, it is set to an application supplied value identifying the compression and/or encryption scheme in use. Application compression scheme ids must be greater than or equal to 10000. Values smaller than 10000 are reserved for internal use. -

    The lsm_compression_id() API may be used to read the compression id from -a database connection. Because the compression id is stored in the database +

    The lsm_info() API may be used to read the compression id from a database +connection as follows: + + + unsigned int iCompressionId; + rc = lsm_info(db, LSM_INFO_COMPRESSION_ID, &iCompressionId); + if( rc==LSM_OK ){ + /* Variable iCompressionId now contains the db compression id */ + } + + +Because the compression id is stored in the database header, it may be read before any required compression or encryption hooks are configured. #define LSM_COMPRESSION_EMPTY 0 #define LSM_COMPRESSION_NONE 1 - int lsm_compression_id(lsm_db *db, u32 *piId);

    When a database is opened for the first time, before it is first written, -the compression id field is set to LSM_COMPRESSION_EMPTY (0). The first time -a transaction is committed, the database compression id is set to a copy of -the lsm_compress.iId field of the compression hooks for the database handle -committing the transaction, or to LSM_COMPRESSION_NONE (1) if no compression -hooks are configured. +the compression id field is set to LSM_COMPRESSION_EMPTY (0). After data is +written into the database file, the database compression id is set to a copy +of the lsm_compress.iId field of the compression hooks for the database handle +doing the writing, or to LSM_COMPRESSION_NONE (1) if no compression hooks +are configured.

    Once the compression id is set to something other than -LSM_COMPRESSION_EMPTY, when a database handle opens a read or write -transaction on the database, the compression id is compared against the -lsm_compress.iId field of the configured compression hooks, or against LSM_COMPRESSION_NONE if no compression hooks are configured. If the compression id -does not match, then an LSM_MISMATCH error is returned and the operation -fails (no transaction or database cursor is opened). - -

    Maybe there should be a way to register a mismatch-handler callback. -Otherwise, applications have to handle LSM_MISMATCH everywhere... - - +LSM_COMPRESSION_EMPTY, when a database handle attempts to read or write the +database file, the compression id is compared against the lsm_compress.iId +field of the configured compression hooks, or against LSM_COMPRESSION_NONE if +no compression hooks are configured. If the compression id does not match, then +an LSM_MISMATCH error is returned and the operation fails (no transaction or +database cursor is opened). + +

    It is also possible to register a compression factory callback with a +database handle. If one is registered, the compression factory callback is +invoked instead of returning LSM_MISMATCH if the configured compression hooks +do not match the compression id of a database. If the callback registers +compatible compression hooks with the database handle (using the normal +lsm_config() interface), then the database read or write operation resumes +after it returns. Otherwise, if the compression factory callback does not +register new, compatible, compression hooks with the database handle, +LSM_MISMATCH is returned to the user. + +

    A compression factory callback is registered with a database handle +by calling lsm_config() with the second argument set to +LSM_CONFIG_SET_COMPRESSION_FACTORY, and the third argument set to point to +an instance of structure lsm_compress_factory. The lsm_config() copies the +contents of the structure - it does not retain a pointer to it. + + + typedef struct lsm_compress_factory lsm_compress_factory; + struct lsm_compress_factory { + void *pCtx; + int (*xFactory)(void *pCtx, lsm_db *db, unsigned int iCompressionId); + void (*xFree)(void *pCtx); + }; + + +

    Explain how the xFactory hook works here.

    6. Performance Tuning

    This section describes the various measures that can be taken in order to fine-tune LSM in order to improve performance in specific circumstances. @@ -1148,11 +1181,11 @@ clients should set the LSM_CONFIG_AUTOWORK parameter to zero. int rc; lsm_db *db; - int nCkpt = 4*1024*1024; + int nCkpt = 4*1024; /* 4096KB == 4MB */ /* Open a database connection to database "test.db". ** ** Configure the connection to automatically checkpoint the database after ** writing each 4MB of data to it (instead of the default 2MB). As well @@ -1166,11 +1199,11 @@ while( 1 ){ int nWrite; /* Attempt up to 512KB of work. Set nWrite to the number of bytes ** actually written to disk. */ - rc = lsm_work(db, 2, 512*1024, &nWrite); + rc = lsm_work(db, 2, 512, &nWrite); if( rc!=LSM_OK && rc!=LSM_BUSY ){ /* Anything other than LSM_OK or LSM_BUSY is a problem. LSM_BUSY ** indicates that some other client has taken the WORKER lock. Any ** other error indicates something has gone quite wrong. */ lsm_close(db);