Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Set a flag on levels that consist entirely of freelist entries. Use this flag to avoid counter-productive merges during database optimization. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
48bd83a17a615805c8b0524facbecc1c |
User & Date: | dan 2012-11-08 11:59:16.953 |
Context
2012-11-08
| ||
21:30 | Add lsmusr.wiki. User documentation for lsm. check-in: c50bcdc37d user: dan tags: trunk | |
11:59 | Set a flag on levels that consist entirely of freelist entries. Use this flag to avoid counter-productive merges during database optimization. check-in: 48bd83a17a user: dan tags: trunk | |
2012-11-07
| ||
20:08 | Remove the LSM_WORK_OPTIMIZE flag. Add free-list management related tests and fixes. check-in: 91912a39ca user: dan tags: trunk | |
Changes
Changes to src/lsmInt.h.
︙ | ︙ | |||
353 354 355 356 357 358 359 | Segment lhs; /* Left-hand (main) segment */ int nRight; /* Size of apRight[] array */ Segment *aRhs; /* Old segments being merged into this */ int iSplitTopic; /* Split key topic (if nRight>0) */ void *pSplitKey; /* Pointer to split-key (if nRight>0) */ int nSplitKey; /* Number of bytes in split-key */ | | > > > > > > > > > > > > > > > | 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 | Segment lhs; /* Left-hand (main) segment */ int nRight; /* Size of apRight[] array */ Segment *aRhs; /* Old segments being merged into this */ int iSplitTopic; /* Split key topic (if nRight>0) */ void *pSplitKey; /* Pointer to split-key (if nRight>0) */ int nSplitKey; /* Number of bytes in split-key */ u16 iAge; /* Number of times data has been written */ u16 flags; /* Mask of LEVEL_XXX bits */ Merge *pMerge; /* Merge operation currently underway */ Level *pNext; /* Next level in tree */ }; /* ** The Level.flags field is set to a combination of the following bits. ** ** LEVEL_FREELIST_ONLY: ** Set if the level consists entirely of free-list entries. ** ** LEVEL_INCOMPLETE: ** This is set while a new toplevel level is being constructed. It is ** never set for any level other than a new toplevel. */ #define LEVEL_FREELIST_ONLY 0x0001 #define LEVEL_INCOMPLETE 0x0002 /* ** A structure describing an ongoing merge. There is an instance of this ** structure for every Level currently undergoing a merge in the worker ** snapshot. ** ** It is assumed that code that uses an instance of this structure has |
︙ | ︙ |
Changes to src/lsm_ckpt.c.
︙ | ︙ | |||
48 49 50 51 52 53 54 | ** ** Append points: ** ** 8 integers (4 * 64-bit page numbers). See ckptExportAppendlist(). ** ** For each level in the database, a level record. Formatted as follows: ** | | > | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | ** ** Append points: ** ** 8 integers (4 * 64-bit page numbers). See ckptExportAppendlist(). ** ** For each level in the database, a level record. Formatted as follows: ** ** 0. Age of the level (least significant 16-bits). And flags mask (most ** significant 16-bits). ** 1. The number of right-hand segments (nRight, possibly 0), ** 2. Segment record for left-hand segment (8 integers defined below), ** 3. Segment record for each right-hand segment (8 integers defined below), ** 4. If nRight>0, The number of segments involved in the merge ** 5. if nRight>0, Current nSkip value (see Merge structure defn.), ** 6. For each segment in the merge: ** 5a. Page number of next cell to read during merge (this field |
︙ | ︙ | |||
303 304 305 306 307 308 309 | int *piOut, /* IN/OUT: Size of checkpoint so far */ int *pRc /* IN/OUT: Error code */ ){ int iOut = *piOut; Merge *pMerge; pMerge = pLevel->pMerge; | | | 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | int *piOut, /* IN/OUT: Size of checkpoint so far */ int *pRc /* IN/OUT: Error code */ ){ int iOut = *piOut; Merge *pMerge; pMerge = pLevel->pMerge; ckptSetValue(p, iOut++, (u32)pLevel->iAge + (u32)(pLevel->flags<<16), pRc); ckptSetValue(p, iOut++, pLevel->nRight, pRc); ckptExportSegment(&pLevel->lhs, p, &iOut, pRc); assert( (pLevel->nRight>0)==(pMerge!=0) ); if( pMerge ){ int i; for(i=0; i<pLevel->nRight; i++){ |
︙ | ︙ | |||
535 536 537 538 539 540 541 | for(i=0; rc==LSM_OK && i<nLevel; i++){ int iRight; Level *pLevel; /* Allocate space for the Level structure and Level.apRight[] array */ pLevel = (Level *)lsmMallocZeroRc(pDb->pEnv, sizeof(Level), &rc); if( rc==LSM_OK ){ | | > > | 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 | for(i=0; rc==LSM_OK && i<nLevel; i++){ int iRight; Level *pLevel; /* Allocate space for the Level structure and Level.apRight[] array */ pLevel = (Level *)lsmMallocZeroRc(pDb->pEnv, sizeof(Level), &rc); if( rc==LSM_OK ){ pLevel->iAge = (u16)(aIn[iIn] & 0x0000FFFF); pLevel->flags = (u16)((aIn[iIn]>>16) & 0x0000FFFF); iIn++; pLevel->nRight = aIn[iIn++]; if( pLevel->nRight ){ int nByte = sizeof(Segment) * pLevel->nRight; pLevel->aRhs = (Segment *)lsmMallocZeroRc(pDb->pEnv, nByte, &rc); } if( rc==LSM_OK ){ *ppNext = pLevel; |
︙ | ︙ |
Changes to src/lsm_sorted.c.
︙ | ︙ | |||
2214 2215 2216 2217 2218 2219 2220 | static int multiCursorAddAll(MultiCursor *pCsr, Snapshot *pSnap){ Level *pLvl; int nPtr = 0; int iPtr = 0; int rc = LSM_OK; for(pLvl=pSnap->pLevel; pLvl; pLvl=pLvl->pNext){ | > > > > > | | | 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 | static int multiCursorAddAll(MultiCursor *pCsr, Snapshot *pSnap){ Level *pLvl; int nPtr = 0; int iPtr = 0; int rc = LSM_OK; for(pLvl=pSnap->pLevel; pLvl; pLvl=pLvl->pNext){ /* If the LEVEL_INCOMPLETE flag is set, then this function is being ** called (indirectly) from within a sortedNewToplevel() call to ** construct pLvl. In this case ignore pLvl - this cursor is going to ** be used to retrieve a freelist entry from the LSM, and the partially ** complete level may confuse it. */ if( pLvl->flags & LEVEL_INCOMPLETE ) continue; nPtr += (1 + pLvl->nRight); } assert( pCsr->aPtr==0 ); pCsr->aPtr = lsmMallocZeroRc(pCsr->pDb->pEnv, sizeof(SegmentPtr) * nPtr, &rc); if( rc==LSM_OK ) pCsr->nPtr = nPtr; for(pLvl=pSnap->pLevel; pLvl && rc==LSM_OK; pLvl=pLvl->pNext){ int i; if( pLvl->flags & LEVEL_INCOMPLETE ) continue; pCsr->aPtr[iPtr].pLevel = pLvl; pCsr->aPtr[iPtr].pSeg = &pLvl->lhs; iPtr++; for(i=0; i<pLvl->nRight; i++){ pCsr->aPtr[iPtr].pLevel = pLvl; pCsr->aPtr[iPtr].pSeg = &pLvl->aRhs[i]; iPtr++; |
︙ | ︙ | |||
4045 4046 4047 4048 4049 4050 4051 | if( pCsr ){ pCsr->pDb = pDb; rc = multiCursorVisitFreelist(pCsr); if( rc==LSM_OK ){ rc = multiCursorAddTree(pCsr, pDb->pWorker, eTree); } if( rc==LSM_OK | < > | 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 | if( pCsr ){ pCsr->pDb = pDb; rc = multiCursorVisitFreelist(pCsr); if( rc==LSM_OK ){ rc = multiCursorAddTree(pCsr, pDb->pWorker, eTree); } if( rc==LSM_OK && pNext && pNext->pMerge==0 && pNext->lhs.iRoot && (eTree!=TREE_NONE || (pNext->flags & LEVEL_FREELIST_ONLY)) ){ pDel = &pNext->lhs; rc = btreeCursorNew(pDb, pDel, &pCsr->pBtCsr); } if( pNext==0 ){ multiCursorIgnoreDelete(pCsr); |
︙ | ︙ | |||
4072 4073 4074 4075 4076 4077 4078 | Merge merge; /* Merge object used to create new level */ MergeWorker mergeworker; /* MergeWorker object for the same purpose */ memset(&merge, 0, sizeof(Merge)); memset(&mergeworker, 0, sizeof(MergeWorker)); pNew->pMerge = &merge; | | > > > > < | | 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 | Merge merge; /* Merge object used to create new level */ MergeWorker mergeworker; /* MergeWorker object for the same purpose */ memset(&merge, 0, sizeof(Merge)); memset(&mergeworker, 0, sizeof(MergeWorker)); pNew->pMerge = &merge; pNew->flags |= LEVEL_INCOMPLETE; mergeworker.pDb = pDb; mergeworker.pLevel = pNew; mergeworker.pCsr = pCsr; pCsr->pPrevMergePtr = &iLeftPtr; /* Mark the separators array for the new level as a "phantom". */ mergeworker.bFlush = 1; /* Do the work to create the new merged segment on disk */ if( rc==LSM_OK ) rc = lsmMCursorFirst(pCsr); while( rc==LSM_OK && mergeWorkerDone(&mergeworker)==0 ){ rc = mergeWorkerStep(&mergeworker); } assert( rc!=LSM_OK || mergeworker.nWork==0 || pNew->lhs.iFirst ); nWrite = mergeworker.nWork; mergeWorkerShutdown(&mergeworker, &rc); pNew->flags &= ~LEVEL_INCOMPLETE; if( eTree==TREE_NONE ){ pNew->flags |= LEVEL_FREELIST_ONLY; } pNew->pMerge = 0; } if( rc!=LSM_OK || pNew->lhs.iFirst==0 ){ assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 ); lsmDbSnapshotSetLevel(pDb->pWorker, pNext); sortedFreeLevel(pDb->pEnv, pNew); }else{ if( pDel ) pDel->iRoot = 0; #if 0 lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "new-toplevel"); #endif if( freelist.nEntry ){ Freelist *p = &pDb->pWorker->freelist; lsmFree(pDb->pEnv, p->aEntry); memcpy(p, &freelist, sizeof(freelist)); freelist.aEntry = 0; |
︙ | ︙ | |||
4163 4164 4165 4166 4167 4168 4169 | /* Allocate the new Level object */ pNew = (Level *)lsmMallocZeroRc(pDb->pEnv, sizeof(Level), &rc); if( pNew ){ pNew->aRhs = (Segment *)lsmMallocZeroRc(pDb->pEnv, nMerge * sizeof(Segment), &rc); } | < > > > > | 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 | /* Allocate the new Level object */ pNew = (Level *)lsmMallocZeroRc(pDb->pEnv, sizeof(Level), &rc); if( pNew ){ pNew->aRhs = (Segment *)lsmMallocZeroRc(pDb->pEnv, nMerge * sizeof(Segment), &rc); } /* Populate the new Level object */ if( rc==LSM_OK ){ Level *pNext = 0; /* Level following pNew */ int i; int bFreeOnly = 1; Level *pTopLevel; Level *p = pLevel; Level **pp; pNew->nRight = nMerge; pNew->iAge = pLevel->iAge+1; for(i=0; i<nMerge; i++){ assert( p->nRight==0 ); pNext = p->pNext; pNew->aRhs[i] = p->lhs; if( (p->flags & LEVEL_FREELIST_ONLY)==0 ) bFreeOnly = 0; sortedFreeLevel(pDb->pEnv, p); p = pNext; } if( bFreeOnly ) pNew->flags |= LEVEL_FREELIST_ONLY; /* Replace the old levels with the new. */ pTopLevel = lsmDbSnapshotLevel(pDb->pWorker); pNew->pNext = p; for(pp=&pTopLevel; *pp!=pLevel; pp=&((*pp)->pNext)); *pp = pNew; lsmDbSnapshotSetLevel(pDb->pWorker, pTopLevel); |
︙ | ︙ | |||
4389 4390 4391 4392 4393 4394 4395 | } if( nThis>nBest ){ assert( pThis ); pBest = pThis; nBest = nThis; } | | > > > > > > > > > > > | | > | 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 | } if( nThis>nBest ){ assert( pThis ); pBest = pThis; nBest = nThis; } if( pBest==0 && nMerge==1 ){ int nFree = 0; int nUsr = 0; for(pLevel=pTopLevel; pLevel; pLevel=pLevel->pNext){ assert( !pLevel->nRight ); if( pLevel->flags & LEVEL_FREELIST_ONLY ){ nFree++; }else{ nUsr++; } } if( nUsr>1 ){ pBest = pTopLevel; nBest = nFree + nUsr; } } if( pBest ){ if( pBest->nRight==0 ){ rc = sortedMergeSetup(pDb, pBest, nBest, ppOut); }else{ *ppOut = pBest; |
︙ | ︙ | |||
4526 4527 4528 4529 4530 4531 4532 | /* Clean up the MergeWorker object initialized above. If no error ** has occurred, invoke the work-hook to inform the application that ** the database structure has changed. */ mergeWorkerShutdown(&mergeworker, &rc); if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); #if 0 | | | 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 | /* Clean up the MergeWorker object initialized above. If no error ** has occurred, invoke the work-hook to inform the application that ** the database structure has changed. */ mergeWorkerShutdown(&mergeworker, &rc); if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); #if 0 lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "work"); #endif assertBtreeOk(pDb, &pLevel->lhs); assertRunInOrder(pDb, &pLevel->lhs); /* If bFlush is true and the database is no longer considered "full", ** break out of the loop even if nRemaining is still greater than ** zero. The caller has an in-memory tree to flush to disk. */ |
︙ | ︙ | |||
5247 5248 5249 5250 5251 5252 5253 | fileToString(pDb->pEnv, zLeft, sizeof(zLeft), 28, aLeft[i]); } if( i<nRight ){ fileToString(pDb->pEnv, zRight, sizeof(zRight), 28, aRight[i]); } if( i==0 ){ | | | | 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 | fileToString(pDb->pEnv, zLeft, sizeof(zLeft), 28, aLeft[i]); } if( i<nRight ){ fileToString(pDb->pEnv, zRight, sizeof(zRight), 28, aRight[i]); } if( i==0 ){ sqlite4_snprintf(zLevel, sizeof(zLevel), "L%d: (age=%d) (flags=%.4x)", iLevel, (int)pLevel->iAge, (int)pLevel->flags ); }else{ zLevel[0] = '\0'; } if( nRight==0 ){ iPad = 28 - (strlen(zLeft)/2) ; |
︙ | ︙ |
Changes to test/ckpt1.test.
︙ | ︙ | |||
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 4K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 8K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 16K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 32K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 64K } do_test 3.2 { sqlite4_lsm_work db main -nmerge 1 -npage 1000000 execsql { SELECT count(*) FROM t1 } } {65536} do_test 3.3 { db close sqlite4 db test.db execsql { SELECT count(*) FROM t1 } } {65536} do_test 3.4 { execsql { INSERT INTO t1 VALUES(randstr(100,100), randstr(100,100)) } sqlite4_lsm_work db main -nmerge 1 -npage 1000000 execsql { SELECT count(*) FROM t1 } } {65537} finish_test | > > | 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 4K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 8K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 16K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 32K INSERT INTO t1 SELECT randstr(100,100), randstr(100,100) FROM t1; -- 64K } do_test 3.2 { sqlite4_lsm_flush db main sqlite4_lsm_work db main -nmerge 1 -npage 1000000 execsql { SELECT count(*) FROM t1 } } {65536} do_test 3.3 { db close sqlite4 db test.db execsql { SELECT count(*) FROM t1 } } {65536} do_test 3.4 { execsql { INSERT INTO t1 VALUES(randstr(100,100), randstr(100,100)) } sqlite4_lsm_flush db main sqlite4_lsm_work db main -nmerge 1 -npage 1000000 execsql { SELECT count(*) FROM t1 } } {65537} finish_test |
Changes to test/csr1.test.
︙ | ︙ | |||
38 39 40 41 42 43 44 | populate_db do_execsql_test 1.1 { SELECT * FROM t1 } { 0 0 1 1 2 4 3 9 4 16 5 25 6 36 7 49 8 64 9 81 } db eval { SELECT a, b FROM t1 } { do_test 1.2.1 { | | | | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | populate_db do_execsql_test 1.1 { SELECT * FROM t1 } { 0 0 1 1 2 4 3 9 4 16 5 25 6 36 7 49 8 64 9 81 } db eval { SELECT a, b FROM t1 } { do_test 1.2.1 { list [catch { sqlite4_lsm_work db main -nmerge 2 -npage 10 } msg] $msg } {1 SQLITE4_MISUSE} do_test 1.2.2 { list [catch { sqlite4_lsm_work db main -nmerge 2 -npage 10 } msg] $msg } {1 SQLITE4_MISUSE} break } #------------------------------------------------------------------------- # populate_db do_execsql_test 2.1 { BEGIN; INSERT INTO t1 VALUES(10, 100); } do_test 2.2 { sqlite4 db2 ./test.db list [catch { db2 eval { BEGIN ; INSERT INTO t1 VALUES(1, 2) } } msg] $msg } {1 {database is locked}} do_execsql_test 2.3 { COMMIT } do_test 2.4 { sqlite4_lsm_work db2 main -npage 0 } {0} db2 close #------------------------------------------------------------------------- # Check that if a transaction is committed and this causes the in-memory # tree to be flushed to disk, # |
︙ | ︙ |