Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Avoid writing pages to disk out of sequential order (one problem still remains). |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
ee694a32e45ed1e27be7ad9a619de9ab |
User & Date: | dan 2012-11-17 10:51:01.915 |
Context
2012-11-17
| ||
13:17 | Fix another out-of-order writes problem. check-in: 1b21fb4494 user: dan tags: trunk | |
10:51 | Avoid writing pages to disk out of sequential order (one problem still remains). check-in: ee694a32e4 user: dan tags: trunk | |
2012-11-15
| ||
18:45 | Update the lsm code so that it matches lsmusr.wiki. check-in: 8915d39dab user: dan tags: trunk | |
Changes
Changes to lsm-test/lsmtest_main.c.
︙ | ︙ | |||
493 494 495 496 497 498 499 500 501 502 503 504 505 506 | lsm_db *pLsm; pLsm = tdb_lsm(pDb); if( pLsm ){ tdb_lsm_config_str(pDb, "mmap=1 autowork=1 automerge=4 worker_automerge=4"); } return pLsm; } #define ST_REPEAT 0 #define ST_WRITE 1 #define ST_PAUSE 2 #define ST_FETCH 3 #define ST_SCAN 4 #define ST_NSCAN 5 | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 | lsm_db *pLsm; pLsm = tdb_lsm(pDb); if( pLsm ){ tdb_lsm_config_str(pDb, "mmap=1 autowork=1 automerge=4 worker_automerge=4"); } return pLsm; } typedef struct WriteHookEvent WriteHookEvent; struct WriteHookEvent { i64 iOff; int nData; int nUs; }; WriteHookEvent prev = {0, 0, 0}; static void flushPrev(FILE *pOut){ if( prev.nData ){ fprintf(pOut, "w %s %lld %d %d\n", "d", prev.iOff, prev.nData, prev.nUs); prev.nData = 0; } } static void do_speed_write_hook2( void *pCtx, int bLog, i64 iOff, int nData, int nUs ){ FILE *pOut = (FILE *)pCtx; if( bLog ) return; if( prev.nData && nData && iOff==prev.iOff+prev.nData ){ prev.nData += nData; prev.nUs += nUs; }else{ flushPrev(pOut); if( nData==0 ){ fprintf(pOut, "s %s 0 0 %d\n", (bLog ? "l" : "d"), nUs); }else{ prev.iOff = iOff; prev.nData = nData; prev.nUs = nUs; } } } #define ST_REPEAT 0 #define ST_WRITE 1 #define ST_PAUSE 2 #define ST_FETCH 3 #define ST_SCAN 4 #define ST_NSCAN 5 |
︙ | ︙ | |||
556 557 558 559 560 561 562 563 564 565 566 567 568 569 | int nContent = 0; TestDb *pDb; Datasource *pData; DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 0, 0, 0, 0 }; char *zSystem = ""; int bLsm = 1; #ifdef NDEBUG /* If NDEBUG is defined, disable the dynamic memory related checks in ** lsmtest_mem.c. They slow things down. */ testMallocUninstall(tdb_lsm_env()); #endif | > | 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 | int nContent = 0; TestDb *pDb; Datasource *pData; DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 0, 0, 0, 0 }; char *zSystem = ""; int bLsm = 1; FILE *pLog = 0; #ifdef NDEBUG /* If NDEBUG is defined, disable the dynamic memory related checks in ** lsmtest_mem.c. They slow things down. */ testMallocUninstall(tdb_lsm_env()); #endif |
︙ | ︙ | |||
625 626 627 628 629 630 631 632 633 634 635 636 637 638 | pDb = testOpen(zSystem, !bReadonly, &rc); } if( rc!=0 ) return rc; if( bReadonly ){ nContent = testCountDatabase(pDb); } for(i=0; i<aParam[ST_REPEAT] && rc==0; i++){ int msWrite, msFetch, msScan; int iFetch; int nWrite = aParam[ST_WRITE]; if( bReadonly ){ msWrite = 0; | > > > > > | 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 | pDb = testOpen(zSystem, !bReadonly, &rc); } if( rc!=0 ) return rc; if( bReadonly ){ nContent = testCountDatabase(pDb); } #if 0 pLog = fopen("/tmp/speed.log", "w"); tdb_lsm_write_hook(pDb, do_speed_write_hook2, (void *)pLog); #endif for(i=0; i<aParam[ST_REPEAT] && rc==0; i++){ int msWrite, msFetch, msScan; int iFetch; int nWrite = aParam[ST_WRITE]; if( bReadonly ){ msWrite = 0; |
︙ | ︙ | |||
678 679 680 681 682 683 684 | printf("%d %d %d\n", i, msWrite, msFetch); fflush(stdout); } testClose(&pDb); testDatasourceFree(pData); | < < < < < < < < < < < | | < < < < < | > | 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 | printf("%d %d %d\n", i, msWrite, msFetch); fflush(stdout); } testClose(&pDb); testDatasourceFree(pData); if( pLog ){ flushPrev(pLog); fclose(pLog); } return rc; } int do_speed_tests(int nArg, char **azArg){ struct DbSystem { const char *zLibrary; const char *zColor; |
︙ | ︙ |
Changes to src/lsmInt.h.
︙ | ︙ | |||
638 639 640 641 642 643 644 | int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId); /* Creating, populating, gobbling and deleting sorted runs. */ void lsmFsGobble(lsm_db *, Segment *, Pgno *, int); int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *); int lsmFsSortedFinish(FileSystem *, Segment *); | | | 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 | int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId); /* Creating, populating, gobbling and deleting sorted runs. */ void lsmFsGobble(lsm_db *, Segment *, Pgno *, int); int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *); int lsmFsSortedFinish(FileSystem *, Segment *); int lsmFsSortedAppend(FileSystem *, Snapshot *, Segment *, int, Page **); int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *); /* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */ lsm_env *lsmFsEnv(FileSystem *); lsm_env *lsmPageEnv(Page *); FileSystem *lsmPageFS(Page *); |
︙ | ︙ | |||
751 752 753 754 755 756 757 758 759 760 761 762 763 764 | int lsmSaveCursors(lsm_db *pDb); int lsmRestoreCursors(lsm_db *pDb); void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *); void lsmFsDumpBlocklists(lsm_db *); void lsmPutU32(u8 *, u32); u32 lsmGetU32(u8 *); /* ** Functions from "lsm_varint.c". */ | > | 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 | int lsmSaveCursors(lsm_db *pDb); int lsmRestoreCursors(lsm_db *pDb); void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *); void lsmFsDumpBlocklists(lsm_db *); void lsmSortedExpandBtreePage(Page *pPg, int nOrig); void lsmPutU32(u8 *, u32); u32 lsmGetU32(u8 *); /* ** Functions from "lsm_varint.c". */ |
︙ | ︙ |
Changes to src/lsm_file.c.
︙ | ︙ | |||
726 727 728 729 730 731 732 | return pPage->aData; } /* ** Return the page number of a page. */ Pgno lsmFsPageNumber(Page *pPage){ | | | 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 | return pPage->aData; } /* ** Return the page number of a page. */ Pgno lsmFsPageNumber(Page *pPage){ /* assert( (pPage->flags & PAGE_DIRTY)==0 ); */ return pPage ? pPage->iPg : 0; } /* ** Page pPg is currently part of the LRU list belonging to pFS. Remove ** it from the list. pPg->pLruNext and pPg->pLruPrev are cleared by this ** operation. |
︙ | ︙ | |||
1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 | ** to it. The page is writable until either lsmFsPagePersist() is called on ** it or the ref-count drops to zero. */ int lsmFsSortedAppend( FileSystem *pFS, Snapshot *pSnapshot, Segment *p, Page **ppOut ){ int rc = LSM_OK; Page *pPg = 0; *ppOut = 0; int iApp = 0; int iNext = 0; int iPrev = p->iLastPg; | > | > | | 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 | ** to it. The page is writable until either lsmFsPagePersist() is called on ** it or the ref-count drops to zero. */ int lsmFsSortedAppend( FileSystem *pFS, Snapshot *pSnapshot, Segment *p, int bDefer, Page **ppOut ){ int rc = LSM_OK; Page *pPg = 0; *ppOut = 0; int iApp = 0; int iNext = 0; int iPrev = p->iLastPg; if( pFS->pCompress || bDefer ){ /* In compressed database mode the page is not assigned a page number ** or location in the database file at this point. This will be done ** by the lsmFsPagePersist() call. */ rc = fsPageBuffer(pFS, 1, &pPg); if( rc==LSM_OK ){ pPg->pFS = pFS; pPg->pSeg = p; pPg->iPg = 0; pPg->flags |= PAGE_DIRTY; pPg->nData = pFS->nPagesize; assert( pPg->aData ); if( pFS->pCompress==0 ) pPg->nData -= 4; pPg->nRef = 1; pFS->nOut++; } }else{ if( iPrev==0 ){ iApp = findAppendPoint(pFS); }else if( fsIsLast(pFS, iPrev) ){ int iNext; rc = fsBlockNext(pFS, fsPageToBlock(pFS, iPrev), &iNext); if( rc!=LSM_OK ) return rc; iApp = fsFirstPageOnBlock(pFS, iNext); }else{ iApp = iPrev + 1; } /* If this is the first page allocated, or if the page allocated is the ** last in the block, also allocate the next block here. */ if( iApp==0 || fsIsLast(pFS, iApp) ){ int iNew; /* New block number */ rc = lsmBlockAllocate(pFS->pDb, &iNew); if( rc!=LSM_OK ) return rc; if( iApp==0 ){ iApp = fsFirstPageOnBlock(pFS, iNew); |
︙ | ︙ | |||
1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 | pPg->nCompress = pFS->nBuffer; return p->xCompress(p->pCtx, (char *)pFS->aOBuffer, &pPg->nCompress, (const char *)pPg->aData, pPg->nData ); } /* ** If the page passed as an argument is dirty, update the database file ** (or mapping of the database file) with its current contents and mark ** the page as clean. ** ** Return LSM_OK if the operation is a success, or an LSM error code | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 | pPg->nCompress = pFS->nBuffer; return p->xCompress(p->pCtx, (char *)pFS->aOBuffer, &pPg->nCompress, (const char *)pPg->aData, pPg->nData ); } static int fsAppendPage( FileSystem *pFS, Segment *pSeg, Pgno *piNew, int *piPrev, int *piNext ){ Pgno iPrev = pSeg->iLastPg; int rc; assert( iPrev!=0 ); *piPrev = 0; *piNext = 0; if( fsIsLast(pFS, iPrev) ){ /* Grab the first page on the next block (which has already be ** allocated). In this case set *piPrev to tell the caller to set ** the "previous block" pointer in the first 4 bytes of the page. */ int iNext; int iBlk = fsPageToBlock(pFS, iPrev); rc = fsBlockNext(pFS, iBlk, &iNext); if( rc!=LSM_OK ) return rc; *piNew = fsFirstPageOnBlock(pFS, iNext); *piPrev = iBlk; }else{ *piNew = iPrev+1; if( fsIsLast(pFS, *piNew) ){ /* Allocate the next block here. */ int iBlk; rc = lsmBlockAllocate(pFS->pDb, &iBlk); if( rc!=LSM_OK ) return rc; *piNext = iBlk; } } pSeg->nSize++; pSeg->iLastPg = *piNew; return LSM_OK; } /* ** If the page passed as an argument is dirty, update the database file ** (or mapping of the database file) with its current contents and mark ** the page as clean. ** ** Return LSM_OK if the operation is a success, or an LSM error code |
︙ | ︙ | |||
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 | pPg->pHashNext = pFS->apHash[iHash]; pFS->apHash[iHash] = pPg; pPg->pSeg->nSize += (sizeof(aSz) * 2) + pPg->nCompress; }else{ i64 iOff; /* Offset to write within database file */ iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1); if( pFS->bUseMmap==0 ){ u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV); rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize); }else if( pPg->flags & PAGE_FREE ){ fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc); if( rc==LSM_OK ){ u8 *aTo = &((u8 *)(pFS->pMap))[iOff]; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 | pPg->pHashNext = pFS->apHash[iHash]; pFS->apHash[iHash] = pPg; pPg->pSeg->nSize += (sizeof(aSz) * 2) + pPg->nCompress; }else{ i64 iOff; /* Offset to write within database file */ if( pPg->iPg==0 ){ /* No page number has been assigned yet. This occurs with pages used ** in the b-tree hierarchy. */ int iPrev = 0; int iNext = 0; int iHash; assert( pPg->pSeg->iFirst ); assert( pPg->flags & PAGE_FREE ); assert( (pPg->flags & PAGE_HASPREV)==0 ); assert( pPg->nData==pFS->nPagesize-4 ); rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext); if( rc!=LSM_OK ) return rc; iHash = fsHashKey(pFS->nHash, pPg->iPg); pPg->pHashNext = pFS->apHash[iHash]; pFS->apHash[iHash] = pPg; if( iPrev ){ assert( iNext==0 ); memmove(&pPg->aData[4], pPg->aData, pPg->nData); lsmPutU32(pPg->aData, iPrev); pPg->flags |= PAGE_HASPREV; pPg->aData += 4; }else if( iNext ){ assert( iPrev==0 ); lsmPutU32(&pPg->aData[pPg->nData], iNext); }else{ int nData = pPg->nData; pPg->nData += 4; lsmSortedExpandBtreePage(pPg, nData); } } iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1); if( pFS->bUseMmap==0 ){ u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV); rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize); }else if( pPg->flags & PAGE_FREE ){ fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc); if( rc==LSM_OK ){ u8 *aTo = &((u8 *)(pFS->pMap))[iOff]; u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV); memcpy(aTo, aFrom, pFS->nPagesize); lsmFree(pFS->pEnv, aFrom); pPg->aData = aTo + (pPg->flags & PAGE_HASPREV); pPg->flags &= ~PAGE_FREE; fsPageAddToLru(pFS, pPg); } } } pPg->flags &= ~PAGE_DIRTY; pFS->nWrite++; |
︙ | ︙ |
Changes to src/lsm_main.c.
︙ | ︙ | |||
470 471 472 473 474 475 476 | ** ** Given the context in which this function is called (as a result of an ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to ** be problems. */ *pnNew = (int)p->root.nByte; if( p->iOldShmid ){ | | | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 | ** ** Given the context in which this function is called (as a result of an ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to ** be problems. */ *pnNew = (int)p->root.nByte; if( p->iOldShmid ){ if( p->iOldLog==lsmCheckpointLogOffset(pShm->aSnap1) ){ *pnOld = 0; }else{ *pnOld = (int)p->oldroot.nByte; } }else{ *pnOld = 0; } |
︙ | ︙ |
Changes to src/lsm_sorted.c.
︙ | ︙ | |||
3090 3091 3092 3093 3094 3095 3096 | Page **apHier = pMW->hier.apHier; int nHier = pMW->hier.nHier; pSeg = &pMW->pLevel->lhs; for(i=0; rc==LSM_OK && i<nHier; i++){ Page *pNew = 0; | | > | > > > > > > > > > > > > > > > > > | 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 | Page **apHier = pMW->hier.apHier; int nHier = pMW->hier.nHier; pSeg = &pMW->pLevel->lhs; for(i=0; rc==LSM_OK && i<nHier; i++){ Page *pNew = 0; rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, 1, &pNew); assert( rc==LSM_OK ); if( rc==LSM_OK ){ u8 *a1; int n1; u8 *a2; int n2; a1 = fsPageData(pNew, &n1); a2 = fsPageData(apHier[i], &n2); assert( n1==n2 || n1+4==n2 ); if( n1==n2 ){ memcpy(a1, a2, n2); }else{ int nEntry = pageGetNRec(a2, n2); int iEof1 = SEGMENT_EOF(n1, nEntry); int iEof2 = SEGMENT_EOF(n2, nEntry); memcpy(a1, a2, iEof2 - 4); memcpy(&a1[iEof1], &a2[iEof2], n2 - iEof2); } lsmFsPageRelease(apHier[i]); apHier[i] = pNew; #if 0 assert( n1==n2 || n1+4==n2 || n2+4==n1 ); if( n1>=n2 ){ /* If n1 (size of the new page) is equal to or greater than n2 (the ** size of the old page), then copy the data into the new page. If ** n1==n2, this could be done with a single memcpy(). However, ** since sometimes n1>n2, the page content and footer must be copied ** separately. */ int nEntry = pageGetNRec(a2, n2); int iEof1 = SEGMENT_EOF(n1, nEntry); int iEof2 = SEGMENT_EOF(n2, nEntry); memcpy(a1, a2, iEof2); memcpy(&a1[iEof1], &a2[iEof2], n2 - iEof2); lsmFsPageRelease(apHier[i]); apHier[i] = pNew; }else{ lsmPutU16(&a1[SEGMENT_FLAGS_OFFSET(n1)], SEGMENT_BTREE_FLAG); lsmPutU16(&a1[SEGMENT_NRECORD_OFFSET(n1)], 0); lsmPutU64(&a1[SEGMENT_POINTER_OFFSET(n1)], 0); i = i - 1; lsmFsPageRelease(pNew); } #endif } } #ifdef LSM_DEBUG if( rc==LSM_OK ){ for(i=0; i<nHier; i++) assert( lsmFsPageWritable(apHier[i]) ); } |
︙ | ︙ | |||
3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 | nRec = pageGetNRec(aData, nData); nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData); if( nByte<=nFree ) break; /* Otherwise, this page is full. Set the right-hand-child pointer ** to iPtr and release it. */ lsmPutU64(&aData[SEGMENT_POINTER_OFFSET(nData)], iPtr); rc = lsmFsPagePersist(pOld); if( rc==LSM_OK ){ iPtr = lsmFsPageNumber(pOld); lsmFsPageRelease(pOld); } } /* Allocate a new page for apHier[iLevel]. */ p->apHier[iLevel] = 0; if( rc==LSM_OK ){ rc = lsmFsSortedAppend( | > | | 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 | nRec = pageGetNRec(aData, nData); nFree = SEGMENT_EOF(nData, nRec) - mergeWorkerPageOffset(aData, nData); if( nByte<=nFree ) break; /* Otherwise, this page is full. Set the right-hand-child pointer ** to iPtr and release it. */ lsmPutU64(&aData[SEGMENT_POINTER_OFFSET(nData)], iPtr); assert( lsmFsPageNumber(pOld)==0 ); rc = lsmFsPagePersist(pOld); if( rc==LSM_OK ){ iPtr = lsmFsPageNumber(pOld); lsmFsPageRelease(pOld); } } /* Allocate a new page for apHier[iLevel]. */ p->apHier[iLevel] = 0; if( rc==LSM_OK ){ rc = lsmFsSortedAppend( pDb->pFS, pDb->pWorker, pSeg, 1, &p->apHier[iLevel] ); } if( rc!=LSM_OK ) return rc; aData = fsPageData(p->apHier[iLevel], &nData); memset(aData, 0, nData); lsmPutU16(&aData[SEGMENT_FLAGS_OFFSET(nData)], SEGMENT_BTREE_FLAG); |
︙ | ︙ | |||
3526 3527 3528 3529 3530 3531 3532 | ){ int rc = LSM_OK; /* Return code */ Page *pNext = 0; /* New page appended to run */ lsm_db *pDb = pMW->pDb; /* Database handle */ Segment *pSeg; /* Run to append to */ pSeg = &pMW->pLevel->lhs; | | | 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 | ){ int rc = LSM_OK; /* Return code */ Page *pNext = 0; /* New page appended to run */ lsm_db *pDb = pMW->pDb; /* Database handle */ Segment *pSeg; /* Run to append to */ pSeg = &pMW->pLevel->lhs; rc = lsmFsSortedAppend(pDb->pFS, pDb->pWorker, pSeg, 0, &pNext); assert( rc!=LSM_OK || pSeg->iFirst>0 || pMW->pDb->compress.xCompress ); if( rc==LSM_OK ){ u8 *aData; /* Data buffer belonging to page pNext */ int nData; /* Size of aData[] in bytes */ rc = mergeWorkerPersistAndRelease(pMW); |
︙ | ︙ | |||
4126 4127 4128 4129 4130 4131 4132 | assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 ); lsmDbSnapshotSetLevel(pDb->pWorker, pNext); sortedFreeLevel(pDb->pEnv, pNew); }else{ if( pDel ) pDel->iRoot = 0; #if 0 | | | 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 | assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 ); lsmDbSnapshotSetLevel(pDb->pWorker, pNext); sortedFreeLevel(pDb->pEnv, pNew); }else{ if( pDel ) pDel->iRoot = 0; #if 0 lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "new-toplevel"); #endif if( freelist.nEntry ){ Freelist *p = &pDb->pWorker->freelist; lsmFree(pDb->pEnv, p->aEntry); memcpy(p, &freelist, sizeof(freelist)); freelist.aEntry = 0; |
︙ | ︙ | |||
4565 4566 4567 4568 4569 4570 4571 | /* Clean up the MergeWorker object initialized above. If no error ** has occurred, invoke the work-hook to inform the application that ** the database structure has changed. */ mergeWorkerShutdown(&mergeworker, &rc); if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); #if 0 | | | 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 | /* Clean up the MergeWorker object initialized above. If no error ** has occurred, invoke the work-hook to inform the application that ** the database structure has changed. */ mergeWorkerShutdown(&mergeworker, &rc); if( rc==LSM_OK ) sortedInvokeWorkHook(pDb); #if 0 lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "work"); #endif assertBtreeOk(pDb, &pLevel->lhs); assertRunInOrder(pDb, &pLevel->lhs); /* If bFlush is true and the database is no longer considered "full", ** break out of the loop even if nRemaining is still greater than ** zero. The caller has an in-memory tree to flush to disk. */ |
︙ | ︙ | |||
4658 4659 4660 4661 4662 4663 4664 | u32 nUnsync; int nPgsz; lsmCheckpointSynced(pDb, 0, 0, &nSync); nUnsync = lsmCheckpointNWrite(pDb->pShmhdr->aSnap1, 0); nPgsz = lsmCheckpointPgsz(pDb->pShmhdr->aSnap1); | | | 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 | u32 nUnsync; int nPgsz; lsmCheckpointSynced(pDb, 0, 0, &nSync); nUnsync = lsmCheckpointNWrite(pDb->pShmhdr->aSnap1, 0); nPgsz = lsmCheckpointPgsz(pDb->pShmhdr->aSnap1); nMax = LSM_MIN(nMax, (pDb->nAutockpt/nPgsz) - (int)(nUnsync-nSync)); if( nMax<nRem ){ bCkpt = 1; nRem = LSM_MAX(nMax, 0); } } /* If there exists in-memory data ready to be flushed to disk, attempt |
︙ | ︙ | |||
4726 4727 4728 4729 4730 4731 4732 | }else{ int rcdummy = LSM_BUSY; lsmFinishWork(pDb, 0, &rcdummy); } assert( pDb->pWorker==0 ); if( rc==LSM_OK ){ | | | | | | 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 | }else{ int rcdummy = LSM_BUSY; lsmFinishWork(pDb, 0, &rcdummy); } assert( pDb->pWorker==0 ); if( rc==LSM_OK ){ *pnWrite = (nMax - nRem); *pbCkpt = (bCkpt && nRem<=0); }else{ *pnWrite = 0; *pbCkpt = 0; } return rc; } static int doLsmWork(lsm_db *pDb, int nMerge, int nPage, int *pnWrite){ int rc; |
︙ | ︙ | |||
5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 | void lsmSortedSaveTreeCursors(lsm_db *pDb){ MultiCursor *pCsr; for(pCsr=pDb->pCsr; pCsr; pCsr=pCsr->pNext){ lsmTreeCursorSave(pCsr->apTreeCsr[0]); lsmTreeCursorSave(pCsr->apTreeCsr[1]); } } #ifdef LSM_DEBUG_EXPENSIVE static void assertRunInOrder(lsm_db *pDb, Segment *pSeg){ Page *pPg = 0; Blob blob1 = {0, 0, 0, 0}; Blob blob2 = {0, 0, 0, 0}; | > > > > > > > > > > > > | 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 | void lsmSortedSaveTreeCursors(lsm_db *pDb){ MultiCursor *pCsr; for(pCsr=pDb->pCsr; pCsr; pCsr=pCsr->pNext){ lsmTreeCursorSave(pCsr->apTreeCsr[0]); lsmTreeCursorSave(pCsr->apTreeCsr[1]); } } void lsmSortedExpandBtreePage(Page *pPg, int nOrig){ u8 *aData; int nData; int nEntry; int iHdr; aData = lsmFsPageData(pPg, &nData); nEntry = pageGetNRec(aData, nOrig); iHdr = SEGMENT_EOF(nOrig, nEntry); memmove(&aData[iHdr + (nData-nOrig)], &aData[iHdr], nOrig-iHdr); } #ifdef LSM_DEBUG_EXPENSIVE static void assertRunInOrder(lsm_db *pDb, Segment *pSeg){ Page *pPg = 0; Blob blob1 = {0, 0, 0, 0}; Blob blob2 = {0, 0, 0, 0}; |
︙ | ︙ |
Changes to tool/lsmperf.tcl.
︙ | ︙ | |||
186 187 188 189 190 191 192 | append script $data3 append script $data4 append script "pause -1\n" exec_gnuplot_script $script $zPng } | | > > | | > > > > | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | append script $data3 append script $data4 append script "pause -1\n" exec_gnuplot_script $script $zPng } do_write_test x.png 100 50000 50000 20 { lsm safety=0 } #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=4194000" #lsm-mt "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000" # lsm "safety=1 multi_proc=0" # lsm-mt "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=8192000" # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" # lsm-mt "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0" # LevelDB leveldb # lsm-st "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1" |
︙ | ︙ |
Changes to www/lsmusr.wiki.
︙ | ︙ | |||
519 520 521 522 523 524 525 | if any other client has written to the database since the current clients read-transaction was opened, it will not be possible to upgrade to a write-transaction. <p>Write-transactions may be opened either implicitly or explicitly. If any of the following functions are called to write to the database when there is no write-transaction open, then an implicit write-transaction is opened and | | | 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 | if any other client has written to the database since the current clients read-transaction was opened, it will not be possible to upgrade to a write-transaction. <p>Write-transactions may be opened either implicitly or explicitly. If any of the following functions are called to write to the database when there is no write-transaction open, then an implicit write-transaction is opened and closed (committed) within the call: <ul> <li> lsm_insert() <li> lsm_delete() <li> lsm_delete_range() </ul> |
︙ | ︙ |