Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Avoid moving pages more than once in an incremental vacuum operation. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | incr-vacuum-opt |
Files: | files | file ages | folders |
SHA1: |
c3939d249119b47bd57baa11a5ed7cc6 |
User & Date: | dan 2013-02-22 20:16:34.273 |
References
2013-02-23
| ||
17:49 | Fix off-by-one bug in [c3939d2491] uncovered by th3. (check-in: 66f9faa9a9 user: dan tags: incr-vacuum-opt) | |
Context
2013-02-22
| ||
20:57 | Fix a problem with the previous commit. (check-in: 720a3ceafc user: dan tags: incr-vacuum-opt) | |
20:16 | Avoid moving pages more than once in an incremental vacuum operation. (check-in: c3939d2491 user: dan tags: incr-vacuum-opt) | |
2013-02-20
| ||
00:54 | On Minix, disable the ".timer" command in the shell in order to avoid calling getrusage(). (check-in: 9bd9bd9cab user: drh tags: trunk) | |
Changes
Changes to src/btree.c.
︙ | ︙ | |||
2905 2906 2907 2908 2909 2910 2911 2912 2913 | } } return rc; } /* Forward declaration required by incrVacuumStep(). */ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); /* | > > > | | | > | | | > > | | | < < < < | | | 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 | } } return rc; } /* Forward declaration required by incrVacuumStep(). */ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); #define BTALLOC_ANY 0 /* Allocate any page */ #define BTALLOC_EXACT 1 /* Allocate exact page if possible */ #define BTALLOC_LE 2 /* Allocate any page <= the parameter */ /* ** Perform a single step of an incremental-vacuum. If successful, return ** SQLITE_OK. If there is no work to do (and therefore no point in ** calling this function again), return SQLITE_DONE. Or, if an error ** occurs, return some other error code. ** ** More specificly, this function attempts to re-organize the database so ** that the last page of the file currently in use is no longer in use. ** ** Parameter nFin is the number of pages that this database would contain ** were this function called until it returns SQLITE_DONE. ** ** If the bCommit parameter is non-zero, this function assumes that the ** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE ** or an error. bCommit is passed true for an auto-vacuum-on-commmit ** operation, or false for an incremental vacuum. */ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ Pgno nFreeList; /* Number of pages still on the free-list */ int rc; assert( sqlite3_mutex_held(pBt->mutex) ); assert( iLastPg>nFin ); if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){ |
︙ | ︙ | |||
2949 2950 2951 2952 2953 2954 2955 | return rc; } if( eType==PTRMAP_ROOTPAGE ){ return SQLITE_CORRUPT_BKPT; } if( eType==PTRMAP_FREEPAGE ){ | | | | > > | | > > > > | | | | 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 | return rc; } if( eType==PTRMAP_ROOTPAGE ){ return SQLITE_CORRUPT_BKPT; } if( eType==PTRMAP_FREEPAGE ){ if( bCommit==0 ){ /* Remove the page from the files free-list. This is not required ** if bCommit is non-zero. In that case, the free-list will be ** truncated to zero after this function returns, so it doesn't ** matter if it still contains some garbage entries. */ Pgno iFreePg; MemPage *pFreePg; rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iLastPg, BTALLOC_EXACT); if( rc!=SQLITE_OK ){ return rc; } assert( iFreePg==iLastPg ); releasePage(pFreePg); } } else { Pgno iFreePg; /* Index of free page to move pLastPg to */ MemPage *pLastPg; u8 eMode = BTALLOC_ANY; /* Mode parameter for allocateBtreePage() */ Pgno iNear = 0; /* nearby parameter for allocateBtreePage() */ rc = btreeGetPage(pBt, iLastPg, &pLastPg, 0); if( rc!=SQLITE_OK ){ return rc; } /* If bCommit is zero, this loop runs exactly once and page pLastPg ** is swapped with the first free page pulled off the free list. ** ** On the other hand, if bCommit is greater than zero, then keep ** looping until a free-page located within the first nFin pages ** of the file is found. */ if( bCommit==0 ){ eMode = BTALLOC_LE; iNear = nFin; } do { MemPage *pFreePg; rc = allocateBtreePage(pBt, &pFreePg, &iFreePg, iNear, eMode); if( rc!=SQLITE_OK ){ releasePage(pLastPg); return rc; } releasePage(pFreePg); }while( bCommit && iFreePg>nFin ); assert( iFreePg<iLastPg ); rc = sqlite3PagerWrite(pLastPg->pDbPage); if( rc==SQLITE_OK ){ rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0); } releasePage(pLastPg); if( rc!=SQLITE_OK ){ return rc; } } } if( bCommit==0 ){ iLastPg--; while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){ if( PTRMAP_ISPAGE(pBt, iLastPg) ){ MemPage *pPg; rc = btreeGetPage(pBt, iLastPg, &pPg, 0); if( rc!=SQLITE_OK ){ return rc; |
︙ | ︙ | |||
3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 | iLastPg--; } sqlite3PagerTruncateImage(pBt->pPager, iLastPg); pBt->nPage = iLastPg; } return SQLITE_OK; } /* ** A write-transaction must be opened before calling this function. ** It performs a single unit of work towards an incremental vacuum. ** ** If the incremental vacuum is finished after this function has run, ** SQLITE_DONE is returned. If it is not finished, but no error occurred, ** SQLITE_OK is returned. Otherwise an SQLite error code. */ int sqlite3BtreeIncrVacuum(Btree *p){ int rc; BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); if( !pBt->autoVacuum ){ rc = SQLITE_DONE; }else{ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | | | > > > | 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 | iLastPg--; } sqlite3PagerTruncateImage(pBt->pPager, iLastPg); pBt->nPage = iLastPg; } return SQLITE_OK; } /* ** The database opened by the first argument is an auto-vacuum database ** nOrig pages in size containing nFree free pages. Return the expected ** size of the database in pages following an auto-vacuum operation. */ static Pgno finalDbSize(BtShared *pBt, Pgno nOrig, Pgno nFree){ int nEntry; /* Number of entries on one ptrmap page */ Pgno nPtrmap; /* Number of PtrMap pages to be freed */ Pgno nFin; /* Return value */ nEntry = pBt->usableSize/5; nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry; nFin = nOrig - nFree - nPtrmap; if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){ nFin--; } while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){ nFin--; } if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; return nFin; } /* ** A write-transaction must be opened before calling this function. ** It performs a single unit of work towards an incremental vacuum. ** ** If the incremental vacuum is finished after this function has run, ** SQLITE_DONE is returned. If it is not finished, but no error occurred, ** SQLITE_OK is returned. Otherwise an SQLite error code. */ int sqlite3BtreeIncrVacuum(Btree *p){ int rc; BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE ); if( !pBt->autoVacuum ){ rc = SQLITE_DONE; }else{ Pgno nOrig = btreePagecount(pBt); Pgno nFree = get4byte(&pBt->pPage1->aData[36]); Pgno nFin = finalDbSize(pBt, nOrig, nFree); if( nFin<nOrig ){ invalidateAllOverflowCache(pBt); rc = incrVacuumStep(pBt, nFin, nOrig, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[28], pBt->nPage); } }else{ rc = SQLITE_DONE; } } sqlite3BtreeLeave(p); return rc; } /* |
︙ | ︙ | |||
3073 3074 3075 3076 3077 3078 3079 | assert( sqlite3_mutex_held(pBt->mutex) ); invalidateAllOverflowCache(pBt); assert(pBt->autoVacuum); if( !pBt->incrVacuum ){ Pgno nFin; /* Number of pages in database after autovacuuming */ Pgno nFree; /* Number of pages on the freelist initially */ | < < < | < < < < < < < | | 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 | assert( sqlite3_mutex_held(pBt->mutex) ); invalidateAllOverflowCache(pBt); assert(pBt->autoVacuum); if( !pBt->incrVacuum ){ Pgno nFin; /* Number of pages in database after autovacuuming */ Pgno nFree; /* Number of pages on the freelist initially */ Pgno iFree; /* The next page to be freed */ Pgno nOrig; /* Database size before freeing */ nOrig = btreePagecount(pBt); if( PTRMAP_ISPAGE(pBt, nOrig) || nOrig==PENDING_BYTE_PAGE(pBt) ){ /* It is not possible to create a database for which the final page ** is either a pointer-map page or the pending-byte page. If one ** is encountered, this indicates corruption. */ return SQLITE_CORRUPT_BKPT; } nFree = get4byte(&pBt->pPage1->aData[36]); nFin = finalDbSize(pBt, nOrig, nFree); if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT; for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){ rc = incrVacuumStep(pBt, nFin, iFree, 1); } if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){ rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); put4byte(&pBt->pPage1->aData[32], 0); put4byte(&pBt->pPage1->aData[36], 0); put4byte(&pBt->pPage1->aData[28], nFin); sqlite3PagerTruncateImage(pBt->pPager, nFin); |
︙ | ︙ | |||
4863 4864 4865 4866 4867 4868 4869 | ** is only used by auto-vacuum databases when allocating a new table. */ static int allocateBtreePage( BtShared *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby, | | | 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 | ** is only used by auto-vacuum databases when allocating a new table. */ static int allocateBtreePage( BtShared *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby, u8 eMode ){ MemPage *pPage1; int rc; u32 n; /* Number of pages on the freelist */ u32 k; /* Number of leaves on the trunk of the freelist */ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; |
︙ | ︙ | |||
4891 4892 4893 4894 4895 4896 4897 | u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ /* If the 'exact' parameter was true and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then ** the entire-list will be searched for that page. */ #ifndef SQLITE_OMIT_AUTOVACUUM | > | | | | | | | | | > > | | 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 | u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ /* If the 'exact' parameter was true and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then ** the entire-list will be searched for that page. */ #ifndef SQLITE_OMIT_AUTOVACUUM if( eMode==BTALLOC_EXACT ){ if( nearby<=mxPage ){ u8 eType; assert( nearby>0 ); assert( pBt->autoVacuum ); rc = ptrmapGet(pBt, nearby, &eType, 0); if( rc ) return rc; if( eType==PTRMAP_FREEPAGE ){ searchList = 1; } } }else if( eMode==BTALLOC_LE ){ searchList = 1; } #endif /* Decrement the free-list count by 1. Set iTrunk to the index of the ** first free-list trunk page. iPrevTrunk is initially 1. */ rc = sqlite3PagerWrite(pPage1->pDbPage); |
︙ | ︙ | |||
4955 4956 4957 4958 4959 4960 4961 | pTrunk = 0; TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); }else if( k>(u32)(pBt->usableSize/4 - 2) ){ /* Value of k is out of range. Database corruption */ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; #ifndef SQLITE_OMIT_AUTOVACUUM | | > > | | 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 | pTrunk = 0; TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1)); }else if( k>(u32)(pBt->usableSize/4 - 2) ){ /* Value of k is out of range. Database corruption */ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; #ifndef SQLITE_OMIT_AUTOVACUUM }else if( searchList && (nearby==iTrunk || (iTrunk<nearby && eMode==BTALLOC_LE)) ){ /* The list is being searched and this trunk page is the page ** to allocate, regardless of whether it has leaves. */ *pPgno = iTrunk; *ppPage = pTrunk; searchList = 0; rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc ){ goto end_allocate_page; } if( k==0 ){ |
︙ | ︙ | |||
5043 5044 5045 5046 5047 5048 5049 | iPage = get4byte(&aData[8+closest*4]); testcase( iPage==mxPage ); if( iPage>mxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iPage==mxPage ); | | > > | 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 | iPage = get4byte(&aData[8+closest*4]); testcase( iPage==mxPage ); if( iPage>mxPage ){ rc = SQLITE_CORRUPT_BKPT; goto end_allocate_page; } testcase( iPage==mxPage ); if( !searchList || (iPage==nearby || (iPage<nearby && eMode==BTALLOC_LE)) ){ int noContent; *pPgno = iPage; TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d" ": %d more free pages\n", *pPgno, closest+1, k, pTrunk->pgno, n-1)); rc = sqlite3PagerWrite(pTrunk->pDbPage); if( rc ) goto end_allocate_page; |
︙ | ︙ | |||
7115 7116 7117 7118 7119 7120 7121 | } assert( pgnoRoot>=3 ); /* Allocate a page. The page that currently resides at pgnoRoot will ** be moved to the allocated page (unless the allocated page happens ** to reside at pgnoRoot). */ | | | 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 | } assert( pgnoRoot>=3 ); /* Allocate a page. The page that currently resides at pgnoRoot will ** be moved to the allocated page (unless the allocated page happens ** to reside at pgnoRoot). */ rc = allocateBtreePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, BTALLOC_EXACT); if( rc!=SQLITE_OK ){ return rc; } if( pgnoMove!=pgnoRoot ){ /* pgnoRoot is the page that will be used for the root-page of ** the new table (assuming an error did not occur). But we were |
︙ | ︙ |