Index: src/mutex.c ================================================================== --- src/mutex.c +++ src/mutex.c @@ -10,11 +10,11 @@ ** ************************************************************************* ** This file contains the C functions that implement mutexes for ** use by the SQLite core. ** -** $Id: mutex.c,v 1.13 2007/08/25 16:31:30 drh Exp $ +** $Id: mutex.c,v 1.14 2007/08/27 17:27:49 danielk1977 Exp $ */ /* ** If SQLITE_MUTEX_APPDEF is defined, then this whole module is ** omitted and equivalent functionality must be provided by the ** application that links against the SQLite library. @@ -251,10 +251,11 @@ **
  • SQLITE_MUTEX_RECURSIVE **
  • SQLITE_MUTEX_STATIC_MASTER **
  • SQLITE_MUTEX_STATIC_MEM **
  • SQLITE_MUTEX_STATIC_MEM2 **
  • SQLITE_MUTEX_STATIC_PRNG +**
  • SQLITE_MUTEX_STATIC_LRU ** ** ** The first two constants cause sqlite3_mutex_alloc() to create ** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE ** is used but not necessarily so when SQLITE_MUTEX_FAST is used. @@ -279,10 +280,11 @@ ** mutex types, the same mutex is returned on every call that has ** the same type number. */ sqlite3_mutex *sqlite3_mutex_alloc(int iType){ static sqlite3_mutex staticMutexes[] = { + { PTHREAD_MUTEX_INITIALIZER, }, { PTHREAD_MUTEX_INITIALIZER, }, { PTHREAD_MUTEX_INITIALIZER, }, { PTHREAD_MUTEX_INITIALIZER, }, { PTHREAD_MUTEX_INITIALIZER, }, }; @@ -472,11 +474,11 @@ InitializeCriticalSection(&p->mutex); } break; } default: { - static sqlite3_mutex staticMutexes[4]; + static sqlite3_mutex staticMutexes[5]; static int isInit = 0; while( !isInit ){ static long lock = 0; if( InterlockedIncrement(&lock)==1 ){ int i; Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -16,11 +16,11 @@ ** is separate from the database file. The pager also implements file ** locking to prevent two processes from writing the same database ** file simultaneously, or one process from reading the database while ** another is writing. ** -** @(#) $Id: pager.c,v 1.376 2007/08/24 16:29:24 drh Exp $ +** @(#) $Id: pager.c,v 1.377 2007/08/27 17:27:49 danielk1977 Exp $ */ #ifndef SQLITE_OMIT_DISKIO #include "sqliteInt.h" #include #include @@ -129,10 +129,17 @@ ** This macro rounds values up so that if the value is an address it ** is guaranteed to be an address that is aligned to an 8-byte boundary. */ #define FORCE_ALIGNMENT(X) (((X)+7)&~7) +typedef struct PgHdr PgHdr; +typedef struct PagerLruLink PagerLruLink; +struct PagerLruLink { + PgHdr *pNext; + PgHdr *pPrev; +}; + /* ** Each in-memory image of a page begins with the following header. ** This header is only visible to this pager module. The client ** code that calls pager sees only the data that follows the header. ** @@ -219,24 +226,26 @@ ** garbage. (Actually, we zero the content, but you should not ** make any assumptions about the content nevertheless.) If the ** content is needed in the future, it should be read from the ** original database file. */ -typedef struct PgHdr PgHdr; struct PgHdr { Pager *pPager; /* The pager to which this page belongs */ Pgno pgno; /* The page number for this page */ PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ - PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */ + PagerLruLink free; /* Next and previous free pages */ PgHdr *pNextAll; /* A list of all pages */ u8 inJournal; /* TRUE if has been written to journal */ u8 dirty; /* TRUE if we need to write back changes */ u8 needSync; /* Sync journal before writing this page */ u8 alwaysRollback; /* Disable DontRollback() for this page */ u8 needRead; /* Read content if PagerWrite() is called */ short int nRef; /* Number of users of this page */ PgHdr *pDirty, *pPrevDirty; /* Dirty pages */ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + PagerLruLink gfree; /* Global list of nRef==0 pages */ +#endif u32 notUsed; /* Buffer space */ #ifdef SQLITE_CHECK_PAGES u32 pageHash; #endif /* pPager->pageSize bytes of page data follow this header */ @@ -280,10 +289,17 @@ #define PGHDR_TO_DATA(P) ((void*)(&(P)[1])) #define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1]) #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize]) #define PGHDR_TO_HIST(P,PGR) \ ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra]) + +typedef struct PagerLruList PagerLruList; +struct PagerLruList { + PgHdr *pFirst; + PgHdr *pLast; + PgHdr *pFirstSynced; /* First page in list with PgHdr.needSync==0 */ +}; /* ** A open page cache is an instance of the following structure. ** ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or @@ -336,12 +352,11 @@ char *zJournal; /* Name of the journal file */ char *zDirectory; /* Directory hold database and journal files */ sqlite3_file *fd, *jfd; /* File descriptors for database and journal */ sqlite3_file *stfd; /* File descriptor for the statement subjournal*/ BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ - PgHdr *pFirst, *pLast; /* List of free pages */ - PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */ + PagerLruList lru; /* LRU list of free pages */ PgHdr *pAll; /* List of all pages */ PgHdr *pStmt; /* List of pages in the statement subjournal */ PgHdr *pDirty; /* List of all dirty pages */ i64 journalOff; /* Current byte offset in the journal file */ i64 journalHdr; /* Byte offset to previous journal header */ @@ -392,10 +407,11 @@ ** sqlite3_release_memory() interface. Access to this list is ** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex. */ #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT static Pager *sqlite3PagerList = 0; +static PagerLruList sqlite3LruPageList = {0, 0, 0}; #endif /* ** Journal files begin with the following magic string. The data @@ -512,10 +528,103 @@ } # define REFINFO(X) pager_refinfo(X) #else # define REFINFO(X) #endif + +static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){ + pLink->pNext = 0; + pLink->pPrev = pList->pLast; + + if( pList->pLast ){ + int iOff = (char *)pLink - (char *)pPg; + PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]); + pLastLink->pNext = pPg; + }else{ + assert(!pList->pFirst); + pList->pFirst = pPg; + } + + pList->pLast = pPg; + if( !pList->pFirstSynced && pPg->needSync==0 ){ + pList->pFirstSynced = pPg; + } +} + +static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){ + int iOff = (char *)pLink - (char *)pPg; + + if( pPg==pList->pFirst ){ + pList->pFirst = pLink->pNext; + } + if( pPg==pList->pLast ){ + pList->pLast = pLink->pPrev; + } + if( pLink->pPrev ){ + PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]); + pPrevLink->pNext = pLink->pNext; + } + if( pLink->pNext ){ + PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]); + pNextLink->pPrev = pLink->pPrev; + } + if( pPg==pList->pFirstSynced ){ + PgHdr *p = pLink->pNext; + while( p && p->needSync ){ + PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]); + p = pL->pNext; + } + pList->pFirstSynced = p; + } + + pLink->pNext = pLink->pPrev = 0; +} + +/* +** Add page to the free-list +*/ +static void lruListAdd(PgHdr *pPg){ + listAdd(&pPg->pPager->lru, &pPg->free, pPg); +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( !pPg->pPager->memDb ){ + sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + listAdd(&sqlite3LruPageList, &pPg->gfree, pPg); + sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + } +#endif +} + +/* +** Remove page from free-list +*/ +static void lruListRemove(PgHdr *pPg){ + listRemove(&pPg->pPager->lru, &pPg->free, pPg); +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( !pPg->pPager->memDb ){ + sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + listRemove(&sqlite3LruPageList, &pPg->gfree, pPg); + sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + } +#endif +} + +/* +** Set the Pager.pFirstSynced variable +*/ +static void lruListSetFirstSynced(Pager *pPager){ + pPager->lru.pFirstSynced = pPager->lru.pFirst; +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + if( !pPager->memDb ){ + PgHdr *p; + sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext); + assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced); + sqlite3LruPageList.pFirstSynced = p; + sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + } +#endif +} /* ** Return true if page *pPg has already been written to the statement ** journal (or statement snapshot has been created, if *pPg is part ** of an in-memory database). @@ -1079,16 +1188,17 @@ if( pPager->errCode ) return; for(pPg=pPager->pAll; pPg; pPg=pNext){ IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno)); PAGER_INCR(sqlite3_pager_pgfree_count); pNext = pPg->pNextAll; + lruListRemove(pPg); sqlite3_free(pPg); } + assert(pPager->lru.pFirst==0); + assert(pPager->lru.pFirstSynced==0); + assert(pPager->lru.pLast==0); pPager->pStmt = 0; - pPager->pFirst = 0; - pPager->pFirstSynced = 0; - pPager->pLast = 0; pPager->pAll = 0; pPager->nHash = 0; sqlite3_free(pPager->aHash); pPager->nPage = 0; pPager->aHash = 0; @@ -1163,11 +1273,11 @@ pPager->state = PAGER_EXCLUSIVE; } pPager->origDbSize = 0; pPager->setMaster = 0; pPager->needSync = 0; - pPager->pFirstSynced = pPager->pFirst; + lruListSetFirstSynced(pPager); pPager->dbSize = -1; return (rc==SQLITE_OK?rc2:rc); } @@ -2282,31 +2392,12 @@ ** and from its hash collision chain. */ static void unlinkPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; - /* Keep the pFirstSynced pointer pointing at the first synchronized page */ - if( pPg==pPager->pFirstSynced ){ - PgHdr *p = pPg->pNextFree; - while( p && p->needSync ){ p = p->pNextFree; } - pPager->pFirstSynced = p; - } - - /* Unlink from the freelist */ - if( pPg->pPrevFree ){ - pPg->pPrevFree->pNextFree = pPg->pNextFree; - }else{ - assert( pPager->pFirst==pPg ); - pPager->pFirst = pPg->pNextFree; - } - if( pPg->pNextFree ){ - pPg->pNextFree->pPrevFree = pPg->pPrevFree; - }else{ - assert( pPager->pLast==pPg ); - pPager->pLast = pPg->pPrevFree; - } - pPg->pNextFree = pPg->pPrevFree = 0; + /* Unlink from free page list */ + lruListRemove(pPg); /* Unlink from the pgno hash table */ unlinkHashChain(pPager, pPg); } @@ -2496,25 +2587,11 @@ ** is a real function so that we can set breakpoints and trace it. */ static void _page_ref(PgHdr *pPg){ if( pPg->nRef==0 ){ /* The page is currently on the freelist. Remove it. */ - if( pPg==pPg->pPager->pFirstSynced ){ - PgHdr *p = pPg->pNextFree; - while( p && p->needSync ){ p = p->pNextFree; } - pPg->pPager->pFirstSynced = p; - } - if( pPg->pPrevFree ){ - pPg->pPrevFree->pNextFree = pPg->pNextFree; - }else{ - pPg->pPager->pFirst = pPg->pNextFree; - } - if( pPg->pNextFree ){ - pPg->pNextFree->pPrevFree = pPg->pPrevFree; - }else{ - pPg->pPager->pLast = pPg->pPrevFree; - } + lruListRemove(pPg); pPg->pPager->nRef++; } pPg->nRef++; REFINFO(pPg); } @@ -2633,11 +2710,11 @@ /* Erase the needSync flag from every page. */ for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ pPg->needSync = 0; } - pPager->pFirstSynced = pPager->pFirst; + lruListSetFirstSynced(pPager); } #ifndef NDEBUG /* If the Pager.needSync flag is clear then the PgHdr.needSync ** flag must also be clear for all pages. Verify that this @@ -2645,11 +2722,11 @@ */ else{ for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){ assert( pPg->needSync==0 ); } - assert( pPager->pFirstSynced==pPager->pFirst ); + assert( pPager->lru.pFirstSynced==pPager->lru.pFirst ); } #endif return rc; } @@ -2855,18 +2932,18 @@ assert(!MEMDB); /* Find a page to recycle. Try to locate a page that does not ** require us to do an fsync() on the journal. */ - pPg = pPager->pFirstSynced; + pPg = pPager->lru.pFirstSynced; /* If we could not find a page that does not require an fsync() ** on the journal file then fsync the journal file. This is a ** very slow operation, so we work hard to avoid it. But sometimes ** it can't be helped. */ - if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){ + if( pPg==0 && pPager->lru.pFirst && syncOk && !MEMDB){ int iDc = sqlite3OsDeviceCharacteristics(pPager->fd); int rc = syncJournal(pPager); if( rc!=0 ){ return rc; } @@ -2883,11 +2960,11 @@ rc = writeJournalHdr(pPager); if( rc!=0 ){ return rc; } } - pPg = pPager->pFirst; + pPg = pPager->lru.pFirst; } if( pPg==0 ){ return SQLITE_OK; } @@ -2943,11 +3020,11 @@ */ int sqlite3PagerReleaseMemory(int nReq){ int nReleased = 0; /* Bytes of memory released so far */ sqlite3_mutex *mutex; /* The MEM2 mutex */ Pager *pPager; /* For looping over pagers */ - int i; /* Passes over pagers */ + int rc = SQLITE_OK; /* Acquire the memory-management mutex */ mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2); sqlite3_mutex_enter(mutex); @@ -2957,79 +3034,77 @@ */ for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){ pPager->iInUseMM = 1; } - /* Outermost loop runs for at most two iterations. First iteration we - ** try to find memory that can be released without calling fsync(). Second - ** iteration (which only runs if the first failed to free nReq bytes of - ** memory) is permitted to call fsync(). This is of course much more - ** expensive. - */ - for(i=0; i<=1; i++){ - - /* Loop through all the SQLite pagers opened by the current thread. */ - Pager *pPager = sqlite3PagerList; - for( ; pPager && (nReq<0 || nReleasedpNext){ - PgHdr *pPg; - int rc = SQLITE_OK; - - /* In-memory databases should not appear on the pager list */ - assert( !MEMDB ); - - /* Skip pagers that are currently in use by the b-tree layer */ - if( pPager->iInUseDB ) continue; - - /* For each pager, try to free as many pages as possible (without - ** calling fsync() if this is the first iteration of the outermost - ** loop). - */ - while( (nReq<0 || nReleasedpAll ){ - pPager->pAll = pPg->pNextAll; - }else{ - for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){} - pTmp->pNextAll = pPg->pNextAll; - } - nReleased += ( - sizeof(*pPg) + pPager->pageSize - + sizeof(u32) + pPager->nExtra - + MEMDB*sizeof(PgHistory) - ); - IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno)); - PAGER_INCR(sqlite3_pager_pgfree_count); - sqlite3_free(pPg); - } - - if( rc!=SQLITE_OK ){ - /* An error occured whilst writing to the database file or - ** journal in pager_recycle(). The error is not returned to the - ** caller of this function. Instead, set the Pager.errCode variable. - ** The error will be returned to the user (or users, in the case - ** of a shared pager cache) of the pager for which the error occured. - */ - assert( - (rc&0xff)==SQLITE_IOERR || - rc==SQLITE_FULL || - rc==SQLITE_BUSY - ); - assert( pPager->state>=PAGER_RESERVED ); - pager_error(pPager, rc); - } + while( rc==SQLITE_OK && (nReq<0 || nReleasedneedSync || pPg->pPager->iInUseDB) ){ + pPg = pPg->gfree.pNext; + } + if( !pPg ){ + pPg = sqlite3LruPageList.pFirst; + while( pPg && pPg->pPager->iInUseDB ){ + pPg = pPg->gfree.pNext; + } + } + sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU)); + + if( !pPg ) break; + + pPager = pPg->pPager; + assert(!pPg->needSync || pPg==pPager->lru.pFirst); + assert(pPg->needSync || pPg==pPager->lru.pFirstSynced); + + rc = pager_recycle(pPager, 1, &pRecycled); + assert(pRecycled==pPg || rc!=SQLITE_OK); + if( rc==SQLITE_OK ){ + /* We've found a page to free. At this point the page has been + ** removed from the page hash-table, free-list and synced-list + ** (pFirstSynced). It is still in the all pages (pAll) list. + ** Remove it from this list before freeing. + ** + ** Todo: Check the Pager.pStmt list to make sure this is Ok. It + ** probably is though. + */ + PgHdr *pTmp; + assert( pPg ); + if( pPg==pPager->pAll ){ + pPager->pAll = pPg->pNextAll; + }else{ + for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){} + pTmp->pNextAll = pPg->pNextAll; + } + nReleased += ( + sizeof(*pPg) + pPager->pageSize + + sizeof(u32) + pPager->nExtra + + MEMDB*sizeof(PgHistory) + ); + IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno)); + PAGER_INCR(sqlite3_pager_pgfree_count); + sqlite3_free(pPg); + }else{ + /* An error occured whilst writing to the database file or + ** journal in pager_recycle(). The error is not returned to the + ** caller of this function. Instead, set the Pager.errCode variable. + ** The error will be returned to the user (or users, in the case + ** of a shared pager cache) of the pager for which the error occured. + */ + assert( + (rc&0xff)==SQLITE_IOERR || + rc==SQLITE_FULL || + rc==SQLITE_BUSY + ); + assert( pPager->state>=PAGER_RESERVED ); + pager_error(pPager, rc); } } /* Clear the memory management flags and release the mutex */ @@ -3250,13 +3325,13 @@ PgHdr *pPg; /* Create a new PgHdr if any of the four conditions defined ** above are met: */ if( pPager->nPagemxPage - || pPager->pFirst==0 + || pPager->lru.pFirst==0 || MEMDB - || (pPager->pFirstSynced==0 && pPager->doNotSync) + || (pPager->lru.pFirstSynced==0 && pPager->doNotSync) ){ if( pPager->nPage>=pPager->nHash ){ pager_resize_hash_table(pPager, pPager->nHash<256 ? 256 : pPager->nHash*2); if( pPager->nHash==0 ){ @@ -3539,23 +3614,13 @@ /* When the number of references to a page reach 0, call the ** destructor and add the page to the freelist. */ if( pPg->nRef==0 ){ - Pager *pPager; - pPager = pPg->pPager; - pPg->pNextFree = 0; - pPg->pPrevFree = pPager->pLast; - pPager->pLast = pPg; - if( pPg->pPrevFree ){ - pPg->pPrevFree->pNextFree = pPg; - }else{ - pPager->pFirst = pPg; - } - if( pPg->needSync==0 && pPager->pFirstSynced==0 ){ - pPager->pFirstSynced = pPg; - } + Pager *pPager = pPg->pPager; + + lruListAdd(pPg); if( pPager->xDestructor ){ pPager->xDestructor(pPg, pPager->pageSize); } /* When all pages reach the freelist, drop the read lock from Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -28,11 +28,11 @@ ** The name of this file under configuration management is "sqlite.h.in". ** The makefile makes some minor changes to this file (such as inserting ** the version number) and changes its name to "sqlite3.h" as ** part of the build process. ** -** @(#) $Id: sqlite.h.in,v 1.242 2007/08/25 16:21:30 drh Exp $ +** @(#) $Id: sqlite.h.in,v 1.243 2007/08/27 17:27:49 danielk1977 Exp $ */ #ifndef _SQLITE3_H_ #define _SQLITE3_H_ #include /* Needed for the definition of va_list */ @@ -3281,10 +3281,11 @@ **
  • SQLITE_MUTEX_RECURSIVE **
  • SQLITE_MUTEX_STATIC_MASTER **
  • SQLITE_MUTEX_STATIC_MEM **
  • SQLITE_MUTEX_STATIC_MEM2 **
  • SQLITE_MUTEX_STATIC_PRNG +**
  • SQLITE_MUTEX_STATIC_LRU ** ** ** The first two constants cause sqlite3_mutex_alloc() to create ** a new mutex. The new mutex is recursive when SQLITE_MUTEX_RECURSIVE ** is used but not necessarily so when SQLITE_MUTEX_FAST is used. @@ -3385,10 +3386,11 @@ #define SQLITE_MUTEX_RECURSIVE 1 #define SQLITE_MUTEX_STATIC_MASTER 2 #define SQLITE_MUTEX_STATIC_MEM 3 /* sqlite3_malloc() */ #define SQLITE_MUTEX_STATIC_MEM2 4 /* sqlite3_release_memory() */ #define SQLITE_MUTEX_STATIC_PRNG 5 /* sqlite3_random() */ +#define SQLITE_MUTEX_STATIC_LRU 6 /* lru page list */ /* ** Undo the hack that converts floating point types to integer for ** builds on processors without floating point support.