Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Separate the concepts of underlying storage size and mapped size in the VFS shared-memory implementation. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | wal |
Files: | files | file ages | folders |
SHA1: |
4cbe49f13fed288f94ff305bcfd99df9 |
User & Date: | drh 2010-04-30 14:39:51.000 |
Context
2010-04-30
| ||
15:24 | If a reader attempts to upgrade to a writer, but is not reading the most recent database snapshot, return SQLITE_BUSY. (check-in: 837d82a929 user: dan tags: wal) | |
14:39 | Separate the concepts of underlying storage size and mapped size in the VFS shared-memory implementation. (check-in: 4cbe49f13f user: drh tags: wal) | |
11:43 | Add a missing walIndexUnmap() call to the checkpoint code. Change a couple of SQLITE_CANTOPEN constants to SQLITE_CANTOPEN_BKPT. (check-in: 1f9e8c5c27 user: dan tags: wal) | |
Changes
Changes to src/os_unix.c.
︙ | ︙ | |||
4590 4591 4592 4593 4594 4595 4596 | */ struct unixShmFile { struct unixFileId fid; /* Unique file identifier */ sqlite3_mutex *mutex; /* Mutex to access this object */ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ sqlite3_mutex *mutexRecov; /* The RECOVER mutex */ char *zFilename; /* Name of the file */ | < > | < < | 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 | */ struct unixShmFile { struct unixFileId fid; /* Unique file identifier */ sqlite3_mutex *mutex; /* Mutex to access this object */ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ sqlite3_mutex *mutexRecov; /* The RECOVER mutex */ char *zFilename; /* Name of the file */ int h; /* Open file descriptor */ int szMap; /* Size of the mapping of file into memory */ char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ int nRef; /* Number of unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */ unixShmFile *pNext; /* Next in list of all unixShmFile objects */ #ifdef SQLITE_DEBUG u8 exclMask; /* Mask of exclusive locks held */ u8 sharedMask; /* Mask of shared locks held */ u8 nextShmId; /* Next available unixShm.id value */ |
︙ | ︙ | |||
4991 4992 4993 4994 4995 4996 4997 | rc = fstat(pFile->h, &sStat); if( rc ){ rc = SQLITE_CANTOPEN_BKPT; goto shm_open_err; } pFile->fid.dev = sStat.st_dev; pFile->fid.ino = sStat.st_ino; | < < < | 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 | rc = fstat(pFile->h, &sStat); if( rc ){ rc = SQLITE_CANTOPEN_BKPT; goto shm_open_err; } pFile->fid.dev = sStat.st_dev; pFile->fid.ino = sStat.st_ino; /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_MUTEX) ){ rc = SQLITE_IOERR_LOCK; goto shm_open_err; } if( unixShmSystemLock(pFile, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){ if( ftruncate(pFile->h, 0) ){ rc = SQLITE_IOERR; goto shm_open_err; } } rc = unixShmSystemLock(pFile, F_RDLCK, UNIX_SHM_DMS); if( rc ) goto shm_open_err; unixShmSystemLock(pFile, F_UNLCK, UNIX_SHM_MUTEX); } /* Make the new connection a child of the unixShmFile */ |
︙ | ︙ | |||
5074 5075 5076 5077 5078 5079 5080 | } unixLeaveMutex(); return SQLITE_OK; } /* | | | | > | < | | | | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | < | | < | | > > | | < > | | | | < < | | 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 | } unixLeaveMutex(); return SQLITE_OK; } /* ** Query and/or changes the size of the underlying storage for ** a shared-memory segment. The reqSize parameter is the new size ** of the underlying storage, or -1 to do just a query. The size ** of the underlying storage (after resizing if resizing occurs) is ** written into pNewSize. ** ** This routine does not (necessarily) change the size of the mapping ** of the underlying storage into memory. Use xShmGet() to change ** the mapping size. ** ** The reqSize parameter is the minimum size requested. The implementation ** is free to expand the storage to some larger amount if it chooses. */ static int unixShmSize( sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ int reqSize, /* Requested size. -1 for query only */ int *pNewSize /* Write new size here */ ){ unixShm *p = (unixShm*)pSharedMem; unixShmFile *pFile = p->pFile; int rc = SQLITE_OK; struct stat sStat; if( reqSize>=0 ){ reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR; reqSize *= SQLITE_UNIX_SHM_INCR; rc = ftruncate(pFile->h, reqSize); } if( fstat(pFile->h, &sStat)==0 ){ *pNewSize = (int)sStat.st_size; }else{ *pNewSize = 0; rc = SQLITE_IOERR; } return rc; } /* ** Map the shared storage into memory. The minimum size of the ** mapping should be reqMapSize if reqMapSize is positive. If ** reqMapSize is zero or negative, the implementation can choose ** whatever mapping size is convenient. ** ** *ppBuf is made to point to the memory which is a mapping of the ** underlying storage. This segment is locked. unixShmRelease() ** must be called to release the lock. ** ** *pNewMapSize is set to the size of the mapping. ** ** *ppBuf and *pNewMapSize might be NULL and zero if no space has ** yet been allocated to the underlying storage. */ static int unixShmGet( sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ int reqMapSize, /* Requested size of mapping. -1 means don't care */ int *pNewMapSize, /* Write new size of mapping here */ void **ppBuf /* Write mapping buffer origin here */ ){ unixShm *p = (unixShm*)pSharedMem; unixShmFile *pFile = p->pFile; int rc = SQLITE_OK; sqlite3_mutex_enter(pFile->mutexBuf); sqlite3_mutex_enter(pFile->mutex); if( pFile->szMap==0 || reqMapSize>pFile->szMap ){ int actualSize; if( unixShmSize(pSharedMem, -1, &actualSize)==SQLITE_OK && reqMapSize<actualSize ){ reqMapSize = actualSize; } if( pFile->pMMapBuf ){ munmap(pFile->pMMapBuf, pFile->szMap); } pFile->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED, pFile->h, 0); pFile->szMap = pFile->pMMapBuf ? reqMapSize : 0; } *pNewMapSize = pFile->szMap; *ppBuf = pFile->pMMapBuf; sqlite3_mutex_leave(pFile->mutex); return rc; } /* ** Release the lock held on the shared memory segment to that other |
︙ | ︙ | |||
6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 | unixDlClose, /* xDlClose */ \ unixRandomness, /* xRandomness */ \ unixSleep, /* xSleep */ \ unixCurrentTime, /* xCurrentTime */ \ unixGetLastError, /* xGetLastError */ \ unixShmOpen, /* xShmOpen */ \ unixShmSize, /* xShmSize */ \ unixShmRelease, /* xShmRelease */ \ 0, /* xShmPush */ \ 0, /* xShmPull */ \ unixShmLock, /* xShmLock */ \ unixShmClose, /* xShmClose */ \ unixShmDelete, /* xShmDelete */ \ 0, /* xRename */ \ | > | 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 | unixDlClose, /* xDlClose */ \ unixRandomness, /* xRandomness */ \ unixSleep, /* xSleep */ \ unixCurrentTime, /* xCurrentTime */ \ unixGetLastError, /* xGetLastError */ \ unixShmOpen, /* xShmOpen */ \ unixShmSize, /* xShmSize */ \ unixShmGet, /* xShmGet */ \ unixShmRelease, /* xShmRelease */ \ 0, /* xShmPush */ \ 0, /* xShmPull */ \ unixShmLock, /* xShmLock */ \ unixShmClose, /* xShmClose */ \ unixShmDelete, /* xShmDelete */ \ 0, /* xRename */ \ |
︙ | ︙ |
Changes to src/sqlite.h.in.
︙ | ︙ | |||
840 841 842 843 844 845 846 | int (*xCurrentTime)(sqlite3_vfs*, double*); int (*xGetLastError)(sqlite3_vfs*, int, char *); /* ** The methods above are in version 1 of the sqlite_vfs object ** definition. Those that follow are added in version 2 or later */ int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**); | | > | 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 | int (*xCurrentTime)(sqlite3_vfs*, double*); int (*xGetLastError)(sqlite3_vfs*, int, char *); /* ** The methods above are in version 1 of the sqlite_vfs object ** definition. Those that follow are added in version 2 or later */ int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**); int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize); int (*xShmGet)(sqlite3_shm*, int reqMapSize, int *pMapSize, void**); int (*xShmRelease)(sqlite3_shm*); int (*xShmPush)(sqlite3_shm*); int (*xShmPull)(sqlite3_shm*); int (*xShmLock)(sqlite3_shm*, int desiredLock, int *gotLock); int (*xShmClose)(sqlite3_shm*); int (*xShmDelete)(sqlite3_vfs*, const char *zName); int (*xRename)(sqlite3_vfs*, const char *zOld, const char *zNew, int dirSync); |
︙ | ︙ |
Changes to src/wal.c.
︙ | ︙ | |||
122 123 124 125 126 127 128 | ** following object. */ struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pFd */ sqlite3_file *pFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ sqlite3_shm *pWIndex; /* The open wal-index file */ | | | 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | ** following object. */ struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pFd */ sqlite3_file *pFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ sqlite3_shm *pWIndex; /* The open wal-index file */ int szWIndex; /* Size of the wal-index that is mapped in mem */ u32 *pWiData; /* Pointer to wal-index content in memory */ u8 lockState; /* SQLITE_SHM_xxxx constant showing lock state */ u8 readerType; /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */ WalIndexHdr hdr; /* Wal-index for current snapshot */ }; |
︙ | ︙ | |||
364 365 366 367 368 369 370 | (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) + (((iFrame-1)>>8)<<6) /* Indexes that occur before iFrame */ + iFrame-1 /* Db page numbers that occur before iFrame */ ); } /* | | > | | > > > | | | | | | > > | | | > | > > > > | | > > > > > > | | | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 | (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) + (((iFrame-1)>>8)<<6) /* Indexes that occur before iFrame */ + iFrame-1 /* Db page numbers that occur before iFrame */ ); } /* ** Release our reference to the wal-index memory map, if we are holding ** it. */ static void walIndexUnmap(Wal *pWal){ if( pWal->pWiData ){ pWal->pVfs->xShmRelease(pWal->pWIndex); pWal->pWiData = 0; } } /* ** Map the wal-index file into memory if it isn't already. ** ** The reqSize parameter is the minimum required size of the mapping. ** A value of -1 means "don't care". The reqSize parameter is ignored ** if the mapping is already held. */ static int walIndexMap(Wal *pWal, int reqSize){ int rc = SQLITE_OK; if( pWal->pWiData==0 ){ rc = pWal->pVfs->xShmGet(pWal->pWIndex, reqSize, &pWal->szWIndex, (void**)(char*)&pWal->pWiData); if( rc==SQLITE_OK && pWal->pWiData==0 ){ /* Make sure pWal->pWiData is not NULL while we are holding the ** lock on the mapping. */ assert( pWal->szWIndex==0 ); pWal->pWiData = &pWal->iCallback; } } return rc; } /* ** Remap the wal-index so that the mapping covers the full size ** of the underlying file. ** ** If enlargeTo is non-negative, then increase the size of the underlying ** storage to be at least as big as enlargeTo before remapping. */ static int walIndexRemap(Wal *pWal, int enlargeTo){ int rc; int sz; rc = pWal->pVfs->xShmSize(pWal->pWIndex, enlargeTo, &sz); if( rc==SQLITE_OK && sz>pWal->szWIndex ){ walIndexUnmap(pWal); rc = walIndexMap(pWal, sz); } return rc; } /* ** Increment by which to increase the wal-index file size. */ #define WALINDEX_MMAP_INCREMENT (64*1024) /* ** Set an entry in the wal-index map to map log frame iFrame to db ** page iPage. Values are always appended to the wal-index (i.e. the ** value of iFrame is always exactly one more than the value passed to ** the previous call), but that restriction is not enforced or asserted ** here. */ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ u32 iSlot = walIndexEntry(iFrame); walIndexMap(pWal, -1); while( (iSlot+128)>=pWal->szWIndex ){ int rc; int nByte = pWal->szWIndex*4 + WALINDEX_MMAP_INCREMENT; /* Enlarge the storage, then remap it. */ rc = walIndexRemap(pWal, nByte); if( rc!=SQLITE_OK ){ return rc; } } /* Set the wal-index entry itself */ |
︙ | ︙ | |||
636 637 638 639 640 641 642 | u32 iLast; /* Last frame in log */ int nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ int nFinal; /* Number of unindexed entries */ struct WalSegment *pFinal; /* Final (unindexed) segment */ u8 *aTmp; /* Temp space used by merge-sort */ | | | 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 | u32 iLast; /* Last frame in log */ int nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ int nFinal; /* Number of unindexed entries */ struct WalSegment *pFinal; /* Final (unindexed) segment */ u8 *aTmp; /* Temp space used by merge-sort */ walIndexMap(pWal, -1); aData = pWal->pWiData; iLast = pWal->hdr.iLastPg; nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + 512; p = (WalIterator *)sqlite3_malloc(nByte); |
︙ | ︙ | |||
785 786 787 788 789 790 791 | ** If the checksum cannot be verified return SQLITE_ERROR. */ int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2] = {1, 1}; u32 aHdr[WALINDEX_HDR_NFIELD+2]; if( pWal->szWIndex==0 ){ | > | | 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 | ** If the checksum cannot be verified return SQLITE_ERROR. */ int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2] = {1, 1}; u32 aHdr[WALINDEX_HDR_NFIELD+2]; if( pWal->szWIndex==0 ){ int rc; rc = walIndexRemap(pWal, WALINDEX_MMAP_INCREMENT); if( rc ) return rc; } /* Read the header. The caller may or may not have locked the wal-index ** file, meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return SQLITE_ERROR. The caller will ** retry. Or, if the caller has already locked the file and the header |
︙ | ︙ | |||
826 827 828 829 830 831 832 | ** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; assert( pWal->lockState>=SQLITE_SHM_READ ); | | | 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 | ** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; assert( pWal->lockState>=SQLITE_SHM_READ ); walIndexMap(pWal, -1); /* First try to read the header without a lock. Verify the checksum ** before returning. This will almost always work. */ if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){ return SQLITE_OK; } |
︙ | ︙ | |||
877 878 879 880 881 882 883 | rc = walIndexReadHdr(pWal, pChanged); if( rc!=SQLITE_OK ){ /* An error occured while attempting log recovery. */ sqlite3WalCloseSnapshot(pWal); }else{ /* Check if the mapping needs to grow. */ | | | | | < | 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 | rc = walIndexReadHdr(pWal, pChanged); if( rc!=SQLITE_OK ){ /* An error occured while attempting log recovery. */ sqlite3WalCloseSnapshot(pWal); }else{ /* Check if the mapping needs to grow. */ if( pWal->hdr.iLastPg && walIndexEntry(pWal->hdr.iLastPg)>=pWal->szWIndex ){ walIndexRemap(pWal, -1); } } } walIndexUnmap(pWal); return rc; } |
︙ | ︙ | |||
909 910 911 912 913 914 915 | */ int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){ u32 iRead = 0; u32 *aData; int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00); assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE ); | | | 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 | */ int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){ u32 iRead = 0; u32 *aData; int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00); assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE ); walIndexMap(pWal, -1); /* Do a linear search of the unindexed block of page-numbers (if any) ** at the end of the wal-index. An alternative to this would be to ** build an index in private memory each time a read transaction is ** opened on a new snapshot. */ aData = pWal->pWiData; |
︙ | ︙ |