Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improved comments and variable names in the read-only WAL logic. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | readonly-wal-recovery |
Files: | files | file ages | folders |
SHA3-256: |
d3c25740eec9a2a41c29e6e488fcf658 |
User & Date: | drh 2017-11-10 20:00:50.239 |
Context
2017-11-11
| ||
13:30 | Further comment improvements in wal.c. No code changes. (check-in: 346388007d user: drh tags: readonly-wal-recovery) | |
2017-11-10
| ||
20:00 | Improved comments and variable names in the read-only WAL logic. (check-in: d3c25740ee user: drh tags: readonly-wal-recovery) | |
2017-11-09
| ||
23:24 | Avoid superfluous SHM unlock call in the Win32 VFS. (check-in: 5a384be697 user: mistachkin tags: readonly-wal-recovery) | |
Changes
Changes to src/wal.c.
︙ | ︙ | |||
451 452 453 454 455 456 457 | u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ | | | 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 | u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 writeLock; /* True if in a write transaction */ u8 ckptLock; /* True if holding a checkpoint lock */ u8 readOnly; /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */ u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ u8 bShmUnreliable; /* SHM content is read-only and unreliable */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ #ifdef SQLITE_DEBUG u8 lockError; /* True if a locking error has occurred */ |
︙ | ︙ | |||
1267 1268 1269 1270 1271 1272 1273 | return rc; } /* ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ | | | 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 | return rc; } /* ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){ int i; for(i=0; i<pWal->nWiData; i++){ sqlite3_free((void *)pWal->apWiData[i]); pWal->apWiData[i] = 0; } } if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ |
︙ | ︙ | |||
2095 2096 2097 2098 2099 2100 2101 | volatile u32 *page0; /* Chunk of wal-index containing header */ /* Ensure that page 0 of the wal-index (the page that contains the ** wal-index header) is mapped. Return early if an error occurs here. */ assert( pChanged ); rc = walIndexPage(pWal, 0, &page0); | > > | > > > > > > | | | | | < | > | | 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 | volatile u32 *page0; /* Chunk of wal-index containing header */ /* Ensure that page 0 of the wal-index (the page that contains the ** wal-index header) is mapped. Return early if an error occurs here. */ assert( pChanged ); rc = walIndexPage(pWal, 0, &page0); if( rc!=SQLITE_OK ){ assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */ if( rc==SQLITE_READONLY_CANTINIT ){ /* The SQLITE_READONLY_CANTINIT return means that the shared-memory ** was openable but is not writable, and this thread is unable to ** confirm that another write-capable connection has the shared-memory ** open, and hence the content of the shared-memory is unreliable, ** since the shared-memory might be inconsistent with the WAL file ** and there is no writer on hand to fix it. */ assert( page0==0 && pWal->writeLock==0 ); pWal->bShmUnreliable = 1; pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; *pChanged = 1; }else{ return rc; /* Any other non-OK return is just an error */ } }; assert( page0 || pWal->writeLock==0 ); /* If the first page of the wal-index has been mapped, try to read the ** wal-index header immediately, without holding any lock. This usually ** works, but may fail if the wal-index header is corrupt or currently ** being modified by another thread or process. */ badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1); /* If the first attempt failed, it might have been due to a race ** with a writer. So get a WRITE lock and try again. */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; } }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){ pWal->writeLock = 1; if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){ |
︙ | ︙ | |||
2148 2149 2150 2151 2152 2153 2154 | /* If the header is read successfully, check the version number to make ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; } | | | | | | > > > > > > > > > | | > | | | | < | | > > | < | < | < > | | > | | | < < < < | > > > > > > > > > > > > > | | | | | | 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 | /* If the header is read successfully, check the version number to make ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; } if( pWal->bShmUnreliable ){ if( rc!=SQLITE_OK ){ walIndexClose(pWal, 0); pWal->bShmUnreliable = 0; assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY; } pWal->exclusiveMode = WAL_NORMAL_MODE; } return rc; } /* ** Open a transaction in a connection where the shared-memory is read-only ** and where we cannot verify that there is a separate write-capable connection ** on hand to keep the shared-memory up-to-date with the WAL file. ** ** This can happen, for example, when the shared-memory is implemented by ** memory-mapping a *-shm file, where a prior writer has shut down and ** left the *-shm file on disk, and now the present connection is trying ** to use that database but lacks write permission on the *-shm file. ** Other scenarios are also possible, depending on the VFS implementation. ** ** Precondition: ** ** The *-wal file has been read and an appropriate wal-index has been ** constructed in pWal->apWiData[] using heap memory instead of shared ** memory. ** ** If this function returns SQLITE_OK, then the read transaction has ** been successfully opened. In this case output variable (*pChanged) ** is set to true before returning if the caller should discard the ** contents of the page cache before proceeding. Or, if it returns ** WAL_RETRY, then the heap memory wal-index has been discarded and ** the caller should retry opening the read transaction from the ** beginning (including attempting to map the *-shm file). ** ** If an error occurs, an SQLite error code is returned. */ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ i64 szWal; /* Size of wal file on disk in bytes */ i64 iOffset; /* Current offset when reading wal file */ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int szFrame; /* Number of bytes in buffer aFrame[] */ u8 *aData; /* Pointer to data part of aFrame buffer */ volatile void *pDummy; /* Dummy argument for xShmMap */ int rc; /* Return code */ u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */ assert( pWal->bShmUnreliable ); assert( pWal->readOnly & WAL_SHM_RDONLY ); assert( pWal->nWiData>0 && pWal->apWiData[0] ); /* Take WAL_READ_LOCK(0). This has the effect of preventing any ** writers from running a checkpoint, but does not stop them ** from running recovery. */ rc = walLockShared(pWal, WAL_READ_LOCK(0)); if( rc!=SQLITE_OK ){ if( rc==SQLITE_BUSY ) rc = WAL_RETRY; goto begin_unreliable_shm_out; } pWal->readLock = 0; /* Check to see if a separate writer has attached to the shared-memory area, ** thus making the shared-memory "reliable" again. Do this by invoking ** the xShmMap() routine of the VFS and looking to see if the return ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT. ** ** Once sqlite3OsShmMap() has been called for a file and has returned ** any SQLITE_READONLY value, it must SQLITE_READONLY or ** SQLITE_READONLY_CANTINIT or some error for all subsequent invocations, ** until sqlite3OsShmUnmap() has been called. This is a requirement ** on the VFS implementation. ** ** If the shared-memory is now "reliable" return WAL_RETRY, which will ** cause the heap-memory WAL-index to be discarded and the actual ** shared memory to be used in its place. */ rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */ if( rc!=SQLITE_READONLY_CANTINIT ){ rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc); goto begin_unreliable_shm_out; } /* Reach this point only if the real shared-memory is still unreliable. ** Assume the in-memory WAL-index substitute is correct and load it ** into pWal->hdr. */ memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); /* The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint ** from taking place. But it does not prevent the wal from being wrapped ** if a checkpoint has already taken place. This means that if another ** client is connected at this point, it may have already checkpointed ** the entire wal. In that case it would not be safe to continue with ** the this transaction, as the other client may overwrite wal ** frames that this client is still using. */ rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); if( rc!=SQLITE_OK ){ goto begin_unreliable_shm_out; } if( szWal<WAL_HDRSIZE ){ /* If the wal file is too small to contain a wal-header and the ** wal-index header has mxFrame==0, then it must be safe to proceed ** reading the database file only. However, the page cache cannot ** be trusted, as a read/write connection may have connected, written ** the db, run a checkpoint, truncated the wal file and disconnected ** since this client's last read transaction. */ *pChanged = 1; rc = (pWal->hdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY); goto begin_unreliable_shm_out; } /* Check the salt keys at the start of the wal file still match. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ goto begin_unreliable_shm_out; } if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){ rc = WAL_RETRY; goto begin_unreliable_shm_out; } /* Allocate a buffer to read frames into */ szFrame = pWal->hdr.szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc64(szFrame); if( aFrame==0 ){ rc = SQLITE_NOMEM_BKPT; goto begin_unreliable_shm_out; } aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Check to see if a complete transaction has been appended to the ** wal file since the heap-memory wal-index was created. If so, the ** heap-memory wal-index is discarded and WAL_RETRY returned to ** the caller. */ |
︙ | ︙ | |||
2294 2295 2296 2297 2298 2299 2300 | rc = WAL_RETRY; break; } } pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; | | | | 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 | rc = WAL_RETRY; break; } } pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; begin_unreliable_shm_out: sqlite3_free(aFrame); if( rc!=SQLITE_OK ){ int i; for(i=0; i<pWal->nWiData; i++){ sqlite3_free((void*)pWal->apWiData[i]); pWal->apWiData[i] = 0; } pWal->bShmUnreliable = 0; sqlite3WalEndReadTransaction(pWal); *pChanged = 1; } return rc; } /* |
︙ | ︙ | |||
2398 2399 2400 2401 2402 2403 2404 | } if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; sqlite3OsSleep(pWal->pVfs, nDelay); } if( !useWal ){ assert( rc==SQLITE_OK ); | | | 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 | } if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39; sqlite3OsSleep(pWal->pVfs, nDelay); } if( !useWal ){ assert( rc==SQLITE_OK ); if( pWal->bShmUnreliable==0 ){ rc = walIndexReadHdr(pWal, pChanged); } if( rc==SQLITE_BUSY ){ /* If there is not a recovery running in another thread or process ** then convert BUSY errors to WAL_RETRY. If recovery is known to ** be running, convert BUSY to BUSY_RECOVERY. There is a race here ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY |
︙ | ︙ | |||
2429 2430 2431 2432 2433 2434 2435 | }else if( rc==SQLITE_BUSY ){ rc = SQLITE_BUSY_RECOVERY; } } if( rc!=SQLITE_OK ){ return rc; } | | | | 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 | }else if( rc==SQLITE_BUSY ){ rc = SQLITE_BUSY_RECOVERY; } } if( rc!=SQLITE_OK ){ return rc; } else if( pWal->bShmUnreliable ){ return walBeginShmUnreliable(pWal, pChanged); } } assert( pWal->nWiData>0 ); assert( pWal->apWiData[0] || (pWal->readOnly & WAL_SHM_RDONLY) ); pInfo = pWal->apWiData[0] ? walCkptInfo(pWal) : 0; if( !useWal && (pInfo==0 || pInfo->nBackfill==pWal->hdr.mxFrame) |
︙ | ︙ | |||
2785 2786 2787 2788 2789 2790 2791 | /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case as an optimization. Likewise, if pWal->readLock==0, ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ | | | 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 | /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early ** in this case as an optimization. Likewise, if pWal->readLock==0, ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ *piRead = 0; return SQLITE_OK; } /* Search the hash table or tables for an entry matching page number ** pgno. Each iteration of the following for() loop searches one ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). |
︙ | ︙ | |||
2848 2849 2850 2851 2852 2853 2854 | #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ { u32 iRead2 = 0; u32 iTest; | | | 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 | #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the ** result obtained using the hash indexes above. */ { u32 iRead2 = 0; u32 iTest; assert( pWal->bShmUnreliable || pWal->minFrame>0 ); for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){ if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest; break; } } assert( iRead==iRead2 ); |
︙ | ︙ |