/ Check-in [ebf40413]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to make WAL more robust against SHM locking failures and OOM errors.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: ebf4041383c3cdddb5861960359abd209d4a1028
User & Date: drh 2010-06-02 14:45:51
Context
2010-06-02
15:43
When walTryBeginRead() encounters an I/O error trying to set a lock (as opposed to SQLITE_BUSY) be sure to propagate that error back up the call stack. check-in: aa2c2b67 user: drh tags: trunk
14:45
Changes to make WAL more robust against SHM locking failures and OOM errors. check-in: ebf40413 user: drh tags: trunk
14:43
In the rowhash.test, make sure global variables are cleared prior to use. check-in: 28efe0a4 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  3358   3358   
  3359   3359       /* Check to see if another process is holding the dead-man switch.
  3360   3360       ** If not, truncate the file to zero length. 
  3361   3361       */
  3362   3362       rc = SQLITE_OK;
  3363   3363       if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
  3364   3364         if( ftruncate(pShmNode->h, 0) ){
  3365         -        rc = SQLITE_IOERR;
         3365  +        rc = SQLITE_IOERR_SHMOPEN;
  3366   3366         }
  3367   3367       }
  3368   3368       if( rc==SQLITE_OK ){
  3369   3369         rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
  3370   3370       }
  3371   3371       if( rc ) goto shm_open_err;
  3372   3372     }
................................................................................
  3465   3465   
  3466   3466     while( 1 ){
  3467   3467       if( fstat(pShmNode->h, &sStat)==0 ){
  3468   3468         *pNewSize = (int)sStat.st_size;
  3469   3469         if( reqSize<=(int)sStat.st_size ) break;
  3470   3470       }else{
  3471   3471         *pNewSize = 0;
  3472         -      rc = SQLITE_IOERR;
         3472  +      rc = SQLITE_IOERR_SHMSIZE;
  3473   3473         break;
  3474   3474       }
  3475   3475       rc = ftruncate(pShmNode->h, reqSize);
  3476   3476       reqSize = -1;
  3477   3477     }
  3478   3478     return rc;
  3479   3479   }

Changes to src/sqlite.h.in.

   384    384   #define SQLITE_READONLY     8   /* Attempt to write a readonly database */
   385    385   #define SQLITE_INTERRUPT    9   /* Operation terminated by sqlite3_interrupt()*/
   386    386   #define SQLITE_IOERR       10   /* Some kind of disk I/O error occurred */
   387    387   #define SQLITE_CORRUPT     11   /* The database disk image is malformed */
   388    388   #define SQLITE_NOTFOUND    12   /* NOT USED. Table or record not found */
   389    389   #define SQLITE_FULL        13   /* Insertion failed because database is full */
   390    390   #define SQLITE_CANTOPEN    14   /* Unable to open the database file */
   391         -#define SQLITE_PROTOCOL    15   /* NOT USED. Database lock protocol error */
          391  +#define SQLITE_PROTOCOL    15   /* Database lock protocol error */
   392    392   #define SQLITE_EMPTY       16   /* Database is empty */
   393    393   #define SQLITE_SCHEMA      17   /* The database schema changed */
   394    394   #define SQLITE_TOOBIG      18   /* String or BLOB exceeds size limit */
   395    395   #define SQLITE_CONSTRAINT  19   /* Abort due to constraint violation */
   396    396   #define SQLITE_MISMATCH    20   /* Data type mismatch */
   397    397   #define SQLITE_MISUSE      21   /* Library used incorrectly */
   398    398   #define SQLITE_NOLFS       22   /* Uses OS features not supported on host */
................................................................................
   440    440   #define SQLITE_IOERR_BLOCKED           (SQLITE_IOERR | (11<<8))
   441    441   #define SQLITE_IOERR_NOMEM             (SQLITE_IOERR | (12<<8))
   442    442   #define SQLITE_IOERR_ACCESS            (SQLITE_IOERR | (13<<8))
   443    443   #define SQLITE_IOERR_CHECKRESERVEDLOCK (SQLITE_IOERR | (14<<8))
   444    444   #define SQLITE_IOERR_LOCK              (SQLITE_IOERR | (15<<8))
   445    445   #define SQLITE_IOERR_CLOSE             (SQLITE_IOERR | (16<<8))
   446    446   #define SQLITE_IOERR_DIR_CLOSE         (SQLITE_IOERR | (17<<8))
          447  +#define SQLITE_IOERR_SHMOPEN           (SQLITE_IOERR | (18<<8))
          448  +#define SQLITE_IOERR_SHMSIZE           (SQLITE_IOERR | (19<<8))
          449  +#define SQLITE_IOERR_SHMLOCK           (SQLITE_IOERR | (20<<8))
   447    450   #define SQLITE_LOCKED_SHAREDCACHE      (SQLITE_LOCKED |  (1<<8))
   448    451   #define SQLITE_BUSY_RECOVERY           (SQLITE_BUSY   |  (1<<8))
   449    452   #define SQLITE_CANTOPEN_NOTEMPDIR      (SQLITE_CANTOPEN | (1<<8))
   450    453   
   451    454   /*
   452    455   ** CAPI3REF: Flags For File Open Operations
   453    456   **

Changes to src/wal.c.

   373    373     u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
   374    374     u8 isWIndexOpen;           /* True if ShmOpen() called on pDbFd */
   375    375     u8 writeLock;              /* True if in a write transaction */
   376    376     u8 ckptLock;               /* True if holding a checkpoint lock */
   377    377     WalIndexHdr hdr;           /* Wal-index header for current transaction */
   378    378     char *zWalName;            /* Name of WAL file */
   379    379     u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
          380  +#ifdef SQLITE_DEBUG
          381  +  u8 lockError;              /* True if a locking error has occurred */
          382  +#endif
   380    383   };
   381    384   
   382    385   /*
   383    386   ** Return a pointer to the WalCkptInfo structure in the wal-index.
   384    387   */
   385    388   static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
   386    389     assert( pWal->pWiData!=0 );
................................................................................
   625    628   static int walLockShared(Wal *pWal, int lockIdx){
   626    629     int rc;
   627    630     if( pWal->exclusiveMode ) return SQLITE_OK;
   628    631     rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
   629    632                           SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
   630    633     WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
   631    634               walLockName(lockIdx), rc ? "failed" : "ok"));
          635  +  VVA_ONLY( pWal->lockError = (rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
   632    636     return rc;
   633    637   }
   634    638   static void walUnlockShared(Wal *pWal, int lockIdx){
   635    639     if( pWal->exclusiveMode ) return;
   636    640     (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
   637    641                            SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
   638    642     WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
................................................................................
   640    644   static int walLockExclusive(Wal *pWal, int lockIdx, int n){
   641    645     int rc;
   642    646     if( pWal->exclusiveMode ) return SQLITE_OK;
   643    647     rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
   644    648                           SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
   645    649     WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
   646    650               walLockName(lockIdx), n, rc ? "failed" : "ok"));
          651  +  VVA_ONLY( pWal->lockError = (rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
   647    652     return rc;
   648    653   }
   649    654   static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
   650    655     if( pWal->exclusiveMode ) return;
   651    656     (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
   652    657                            SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
   653    658     WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
................................................................................
  1689   1694   ** This routine uses the nBackfill and aReadMark[] fields of the header
  1690   1695   ** to select a particular WAL_READ_LOCK() that strives to let the
  1691   1696   ** checkpoint process do as much work as possible.  This routine might
  1692   1697   ** update values of the aReadMark[] array in the header, but if it does
  1693   1698   ** so it takes care to hold an exclusive lock on the corresponding
  1694   1699   ** WAL_READ_LOCK() while changing values.
  1695   1700   */
  1696         -static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal){
         1701  +static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
  1697   1702     volatile WalIndexHdr *pHdr;     /* Header of the wal-index */
  1698   1703     volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  1699   1704     u32 mxReadMark;                 /* Largest aReadMark[] value */
  1700   1705     int mxI;                        /* Index of largest aReadMark[] value */
  1701   1706     int i;                          /* Loop counter */
  1702   1707     int rc;                         /* Return code  */
  1703   1708   
  1704   1709     assert( pWal->readLock<0 );     /* Not currently locked */
         1710  +
         1711  +  /* Take steps to avoid spinning forever if there is a protocol error. */
         1712  +  if( cnt>5 ){
         1713  +    if( cnt>100 ) return SQLITE_PROTOCOL;
         1714  +    sqlite3OsSleep(pWal->pVfs, 1);
         1715  +  }
  1705   1716   
  1706   1717     if( !useWal ){
  1707   1718       rc = walIndexReadHdr(pWal, pChanged);
  1708   1719       if( rc==SQLITE_BUSY ){
  1709   1720         /* If there is not a recovery running in another thread or process
  1710   1721         ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
  1711   1722         ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
................................................................................
  1816   1827   ** If the database contents have changes since the previous read
  1817   1828   ** transaction, then *pChanged is set to 1 before returning.  The
  1818   1829   ** Pager layer will use this to know that is cache is stale and
  1819   1830   ** needs to be flushed.
  1820   1831   */
  1821   1832   int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
  1822   1833     int rc;                         /* Return code */
         1834  +  int cnt = 0;                    /* Number of TryBeginRead attempts */
  1823   1835   
  1824   1836     do{
  1825         -    rc = walTryBeginRead(pWal, pChanged, 0);
         1837  +    rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
  1826   1838     }while( rc==WAL_RETRY );
  1827   1839     walIndexUnmap(pWal);
  1828   1840     return rc;
  1829   1841   }
  1830   1842   
  1831   1843   /*
  1832   1844   ** Finish with a read transaction.  All this does is release the
................................................................................
  1855   1867     u8 *pOut                        /* Buffer to write page data to */
  1856   1868   ){
  1857   1869     int rc;                         /* Return code */
  1858   1870     u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  1859   1871     u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
  1860   1872     int iHash;                      /* Used to loop through N hash tables */
  1861   1873   
  1862         -  /* This routine is only called from within a read transaction */
  1863         -  assert( pWal->readLock>=0 );
         1874  +  /* This routine is only be called from within a read transaction. */
         1875  +  assert( pWal->readLock>=0 || pWal->lockError );
  1864   1876   
  1865   1877     /* If the "last page" field of the wal-index header snapshot is 0, then
  1866   1878     ** no data will be read from the wal under any circumstances. Return early
  1867   1879     ** in this case to avoid the walIndexMap/Unmap overhead.  Likewise, if
  1868   1880     ** pWal->readLock==0, then the WAL is ignored by the reader so
  1869   1881     ** return early, as if the WAL were empty.
  1870   1882     */
................................................................................
  1978   1990   }
  1979   1991   
  1980   1992   
  1981   1993   /* 
  1982   1994   ** Set *pPgno to the size of the database file (or zero, if unknown).
  1983   1995   */
  1984   1996   void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
  1985         -  assert( pWal->readLock>=0 );
         1997  +  assert( pWal->readLock>=0 || pWal->lockError );
  1986   1998     *pPgno = pWal->hdr.nPage;
  1987   1999   }
  1988   2000   
  1989   2001   
  1990   2002   /* 
  1991   2003   ** This function starts a write transaction on the WAL.
  1992   2004   **
................................................................................
  2129   2141   **
  2130   2142   ** SQLITE_OK is returned if no error is encountered (regardless of whether
  2131   2143   ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
  2132   2144   ** if some error 
  2133   2145   */
  2134   2146   static int walRestartLog(Wal *pWal){
  2135   2147     int rc = SQLITE_OK;
         2148  +  int cnt;
         2149  +
  2136   2150     if( pWal->readLock==0 
  2137   2151      && SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)))
  2138   2152     ){
  2139   2153       volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  2140   2154       assert( pInfo->nBackfill==pWal->hdr.mxFrame );
  2141   2155       if( pInfo->nBackfill>0 ){
  2142   2156         rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
................................................................................
  2159   2173           walIndexWriteHdr(pWal);
  2160   2174           memset((void*)pInfo, 0, sizeof(*pInfo));
  2161   2175           walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
  2162   2176         }
  2163   2177       }
  2164   2178       walUnlockShared(pWal, WAL_READ_LOCK(0));
  2165   2179       pWal->readLock = -1;
         2180  +    cnt = 0;
  2166   2181       do{
  2167   2182         int notUsed;
  2168         -      rc = walTryBeginRead(pWal, &notUsed, 1);
         2183  +      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
  2169   2184       }while( rc==WAL_RETRY );
  2170   2185   
  2171   2186       /* Unmap the wal-index before returning. Otherwise the VFS layer may
  2172   2187       ** hold a mutex for the duration of the IO performed by WalFrames().
  2173   2188       */
  2174   2189       walIndexUnmap(pWal);
  2175   2190     }
................................................................................
  2414   2429   ** If op is negative, then do a dry-run of the op==1 case but do
  2415   2430   ** not actually change anything.  The pager uses this to see if it
  2416   2431   ** should acquire the database exclusive lock prior to invoking
  2417   2432   ** the op==1 case.
  2418   2433   */
  2419   2434   int sqlite3WalExclusiveMode(Wal *pWal, int op){
  2420   2435     int rc;
  2421         -  assert( pWal->writeLock==0 && pWal->readLock>=0 );
         2436  +  assert( pWal->writeLock==0 );
         2437  +  /* pWal->readLock is usually set, but might be -1 if there was a prior OOM */
         2438  +  assert( pWal->readLock>=0 || pWal->lockError );
  2422   2439     if( op==0 ){
  2423   2440       if( pWal->exclusiveMode ){
  2424   2441         pWal->exclusiveMode = 0;
  2425         -      if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
         2442  +      if( pWal->readLock>=0 
         2443  +       && walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK
         2444  +      ){
  2426   2445           pWal->exclusiveMode = 1;
  2427   2446         }
  2428   2447         rc = pWal->exclusiveMode==0;
  2429   2448       }else{
  2430         -      /* No changes.  Either already in locking_mode=NORMAL or else the 
  2431         -      ** acquisition of the read-lock failed.  The pager must continue to
  2432         -      ** hold the database exclusive lock. */
         2449  +      /* Already in locking_mode=NORMAL */
  2433   2450         rc = 0;
  2434   2451       }
  2435   2452     }else if( op>0 ){
  2436   2453       assert( pWal->exclusiveMode==0 );
         2454  +    assert( pWal->readLock>=0 );
  2437   2455       walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
  2438   2456       pWal->exclusiveMode = 1;
  2439   2457       rc = 1;
  2440   2458     }else{
  2441   2459       rc = pWal->exclusiveMode==0;
  2442   2460     }
  2443   2461     return rc;
  2444   2462   }
  2445   2463   
  2446   2464   #endif /* #ifndef SQLITE_OMIT_WAL */