/ Check-in [2c2afdd0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add detailed error logging to WAL in an effort to track down an obscure SQLITE_PROTOCOL problem. This code is intended for debugging and not for release.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal-trace-375
Files: files | file ages | folders
SHA1: 2c2afdd0adad7d364915ac200c71603deddf148e
User & Date: drh 2011-01-05 12:50:18
Context
2011-01-18
17:34
Pull over all the latest trunk changes. check-in: ca86d04b user: drh tags: wal-trace-375
2011-01-05
12:50
Add detailed error logging to WAL in an effort to track down an obscure SQLITE_PROTOCOL problem. This code is intended for debugging and not for release. check-in: 2c2afdd0 user: drh tags: wal-trace-375
12:20
Add the sqlite3_vsnprintf() interface. check-in: fc67adea user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/wal.c.

   248    248   */
   249    249   #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
   250    250   int sqlite3WalTrace = 0;
   251    251   # define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
   252    252   #else
   253    253   # define WALTRACE(X)
   254    254   #endif
          255  +
          256  +/*
          257  +** WAL tracing logic added to search for an SQLITE_PROTOCOL error.
          258  +*/
          259  +static void walTrace(const char *zFormat, ...){
          260  +  va_list ap;
          261  +  char zMsg[100];
          262  +  va_start(ap, zFormat);
          263  +  sqlite3_vsnprintf(sizeof(zMsg), zMsg, zFormat, ap);
          264  +  va_end(ap);
          265  +#ifdef SQLITE_WAL_TRACE
          266  +  fprintf(stdout, "WALTRACE: [%s]\n", zMsg); fflush(stdout);
          267  +#endif
          268  +  sqlite3_log(99, "%s", zMsg);
          269  +}
   255    270   
   256    271   /*
   257    272   ** The maximum (and only) versions of the wal and wal-index formats
   258    273   ** that may be interpreted by this version of SQLite.
   259    274   **
   260    275   ** If a client begins recovering a WAL file and finds that (a) the checksum
   261    276   ** values in the wal-header are correct and (b) the version field is not
................................................................................
   507    522     /* Enlarge the pWal->apWiData[] array if required */
   508    523     if( pWal->nWiData<=iPage ){
   509    524       int nByte = sizeof(u32*)*(iPage+1);
   510    525       volatile u32 **apNew;
   511    526       apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
   512    527       if( !apNew ){
   513    528         *ppPage = 0;
          529  +      walTrace("realloc(%d) in walIndexPage()", nByte);
   514    530         return SQLITE_NOMEM;
   515    531       }
   516    532       memset((void*)&apNew[pWal->nWiData], 0,
   517    533              sizeof(u32*)*(iPage+1-pWal->nWiData));
   518    534       pWal->apWiData = apNew;
   519    535       pWal->nWiData = iPage+1;
   520    536     }
................................................................................
   525    541         pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
   526    542         if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
   527    543       }else{
   528    544         rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
   529    545             pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
   530    546         );
   531    547       }
          548  +    if( rc ) walTrace("xShmMap():%d in walIndexPage(iPage=%d)",rc,iPage);
   532    549     }
   533    550   
   534    551     *ppPage = pWal->apWiData[iPage];
   535    552     assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
   536    553     return rc;
   537    554   }
   538    555   
................................................................................
  1856   1873   
  1857   1874     /* If the first attempt failed, it might have been due to a race
  1858   1875     ** with a writer.  So get a WRITE lock and try again.
  1859   1876     */
  1860   1877     assert( badHdr==0 || pWal->writeLock==0 );
  1861   1878     if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
  1862   1879       pWal->writeLock = 1;
         1880  +    walTrace("trying walIndexTryHdr w/lock");
  1863   1881       if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
  1864   1882         badHdr = walIndexTryHdr(pWal, pChanged);
  1865   1883         if( badHdr ){
  1866   1884           /* If the wal-index header is still malformed even while holding
  1867   1885           ** a WRITE lock, it can only mean that the header is corrupted and
  1868   1886           ** needs to be reconstructed.  So run recovery to do exactly that.
  1869   1887           */
         1888  +        walTrace("walIndexTryHdr() failed w/lock");
  1870   1889           rc = walIndexRecover(pWal);
         1890  +        if( rc ) walTrace("walIndexRecover():%d", rc);
  1871   1891           *pChanged = 1;
  1872   1892         }
  1873   1893       }
  1874   1894       pWal->writeLock = 0;
  1875   1895       walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  1876   1896     }
         1897  +  else if(badHdr) walTrace("walLockExcl():%d in walIndexReadHdr()", rc);
  1877   1898   
  1878   1899     /* If the header is read successfully, check the version number to make
  1879   1900     ** sure the wal-index was not constructed with some future format that
  1880   1901     ** this version of SQLite cannot understand.
  1881   1902     */
  1882   1903     if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
  1883   1904       rc = SQLITE_CANTOPEN_BKPT;
................................................................................
  1949   1970     int i;                          /* Loop counter */
  1950   1971     int rc = SQLITE_OK;             /* Return code  */
  1951   1972   
  1952   1973     assert( pWal->readLock<0 );     /* Not currently locked */
  1953   1974   
  1954   1975     /* Take steps to avoid spinning forever if there is a protocol error. */
  1955   1976     if( cnt>5 ){
         1977  +    walTrace("cnt=%d",cnt);
  1956   1978       if( cnt>100 ) return SQLITE_PROTOCOL;
  1957   1979       sqlite3OsSleep(pWal->pVfs, 1);
  1958   1980     }
  1959   1981   
  1960   1982     if( !useWal ){
  1961   1983       rc = walIndexReadHdr(pWal, pChanged);
  1962   1984       if( rc==SQLITE_BUSY ){
................................................................................
  2007   2029           **
  2008   2030           ** However, if frames have been appended to the log (or if the log 
  2009   2031           ** is wrapped and written for that matter) before the READ_LOCK(0)
  2010   2032           ** is obtained, that is not necessarily true. A checkpointer may
  2011   2033           ** have started to backfill the appended frames but crashed before
  2012   2034           ** it finished. Leaving a corrupt image in the database file.
  2013   2035           */
         2036  +        walTrace("wal read/write race - writer won"); 
  2014   2037           walUnlockShared(pWal, WAL_READ_LOCK(0));
  2015   2038           return WAL_RETRY;
  2016   2039         }
  2017   2040         pWal->readLock = 0;
  2018   2041         return SQLITE_OK;
  2019   2042       }else if( rc!=SQLITE_BUSY ){
         2043  +      walTrace("walLockShared(0):%d in walTryBeginRead", rc);
  2020   2044         return rc;
  2021   2045       }
  2022   2046     }
  2023   2047   
  2024   2048     /* If we get this far, it means that the reader will want to use
  2025   2049     ** the WAL to get at content from recent commits.  The job now is
  2026   2050     ** to select one of the aReadMark[] entries that is closest to
................................................................................
  2042   2066       ** be mxFrame, then retry.
  2043   2067       */
  2044   2068       rc = walLockExclusive(pWal, WAL_READ_LOCK(1), 1);
  2045   2069       if( rc==SQLITE_OK ){
  2046   2070         pInfo->aReadMark[1] = pWal->hdr.mxFrame;
  2047   2071         walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1);
  2048   2072         rc = WAL_RETRY;
         2073  +      walTrace("aReadMark[1] <- %d", pWal->hdr.mxFrame);
  2049   2074       }else if( rc==SQLITE_BUSY ){
  2050   2075         rc = WAL_RETRY;
         2076  +      walTrace("aReadMark[1] is busy");
  2051   2077       }
  2052   2078       return rc;
  2053   2079     }else{
  2054   2080       if( mxReadMark < pWal->hdr.mxFrame ){
  2055   2081         for(i=1; i<WAL_NREADER; i++){
  2056   2082           rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2057   2083           if( rc==SQLITE_OK ){
  2058   2084             mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
  2059   2085             mxI = i;
  2060   2086             walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2061   2087             break;
  2062   2088           }else if( rc!=SQLITE_BUSY ){
         2089  +          walTrace("walLockExclusive(%d):%d", i, rc);
  2063   2090             return rc;
  2064   2091           }
  2065   2092         }
  2066   2093       }
  2067   2094   
  2068   2095       rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  2069   2096       if( rc ){
         2097  +      walTrace("walLockShared(mxI=%d):%d", mxI, rc);
  2070   2098         return rc==SQLITE_BUSY ? WAL_RETRY : rc;
  2071   2099       }
  2072   2100       /* Now that the read-lock has been obtained, check that neither the
  2073   2101       ** value in the aReadMark[] array or the contents of the wal-index
  2074   2102       ** header have changed.
  2075   2103       **
  2076   2104       ** It is necessary to check that the wal-index header did not change