/ Check-in [7315f7cb]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Update sqlite3_snapshot_open() to reduce the chances of reading a corrupt snapshot created by a checkpointer process exiting unexpectedly.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | snapshot-get
Files: files | file ages | folders
SHA1: 7315f7cbf4179aadda0f1a0baa1526a9b9f9729f
User & Date: dan 2015-12-09 20:05:27
Context
2015-12-10
02:15
Add the nBackfillAttempted field in formerly unused space in WalCkptInfo and use that field to close the race condition on opening a snapshot. check-in: cb68e9d0 user: drh tags: snapshot-get
2015-12-09
20:05
Update sqlite3_snapshot_open() to reduce the chances of reading a corrupt snapshot created by a checkpointer process exiting unexpectedly. check-in: 7315f7cb user: dan tags: snapshot-get
16:04
Merge unrelated fixes from trunk. check-in: 362615b4 user: drh tags: snapshot-get
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/sqlite.h.in.

  7891   7891   ** SQLITE_OK.
  7892   7892   **
  7893   7893   ** If the specified database does not exist, or is not a wal mode database, 
  7894   7894   ** or the database handle does not have an open read transaction on it,
  7895   7895   ** SQLITE_ERROR is returned. If any other error occurs, for example an IO 
  7896   7896   ** error or an OOM condition, the corresponding SQLite error code is 
  7897   7897   ** returned.
         7898  +**
         7899  +** Each successful call to sqlite3_snapshot_get() must be matched by a call
         7900  +** to sqlite3_snapshot_free() to delete the snapshot handle. Not doing so
         7901  +** is a memory leak. The results of using a snapshot handle after it has 
         7902  +** been deleted by sqlite3_snapshot_free() are undefined.
         7903  +**
         7904  +** Given a snapshot handle, the sqlite3_snapshot_open() API function may be
         7905  +** used to open a read transaction on the same database snapshot that was
         7906  +** being read when sqlite3_snapshot_get() was called to obtain it. The
         7907  +** combination of the first two arguments to sqlite3_snapshot_open() - a
         7908  +** database handle and the name (e.g. "main") of one of its attached 
         7909  +** databases - must refer to the same database file as that identified by 
         7910  +** the arguments passed to the sqlite3_snapshot_get() call. The database
         7911  +** handle must not have an open read or write transaction on this database
         7912  +** file, and must not be in auto-commit mode.
         7913  +**
         7914  +** An old database snapshot may only be opened if SQLite is able to 
         7915  +** determine that it is still valid. The only way for an application to 
         7916  +** guarantee that a snapshot remains valid is by holding an open 
         7917  +** read-transaction on it or on an older snapshot of the same database 
         7918  +** file. If SQLite cannot determine that the snapshot identified by the
         7919  +** snapshot handle, SQLITE_BUSY_SNAPSHOT is returned.
         7920  +**
         7921  +** Otherwise, if the read transaction is successfully opened, SQLITE_OK is
         7922  +** returned. If the named database is not in wal mode or if the database
         7923  +** handle already has an open read or write transaction on it, or if the 
         7924  +** database handle is in auto-commit mode, SQLITE_ERROR is returned. If
         7925  +** an OOM or IO error occurs, the associated SQLite error code is returned.
  7898   7926   */
  7899   7927   typedef struct sqlite3_snapshot sqlite3_snapshot;
  7900   7928   int sqlite3_snapshot_get(sqlite3*, const char*, sqlite3_snapshot **ppSnapshot);
  7901         -int sqlite3_snapshot_open(sqlite3*, const char*, sqlite3_snapshot*);
  7902   7929   void sqlite3_snapshot_free(sqlite3_snapshot*);
         7930  +int sqlite3_snapshot_open(sqlite3*, const char*, sqlite3_snapshot*);
  7903   7931   
  7904   7932   /*
  7905   7933   ** Undo the hack that converts floating point types to integer for
  7906   7934   ** builds on processors without floating point support.
  7907   7935   */
  7908   7936   #ifdef SQLITE_OMIT_FLOATING_POINT
  7909   7937   # undef double
  7910   7938   #endif
  7911   7939   
  7912   7940   #ifdef __cplusplus
  7913   7941   }  /* End of the 'extern "C"' block */
  7914   7942   #endif
  7915   7943   #endif /* _SQLITE3_H_ */

Changes to src/wal.c.

  2271   2271         mxI = i;
  2272   2272       }
  2273   2273     }
  2274   2274     /* There was once an "if" here. The extra "{" is to preserve indentation. */
  2275   2275     {
  2276   2276       if( (pWal->readOnly & WAL_SHM_RDONLY)==0
  2277   2277        && (mxReadMark<mxFrame || mxI==0)
         2278  +#ifdef SQLITE_ENABLE_SNAPSHOT
         2279  +     && pWal->pSnapshot==0
         2280  +#endif
  2278   2281       ){
  2279   2282         for(i=1; i<WAL_NREADER; i++){
  2280   2283           rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2281   2284           if( rc==SQLITE_OK ){
  2282   2285             mxReadMark = pInfo->aReadMark[i] = mxFrame;
  2283   2286             mxI = i;
  2284   2287             walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
................................................................................
  2285   2288             break;
  2286   2289           }else if( rc!=SQLITE_BUSY ){
  2287   2290             return rc;
  2288   2291           }
  2289   2292         }
  2290   2293       }
  2291   2294       if( mxI==0 ){
         2295  +#ifdef SQLITE_ENABLE_SNAPSHOT
         2296  +      if( pWal->pSnapshot ) return SQLITE_BUSY_SNAPSHOT;
         2297  +#endif
  2292   2298         assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
  2293   2299         return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
  2294   2300       }
  2295   2301   
  2296   2302       rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  2297   2303       if( rc ){
  2298   2304         return rc==SQLITE_BUSY ? WAL_RETRY : rc;
................................................................................
  2379   2385     testcase( (rc&0xff)==SQLITE_IOERR );
  2380   2386     testcase( rc==SQLITE_PROTOCOL );
  2381   2387     testcase( rc==SQLITE_OK );
  2382   2388   
  2383   2389   #ifdef SQLITE_ENABLE_SNAPSHOT
  2384   2390     if( rc==SQLITE_OK ){
  2385   2391       if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr)) ){
         2392  +      /* At this point the client has a lock on an aReadMark[] slot holding
         2393  +      ** a value equal to or smaller than pSnapshot->mxFrame. This client
         2394  +      ** did not populate the aReadMark[] slot. pWal->hdr is populated with
         2395  +      ** the wal-index header for the snapshot currently at the head of the
         2396  +      ** wal file, which is different from pSnapshot.
         2397  +      **
         2398  +      ** The presence of the aReadMark[] slot entry makes it very likely 
         2399  +      ** that either there is currently another read-transaction open on
         2400  +      ** pSnapshot, or that there has been one more recently than the last
         2401  +      ** checkpoint of any frames greater than pSnapshot->mxFrame was 
         2402  +      ** started. There is an exception though: client 1 may have called
         2403  +      ** walTryBeginRead and started to open snapshot pSnapshot, setting
         2404  +      ** the aReadMark[] slot to do so. At the same time, client 2 may 
         2405  +      ** have committed a new snapshot to disk and started a checkpoint.
         2406  +      ** In this circumstance client 1 does not end up reading pSnapshot,
         2407  +      ** but may leave the aReadMark[] slot populated.
         2408  +      **
         2409  +      ** The race condition above is difficult to detect. One approach would
         2410  +      ** be to check the aReadMark[] slot for another client. But this is
         2411  +      ** prone to false-positives from other snapshot clients. And there
         2412  +      ** is no equivalent to xCheckReservedLock() for wal locks. Another
         2413  +      ** approach would be to take the checkpointer lock and check that
         2414  +      ** fewer than pSnapshot->mxFrame frames have been checkpointed. But
         2415  +      ** that does not account for checkpointer processes that failed after
         2416  +      ** checkpointing frames but before updating WalCkptInfo.nBackfill.
         2417  +      ** And it would mean that this function would block on checkpointers
         2418  +      ** and vice versa.
         2419  +      **
         2420  +      ** TODO: For now, this race condition is ignored.
         2421  +      */
  2386   2422         volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  2387         -      rc = walLockShared(pWal, WAL_READ_LOCK(0));
  2388         -      if( rc==SQLITE_OK ){
  2389         -        if( pInfo->nBackfill<=pSnapshot->mxFrame 
  2390         -         && pSnapshot->aSalt[0]==pWal->hdr.aSalt[0]
  2391         -         && pSnapshot->aSalt[1]==pWal->hdr.aSalt[1]
  2392         -        ){
  2393         -          assert( pWal->readLock>0 );
  2394         -          assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
  2395         -          memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
  2396         -          *pChanged = bChanged;
  2397         -        }else{
  2398         -          rc = SQLITE_BUSY_SNAPSHOT;
  2399         -        }
  2400         -        walUnlockShared(pWal, WAL_READ_LOCK(0));
         2423  +
         2424  +      assert( pWal->readLock>0 );
         2425  +      assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
         2426  +
         2427  +      /* Check that the wal file has not been wrapped. Assuming it has not,
         2428  +      ** overwrite pWal->hdr with *pSnapshot and set *pChanged as appropriate
         2429  +      ** for opening the snapshot. Or, if the wal file has been wrapped
         2430  +      ** since pSnapshot was written, return SQLITE_BUSY_SNAPSHOT. */
         2431  +      if( pSnapshot->aSalt[0]==pWal->hdr.aSalt[0]
         2432  +       && pSnapshot->aSalt[1]==pWal->hdr.aSalt[1]
         2433  +      ){
         2434  +        memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
         2435  +        *pChanged = bChanged;
         2436  +      }else{
         2437  +        rc = SQLITE_BUSY_SNAPSHOT;
  2401   2438         }
         2439  +
  2402   2440         if( rc!=SQLITE_OK ){
  2403   2441           sqlite3WalEndReadTransaction(pWal);
  2404   2442         }
  2405   2443       }
  2406   2444     }
  2407   2445   #endif
  2408   2446     return rc;

Changes to test/snapshot.test.

    53     53   } {1 SQLITE_ERROR}
    54     54   do_execsql_test 1.3.2 COMMIT
    55     55   
    56     56   #-------------------------------------------------------------------------
    57     57   # Check that a simple case works. Reuse the database created by the
    58     58   # block of tests above.
    59     59   #
    60         -do_execsql_test 2.0 {
           60  +# UPDATE: This case (2.1) no longer works. 2.2 does.
           61  +#
           62  +do_execsql_test 2.1.0 {
    61     63     BEGIN;
    62     64       SELECT * FROM t1;
    63     65   } {1 2 3 4 5 6 7 8}
    64     66   
    65         -do_test 2.1 {
           67  +do_test 2.1.1 {
    66     68     set snapshot [sqlite3_snapshot_get db main]
    67     69     execsql {
    68     70       COMMIT;
    69     71       INSERT INTO t1 VALUES(9, 10);
    70     72       SELECT * FROM t1;
    71     73     }
    72     74   } {1 2 3 4 5 6 7 8 9 10}
    73     75   
    74         -do_test 2.2 {
           76  +do_test 2.1.2 {
           77  +  execsql BEGIN
           78  +  list [catch { sqlite3_snapshot_open db main $snapshot } msg] $msg
           79  +} {1 SQLITE_BUSY_SNAPSHOT}
           80  +
           81  +do_test 2.1.3 {
           82  +  sqlite3_snapshot_free $snapshot
           83  +  execsql COMMIT
           84  +} {}
           85  +
           86  +do_test 2.2.0 {
           87  +  sqlite3 db2 test.db
           88  +  execsql {
           89  +    BEGIN;
           90  +      SELECT * FROM t1;
           91  +  } db2
           92  +} {1 2 3 4 5 6 7 8 9 10}
           93  +
           94  +do_test 2.2.1 {
           95  +  set snapshot [sqlite3_snapshot_get db2 main]
           96  +  execsql {
           97  +    INSERT INTO t1 VALUES(11, 12);
           98  +    SELECT * FROM t1;
           99  +  }
          100  +} {1 2 3 4 5 6 7 8 9 10 11 12}
          101  +
          102  +do_test 2.1.2 {
    75    103     execsql BEGIN
    76    104     sqlite3_snapshot_open db main $snapshot
    77         -  execsql { SELECT * FROM t1 }
    78         -} {1 2 3 4 5 6 7 8}
          105  +  execsql {
          106  +    SELECT * FROM t1;
          107  +  }
          108  +} {1 2 3 4 5 6 7 8 9 10}
    79    109   
    80         -do_test 2.3 {
          110  +do_test 2.1.3 {
    81    111     sqlite3_snapshot_free $snapshot
    82    112     execsql COMMIT
          113  +  execsql COMMIT db2
          114  +  db2 close
    83    115   } {}
    84    116   
    85    117   #-------------------------------------------------------------------------
    86    118   # Check some errors in sqlite3_snapshot_open(). It is an error if:
    87    119   #
    88    120   #   1) the db is in auto-commit mode,
    89    121   #   2) the db has an open (read or write) transaction,