SQLite

Check-in [bb0b6021e7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Instead of transitioning to RECOVER state from CHECKPOINT when a recovery is required, perform the recovery while holding the CHECKPOINT lock.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bb0b6021e721c2e4f1533ab18381604c6b7b31d5
User & Date: dan 2010-05-06 18:48:28.000
Context
2010-05-06
19:04
Updates to the VFS SHM locking documentation. (check-in: 9927ce4210 user: drh tags: trunk)
18:48
Instead of transitioning to RECOVER state from CHECKPOINT when a recovery is required, perform the recovery while holding the CHECKPOINT lock. (check-in: bb0b6021e7 user: dan tags: trunk)
18:27
If recovery is run before a checkpoint, change back to a CHECKPOINT lock before performing the actual checkpoint. (check-in: dc98ee169c user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to doc/vfs-shm.txt.
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
(11h)  READ_FULL          UNLOCK          UNLOCK
(11i)  READ_FULL          WRITE           WRITE
(11j)  READ_FULL          RECOVER         RECOVER
(11k)  WRITE              READ            READ
(11l)  PENDING            UNLOCK          UNLOCK
(11m)  PENDING            CHECKPOINT      CHECKPOINT
(11n)  CHECKPOINT         UNLOCK          UNLOCK
(11o)  CHECKPOINT         RECOVER         RECOVER
(11p)  RECOVER            READ            READ
(11q)  RECOVER            CHECKPOINT      CHECKPOINT

These 17 transitions are all that needs to be supported.  The lock
manager implementation can assert that fact.  The other 25 possible
transitions among the 7 locking states will never occur.

The rules above are sufficient for correctness.  For maximum concurrency,
the following additional considerations apply:







<
|
<







109
110
111
112
113
114
115

116

117
118
119
120
121
122
123
(11h)  READ_FULL          UNLOCK          UNLOCK
(11i)  READ_FULL          WRITE           WRITE
(11j)  READ_FULL          RECOVER         RECOVER
(11k)  WRITE              READ            READ
(11l)  PENDING            UNLOCK          UNLOCK
(11m)  PENDING            CHECKPOINT      CHECKPOINT
(11n)  CHECKPOINT         UNLOCK          UNLOCK

(11o)  RECOVER            READ            READ


These 17 transitions are all that needs to be supported.  The lock
manager implementation can assert that fact.  The other 25 possible
transitions among the 7 locking states will never occur.

The rules above are sufficient for correctness.  For maximum concurrency,
the following additional considerations apply:
Changes to src/os_unix.c.
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
        p->lockState = SQLITE_SHM_WRITE;
      }
      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
           || p->lockState==SQLITE_SHM_RECOVER );
      if( p->lockState==SQLITE_SHM_RECOVER ){
        unixShmUnlock(pFile, p, UNIX_SHM_C);
        p->lockState = SQLITE_SHM_CHECKPOINT;
        rc = SQLITE_OK;
      }
      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_A);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
           || p->lockState==SQLITE_SHM_CHECKPOINT );
      assert( sqlite3_mutex_held(pFile->mutexBuf) );
      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }







|
<
<
<
<
<


















|







5335
5336
5337
5338
5339
5340
5341
5342





5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
        p->lockState = SQLITE_SHM_WRITE;
      }
      break;
    }
    case SQLITE_SHM_CHECKPOINT: {
      assert( p->lockState==SQLITE_SHM_UNLOCK
           || p->lockState==SQLITE_SHM_PENDING
      );





      if( p->lockState==SQLITE_SHM_UNLOCK ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_B|UNIX_SHM_C);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_PENDING;
        }
      }
      if( p->lockState==SQLITE_SHM_PENDING ){
        rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_A);
        if( rc==SQLITE_OK ){
          p->lockState = SQLITE_SHM_CHECKPOINT;
        }
      }
      break;
    }
    default: {
      assert( desiredLock==SQLITE_SHM_RECOVER );
      assert( p->lockState==SQLITE_SHM_READ
           || p->lockState==SQLITE_SHM_READ_FULL
      );
      assert( sqlite3_mutex_held(pFile->mutexBuf) );
      rc = unixShmExclusiveLock(pFile, p, UNIX_SHM_C);
      if( rc==SQLITE_OK ){
        p->lockState = SQLITE_SHM_RECOVER;
      }
      break;
    }
Changes to src/wal.c.
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
** The caller must hold RECOVER lock on the wal-index file.
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  WalIndexHdr hdr;              /* Recovered wal-index header */

  assert( pWal->lockState==SQLITE_SHM_RECOVER );
  memset(&hdr, 0, sizeof(hdr));

  rc = sqlite3OsFileSize(pWal->pFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }








|







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
** The caller must hold RECOVER lock on the wal-index file.
*/
static int walIndexRecover(Wal *pWal){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  WalIndexHdr hdr;              /* Recovered wal-index header */

  assert( pWal->lockState>SQLITE_SHM_READ );
  memset(&hdr, 0, sizeof(hdr));

  rc = sqlite3OsFileSize(pWal->pFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

937
938
939
940
941
942
943

944

945
946
947
948
949
950
951
952
953
954

955

956
957
958
959
960
961
962
  }

  /* If the first attempt to read the header failed, lock the wal-index
  ** file and try again. If the header checksum verification fails this
  ** time as well, run log recovery.
  */
  lockState = pWal->lockState;

  if( SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) ){

    /* This call to walIndexTryHdr() may not return an error code, as the
    ** wal-index is already mapped. It may find that the header is invalid,
    ** but there is no chance of hitting an actual error.  */
    assert( pWal->pWiData );
    rc = walIndexTryHdr(pWal, &isValid, pChanged);
    assert( rc==SQLITE_OK );
    if( isValid==0 ){
      *pChanged = 1;
      rc = walIndexRecover(pWal);
    }

    walSetLock(pWal, lockState);

  }

  return rc;
}

/*
** Lock a snapshot.







>
|
>










>
|
>







937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
  }

  /* If the first attempt to read the header failed, lock the wal-index
  ** file and try again. If the header checksum verification fails this
  ** time as well, run log recovery.
  */
  lockState = pWal->lockState;
  if( lockState>SQLITE_SHM_READ
   || SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) 
  ){
    /* This call to walIndexTryHdr() may not return an error code, as the
    ** wal-index is already mapped. It may find that the header is invalid,
    ** but there is no chance of hitting an actual error.  */
    assert( pWal->pWiData );
    rc = walIndexTryHdr(pWal, &isValid, pChanged);
    assert( rc==SQLITE_OK );
    if( isValid==0 ){
      *pChanged = 1;
      rc = walIndexRecover(pWal);
    }
    if( lockState==SQLITE_SHM_READ ){
      walSetLock(pWal, SQLITE_SHM_READ);
    }
  }

  return rc;
}

/*
** Lock a snapshot.
Changes to test/wal2.test.
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  catchsql { SELECT * FROM data }
} {0 {{need xShmOpen to see this}}}
db close
tvfs delete

#-------------------------------------------------------------------------
# Test that if a database connection is forced to run recovery before it
# can perform a checkpoint, it transistions from RECOVERY->CHECKPOINT
# before doing so.
#
do_test wal2-5.1 {
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return $::tvfs_cb_return
  }







|
<







334
335
336
337
338
339
340
341

342
343
344
345
346
347
348
  catchsql { SELECT * FROM data }
} {0 {{need xShmOpen to see this}}}
db close
tvfs delete

#-------------------------------------------------------------------------
# Test that if a database connection is forced to run recovery before it
# can perform a checkpoint, it does not transition into RECOVER state.

#
do_test wal2-5.1 {
  proc tvfs_cb {method args} {
    set ::shm_file [lindex $args 0]
    if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
    return $::tvfs_cb_return
  }
358
359
360
361
362
363
364
365
366
367

368
369
    INSERT INTO x VALUES(1);
  }

  incr_tvfs_hdr $::shm_file 1 1
  set ::locks [list]
  execsql { PRAGMA wal_checkpoint }
  set ::locks
} {CHECKPOINT RECOVER CHECKPOINT UNLOCK}




finish_test







|

|
>


357
358
359
360
361
362
363
364
365
366
367
368
369
    INSERT INTO x VALUES(1);
  }

  incr_tvfs_hdr $::shm_file 1 1
  set ::locks [list]
  execsql { PRAGMA wal_checkpoint }
  set ::locks
} {CHECKPOINT UNLOCK}

db close
tvfs delete

finish_test