/ Check-in [e2fc5c81]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add experimental mode that uses two wal files. Activated using "PRAGMA journal_mode = wal2".
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal2
Files: files | file ages | folders
SHA3-256: e2fc5c814cf6862d536aacb9eca66ecd31ba7e3e3033fa4c5564d533f4a18dfc
User & Date: dan 2017-10-04 20:57:14
Wiki:wal2
Context
2017-10-05
18:14
Fix test case failures on this branch. check-in: 16decc13 user: dan tags: wal2
2017-10-04
20:57
Add experimental mode that uses two wal files. Activated using "PRAGMA journal_mode = wal2". check-in: e2fc5c81 user: dan tags: wal2
18:26
Updates to requirements marks. check-in: 40964a4e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

  2989   2989       if( page1[18]>1 ){
  2990   2990         pBt->btsFlags |= BTS_READ_ONLY;
  2991   2991       }
  2992   2992       if( page1[19]>1 ){
  2993   2993         goto page1_init_failed;
  2994   2994       }
  2995   2995   #else
  2996         -    if( page1[18]>2 ){
         2996  +    if( page1[18]>3 ){
  2997   2997         pBt->btsFlags |= BTS_READ_ONLY;
  2998   2998       }
  2999         -    if( page1[19]>2 ){
         2999  +    if( page1[19]>3 ){
  3000   3000         goto page1_init_failed;
  3001   3001       }
  3002   3002   
  3003   3003       /* If the write version is set to 2, this database should be accessed
  3004   3004       ** in WAL mode. If the log is not already open, open it now. Then 
  3005   3005       ** return SQLITE_OK and return without populating BtShared.pPage1.
  3006   3006       ** The caller detects this and calls this function again. This is
  3007   3007       ** required as the version of page 1 currently in the page1 buffer
  3008   3008       ** may not be the latest version - there may be a newer one in the log
  3009   3009       ** file.
  3010   3010       */
  3011         -    if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){
         3011  +    if( page1[19]>=2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){
  3012   3012         int isOpen = 0;
  3013         -      rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen);
         3013  +      rc = sqlite3PagerOpenWal(pBt->pPager, (page1[19]==3), &isOpen);
  3014   3014         if( rc!=SQLITE_OK ){
  3015   3015           goto page1_init_failed;
  3016   3016         }else{
  3017   3017           setDefaultSyncFlag(pBt, SQLITE_DEFAULT_WAL_SYNCHRONOUS+1);
  3018   3018           if( isOpen==0 ){
  3019   3019             releasePageOne(pPage1);
  3020   3020             return SQLITE_OK;
................................................................................
  9862   9862   ** "write version" (single byte at byte offset 19) fields in the database
  9863   9863   ** header to iVersion.
  9864   9864   */
  9865   9865   int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
  9866   9866     BtShared *pBt = pBtree->pBt;
  9867   9867     int rc;                         /* Return code */
  9868   9868    
  9869         -  assert( iVersion==1 || iVersion==2 );
         9869  +  assert( iVersion==1 || iVersion==2 || iVersion==3 );
  9870   9870   
  9871   9871     /* If setting the version fields to 1, do not automatically open the
  9872   9872     ** WAL connection, even if the version fields are currently set to 2.
  9873   9873     */
  9874   9874     pBt->btsFlags &= ~BTS_NO_WAL;
  9875   9875     if( iVersion==1 ) pBt->btsFlags |= BTS_NO_WAL;
  9876   9876   

Changes to src/os_unix.c.

  4159   4159     int rc = SQLITE_OK;    /* Result code form fcntl() */
  4160   4160   
  4161   4161     /* Access to the unixShmNode object is serialized by the caller */
  4162   4162     pShmNode = pFile->pInode->pShmNode;
  4163   4163     assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 );
  4164   4164   
  4165   4165     /* Shared locks never span more than one byte */
  4166         -  assert( n==1 || lockType!=F_RDLCK );
         4166  +  /* assert( n==1 || lockType!=F_RDLCK ); */
  4167   4167   
  4168   4168     /* Locks are within range */
  4169   4169     assert( n>=1 && n<=SQLITE_SHM_NLOCK );
  4170   4170   
  4171   4171     if( pShmNode->h>=0 ){
  4172   4172       /* Initialize the locking parameters */
  4173   4173       memset(&f, 0, sizeof(f));
................................................................................
  4605   4605     assert( pShmNode->pInode==pDbFd->pInode );
  4606   4606     assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK );
  4607   4607     assert( n>=1 );
  4608   4608     assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED)
  4609   4609          || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE)
  4610   4610          || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED)
  4611   4611          || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) );
  4612         -  assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 );
         4612  +  /* assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); */
  4613   4613     assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  4614   4614     assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  4615   4615   
  4616   4616     mask = (1<<(ofst+n)) - (1<<ofst);
  4617   4617     assert( n>1 || mask==(1<<ofst) );
  4618   4618     sqlite3_mutex_enter(pShmNode->mutex);
  4619   4619     if( flags & SQLITE_SHM_UNLOCK ){

Changes to src/pager.c.

   806    806   #endif
   807    807   
   808    808   /*
   809    809   ** The maximum legal page number is (2^31 - 1).
   810    810   */
   811    811   #define PAGER_MAX_PGNO 2147483647
   812    812   
   813         -/*
   814         -** The argument to this macro is a file descriptor (type sqlite3_file*).
   815         -** Return 0 if it is not open, or non-zero (but not 1) if it is.
   816         -**
   817         -** This is so that expressions can be written as:
   818         -**
   819         -**   if( isOpen(pPager->jfd) ){ ...
   820         -**
   821         -** instead of
   822         -**
   823         -**   if( pPager->jfd->pMethods ){ ...
   824         -*/
   825         -#define isOpen(pFd) ((pFd)->pMethods!=0)
   826         -
   827    813   /*
   828    814   ** Return true if this pager uses a write-ahead log to read page pgno.
   829    815   ** Return false if the pager reads pgno directly from the database.
   830    816   */
   831    817   #if !defined(SQLITE_OMIT_WAL) && defined(SQLITE_DIRECT_OVERFLOW_READ)
   832    818   int sqlite3PagerUseWal(Pager *pPager, Pgno pgno){
   833    819     u32 iRead = 0;
................................................................................
   940    926           ** a rollback transaction that switches from journal_mode=off
   941    927           ** to journal_mode=wal.
   942    928           */
   943    929           assert( p->eLock>=RESERVED_LOCK );
   944    930           assert( isOpen(p->jfd) 
   945    931                || p->journalMode==PAGER_JOURNALMODE_OFF 
   946    932                || p->journalMode==PAGER_JOURNALMODE_WAL 
          933  +             || p->journalMode==PAGER_JOURNALMODE_WAL2
   947    934           );
   948    935         }
   949    936         assert( pPager->dbOrigSize==pPager->dbFileSize );
   950    937         assert( pPager->dbOrigSize==pPager->dbHintSize );
   951    938         break;
   952    939   
   953    940       case PAGER_WRITER_DBMOD:
................................................................................
   954    941         assert( p->eLock==EXCLUSIVE_LOCK );
   955    942         assert( pPager->errCode==SQLITE_OK );
   956    943         assert( !pagerUseWal(pPager) );
   957    944         assert( p->eLock>=EXCLUSIVE_LOCK );
   958    945         assert( isOpen(p->jfd) 
   959    946              || p->journalMode==PAGER_JOURNALMODE_OFF 
   960    947              || p->journalMode==PAGER_JOURNALMODE_WAL 
          948  +           || p->journalMode==PAGER_JOURNALMODE_WAL2
   961    949              || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
   962    950         );
   963    951         assert( pPager->dbOrigSize<=pPager->dbHintSize );
   964    952         break;
   965    953   
   966    954       case PAGER_WRITER_FINISHED:
   967    955         assert( p->eLock==EXCLUSIVE_LOCK );
   968    956         assert( pPager->errCode==SQLITE_OK );
   969    957         assert( !pagerUseWal(pPager) );
   970    958         assert( isOpen(p->jfd) 
   971    959              || p->journalMode==PAGER_JOURNALMODE_OFF 
   972    960              || p->journalMode==PAGER_JOURNALMODE_WAL 
          961  +           || p->journalMode==PAGER_JOURNALMODE_WAL2
   973    962              || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC)
   974    963         );
   975    964         break;
   976    965   
   977    966       case PAGER_ERROR:
   978    967         /* There must be at least one outstanding reference to the pager if
   979    968         ** in ERROR state. Otherwise the pager should have already dropped
................................................................................
  2059   2048             ** https://bugzilla.mozilla.org/show_bug.cgi?id=1072773
  2060   2049             */
  2061   2050             rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
  2062   2051           }
  2063   2052         }
  2064   2053         pPager->journalOff = 0;
  2065   2054       }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST
  2066         -      || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL)
         2055  +      || (pPager->exclusiveMode && pPager->journalMode<PAGER_JOURNALMODE_WAL)
  2067   2056       ){
  2068   2057         rc = zeroJournalHdr(pPager, hasMaster||pPager->tempFile);
  2069   2058         pPager->journalOff = 0;
  2070   2059       }else{
  2071   2060         /* This branch may be executed with Pager.journalMode==MEMORY if
  2072   2061         ** a hot-journal was just rolled back. In this case the journal
  2073   2062         ** file should be closed and deleted. If this connection writes to
  2074   2063         ** the database file, it will do so using an in-memory journal.
  2075   2064         */
  2076   2065         int bDelete = !pPager->tempFile;
  2077   2066         assert( sqlite3JournalIsInMemory(pPager->jfd)==0 );
  2078   2067         assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE 
  2079   2068              || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
  2080         -           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
         2069  +           || pPager->journalMode==PAGER_JOURNALMODE_WAL
         2070  +           || pPager->journalMode==PAGER_JOURNALMODE_WAL2
  2081   2071         );
  2082   2072         sqlite3OsClose(pPager->jfd);
  2083   2073         if( bDelete ){
  2084   2074           rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, pPager->extraSync);
  2085   2075         }
  2086   2076       }
  2087   2077     }
................................................................................
  3340   3330   
  3341   3331           rc = pagerPagecount(pPager, &nPage);
  3342   3332           if( rc ) return rc;
  3343   3333           if( nPage==0 ){
  3344   3334             rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0);
  3345   3335           }else{
  3346   3336             testcase( sqlite3PcachePagecount(pPager->pPCache)==0 );
  3347         -          rc = sqlite3PagerOpenWal(pPager, 0);
         3337  +          rc = sqlite3PagerOpenWal(pPager, 0, 0);
  3348   3338           }
  3349         -      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
         3339  +      }else if( pPager->journalMode>=PAGER_JOURNALMODE_WAL ){
  3350   3340           pPager->journalMode = PAGER_JOURNALMODE_DELETE;
  3351   3341         }
  3352   3342       }
  3353   3343     }
  3354   3344     return rc;
  3355   3345   }
  3356   3346   #endif
................................................................................
  7241   7231   
  7242   7232     /* The eMode parameter is always valid */
  7243   7233     assert(      eMode==PAGER_JOURNALMODE_DELETE
  7244   7234               || eMode==PAGER_JOURNALMODE_TRUNCATE
  7245   7235               || eMode==PAGER_JOURNALMODE_PERSIST
  7246   7236               || eMode==PAGER_JOURNALMODE_OFF 
  7247   7237               || eMode==PAGER_JOURNALMODE_WAL 
         7238  +            || eMode==PAGER_JOURNALMODE_WAL2
  7248   7239               || eMode==PAGER_JOURNALMODE_MEMORY );
  7249   7240   
  7250   7241     /* This routine is only called from the OP_JournalMode opcode, and
  7251   7242     ** the logic there will never allow a temporary file to be changed
  7252   7243     ** to WAL mode.
  7253   7244     */
  7254   7245     assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL );
................................................................................
  7275   7266       */
  7276   7267       assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
  7277   7268       assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
  7278   7269       assert( (PAGER_JOURNALMODE_DELETE & 5)==0 );
  7279   7270       assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 );
  7280   7271       assert( (PAGER_JOURNALMODE_OFF & 5)==0 );
  7281   7272       assert( (PAGER_JOURNALMODE_WAL & 5)==5 );
         7273  +    assert( (PAGER_JOURNALMODE_WAL2 & 5)==4 );
  7282   7274   
  7283   7275       assert( isOpen(pPager->fd) || pPager->exclusiveMode );
  7284         -    if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){
         7276  +    if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 
         7277  +     && eMode!=PAGER_JOURNALMODE_WAL2       /* TODO: fix this if possible */
         7278  +    ){
  7285   7279   
  7286   7280         /* In this case we would like to delete the journal file. If it is
  7287   7281         ** not possible, then that is not a problem. Deleting the journal file
  7288   7282         ** here is an optimization only.
  7289   7283         **
  7290   7284         ** Before deleting the journal file, obtain a RESERVED lock on the
  7291   7285         ** database file. This ensures that the journal file is not deleted
................................................................................
  7439   7433   
  7440   7434   /*
  7441   7435   ** Call sqlite3WalOpen() to open the WAL handle. If the pager is in 
  7442   7436   ** exclusive-locking mode when this function is called, take an EXCLUSIVE
  7443   7437   ** lock on the database file and use heap-memory to store the wal-index
  7444   7438   ** in. Otherwise, use the normal shared-memory.
  7445   7439   */
  7446         -static int pagerOpenWal(Pager *pPager){
         7440  +static int pagerOpenWal(Pager *pPager, int bWal2){
  7447   7441     int rc = SQLITE_OK;
  7448   7442   
  7449   7443     assert( pPager->pWal==0 && pPager->tempFile==0 );
  7450   7444     assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
  7451   7445   
  7452   7446     /* If the pager is already in exclusive-mode, the WAL module will use 
  7453   7447     ** heap-memory for the wal-index instead of the VFS shared-memory 
................................................................................
  7460   7454   
  7461   7455     /* Open the connection to the log file. If this operation fails, 
  7462   7456     ** (e.g. due to malloc() failure), return an error code.
  7463   7457     */
  7464   7458     if( rc==SQLITE_OK ){
  7465   7459       rc = sqlite3WalOpen(pPager->pVfs,
  7466   7460           pPager->fd, pPager->zWal, pPager->exclusiveMode,
  7467         -        pPager->journalSizeLimit, &pPager->pWal
         7461  +        pPager->journalSizeLimit, bWal2, &pPager->pWal
  7468   7462       );
  7469   7463     }
  7470   7464     pagerFixMaplimit(pPager);
  7471   7465   
  7472   7466     return rc;
  7473   7467   }
  7474   7468   
................................................................................
  7486   7480   **
  7487   7481   ** If the pager is open on a temp-file (or in-memory database), or if
  7488   7482   ** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK
  7489   7483   ** without doing anything.
  7490   7484   */
  7491   7485   int sqlite3PagerOpenWal(
  7492   7486     Pager *pPager,                  /* Pager object */
         7487  +  int bWal2,                      /* Open in wal2 mode if not already open */
  7493   7488     int *pbOpen                     /* OUT: Set to true if call is a no-op */
  7494   7489   ){
  7495   7490     int rc = SQLITE_OK;             /* Return code */
  7496   7491   
  7497   7492     assert( assert_pager_state(pPager) );
  7498   7493     assert( pPager->eState==PAGER_OPEN   || pbOpen );
  7499   7494     assert( pPager->eState==PAGER_READER || !pbOpen );
................................................................................
  7502   7497   
  7503   7498     if( !pPager->tempFile && !pPager->pWal ){
  7504   7499       if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
  7505   7500   
  7506   7501       /* Close any rollback journal previously open */
  7507   7502       sqlite3OsClose(pPager->jfd);
  7508   7503   
  7509         -    rc = pagerOpenWal(pPager);
         7504  +    rc = pagerOpenWal(pPager, bWal2);
  7510   7505       if( rc==SQLITE_OK ){
  7511         -      pPager->journalMode = PAGER_JOURNALMODE_WAL;
         7506  +      pPager->journalMode = bWal2?PAGER_JOURNALMODE_WAL2:PAGER_JOURNALMODE_WAL;
  7512   7507         pPager->eState = PAGER_OPEN;
  7513   7508       }
  7514   7509     }else{
  7515   7510       *pbOpen = 1;
  7516   7511     }
  7517   7512   
  7518   7513     return rc;
................................................................................
  7526   7521   ** EXCLUSIVE lock on the database file. If this cannot be obtained, an
  7527   7522   ** error (SQLITE_BUSY) is returned and the log connection is not closed.
  7528   7523   ** If successful, the EXCLUSIVE lock is not released before returning.
  7529   7524   */
  7530   7525   int sqlite3PagerCloseWal(Pager *pPager, sqlite3 *db){
  7531   7526     int rc = SQLITE_OK;
  7532   7527   
  7533         -  assert( pPager->journalMode==PAGER_JOURNALMODE_WAL );
         7528  +  assert( pPager->journalMode==PAGER_JOURNALMODE_WAL 
         7529  +       || pPager->journalMode==PAGER_JOURNALMODE_WAL2
         7530  +  );
  7534   7531   
  7535   7532     /* If the log file is not already open, but does exist in the file-system,
  7536   7533     ** it may need to be checkpointed before the connection can switch to
  7537   7534     ** rollback mode. Open it now so this can happen.
  7538   7535     */
  7539   7536     if( !pPager->pWal ){
  7540   7537       int logexists = 0;
................................................................................
  7541   7538       rc = pagerLockDb(pPager, SHARED_LOCK);
  7542   7539       if( rc==SQLITE_OK ){
  7543   7540         rc = sqlite3OsAccess(
  7544   7541             pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists
  7545   7542         );
  7546   7543       }
  7547   7544       if( rc==SQLITE_OK && logexists ){
  7548         -      rc = pagerOpenWal(pPager);
         7545  +      rc = pagerOpenWal(pPager, 0);
  7549   7546       }
  7550   7547     }
  7551   7548       
  7552   7549     /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on
  7553   7550     ** the database file, the log and log-summary files will be deleted.
  7554   7551     */
  7555   7552     if( rc==SQLITE_OK && pPager->pWal ){

Changes to src/pager.h.

    77     77   #define PAGER_JOURNALMODE_QUERY     (-1)  /* Query the value of journalmode */
    78     78   #define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
    79     79   #define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
    80     80   #define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
    81     81   #define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
    82     82   #define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
    83     83   #define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */
           84  +#define PAGER_JOURNALMODE_WAL2        6   /* Use write-ahead logging mode 2 */
           85  +
           86  +#define isWalMode(x) ((x)==PAGER_JOURNALMODE_WAL || (x)==PAGER_JOURNALMODE_WAL2)
           87  +
           88  +/*
           89  +** The argument to this macro is a file descriptor (type sqlite3_file*).
           90  +** Return 0 if it is not open, or non-zero (but not 1) if it is.
           91  +**
           92  +** This is so that expressions can be written as:
           93  +**
           94  +**   if( isOpen(pPager->jfd) ){ ...
           95  +**
           96  +** instead of
           97  +**
           98  +**   if( pPager->jfd->pMethods ){ ...
           99  +*/
          100  +#define isOpen(pFd) ((pFd)->pMethods!=0)
    84    101   
    85    102   /*
    86    103   ** Flags that make up the mask passed to sqlite3PagerGet().
    87    104   */
    88    105   #define PAGER_GET_NOCONTENT     0x01  /* Do not load data from disk */
    89    106   #define PAGER_GET_READONLY      0x02  /* Read-only page is acceptable */
    90    107   
................................................................................
   173    190   int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint);
   174    191   int sqlite3PagerSharedLock(Pager *pPager);
   175    192   
   176    193   #ifndef SQLITE_OMIT_WAL
   177    194     int sqlite3PagerCheckpoint(Pager *pPager, sqlite3*, int, int*, int*);
   178    195     int sqlite3PagerWalSupported(Pager *pPager);
   179    196     int sqlite3PagerWalCallback(Pager *pPager);
   180         -  int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen);
          197  +  int sqlite3PagerOpenWal(Pager *pPager, int, int *pisOpen);
   181    198     int sqlite3PagerCloseWal(Pager *pPager, sqlite3*);
   182    199   # ifdef SQLITE_DIRECT_OVERFLOW_READ
   183    200     int sqlite3PagerUseWal(Pager *pPager, Pgno);
   184    201   # endif
   185    202   # ifdef SQLITE_ENABLE_SNAPSHOT
   186    203     int sqlite3PagerSnapshotGet(Pager *pPager, sqlite3_snapshot **ppSnapshot);
   187    204     int sqlite3PagerSnapshotOpen(Pager *pPager, sqlite3_snapshot *pSnapshot);

Changes to src/pragma.c.

   256    256   ** defined in pager.h. This function returns the associated lowercase
   257    257   ** journal-mode name.
   258    258   */
   259    259   const char *sqlite3JournalModename(int eMode){
   260    260     static char * const azModeName[] = {
   261    261       "delete", "persist", "off", "truncate", "memory"
   262    262   #ifndef SQLITE_OMIT_WAL
   263         -     , "wal"
          263  +     , "wal", "wal2"
   264    264   #endif
   265    265     };
   266    266     assert( PAGER_JOURNALMODE_DELETE==0 );
   267    267     assert( PAGER_JOURNALMODE_PERSIST==1 );
   268    268     assert( PAGER_JOURNALMODE_OFF==2 );
   269    269     assert( PAGER_JOURNALMODE_TRUNCATE==3 );
   270    270     assert( PAGER_JOURNALMODE_MEMORY==4 );
   271    271     assert( PAGER_JOURNALMODE_WAL==5 );
          272  +  assert( PAGER_JOURNALMODE_WAL2==6 );
   272    273     assert( eMode>=0 && eMode<=ArraySize(azModeName) );
   273    274   
   274    275     if( eMode==ArraySize(azModeName) ) return 0;
   275    276     return azModeName[eMode];
   276    277   }
   277    278   
   278    279   /*

Changes to src/vdbe.c.

  6321   6321     eNew = pOp->p3;
  6322   6322     assert( eNew==PAGER_JOURNALMODE_DELETE 
  6323   6323          || eNew==PAGER_JOURNALMODE_TRUNCATE 
  6324   6324          || eNew==PAGER_JOURNALMODE_PERSIST 
  6325   6325          || eNew==PAGER_JOURNALMODE_OFF
  6326   6326          || eNew==PAGER_JOURNALMODE_MEMORY
  6327   6327          || eNew==PAGER_JOURNALMODE_WAL
         6328  +       || eNew==PAGER_JOURNALMODE_WAL2
  6328   6329          || eNew==PAGER_JOURNALMODE_QUERY
  6329   6330     );
  6330   6331     assert( pOp->p1>=0 && pOp->p1<db->nDb );
  6331   6332     assert( p->readOnly==0 );
  6332   6333   
  6333   6334     pBt = db->aDb[pOp->p1].pBt;
  6334   6335     pPager = sqlite3BtreePager(pBt);
................................................................................
  6338   6339   
  6339   6340   #ifndef SQLITE_OMIT_WAL
  6340   6341     zFilename = sqlite3PagerFilename(pPager, 1);
  6341   6342   
  6342   6343     /* Do not allow a transition to journal_mode=WAL for a database
  6343   6344     ** in temporary storage or if the VFS does not support shared memory 
  6344   6345     */
  6345         -  if( eNew==PAGER_JOURNALMODE_WAL
         6346  +  if( isWalMode(eNew)
  6346   6347      && (sqlite3Strlen30(zFilename)==0           /* Temp file */
  6347   6348          || !sqlite3PagerWalSupported(pPager))   /* No shared-memory support */
  6348   6349     ){
  6349   6350       eNew = eOld;
  6350   6351     }
  6351   6352   
  6352         -  if( (eNew!=eOld)
  6353         -   && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL)
  6354         -  ){
         6353  +  if( eNew!=eOld && (isWalMode(eNew) || isWalMode(eOld)) ){
         6354  +
         6355  +    /* Prevent changing directly to wal2 from wal mode. And vice versa. */
         6356  +    if( isWalMode(eNew) && isWalMode(eOld) ){
         6357  +      rc = SQLITE_ERROR;
         6358  +      sqlite3VdbeError(p, "cannot change from %s to %s mode",
         6359  +          sqlite3JournalModename(eOld), sqlite3JournalModename(eNew)
         6360  +      );
         6361  +      goto abort_due_to_error;
         6362  +    }
         6363  +
         6364  +    /* Prevent switching into or out of wal/wal2 mode mid-transaction */
  6355   6365       if( !db->autoCommit || db->nVdbeRead>1 ){
  6356   6366         rc = SQLITE_ERROR;
  6357   6367         sqlite3VdbeError(p,
  6358   6368             "cannot change %s wal mode from within a transaction",
  6359   6369             (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of")
  6360   6370         );
  6361   6371         goto abort_due_to_error;
  6362         -    }else{
         6372  +    }
  6363   6373    
  6364         -      if( eOld==PAGER_JOURNALMODE_WAL ){
  6365         -        /* If leaving WAL mode, close the log file. If successful, the call
  6366         -        ** to PagerCloseWal() checkpoints and deletes the write-ahead-log 
  6367         -        ** file. An EXCLUSIVE lock may still be held on the database file 
  6368         -        ** after a successful return. 
  6369         -        */
  6370         -        rc = sqlite3PagerCloseWal(pPager, db);
  6371         -        if( rc==SQLITE_OK ){
  6372         -          sqlite3PagerSetJournalMode(pPager, eNew);
  6373         -        }
  6374         -      }else if( eOld==PAGER_JOURNALMODE_MEMORY ){
  6375         -        /* Cannot transition directly from MEMORY to WAL.  Use mode OFF
  6376         -        ** as an intermediate */
  6377         -        sqlite3PagerSetJournalMode(pPager, PAGER_JOURNALMODE_OFF);
         6374  +    if( isWalMode(eOld) ){
         6375  +      /* If leaving WAL mode, close the log file. If successful, the call
         6376  +      ** to PagerCloseWal() checkpoints and deletes the write-ahead-log 
         6377  +      ** file. An EXCLUSIVE lock may still be held on the database file 
         6378  +      ** after a successful return. 
         6379  +      */
         6380  +      rc = sqlite3PagerCloseWal(pPager, db);
         6381  +      if( rc==SQLITE_OK ){
         6382  +        sqlite3PagerSetJournalMode(pPager, eNew);
  6378   6383         }
  6379         -  
  6380         -      /* Open a transaction on the database file. Regardless of the journal
  6381         -      ** mode, this transaction always uses a rollback journal.
  6382         -      */
  6383         -      assert( sqlite3BtreeIsInTrans(pBt)==0 );
  6384         -      if( rc==SQLITE_OK ){
  6385         -        rc = sqlite3BtreeSetVersion(pBt, (eNew==PAGER_JOURNALMODE_WAL ? 2 : 1));
  6386         -      }
         6384  +    }else if( eOld==PAGER_JOURNALMODE_MEMORY ){
         6385  +      /* Cannot transition directly from MEMORY to WAL.  Use mode OFF
         6386  +      ** as an intermediate */
         6387  +      sqlite3PagerSetJournalMode(pPager, PAGER_JOURNALMODE_OFF);
         6388  +    }
         6389  +
         6390  +    /* Open a transaction on the database file. Regardless of the journal
         6391  +    ** mode, this transaction always uses a rollback journal.
         6392  +    */
         6393  +    assert( sqlite3BtreeIsInTrans(pBt)==0 );
         6394  +    if( rc==SQLITE_OK ){
         6395  +      /* 1==rollback, 2==wal, 3==wal2 */
         6396  +      rc = sqlite3BtreeSetVersion(pBt, 
         6397  +          1 + isWalMode(eNew) + (eNew==PAGER_JOURNALMODE_WAL2)
         6398  +      );
  6387   6399       }
  6388   6400     }
  6389   6401   #endif /* ifndef SQLITE_OMIT_WAL */
  6390   6402   
  6391   6403     if( rc ) eNew = eOld;
  6392   6404     eNew = sqlite3PagerSetJournalMode(pPager, eNew);
  6393   6405   

Changes to src/wal.c.

   251    251   int sqlite3WalTrace = 0;
   252    252   # define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
   253    253   #else
   254    254   # define WALTRACE(X)
   255    255   #endif
   256    256   
   257    257   /*
   258         -** The maximum (and only) versions of the wal and wal-index formats
   259         -** that may be interpreted by this version of SQLite.
          258  +** Both the wal-file and the wal-index contain version fields 
          259  +** indicating the current version of the system. If a client
          260  +** reads the header of a wal file (as part of recovery), or the
          261  +** wal-index (as part of opening a read transaction) and (a) the
          262  +** header checksum is correct but (b) the version field is not
          263  +** recognized, the operation fails with SQLITE_CANTOPEN.
   260    264   **
   261         -** If a client begins recovering a WAL file and finds that (a) the checksum
   262         -** values in the wal-header are correct and (b) the version field is not
   263         -** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN.
   264         -**
   265         -** Similarly, if a client successfully reads a wal-index header (i.e. the 
   266         -** checksum test is successful) and finds that the version field is not
   267         -** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite
   268         -** returns SQLITE_CANTOPEN.
          265  +** Currently, clients support both version-1 ("journal_mode=wal") and
          266  +** version-2 ("journal_mode=wal2"). Legacy clients may support version-1
          267  +** only.
   269    268   */
   270         -#define WAL_MAX_VERSION      3007000
   271         -#define WALINDEX_MAX_VERSION 3007000
          269  +#define WAL_VERSION1 3007000      /* For "journal_mode=wal" */
          270  +#define WAL_VERSION2 3021000      /* For "journal_mode=wal2" */
   272    271   
   273    272   /*
   274    273   ** Indices of various locking bytes.   WAL_NREADER is the number
   275    274   ** of available reader locks and should be at least 3.  The default
   276    275   ** is SQLITE_SHM_NLOCK==8 and  WAL_NREADER==5.
   277    276   */
   278    277   #define WAL_WRITE_LOCK         0
   279    278   #define WAL_ALL_BUT_WRITE      1
   280    279   #define WAL_CKPT_LOCK          1
   281    280   #define WAL_RECOVER_LOCK       2
   282    281   #define WAL_READ_LOCK(I)       (3+(I))
   283    282   #define WAL_NREADER            (SQLITE_SHM_NLOCK-3)
   284    283   
          284  +/*
          285  +** Values that may be stored in Wal.readLock in wal2 mode.
          286  +**
          287  +** In wal mode, the Wal.readLock member is set to -1 when no read-lock
          288  +** is held, or else is the index of the read-mark on which a lock is
          289  +** held.
          290  +**
          291  +** In wal2 mode, Wal.readLock must be set to one of the following values.
          292  +** A value of -1 still indicates that no read-lock is held, but the other
          293  +** values are symbolic. See the implementation of walLockReader() for
          294  +** details of how the symbols map to OS level locks.
          295  +*/
          296  +#define WAL_LOCK_NONE        -1
          297  +#define WAL_LOCK_PART1        1
          298  +#define WAL_LOCK_PART1_FULL2  2
          299  +#define WAL_LOCK_PART2        3
          300  +#define WAL_LOCK_PART2_FULL1  4
          301  +
          302  +/* 
          303  +** This constant is used in wal2 mode only.
          304  +**
          305  +** In wal2 mode, when committing a transaction, if the current wal file 
          306  +** is sufficiently large and there are no conflicting locks held, the
          307  +** writer writes the new transaction into the start of the other wal
          308  +** file. Usually, "sufficiently large" is defined by the value configured
          309  +** using "PRAGMA journal_size_limit". However, if no such value has been
          310  +** configured, sufficiently large defaults to WAL_DEFAULT_WALSIZE frames.
          311  +*/
          312  +#define WAL_DEFAULT_WALSIZE 1000
   285    313   
   286    314   /* Object declarations */
   287    315   typedef struct WalIndexHdr WalIndexHdr;
   288    316   typedef struct WalIterator WalIterator;
   289    317   typedef struct WalCkptInfo WalCkptInfo;
   290    318   
   291    319   
................................................................................
   297    325   ** For all versions of SQLite through 3.10.0 and probably beyond,
   298    326   ** the locking bytes (WalCkptInfo.aLock) start at offset 120 and
   299    327   ** the total header size is 136 bytes.
   300    328   **
   301    329   ** The szPage value can be any power of 2 between 512 and 32768, inclusive.
   302    330   ** Or it can be 1 to represent a 65536-byte page.  The latter case was
   303    331   ** added in 3.7.1 when support for 64K pages was added.  
          332  +**
          333  +** WAL2 mode notes: Member variable mxFrame2 is only used in wal2 mode
          334  +** (when iVersion is set to WAL_VERSION2). The lower 31 bits store
          335  +** the maximum frame number in file *-wal2. The most significant bit
          336  +** is a flag - set if clients are currently appending to *-wal2, clear
          337  +** otherwise.
   304    338   */
   305    339   struct WalIndexHdr {
   306    340     u32 iVersion;                   /* Wal-index version */
   307         -  u32 unused;                     /* Unused (padding) field */
          341  +  u32 mxFrame2;                   /* See "WAL2 mode notes" above */
   308    342     u32 iChange;                    /* Counter incremented each transaction */
   309    343     u8 isInit;                      /* 1 when initialized */
   310    344     u8 bigEndCksum;                 /* True if checksums in WAL are big-endian */
   311    345     u16 szPage;                     /* Database page size in bytes. 1==64K */
   312         -  u32 mxFrame;                    /* Index of last valid frame in the WAL */
          346  +  u32 mxFrame;                    /* Index of last valid frame in each WAL */
   313    347     u32 nPage;                      /* Size of database in pages */
   314    348     u32 aFrameCksum[2];             /* Checksum of last frame in log */
   315    349     u32 aSalt[2];                   /* Two salt values copied from WAL header */
   316    350     u32 aCksum[2];                  /* Checksum over all prior fields */
   317    351   };
   318    352   
          353  +/*
          354  +** The following macros and functions are get/set methods for the maximum
          355  +** frame numbers and current wal file values stored in the WalIndexHdr
          356  +** structure. These are helpful because of the unorthodox way in which
          357  +** the values are stored in wal2 mode (see above). They are equivalent
          358  +** to functions with the following signatures.
          359  +**
          360  +**   u32  walidxGetMxFrame(WalIndexHdr*, int iWal);          // get mxFrame
          361  +**   void walidxSetMxFrame(WalIndexHdr*, int iWal, u32 val); // set mxFrame
          362  +**   int  walidxGetFile(WalIndexHdr*)                        // get file
          363  +**   void walidxSetFile(WalIndexHdr*, int val);              // set file
          364  +*/
          365  +#define walidxGetMxFrame(pHdr, iWal) \
          366  +  ((iWal) ? ((pHdr)->mxFrame2 & 0x7FFFFFF) : (pHdr)->mxFrame)
          367  +
          368  +static void walidxSetMxFrame(WalIndexHdr *pHdr, int iWal, u32 mxFrame){
          369  +  if( iWal ){
          370  +    pHdr->mxFrame2 = (pHdr->mxFrame2 & 0x80000000) | mxFrame;
          371  +  }else{
          372  +    pHdr->mxFrame = mxFrame;
          373  +  }
          374  +  assert( walidxGetMxFrame(pHdr, iWal)==mxFrame );
          375  +}
          376  +
          377  +#define walidxGetFile(pHdr) ((pHdr)->mxFrame2 >> 31)
          378  +
          379  +#define walidxSetFile(pHdr, iWal) (                                   \
          380  +    (pHdr)->mxFrame2 = ((pHdr)->mxFrame2 & 0x7FFFFFFF) | ((iWal)<<31) \
          381  +)
          382  +
          383  +/*
          384  +** Argument is a pointer to a Wal structure. Return true if the current
          385  +** cache of the wal-index header indicates "journal_mode=wal2" mode, or
          386  +** false otherwise.
          387  +*/
          388  +#define isWalMode2(pWal) ((pWal)->hdr.iVersion==WAL_VERSION2)
          389  +
   319    390   /*
   320    391   ** A copy of the following object occurs in the wal-index immediately
   321    392   ** following the second copy of the WalIndexHdr.  This object stores
   322    393   ** information used by checkpoint.
   323    394   **
   324    395   ** nBackfill is the number of frames in the WAL that have been written
   325    396   ** back into the database. (We call the act of moving content from WAL to
................................................................................
   423    494   /*
   424    495   ** An open write-ahead log file is represented by an instance of the
   425    496   ** following object.
   426    497   */
   427    498   struct Wal {
   428    499     sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
   429    500     sqlite3_file *pDbFd;       /* File handle for the database file */
   430         -  sqlite3_file *pWalFd;      /* File handle for WAL file */
          501  +  sqlite3_file *apWalFd[2];  /* File handle for "*-wal" and "*-wal2" */
   431    502     u32 iCallback;             /* Value to pass to log callback (or 0) */
   432    503     i64 mxWalSize;             /* Truncate WAL to this size upon reset */
   433    504     int nWiData;               /* Size of array apWiData */
   434    505     int szFirstBlock;          /* Size of first block written to WAL file */
   435    506     volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
   436    507     u32 szPage;                /* Database page size */
   437    508     i16 readLock;              /* Which read lock is being held.  -1 for none */
................................................................................
   443    514     u8 truncateOnCommit;       /* True to truncate WAL file on commit */
   444    515     u8 syncHeader;             /* Fsync the WAL header if true */
   445    516     u8 padToSectorBoundary;    /* Pad transactions out to the next sector */
   446    517     WalIndexHdr hdr;           /* Wal-index header for current transaction */
   447    518     u32 minFrame;              /* Ignore wal frames before this one */
   448    519     u32 iReCksum;              /* On commit, recalculate checksums from here */
   449    520     const char *zWalName;      /* Name of WAL file */
          521  +  char *zWalName2;           /* Name of second WAL file */
   450    522     u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
   451    523   #ifdef SQLITE_DEBUG
   452    524     u8 lockError;              /* True if a locking error has occurred */
   453    525   #endif
   454    526   #ifdef SQLITE_ENABLE_SNAPSHOT
   455    527     WalIndexHdr *pSnapshot;    /* Start transaction here if not NULL */
   456    528   #endif
          529  +  int bWal2;                 /* bWal2 flag passed to WalOpen() */
   457    530   };
   458    531   
   459    532   /*
   460    533   ** Candidate values for Wal.exclusiveMode.
   461    534   */
   462    535   #define WAL_NORMAL_MODE     0
   463    536   #define WAL_EXCLUSIVE_MODE  1     
................................................................................
   663    736   */
   664    737   static void walIndexWriteHdr(Wal *pWal){
   665    738     volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
   666    739     const int nCksum = offsetof(WalIndexHdr, aCksum);
   667    740   
   668    741     assert( pWal->writeLock );
   669    742     pWal->hdr.isInit = 1;
   670         -  pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
          743  +  assert( pWal->hdr.iVersion==WAL_VERSION1||pWal->hdr.iVersion==WAL_VERSION2 );
   671    744     walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
   672    745     memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
   673    746     walShmBarrier(pWal);
   674    747     memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
   675    748   }
   676    749   
   677    750   /*
................................................................................
   741    814     */
   742    815     pgno = sqlite3Get4byte(&aFrame[0]);
   743    816     if( pgno==0 ){
   744    817       return 0;
   745    818     }
   746    819   
   747    820     /* A frame is only valid if a checksum of the WAL header,
   748         -  ** all prior frams, the first 16 bytes of this frame-header, 
          821  +  ** all prior frames, the first 16 bytes of this frame-header, 
   749    822     ** and the frame-data matches the checksum in the last 8 
   750    823     ** bytes of this frame-header.
   751    824     */
   752    825     nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
   753    826     walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
   754    827     walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
   755    828     if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) 
................................................................................
   826    899   static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
   827    900     if( pWal->exclusiveMode ) return;
   828    901     (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
   829    902                            SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
   830    903     WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
   831    904                walLockName(lockIdx), n));
   832    905   }
          906  +
          907  +/*
          908  +** This function is used to take and release read-locks in wal2 mode.
          909  +**
          910  +** Use of WAL_READ_LOCK(x) slots for (1<=x<=4).
          911  +**
          912  +** 1) Partial read of *-wal-1   (blocks checkpointer from checkpointing)
          913  +** 2) Full read of *-wal-2      (blocks writer from writing)
          914  +** 3) Partial read of *-wal-2   (blocks checkpointer from checkpointing)
          915  +** 4) Full read of *-wal-1      (blocks writer from writing)
          916  +*/
          917  +static int walLockReader(Wal *pWal, int eLock, int bLock){
          918  +  int i;                          /* Index of first readmark to lock */
          919  +  int n;                          /* Number of readmarks to lock */
          920  +
          921  +  assert( pWal->hdr.iVersion==WAL_VERSION2 );
          922  +  if( pWal->exclusiveMode ) return SQLITE_OK;
          923  +
          924  +  switch( eLock ){
          925  +    case WAL_LOCK_PART1      : i = 1; n = 1; break; 
          926  +    case WAL_LOCK_PART1_FULL2: i = 1; n = 2; break; 
          927  +    case WAL_LOCK_PART2      : i = 3; n = 1; break; 
          928  +    case WAL_LOCK_PART2_FULL1: i = 3; n = 2; break; 
          929  +    default: assert( !"cannot happen" );
          930  +  }
          931  +
          932  +  return sqlite3OsShmLock(pWal->pDbFd, WAL_READ_LOCK(i), n,
          933  +      SQLITE_SHM_SHARED | (bLock ? SQLITE_SHM_LOCK : SQLITE_SHM_UNLOCK) 
          934  +  );
          935  +}
   833    936   
   834    937   /*
   835    938   ** Compute a hash on a page number.  The resulting hash value must land
   836    939   ** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
   837    940   ** the hash to the next value in the event of a collision.
   838    941   */
   839    942   static int walHash(u32 iPage){
................................................................................
   886    989     
   887    990       *paPgno = &aPgno[-1];
   888    991       *paHash = aHash;
   889    992       *piZero = iZero;
   890    993     }
   891    994     return rc;
   892    995   }
          996  +
          997  +static u32 walExternalEncode(int iWal, u32 iFrame){
          998  +  u32 iRet;
          999  +  if( iWal ){
         1000  +    iRet = HASHTABLE_NPAGE_ONE + iFrame;
         1001  +    iRet += ((iFrame-1) / HASHTABLE_NPAGE) * HASHTABLE_NPAGE;
         1002  +  }else{
         1003  +    iRet = iFrame;
         1004  +    iFrame += HASHTABLE_NPAGE - HASHTABLE_NPAGE_ONE;
         1005  +    iRet += ((iFrame-1) / HASHTABLE_NPAGE) * HASHTABLE_NPAGE;
         1006  +  }
         1007  +  return iRet;
         1008  +}
         1009  +
         1010  +/*
         1011  +** Parameter iExternal is an external frame identifier. This function
         1012  +** transforms it to a wal file number (0 or 1) and frame number within
         1013  +** this wal file (reported via output parameter *piRead).
         1014  +*/
         1015  +static int walExternalDecode(u32 iExternal, u32 *piRead){
         1016  +  int iHash = (iExternal+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1)/HASHTABLE_NPAGE;
         1017  +
         1018  +  if( 0==(iHash & 0x01) ){
         1019  +    /* A frame in wal file 0 */
         1020  +    *piRead = (iExternal <= HASHTABLE_NPAGE_ONE) ? iExternal :
         1021  +      iExternal - (iHash/2) * HASHTABLE_NPAGE;
         1022  +    return 0;
         1023  +  }
         1024  +  if( iHash==0 ){
         1025  +    *piRead = iExternal;
         1026  +    return 0;
         1027  +  }else{
         1028  +    *piRead = iExternal - HASHTABLE_NPAGE_ONE - ((iHash-1)/2) * HASHTABLE_NPAGE;
         1029  +  }
         1030  +
         1031  +  return (iHash % 2);
         1032  +}
   893   1033   
   894   1034   /*
   895   1035   ** Return the number of the wal-index page that contains the hash-table
   896   1036   ** and page-number array that contain entries corresponding to WAL frame
   897   1037   ** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages 
   898   1038   ** are numbered starting from 0.
   899   1039   */
................................................................................
   903   1043          && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
   904   1044          && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
   905   1045          && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
   906   1046          && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
   907   1047     );
   908   1048     return iHash;
   909   1049   }
         1050  +
         1051  +/*
         1052  +** Return the index of the hash-table corresponding to frame iFrame of wal
         1053  +** file iWal.
         1054  +*/
         1055  +static int walFramePage2(int iWal, u32 iFrame){
         1056  +  int iRet;
         1057  +  assert( iWal==0 || iWal==1 );
         1058  +  assert( iFrame>0 );
         1059  +  if( iWal==0 ){
         1060  +    iRet = 2*((iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1)/HASHTABLE_NPAGE);
         1061  +  }else{
         1062  +    iRet = 1 + 2 * ((iFrame-1) / HASHTABLE_NPAGE);
         1063  +  }
         1064  +  return iRet;
         1065  +}
   910   1066   
   911   1067   /*
   912   1068   ** Return the page number associated with frame iFrame in this WAL.
   913   1069   */
   914   1070   static u32 walFramePgno(Wal *pWal, u32 iFrame){
   915   1071     int iHash = walFramePage(iFrame);
   916   1072     if( iHash==0 ){
   917   1073       return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
   918   1074     }
   919   1075     return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
   920   1076   }
         1077  +
         1078  +static u32 walFramePgno2(Wal *pWal, int iWal, u32 iFrame){
         1079  +  return walFramePgno(pWal, walExternalEncode(iWal, iFrame));
         1080  +}
   921   1081   
   922   1082   /*
   923   1083   ** Remove entries from the hash table that point to WAL slots greater
   924   1084   ** than pWal->hdr.mxFrame.
   925   1085   **
   926   1086   ** This function is called whenever pWal->hdr.mxFrame is decreased due
   927   1087   ** to a rollback or savepoint.
................................................................................
   934   1094   static void walCleanupHash(Wal *pWal){
   935   1095     volatile ht_slot *aHash = 0;    /* Pointer to hash table to clear */
   936   1096     volatile u32 *aPgno = 0;        /* Page number array for hash table */
   937   1097     u32 iZero = 0;                  /* frame == (aHash[x]+iZero) */
   938   1098     int iLimit = 0;                 /* Zero values greater than this */
   939   1099     int nByte;                      /* Number of bytes to zero in aPgno[] */
   940   1100     int i;                          /* Used to iterate through aHash[] */
         1101  +  int iWal = walidxGetFile(&pWal->hdr);
         1102  +  u32 mxFrame = walidxGetMxFrame(&pWal->hdr, iWal);
         1103  +
         1104  +  u32 iExternal;
         1105  +  if( isWalMode2(pWal) ){
         1106  +    iExternal = walExternalEncode(iWal, mxFrame);
         1107  +  }else{
         1108  +    assert( iWal==0 );
         1109  +    iExternal = mxFrame;
         1110  +  }
   941   1111   
   942   1112     assert( pWal->writeLock );
   943         -  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 );
   944         -  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE );
   945         -  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 );
         1113  +  testcase( mxFrame==HASHTABLE_NPAGE_ONE-1 );
         1114  +  testcase( mxFrame==HASHTABLE_NPAGE_ONE );
         1115  +  testcase( mxFrame==HASHTABLE_NPAGE_ONE+1 );
   946   1116   
   947         -  if( pWal->hdr.mxFrame==0 ) return;
         1117  +  if( mxFrame==0 ) return;
   948   1118   
   949   1119     /* Obtain pointers to the hash-table and page-number array containing 
   950   1120     ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
   951   1121     ** that the page said hash-table and array reside on is already mapped.
   952   1122     */
   953         -  assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
   954         -  assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
   955         -  walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);
         1123  +  assert( pWal->nWiData>walFramePage(iExternal) );
         1124  +  assert( pWal->apWiData[walFramePage(iExternal)] );
         1125  +  walHashGet(pWal, walFramePage(iExternal), &aHash, &aPgno, &iZero);
   956   1126   
   957   1127     /* Zero all hash-table entries that correspond to frame numbers greater
   958   1128     ** than pWal->hdr.mxFrame.
   959   1129     */
   960         -  iLimit = pWal->hdr.mxFrame - iZero;
         1130  +  iLimit = iExternal - iZero;
   961   1131     assert( iLimit>0 );
   962   1132     for(i=0; i<HASHTABLE_NSLOT; i++){
   963   1133       if( aHash[i]>iLimit ){
   964   1134         aHash[i] = 0;
   965   1135       }
   966   1136     }
   967   1137     
   968   1138     /* Zero the entries in the aPgno array that correspond to frames with
   969         -  ** frame numbers greater than pWal->hdr.mxFrame. 
   970         -  */
         1139  +  ** frame numbers greater than pWal->hdr.mxFrame.  */
   971   1140     nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]);
   972   1141     memset((void *)&aPgno[iLimit+1], 0, nByte);
   973   1142   
   974   1143   #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
   975   1144     /* Verify that the every entry in the mapping region is still reachable
   976   1145     ** via the hash table even after the cleanup.
   977   1146     */
................................................................................
   984   1153         }
   985   1154         assert( aHash[iKey]==j );
   986   1155       }
   987   1156     }
   988   1157   #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
   989   1158   }
   990   1159   
   991         -
   992   1160   /*
   993   1161   ** Set an entry in the wal-index that will map database page number
   994   1162   ** pPage into WAL frame iFrame.
   995   1163   */
   996         -static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
         1164  +static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){
   997   1165     int rc;                         /* Return code */
   998   1166     u32 iZero = 0;                  /* One less than frame number of aPgno[1] */
   999   1167     volatile u32 *aPgno = 0;        /* Page number array */
  1000   1168     volatile ht_slot *aHash = 0;    /* Hash table */
         1169  +  u32 iExternal;
         1170  +  
         1171  +  if( isWalMode2(pWal) ){
         1172  +    iExternal = walExternalEncode(iWal, iFrame);
         1173  +  }else{
         1174  +    assert( iWal==0 );
         1175  +    iExternal = iFrame;
         1176  +  }
  1001   1177   
  1002         -  rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);
         1178  +  rc = walHashGet(pWal, walFramePage(iExternal), &aHash, &aPgno, &iZero);
  1003   1179   
  1004   1180     /* Assuming the wal-index file was successfully mapped, populate the
  1005   1181     ** page number array and hash table entry.
  1006   1182     */
  1007   1183     if( rc==SQLITE_OK ){
  1008   1184       int iKey;                     /* Hash table key */
  1009   1185       int idx;                      /* Value to write to hash-table slot */
  1010   1186       int nCollide;                 /* Number of hash collisions */
  1011   1187   
  1012         -    idx = iFrame - iZero;
         1188  +    idx = iExternal - iZero;
  1013   1189       assert( idx <= HASHTABLE_NSLOT/2 + 1 );
  1014   1190       
  1015   1191       /* If this is the first entry to be added to this hash-table, zero the
  1016   1192       ** entire hash table and aPgno[] array before proceeding. 
  1017   1193       */
  1018   1194       if( idx==1 ){
  1019   1195         int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]);
................................................................................
  1067   1243   #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
  1068   1244     }
  1069   1245   
  1070   1246   
  1071   1247     return rc;
  1072   1248   }
  1073   1249   
         1250  +/*
         1251  +** Recover a single wal file - *-wal if iWal==0, or *-wal2 if iWal==1.
         1252  +*/
         1253  +static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){
         1254  +  i64 nSize;                      /* Size of log file */
         1255  +  u32 aFrameCksum[2] = {0, 0};
         1256  +  int rc;
         1257  +  sqlite3_file *pWalFd = pWal->apWalFd[iWal];
         1258  +
         1259  +  assert( iWal==0 || iWal==1 );
         1260  +
         1261  +  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
         1262  +  sqlite3_randomness(8, pWal->hdr.aSalt);
         1263  +
         1264  +  rc = sqlite3OsFileSize(pWalFd, &nSize);
         1265  +  if( rc==SQLITE_OK ){
         1266  +    if( nSize>WAL_HDRSIZE ){
         1267  +      u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
         1268  +      u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
         1269  +      int szFrame;                  /* Number of bytes in buffer aFrame[] */
         1270  +      u8 *aData;                    /* Pointer to data part of aFrame buffer */
         1271  +      int iFrame;                   /* Index of last frame read */
         1272  +      i64 iOffset;                  /* Next offset to read from log file */
         1273  +      int szPage;                   /* Page size according to the log */
         1274  +      u32 magic;                    /* Magic value read from WAL header */
         1275  +      u32 version;                  /* Magic value read from WAL header */
         1276  +      int isValid;                  /* True if this frame is valid */
         1277  +  
         1278  +      /* Read in the WAL header. */
         1279  +      rc = sqlite3OsRead(pWalFd, aBuf, WAL_HDRSIZE, 0);
         1280  +      if( rc!=SQLITE_OK ){
         1281  +        return rc;
         1282  +      }
         1283  +  
         1284  +      /* If the database page size is not a power of two, or is greater than
         1285  +      ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
         1286  +      ** data. Similarly, if the 'magic' value is invalid, ignore the whole
         1287  +      ** WAL file.
         1288  +      */
         1289  +      magic = sqlite3Get4byte(&aBuf[0]);
         1290  +      szPage = sqlite3Get4byte(&aBuf[8]);
         1291  +      if( (magic&0xFFFFFFFE)!=WAL_MAGIC 
         1292  +       || szPage&(szPage-1) 
         1293  +       || szPage>SQLITE_MAX_PAGE_SIZE 
         1294  +       || szPage<512 
         1295  +      ){
         1296  +        return SQLITE_OK;
         1297  +      }
         1298  +      pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
         1299  +      pWal->szPage = szPage;
         1300  +  
         1301  +      /* Verify that the WAL header checksum is correct */
         1302  +      walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, 
         1303  +          aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
         1304  +      );
         1305  +      if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
         1306  +       || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
         1307  +      ){
         1308  +        return SQLITE_OK;
         1309  +      }
         1310  +  
         1311  +      memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
         1312  +      *pnCkpt = sqlite3Get4byte(&aBuf[12]);
         1313  +  
         1314  +      /* Verify that the version number on the WAL format is one that
         1315  +      ** are able to understand */
         1316  +      version = sqlite3Get4byte(&aBuf[4]);
         1317  +      if( version!=WAL_VERSION1 && version!=WAL_VERSION2 ){
         1318  +        return SQLITE_CANTOPEN_BKPT;
         1319  +      }
         1320  +      pWal->hdr.iVersion = version;
         1321  +  
         1322  +      /* Malloc a buffer to read frames into. */
         1323  +      szFrame = szPage + WAL_FRAME_HDRSIZE;
         1324  +      aFrame = (u8 *)sqlite3_malloc64(szFrame);
         1325  +      if( !aFrame ){
         1326  +        return SQLITE_NOMEM_BKPT;
         1327  +      }
         1328  +      aData = &aFrame[WAL_FRAME_HDRSIZE];
         1329  +  
         1330  +      /* Read all frames from the log file. */
         1331  +      iFrame = 0;
         1332  +      for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
         1333  +        u32 pgno;                   /* Database page number for frame */
         1334  +        u32 nTruncate;              /* dbsize field from frame header */
         1335  +  
         1336  +        /* Read and decode the next log frame. */
         1337  +        iFrame++;
         1338  +        rc = sqlite3OsRead(pWalFd, aFrame, szFrame, iOffset);
         1339  +        if( rc!=SQLITE_OK ) break;
         1340  +        isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
         1341  +        if( !isValid ) break;
         1342  +        rc = walIndexAppend(pWal, iWal, iFrame, pgno);
         1343  +        if( rc!=SQLITE_OK ) break;
         1344  +  
         1345  +        /* If nTruncate is non-zero, this is a commit record. */
         1346  +        if( nTruncate ){
         1347  +          pWal->hdr.mxFrame = iFrame;
         1348  +          pWal->hdr.nPage = nTruncate;
         1349  +          pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
         1350  +          testcase( szPage<=32768 );
         1351  +          testcase( szPage>=65536 );
         1352  +          aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
         1353  +          aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
         1354  +        }
         1355  +      }
         1356  +  
         1357  +      sqlite3_free(aFrame);
         1358  +    }else if( pbZero && nSize==0 ){
         1359  +      *pbZero = 1;
         1360  +    }
         1361  +  }
         1362  +
         1363  +  pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
         1364  +  pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
         1365  +
         1366  +  return rc;
         1367  +}
         1368  +
         1369  +static int walOpenWal2(Wal *pWal){
         1370  +  int rc = SQLITE_OK;
         1371  +  if( !isOpen(pWal->apWalFd[1]) ){
         1372  +    int f = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
         1373  +    rc = sqlite3OsOpen(pWal->pVfs, pWal->zWalName2, pWal->apWalFd[1], f, &f);
         1374  +  }
         1375  +  return rc;
         1376  +}
  1074   1377   
  1075   1378   /*
  1076   1379   ** Recover the wal-index by reading the write-ahead log file. 
  1077   1380   **
  1078   1381   ** This routine first tries to establish an exclusive lock on the
  1079   1382   ** wal-index to prevent other threads/processes from doing anything
  1080   1383   ** with the WAL or wal-index while recovery is running.  The
  1081   1384   ** WAL_RECOVER_LOCK is also held so that other threads will know
  1082   1385   ** that this thread is running recovery.  If unable to establish
  1083   1386   ** the necessary locks, this routine returns SQLITE_BUSY.
  1084   1387   */
  1085   1388   static int walIndexRecover(Wal *pWal){
  1086   1389     int rc;                         /* Return Code */
  1087         -  i64 nSize;                      /* Size of log file */
  1088         -  u32 aFrameCksum[2] = {0, 0};
  1089   1390     int iLock;                      /* Lock offset to lock for checkpoint */
  1090   1391     int nLock;                      /* Number of locks to hold */
         1392  +  u32 nCkpt1 = 0xFFFFFFFF;
         1393  +  u32 nCkpt2 = 0xFFFFFFFF;
         1394  +  int bZero = 0;
         1395  +  WalIndexHdr hdr;
  1091   1396   
  1092   1397     /* Obtain an exclusive lock on all byte in the locking range not already
  1093   1398     ** locked by the caller. The caller is guaranteed to have locked the
  1094   1399     ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
  1095   1400     ** If successful, the same bytes that are locked here are unlocked before
  1096   1401     ** this function returns.
  1097   1402     */
................................................................................
  1103   1408     nLock = SQLITE_SHM_NLOCK - iLock;
  1104   1409     rc = walLockExclusive(pWal, iLock, nLock);
  1105   1410     if( rc ){
  1106   1411       return rc;
  1107   1412     }
  1108   1413     WALTRACE(("WAL%p: recovery begin...\n", pWal));
  1109   1414   
  1110         -  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  1111         -
  1112         -  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
  1113         -  if( rc!=SQLITE_OK ){
  1114         -    goto recovery_error;
  1115         -  }
  1116         -
  1117         -  if( nSize>WAL_HDRSIZE ){
  1118         -    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
  1119         -    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
  1120         -    int szFrame;                  /* Number of bytes in buffer aFrame[] */
  1121         -    u8 *aData;                    /* Pointer to data part of aFrame buffer */
  1122         -    int iFrame;                   /* Index of last frame read */
  1123         -    i64 iOffset;                  /* Next offset to read from log file */
  1124         -    int szPage;                   /* Page size according to the log */
  1125         -    u32 magic;                    /* Magic value read from WAL header */
  1126         -    u32 version;                  /* Magic value read from WAL header */
  1127         -    int isValid;                  /* True if this frame is valid */
  1128         -
  1129         -    /* Read in the WAL header. */
  1130         -    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
  1131         -    if( rc!=SQLITE_OK ){
  1132         -      goto recovery_error;
  1133         -    }
  1134         -
  1135         -    /* If the database page size is not a power of two, or is greater than
  1136         -    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
  1137         -    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
  1138         -    ** WAL file.
  1139         -    */
  1140         -    magic = sqlite3Get4byte(&aBuf[0]);
  1141         -    szPage = sqlite3Get4byte(&aBuf[8]);
  1142         -    if( (magic&0xFFFFFFFE)!=WAL_MAGIC 
  1143         -     || szPage&(szPage-1) 
  1144         -     || szPage>SQLITE_MAX_PAGE_SIZE 
  1145         -     || szPage<512 
  1146         -    ){
  1147         -      goto finished;
  1148         -    }
  1149         -    pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
  1150         -    pWal->szPage = szPage;
  1151         -    pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
  1152         -    memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
  1153         -
  1154         -    /* Verify that the WAL header checksum is correct */
  1155         -    walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, 
  1156         -        aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
  1157         -    );
  1158         -    if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
  1159         -     || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
  1160         -    ){
  1161         -      goto finished;
  1162         -    }
  1163         -
  1164         -    /* Verify that the version number on the WAL format is one that
  1165         -    ** are able to understand */
  1166         -    version = sqlite3Get4byte(&aBuf[4]);
  1167         -    if( version!=WAL_MAX_VERSION ){
  1168         -      rc = SQLITE_CANTOPEN_BKPT;
  1169         -      goto finished;
  1170         -    }
  1171         -
  1172         -    /* Malloc a buffer to read frames into. */
  1173         -    szFrame = szPage + WAL_FRAME_HDRSIZE;
  1174         -    aFrame = (u8 *)sqlite3_malloc64(szFrame);
  1175         -    if( !aFrame ){
  1176         -      rc = SQLITE_NOMEM_BKPT;
  1177         -      goto recovery_error;
  1178         -    }
  1179         -    aData = &aFrame[WAL_FRAME_HDRSIZE];
  1180         -
  1181         -    /* Read all frames from the log file. */
  1182         -    iFrame = 0;
  1183         -    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
  1184         -      u32 pgno;                   /* Database page number for frame */
  1185         -      u32 nTruncate;              /* dbsize field from frame header */
  1186         -
  1187         -      /* Read and decode the next log frame. */
  1188         -      iFrame++;
  1189         -      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
  1190         -      if( rc!=SQLITE_OK ) break;
  1191         -      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
  1192         -      if( !isValid ) break;
  1193         -      rc = walIndexAppend(pWal, iFrame, pgno);
  1194         -      if( rc!=SQLITE_OK ) break;
  1195         -
  1196         -      /* If nTruncate is non-zero, this is a commit record. */
  1197         -      if( nTruncate ){
  1198         -        pWal->hdr.mxFrame = iFrame;
  1199         -        pWal->hdr.nPage = nTruncate;
  1200         -        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
  1201         -        testcase( szPage<=32768 );
  1202         -        testcase( szPage>=65536 );
  1203         -        aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
  1204         -        aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
  1205         -      }
  1206         -    }
  1207         -
  1208         -    sqlite3_free(aFrame);
  1209         -  }
  1210         -
  1211         -finished:
         1415  +  /* Recover the *-wal file. If a valid version-1 header is recovered
         1416  +  ** from it, do not open the *-wal2 file. Even if it exists.
         1417  +  **
         1418  +  ** Otherwise, if the *-wal2 file exists or if the "wal2" flag was 
         1419  +  ** specified when sqlite3WalOpen() was called, open and recover
         1420  +  ** the *-wal2 file. Except, if the *-wal file was zero bytes in size,
         1421  +  ** truncate the *-wal2 to zero bytes in size.
         1422  +  **
         1423  +  ** After this block has run, if the *-wal2 file is open the system
         1424  +  ** starts up in VERSION2 mode. In this case pWal->hdr contains the 
         1425  +  ** wal-index header considering only *-wal2. Stack variable hdr
         1426  +  ** contains the wal-index header considering only *-wal. The hash 
         1427  +  ** tables are populated for both.  
         1428  +  **
         1429  +  ** Or, if the *-wal2 file is not open, start up in VERSION1 mode.
         1430  +  ** pWal->hdr is already populated.
         1431  +  */
         1432  +  rc = walIndexRecoverOne(pWal, 0, &nCkpt1, &bZero);
         1433  +  assert( pWal->hdr.iVersion==0 
         1434  +      || pWal->hdr.iVersion==WAL_VERSION1 
         1435  +      || pWal->hdr.iVersion==WAL_VERSION2 
         1436  +  );
         1437  +  if( rc==SQLITE_OK && pWal->hdr.iVersion!=WAL_VERSION1 ){
         1438  +    int bOpen = 1;
         1439  +    sqlite3_vfs *pVfs = pWal->pVfs;
         1440  +    if( pWal->hdr.iVersion==0 && pWal->bWal2==0 ){
         1441  +      rc = sqlite3OsAccess(pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bOpen);
         1442  +    }
         1443  +    if( rc==SQLITE_OK && bOpen ){
         1444  +      rc = walOpenWal2(pWal);
         1445  +      if( rc==SQLITE_OK ){
         1446  +        hdr = pWal->hdr;
         1447  +        rc = walIndexRecoverOne(pWal, 1, &nCkpt2, 0);
         1448  +      }
         1449  +    }
         1450  +  }
         1451  +
  1212   1452     if( rc==SQLITE_OK ){
  1213   1453       volatile WalCkptInfo *pInfo;
  1214         -    int i;
  1215         -    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
  1216         -    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
         1454  +
         1455  +    if( isOpen(pWal->apWalFd[1]) ){
         1456  +      /* The case where *-wal2 may follow *-wal */
         1457  +      if( nCkpt2<=0x0F && nCkpt2==nCkpt1+1 ){
         1458  +        if( sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[0]))==hdr.aFrameCksum[0]
         1459  +            && sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[1]))==hdr.aFrameCksum[1]
         1460  +          ){
         1461  +          walidxSetFile(&pWal->hdr, 1);
         1462  +          walidxSetMxFrame(&pWal->hdr, 1, pWal->hdr.mxFrame);
         1463  +          walidxSetMxFrame(&pWal->hdr, 0, hdr.mxFrame);
         1464  +        }else{
         1465  +          pWal->hdr = hdr;
         1466  +        }
         1467  +      }else
         1468  +
         1469  +      /* When *-wal may follow *-wal2 */
         1470  +      if( (nCkpt2==0x0F && nCkpt1==0) || (nCkpt2<0x0F && nCkpt2==nCkpt1-1) ){
         1471  +        if( sqlite3Get4byte((u8*)(&hdr.aSalt[0]))==pWal->hdr.aFrameCksum[0]
         1472  +         && sqlite3Get4byte((u8*)(&hdr.aSalt[1]))==pWal->hdr.aFrameCksum[1]
         1473  +        ){
         1474  +          SWAP(WalIndexHdr, pWal->hdr, hdr);
         1475  +          walidxSetMxFrame(&pWal->hdr, 1, hdr.mxFrame);
         1476  +        }
         1477  +      }else
         1478  +
         1479  +      /* Fallback */
         1480  +      if( nCkpt1<=nCkpt2 ){
         1481  +        pWal->hdr = hdr;
         1482  +      }else{
         1483  +        walidxSetFile(&pWal->hdr, 1);
         1484  +      }
         1485  +      pWal->hdr.iVersion = WAL_VERSION2;
         1486  +    }else{
         1487  +      pWal->hdr.iVersion = WAL_VERSION1;
         1488  +    }
         1489  +
  1217   1490       walIndexWriteHdr(pWal);
  1218   1491   
  1219   1492       /* Reset the checkpoint-header. This is safe because this thread is 
  1220   1493       ** currently holding locks that exclude all other readers, writers and
  1221         -    ** checkpointers.
  1222         -    */
         1494  +    ** checkpointers.  */
  1223   1495       pInfo = walCkptInfo(pWal);
  1224         -    pInfo->nBackfill = 0;
  1225         -    pInfo->nBackfillAttempted = pWal->hdr.mxFrame;
  1226         -    pInfo->aReadMark[0] = 0;
  1227         -    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
  1228         -    if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;
         1496  +    memset((void*)pInfo, 0, sizeof(WalCkptInfo));
         1497  +    if( 0==isWalMode2(pWal) ){
         1498  +      int i;
         1499  +      pInfo->nBackfillAttempted = pWal->hdr.mxFrame;
         1500  +      pInfo->aReadMark[0] = 0;
         1501  +      for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
         1502  +      if( pWal->hdr.mxFrame ) pInfo->aReadMark[1] = pWal->hdr.mxFrame;
         1503  +    }
  1229   1504   
  1230   1505       /* If more than one frame was recovered from the log file, report an
  1231   1506       ** event via sqlite3_log(). This is to help with identifying performance
  1232   1507       ** problems caused by applications routinely shutting down without
  1233         -    ** checkpointing the log file.
  1234         -    */
         1508  +    ** checkpointing the log file.  */
  1235   1509       if( pWal->hdr.nPage ){
  1236   1510         sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
  1237         -          "recovered %d frames from WAL file %s",
  1238         -          pWal->hdr.mxFrame, pWal->zWalName
         1511  +          "recovered (%d,%d) frames from WAL files %s[2] (%s mode)",
         1512  +          walidxGetMxFrame(&pWal->hdr, 0), walidxGetMxFrame(&pWal->hdr, 1), 
         1513  +          pWal->zWalName, isWalMode2(pWal) ? "wal2" : "wal"
  1239   1514         );
  1240   1515       }
  1241   1516     }
  1242   1517   
  1243         -recovery_error:
  1244   1518     WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
  1245   1519     walUnlockExclusive(pWal, iLock, nLock);
  1246   1520     return rc;
  1247   1521   }
  1248   1522   
  1249   1523   /*
  1250         -** Close an open wal-index.
         1524  +** Close an open wal-index and wal files.
  1251   1525   */
  1252   1526   static void walIndexClose(Wal *pWal, int isDelete){
  1253   1527     if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
  1254   1528       int i;
  1255   1529       for(i=0; i<pWal->nWiData; i++){
  1256   1530         sqlite3_free((void *)pWal->apWiData[i]);
  1257   1531         pWal->apWiData[i] = 0;
  1258   1532       }
  1259   1533     }else{
  1260   1534       sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
  1261   1535     }
         1536  +  sqlite3OsClose(pWal->apWalFd[0]);
         1537  +  sqlite3OsClose(pWal->apWalFd[1]);
  1262   1538   }
  1263   1539   
  1264   1540   /* 
  1265   1541   ** Open a connection to the WAL file zWalName. The database file must 
  1266   1542   ** already be opened on connection pDbFd. The buffer that zWalName points
  1267   1543   ** to must remain valid for the lifetime of the returned Wal* handle.
  1268   1544   **
................................................................................
  1278   1554   */
  1279   1555   int sqlite3WalOpen(
  1280   1556     sqlite3_vfs *pVfs,              /* vfs module to open wal and wal-index */
  1281   1557     sqlite3_file *pDbFd,            /* The open database file */
  1282   1558     const char *zWalName,           /* Name of the WAL file */
  1283   1559     int bNoShm,                     /* True to run in heap-memory mode */
  1284   1560     i64 mxWalSize,                  /* Truncate WAL to this size on reset */
         1561  +  int bWal2,                      /* True to open in wal2 mode */
  1285   1562     Wal **ppWal                     /* OUT: Allocated Wal handle */
  1286   1563   ){
  1287   1564     int rc;                         /* Return Code */
  1288   1565     Wal *pRet;                      /* Object to allocate and return */
  1289   1566     int flags;                      /* Flags passed to OsOpen() */
         1567  +  int nWalName;                   /* Length of zWalName in bytes */
         1568  +  int nByte;                      /* Bytes of space to allocate */
  1290   1569   
  1291   1570     assert( zWalName && zWalName[0] );
  1292   1571     assert( pDbFd );
  1293   1572   
  1294   1573     /* In the amalgamation, the os_unix.c and os_win.c source files come before
  1295   1574     ** this source file.  Verify that the #defines of the locking byte offsets
  1296   1575     ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value.
................................................................................
  1302   1581   #ifdef WIN_SHM_BASE
  1303   1582     assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET );
  1304   1583   #endif
  1305   1584   #ifdef UNIX_SHM_BASE
  1306   1585     assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET );
  1307   1586   #endif
  1308   1587   
         1588  +  nWalName = sqlite3Strlen30(zWalName);
         1589  +  nByte = sizeof(Wal) + pVfs->szOsFile*2 + nWalName+2;
  1309   1590   
  1310   1591     /* Allocate an instance of struct Wal to return. */
  1311   1592     *ppWal = 0;
  1312         -  pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile);
         1593  +  pRet = (Wal*)sqlite3MallocZero(nByte);
  1313   1594     if( !pRet ){
  1314   1595       return SQLITE_NOMEM_BKPT;
  1315   1596     }
  1316   1597   
  1317   1598     pRet->pVfs = pVfs;
  1318         -  pRet->pWalFd = (sqlite3_file *)&pRet[1];
         1599  +  pRet->apWalFd[0] = (sqlite3_file*)((char*)pRet+sizeof(Wal));
         1600  +  pRet->apWalFd[1] = (sqlite3_file*)((char*)pRet+sizeof(Wal)+pVfs->szOsFile);
  1319   1601     pRet->pDbFd = pDbFd;
  1320         -  pRet->readLock = -1;
         1602  +  pRet->readLock = WAL_LOCK_NONE;
  1321   1603     pRet->mxWalSize = mxWalSize;
  1322   1604     pRet->zWalName = zWalName;
  1323   1605     pRet->syncHeader = 1;
  1324   1606     pRet->padToSectorBoundary = 1;
  1325   1607     pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
         1608  +  pRet->bWal2 = bWal2;
  1326   1609   
  1327         -  /* Open file handle on the write-ahead log file. */
         1610  +  pRet->zWalName2 = (char*)pRet + sizeof(Wal) + 2*pVfs->szOsFile;
         1611  +  memcpy(pRet->zWalName2, zWalName, nWalName);
         1612  +  pRet->zWalName2[nWalName] = '2';
         1613  +  pRet->zWalName2[nWalName+1] = '\0';
         1614  +
         1615  +  /* Open a file handle on the first write-ahead log file. */
  1328   1616     flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
  1329         -  rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
         1617  +  rc = sqlite3OsOpen(pVfs, zWalName, pRet->apWalFd[0], flags, &flags);
  1330   1618     if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
  1331   1619       pRet->readOnly = WAL_RDONLY;
  1332   1620     }
  1333   1621   
  1334   1622     if( rc!=SQLITE_OK ){
  1335   1623       walIndexClose(pRet, 0);
  1336         -    sqlite3OsClose(pRet->pWalFd);
  1337   1624       sqlite3_free(pRet);
  1338   1625     }else{
  1339   1626       int iDC = sqlite3OsDeviceCharacteristics(pDbFd);
  1340   1627       if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; }
  1341   1628       if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){
  1342   1629         pRet->padToSectorBoundary = 0;
  1343   1630       }
................................................................................
  1542   1829   }
  1543   1830   
  1544   1831   /*
  1545   1832   ** Construct a WalInterator object that can be used to loop over all 
  1546   1833   ** pages in the WAL in ascending order. The caller must hold the checkpoint
  1547   1834   ** lock.
  1548   1835   **
  1549         -** On success, make *pp point to the newly allocated WalInterator object
  1550         -** return SQLITE_OK. Otherwise, return an error code. If this routine
  1551         -** returns an error, the value of *pp is undefined.
         1836  +** On success, make *pp point to the newly allocated WalIterator object
         1837  +** and return SQLITE_OK. Otherwise, return an error code. If this routine
         1838  +** returns an error, the final value of *pp is undefined.
  1552   1839   **
  1553   1840   ** The calling routine should invoke walIteratorFree() to destroy the
  1554   1841   ** WalIterator object when it has finished with it.
  1555   1842   */
  1556         -static int walIteratorInit(Wal *pWal, WalIterator **pp){
         1843  +static int walIteratorInit(Wal *pWal, int iWal, WalIterator **pp){
  1557   1844     WalIterator *p;                 /* Return value */
  1558   1845     int nSegment;                   /* Number of segments to merge */
  1559   1846     u32 iLast;                      /* Last frame in log */
  1560   1847     int nByte;                      /* Number of bytes to allocate */
  1561   1848     int i;                          /* Iterator variable */
         1849  +  int iLastSeg;                   /* Last hash table to iterate though */
  1562   1850     ht_slot *aTmp;                  /* Temp space used by merge-sort */
  1563   1851     int rc = SQLITE_OK;             /* Return Code */
         1852  +  int iMode = isWalMode2(pWal) ? 2 : 1;
         1853  +
         1854  +  assert( isWalMode2(pWal) || iWal==0 );
  1564   1855   
  1565   1856     /* This routine only runs while holding the checkpoint lock. And
  1566   1857     ** it only runs if there is actually content in the log (mxFrame>0).
  1567   1858     */
  1568         -  assert( pWal->ckptLock && pWal->hdr.mxFrame>0 );
  1569         -  iLast = pWal->hdr.mxFrame;
         1859  +  iLast = walidxGetMxFrame(&pWal->hdr, iWal);
         1860  +  assert( pWal->ckptLock && iLast>0 );
         1861  +
         1862  +  if( iMode==2 ){
         1863  +    iLastSeg = walFramePage2(iWal, iLast);
         1864  +  }else{
         1865  +    iLastSeg = walFramePage(iLast);
         1866  +  }
         1867  +  nSegment = 1 + (iLastSeg/iMode);
  1570   1868   
  1571   1869     /* Allocate space for the WalIterator object. */
  1572         -  nSegment = walFramePage(iLast) + 1;
  1573   1870     nByte = sizeof(WalIterator) 
  1574   1871           + (nSegment-1)*sizeof(struct WalSegment)
  1575   1872           + iLast*sizeof(ht_slot);
  1576   1873     p = (WalIterator *)sqlite3_malloc64(nByte);
  1577   1874     if( !p ){
  1578   1875       return SQLITE_NOMEM_BKPT;
  1579   1876     }
................................................................................
  1586   1883     aTmp = (ht_slot *)sqlite3_malloc64(
  1587   1884         sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
  1588   1885     );
  1589   1886     if( !aTmp ){
  1590   1887       rc = SQLITE_NOMEM_BKPT;
  1591   1888     }
  1592   1889   
  1593         -  for(i=0; rc==SQLITE_OK && i<nSegment; i++){
         1890  +  for(i=iWal; rc==SQLITE_OK && i<=iLastSeg; i+=iMode){
  1594   1891       volatile ht_slot *aHash;
  1595         -    u32 iZero;
         1892  +    u32 iExtZero;
  1596   1893       volatile u32 *aPgno;
  1597   1894   
  1598         -    rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);
         1895  +    rc = walHashGet(pWal, i, &aHash, &aPgno, &iExtZero);
  1599   1896       if( rc==SQLITE_OK ){
  1600   1897         int j;                      /* Counter variable */
  1601   1898         int nEntry;                 /* Number of entries in this segment */
  1602   1899         ht_slot *aIndex;            /* Sorted index for this segment */
         1900  +      u32 iZero;
         1901  +
         1902  +      if( iMode==2 ){
         1903  +        walExternalDecode(iExtZero+1, &iZero);
         1904  +        iZero--;
         1905  +        assert( iZero==0 || i>=2 );
         1906  +      }else{
         1907  +        iZero = iExtZero;
         1908  +      }
  1603   1909   
  1604   1910         aPgno++;
  1605         -      if( (i+1)==nSegment ){
         1911  +      if( i==iLastSeg ){
  1606   1912           nEntry = (int)(iLast - iZero);
  1607   1913         }else{
  1608   1914           nEntry = (int)((u32*)aHash - (u32*)aPgno);
  1609   1915         }
  1610   1916         aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero];
  1611   1917         iZero++;
  1612   1918     
  1613   1919         for(j=0; j<nEntry; j++){
  1614   1920           aIndex[j] = (ht_slot)j;
  1615   1921         }
  1616   1922         walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry);
  1617         -      p->aSegment[i].iZero = iZero;
  1618         -      p->aSegment[i].nEntry = nEntry;
  1619         -      p->aSegment[i].aIndex = aIndex;
  1620         -      p->aSegment[i].aPgno = (u32 *)aPgno;
         1923  +      p->aSegment[i/iMode].iZero = iZero;
         1924  +      p->aSegment[i/iMode].nEntry = nEntry;
         1925  +      p->aSegment[i/iMode].aIndex = aIndex;
         1926  +      p->aSegment[i/iMode].aPgno = (u32 *)aPgno;
  1621   1927       }
  1622   1928     }
  1623   1929     sqlite3_free(aTmp);
  1624   1930   
  1625   1931     if( rc!=SQLITE_OK ){
  1626   1932       walIteratorFree(p);
  1627   1933     }
................................................................................
  1674   1980   ** new wal-index header. It should be passed a pseudo-random value (i.e. 
  1675   1981   ** one obtained from sqlite3_randomness()).
  1676   1982   */
  1677   1983   static void walRestartHdr(Wal *pWal, u32 salt1){
  1678   1984     volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  1679   1985     int i;                          /* Loop counter */
  1680   1986     u32 *aSalt = pWal->hdr.aSalt;   /* Big-endian salt values */
         1987  +  assert( isWalMode2(pWal)==0 );
  1681   1988     pWal->nCkpt++;
  1682   1989     pWal->hdr.mxFrame = 0;
  1683   1990     sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
  1684   1991     memcpy(&pWal->hdr.aSalt[1], &salt1, 4);
  1685   1992     walIndexWriteHdr(pWal);
  1686   1993     pInfo->nBackfill = 0;
  1687   1994     pInfo->nBackfillAttempted = 0;
................................................................................
  1735   2042     WalIterator *pIter = 0;         /* Wal iterator context */
  1736   2043     u32 iDbpage = 0;                /* Next database page to write */
  1737   2044     u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
  1738   2045     u32 mxSafeFrame;                /* Max frame that can be backfilled */
  1739   2046     u32 mxPage;                     /* Max database page to write */
  1740   2047     int i;                          /* Loop counter */
  1741   2048     volatile WalCkptInfo *pInfo;    /* The checkpoint status information */
         2049  +  int bWal2 = isWalMode2(pWal);   /* True for wal2 connections */
         2050  +  int iCkpt = bWal2 ? !walidxGetFile(&pWal->hdr) : 0;
  1742   2051   
         2052  +  mxSafeFrame = walidxGetMxFrame(&pWal->hdr, iCkpt);
  1743   2053     szPage = walPagesize(pWal);
  1744   2054     testcase( szPage<=32768 );
  1745   2055     testcase( szPage>=65536 );
  1746   2056     pInfo = walCkptInfo(pWal);
  1747         -  if( pInfo->nBackfill<pWal->hdr.mxFrame ){
         2057  +  if( (bWal2==1 && pInfo->nBackfill==0 && mxSafeFrame) 
         2058  +   || (bWal2==0 && pInfo->nBackfill<mxSafeFrame) 
         2059  +  ){
         2060  +    sqlite3_file *pWalFd = pWal->apWalFd[iCkpt];
         2061  +    mxPage = pWal->hdr.nPage;
         2062  +
         2063  +    /* If this is a wal2 system, check for a reader holding a lock 
         2064  +    ** preventing this checkpoint operation. If one is found, return
         2065  +    ** early.  */
         2066  +    if( bWal2 ){
         2067  +      rc = walLockExclusive(pWal, WAL_READ_LOCK(1 + iCkpt*2), 1);
         2068  +      if( rc!=SQLITE_OK ) return rc;
         2069  +    }
  1748   2070   
  1749   2071       /* Allocate the iterator */
  1750         -    rc = walIteratorInit(pWal, &pIter);
         2072  +    rc = walIteratorInit(pWal, iCkpt, &pIter);
  1751   2073       if( rc!=SQLITE_OK ){
  1752   2074         return rc;
  1753   2075       }
  1754   2076       assert( pIter );
  1755   2077   
  1756   2078       /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked
  1757   2079       ** in the SQLITE_CHECKPOINT_PASSIVE mode. */
  1758   2080       assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
  1759   2081   
  1760         -    /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  1761         -    ** safe to write into the database.  Frames beyond mxSafeFrame might
  1762         -    ** overwrite database pages that are in use by active readers and thus
  1763         -    ** cannot be backfilled from the WAL.
         2082  +
         2083  +    /* If this is a wal system (not wal2), compute in mxSafeFrame the index 
         2084  +    ** of the last frame of the WAL that is safe to write into the database.
         2085  +    ** Frames beyond mxSafeFrame might overwrite database pages that are in 
         2086  +    ** use by active readers and thus cannot be backfilled from the WAL.
  1764   2087       */
  1765         -    mxSafeFrame = pWal->hdr.mxFrame;
  1766         -    mxPage = pWal->hdr.nPage;
  1767         -    for(i=1; i<WAL_NREADER; i++){
  1768         -      /* Thread-sanitizer reports that the following is an unsafe read,
  1769         -      ** as some other thread may be in the process of updating the value
  1770         -      ** of the aReadMark[] slot. The assumption here is that if that is
  1771         -      ** happening, the other client may only be increasing the value,
  1772         -      ** not decreasing it. So assuming either that either the "old" or
  1773         -      ** "new" version of the value is read, and not some arbitrary value
  1774         -      ** that would never be written by a real client, things are still 
  1775         -      ** safe.  */
  1776         -      u32 y = pInfo->aReadMark[i];
  1777         -      if( mxSafeFrame>y ){
  1778         -        assert( y<=pWal->hdr.mxFrame );
  1779         -        rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
  1780         -        if( rc==SQLITE_OK ){
  1781         -          pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
  1782         -          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
  1783         -        }else if( rc==SQLITE_BUSY ){
  1784         -          mxSafeFrame = y;
  1785         -          xBusy = 0;
  1786         -        }else{
  1787         -          goto walcheckpoint_out;
  1788         -        }
  1789         -      }
  1790         -    }
  1791         -
  1792         -    if( pInfo->nBackfill<mxSafeFrame
         2088  +    if( bWal2==0 ){
         2089  +      for(i=1; i<WAL_NREADER; i++){
         2090  +        /* Thread-sanitizer reports that the following is an unsafe read,
         2091  +        ** as some other thread may be in the process of updating the value
         2092  +        ** of the aReadMark[] slot. The assumption here is that if that is
         2093  +        ** happening, the other client may only be increasing the value,
         2094  +        ** not decreasing it. So assuming either that either the "old" or
         2095  +        ** "new" version of the value is read, and not some arbitrary value
         2096  +        ** that would never be written by a real client, things are still 
         2097  +        ** safe.  */
         2098  +        u32 y = pInfo->aReadMark[i];
         2099  +        if( mxSafeFrame>y ){
         2100  +          assert( y<=pWal->hdr.mxFrame );
         2101  +          rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
         2102  +          if( rc==SQLITE_OK ){
         2103  +            pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
         2104  +            walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
         2105  +          }else if( rc==SQLITE_BUSY ){
         2106  +            mxSafeFrame = y;
         2107  +            xBusy = 0;
         2108  +          }else{
         2109  +            goto walcheckpoint_out;
         2110  +          }
         2111  +        }
         2112  +      }
         2113  +    }
         2114  +
         2115  +    if( bWal2 || (pInfo->nBackfill<mxSafeFrame
  1793   2116        && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK
  1794         -    ){
         2117  +    )){
  1795   2118         i64 nSize;                    /* Current size of database file */
  1796   2119         u32 nBackfill = pInfo->nBackfill;
  1797   2120   
         2121  +      assert( bWal2==0 || nBackfill==0 );
  1798   2122         pInfo->nBackfillAttempted = mxSafeFrame;
  1799   2123   
  1800         -      /* Sync the WAL to disk */
  1801         -      rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
         2124  +      /* Sync the wal file being checkpointed to disk */
         2125  +      rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags));
  1802   2126   
  1803   2127         /* If the database may grow as a result of this checkpoint, hint
  1804         -      ** about the eventual size of the db file to the VFS layer.
  1805         -      */
         2128  +      ** about the eventual size of the db file to the VFS layer.  */
  1806   2129         if( rc==SQLITE_OK ){
  1807   2130           i64 nReq = ((i64)mxPage * szPage);
  1808   2131           rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
  1809   2132           if( rc==SQLITE_OK && nSize<nReq ){
  1810   2133             sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
  1811   2134           }
  1812   2135         }
  1813   2136   
  1814         -
  1815   2137         /* Iterate through the contents of the WAL, copying data to the db file */
  1816   2138         while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
  1817   2139           i64 iOffset;
  1818         -        assert( walFramePgno(pWal, iFrame)==iDbpage );
         2140  +
         2141  +        assert( bWal2==1 || walFramePgno(pWal, iFrame)==iDbpage );
         2142  +        assert( bWal2==0 || walFramePgno2(pWal, iCkpt, iFrame)==iDbpage );
         2143  +
  1819   2144           if( db->u1.isInterrupted ){
  1820   2145             rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT;
  1821   2146             break;
  1822   2147           }
  1823   2148           if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){
         2149  +          assert( bWal2==0 || iDbpage>mxPage );
  1824   2150             continue;
  1825   2151           }
  1826   2152           iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
         2153  +        WALTRACE(("WAL%p: checkpoint frame %d of wal %d to db page %d\n",
         2154  +              pWal, (int)iFrame, iCkpt, (int)iDbpage
         2155  +        ));
  1827   2156           /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */
  1828         -        rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
         2157  +        rc = sqlite3OsRead(pWalFd, zBuf, szPage, iOffset);
  1829   2158           if( rc!=SQLITE_OK ) break;
  1830   2159           iOffset = (iDbpage-1)*(i64)szPage;
  1831   2160           testcase( IS_BIG_INT(iOffset) );
  1832   2161           rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
  1833   2162           if( rc!=SQLITE_OK ) break;
  1834   2163         }
  1835   2164   
  1836         -      /* If work was actually accomplished... */
         2165  +      /* Truncate the db file, sync the wal file and set the WalCkptInfo
         2166  +      ** flag to indicate that it has been checkpointed. */
         2167  +      if( !bWal2 && rc==SQLITE_OK && mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
         2168  +        i64 szDb = pWal->hdr.nPage*(i64)szPage;
         2169  +        testcase( IS_BIG_INT(szDb) );
         2170  +        rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
         2171  +      }
         2172  +      if( rc==SQLITE_OK ){
         2173  +        rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags));
         2174  +      }
  1837   2175         if( rc==SQLITE_OK ){
  1838         -        if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
  1839         -          i64 szDb = pWal->hdr.nPage*(i64)szPage;
  1840         -          testcase( IS_BIG_INT(szDb) );
  1841         -          rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
  1842         -          if( rc==SQLITE_OK ){
  1843         -            rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags));
  1844         -          }
  1845         -        }
  1846         -        if( rc==SQLITE_OK ){
  1847         -          pInfo->nBackfill = mxSafeFrame;
  1848         -        }
         2176  +        pInfo->nBackfill = bWal2 ? 1 : mxSafeFrame;
  1849   2177         }
  1850   2178   
  1851   2179         /* Release the reader lock held while backfilling */
  1852         -      walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
         2180  +      walUnlockExclusive(pWal, WAL_READ_LOCK(bWal2 ? 1 + iCkpt*2 : 0), 1);
  1853   2181       }
  1854   2182   
  1855   2183       if( rc==SQLITE_BUSY ){
  1856   2184         /* Reset the return code so as not to report a checkpoint failure
  1857   2185         ** just because there are active readers.  */
  1858   2186         rc = SQLITE_OK;
  1859   2187       }
................................................................................
  1860   2188     }
  1861   2189   
  1862   2190     /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the
  1863   2191     ** entire wal file has been copied into the database file, then block 
  1864   2192     ** until all readers have finished using the wal file. This ensures that 
  1865   2193     ** the next process to write to the database restarts the wal file.
  1866   2194     */
  1867         -  if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
         2195  +  if( bWal2==0 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
  1868   2196       assert( pWal->writeLock );
  1869   2197       if( pInfo->nBackfill<pWal->hdr.mxFrame ){
  1870   2198         rc = SQLITE_BUSY;
  1871   2199       }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){
  1872   2200         u32 salt1;
  1873   2201         sqlite3_randomness(4, &salt1);
  1874   2202         assert( pInfo->nBackfill==pWal->hdr.mxFrame );
................................................................................
  1885   2213             ** writer clients should see that the entire log file has been
  1886   2214             ** checkpointed and behave accordingly. This seems unsafe though,
  1887   2215             ** as it would leave the system in a state where the contents of
  1888   2216             ** the wal-index header do not match the contents of the 
  1889   2217             ** file-system. To avoid this, update the wal-index header to
  1890   2218             ** indicate that the log file contains zero valid frames.  */
  1891   2219             walRestartHdr(pWal, salt1);
  1892         -          rc = sqlite3OsTruncate(pWal->pWalFd, 0);
         2220  +          rc = sqlite3OsTruncate(pWal->apWalFd[0], 0);
  1893   2221           }
  1894   2222           walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
  1895   2223         }
  1896   2224       }
  1897   2225     }
  1898   2226   
  1899   2227    walcheckpoint_out:
................................................................................
  1902   2230   }
  1903   2231   
  1904   2232   /*
  1905   2233   ** If the WAL file is currently larger than nMax bytes in size, truncate
  1906   2234   ** it to exactly nMax bytes. If an error occurs while doing so, ignore it.
  1907   2235   */
  1908   2236   static void walLimitSize(Wal *pWal, i64 nMax){
  1909         -  i64 sz;
  1910         -  int rx;
  1911         -  sqlite3BeginBenignMalloc();
  1912         -  rx = sqlite3OsFileSize(pWal->pWalFd, &sz);
  1913         -  if( rx==SQLITE_OK && (sz > nMax ) ){
  1914         -    rx = sqlite3OsTruncate(pWal->pWalFd, nMax);
  1915         -  }
  1916         -  sqlite3EndBenignMalloc();
  1917         -  if( rx ){
  1918         -    sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
         2237  +  if( isWalMode2(pWal)==0 ){
         2238  +    i64 sz;
         2239  +    int rx;
         2240  +    sqlite3BeginBenignMalloc();
         2241  +    rx = sqlite3OsFileSize(pWal->apWalFd[0], &sz);
         2242  +    if( rx==SQLITE_OK && (sz > nMax ) ){
         2243  +      rx = sqlite3OsTruncate(pWal->apWalFd[0], nMax);
         2244  +    }
         2245  +    sqlite3EndBenignMalloc();
         2246  +    if( rx ){
         2247  +      sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
         2248  +    }
  1919   2249     }
  1920   2250   }
  1921   2251   
  1922   2252   /*
  1923   2253   ** Close a connection to a log file.
  1924   2254   */
  1925   2255   int sqlite3WalClose(
................................................................................
  1940   2270       ** the wal and wal-index files.
  1941   2271       **
  1942   2272       ** The EXCLUSIVE lock is not released before returning.
  1943   2273       */
  1944   2274       if( zBuf!=0
  1945   2275        && SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE))
  1946   2276       ){
         2277  +      int i;
  1947   2278         if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
  1948   2279           pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
  1949   2280         }
  1950         -      rc = sqlite3WalCheckpoint(pWal, db, 
  1951         -          SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
  1952         -      );
  1953         -      if( rc==SQLITE_OK ){
  1954         -        int bPersist = -1;
  1955         -        sqlite3OsFileControlHint(
  1956         -            pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
         2281  +      for(i=0; rc==SQLITE_OK && i<2; i++){
         2282  +        rc = sqlite3WalCheckpoint(pWal, db, 
         2283  +            SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
  1957   2284           );
  1958         -        if( bPersist!=1 ){
  1959         -          /* Try to delete the WAL file if the checkpoint completed and
  1960         -          ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal
  1961         -          ** mode (!bPersist) */
  1962         -          isDelete = 1;
  1963         -        }else if( pWal->mxWalSize>=0 ){
  1964         -          /* Try to truncate the WAL file to zero bytes if the checkpoint
  1965         -          ** completed and fsynced (rc==SQLITE_OK) and we are in persistent
  1966         -          ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a
  1967         -          ** non-negative value (pWal->mxWalSize>=0).  Note that we truncate
  1968         -          ** to zero bytes as truncating to the journal_size_limit might
  1969         -          ** leave a corrupt WAL file on disk. */
  1970         -          walLimitSize(pWal, 0);
         2285  +        if( rc==SQLITE_OK ){
         2286  +          int bPersist = -1;
         2287  +          sqlite3OsFileControlHint(
         2288  +              pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
         2289  +          );
         2290  +          if( bPersist!=1 ){
         2291  +            /* Try to delete the WAL file if the checkpoint completed and
         2292  +            ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal
         2293  +            ** mode (!bPersist) */
         2294  +            isDelete = 1;
         2295  +          }else if( pWal->mxWalSize>=0 ){
         2296  +            /* Try to truncate the WAL file to zero bytes if the checkpoint
         2297  +            ** completed and fsynced (rc==SQLITE_OK) and we are in persistent
         2298  +            ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a
         2299  +            ** non-negative value (pWal->mxWalSize>=0).  Note that we truncate
         2300  +            ** to zero bytes as truncating to the journal_size_limit might
         2301  +            ** leave a corrupt WAL file on disk. */
         2302  +            walLimitSize(pWal, 0);
         2303  +          }
  1971   2304           }
         2305  +
         2306  +        if( isWalMode2(pWal)==0 ) break;
         2307  +
         2308  +        walCkptInfo(pWal)->nBackfill = 0;
         2309  +        walidxSetFile(&pWal->hdr, !walidxGetFile(&pWal->hdr));
         2310  +        pWal->writeLock = 1;
         2311  +        walIndexWriteHdr(pWal);
         2312  +        pWal->writeLock = 0;
  1972   2313         }
  1973   2314       }
  1974   2315   
  1975   2316       walIndexClose(pWal, isDelete);
  1976         -    sqlite3OsClose(pWal->pWalFd);
  1977   2317       if( isDelete ){
  1978   2318         sqlite3BeginBenignMalloc();
  1979   2319         sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
         2320  +      sqlite3OsDelete(pWal->pVfs, pWal->zWalName2, 0);
  1980   2321         sqlite3EndBenignMalloc();
  1981   2322       }
  1982   2323       WALTRACE(("WAL%p: closed\n", pWal));
  1983   2324       sqlite3_free((void *)pWal->apWiData);
  1984   2325       sqlite3_free(pWal);
  1985   2326     }
  1986   2327     return rc;
................................................................................
  2111   2452       }
  2112   2453     }
  2113   2454   
  2114   2455     /* If the header is read successfully, check the version number to make
  2115   2456     ** sure the wal-index was not constructed with some future format that
  2116   2457     ** this version of SQLite cannot understand.
  2117   2458     */
  2118         -  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
         2459  +  if( badHdr==0 
         2460  +   && pWal->hdr.iVersion!=WAL_VERSION1 && pWal->hdr.iVersion!=WAL_VERSION2
         2461  +  ){
  2119   2462       rc = SQLITE_CANTOPEN_BKPT;
  2120   2463     }
  2121   2464   
  2122   2465     return rc;
  2123   2466   }
  2124   2467   
  2125   2468   /*
................................................................................
  2176   2519   ** checkpoint process do as much work as possible.  This routine might
  2177   2520   ** update values of the aReadMark[] array in the header, but if it does
  2178   2521   ** so it takes care to hold an exclusive lock on the corresponding
  2179   2522   ** WAL_READ_LOCK() while changing values.
  2180   2523   */
  2181   2524   static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
  2182   2525     volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  2183         -  u32 mxReadMark;                 /* Largest aReadMark[] value */
  2184         -  int mxI;                        /* Index of largest aReadMark[] value */
  2185         -  int i;                          /* Loop counter */
  2186   2526     int rc = SQLITE_OK;             /* Return code  */
  2187         -  u32 mxFrame;                    /* Wal frame to lock to */
  2188   2527   
  2189         -  assert( pWal->readLock<0 );     /* Not currently locked */
         2528  +  assert( pWal->readLock==WAL_LOCK_NONE );     /* Not currently locked */
  2190   2529   
  2191   2530     /* Take steps to avoid spinning forever if there is a protocol error.
  2192   2531     **
  2193   2532     ** Circumstances that cause a RETRY should only last for the briefest
  2194   2533     ** instances of time.  No I/O or other system calls are done while the
  2195   2534     ** locks are held, so the locks should not be held for very long. But 
  2196   2535     ** if we are unlucky, another process that is holding a lock might get
................................................................................
  2244   2583       }
  2245   2584       if( rc!=SQLITE_OK ){
  2246   2585         return rc;
  2247   2586       }
  2248   2587     }
  2249   2588   
  2250   2589     pInfo = walCkptInfo(pWal);
  2251         -  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame 
  2252         -#ifdef SQLITE_ENABLE_SNAPSHOT
  2253         -   && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0
  2254         -     || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr)))
  2255         -#endif
  2256         -  ){
  2257         -    /* The WAL has been completely backfilled (or it is empty).
  2258         -    ** and can be safely ignored.
  2259         -    */
  2260         -    rc = walLockShared(pWal, WAL_READ_LOCK(0));
  2261         -    walShmBarrier(pWal);
  2262         -    if( rc==SQLITE_OK ){
  2263         -      if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
  2264         -        /* It is not safe to allow the reader to continue here if frames
  2265         -        ** may have been appended to the log before READ_LOCK(0) was obtained.
  2266         -        ** When holding READ_LOCK(0), the reader ignores the entire log file,
  2267         -        ** which implies that the database file contains a trustworthy
  2268         -        ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from
  2269         -        ** happening, this is usually correct.
  2270         -        **
  2271         -        ** However, if frames have been appended to the log (or if the log 
  2272         -        ** is wrapped and written for that matter) before the READ_LOCK(0)
  2273         -        ** is obtained, that is not necessarily true. A checkpointer may
  2274         -        ** have started to backfill the appended frames but crashed before
  2275         -        ** it finished. Leaving a corrupt image in the database file.
  2276         -        */
  2277         -        walUnlockShared(pWal, WAL_READ_LOCK(0));
  2278         -        return WAL_RETRY;
  2279         -      }
  2280         -      pWal->readLock = 0;
  2281         -      return SQLITE_OK;
  2282         -    }else if( rc!=SQLITE_BUSY ){
         2590  +  if( isWalMode2(pWal) ){
         2591  +    int eLock = 1 + (walidxGetFile(&pWal->hdr)*2);
         2592  +    if( pInfo->nBackfill==0 ){
         2593  +      eLock += walidxGetMxFrame(&pWal->hdr, !walidxGetFile(&pWal->hdr))>0;
         2594  +    }
         2595  +    rc = walLockReader(pWal, eLock, 1);
         2596  +    if( rc!=SQLITE_OK ){
  2283   2597         return rc;
  2284   2598       }
  2285         -  }
  2286   2599   
  2287         -  /* If we get this far, it means that the reader will want to use
  2288         -  ** the WAL to get at content from recent commits.  The job now is
  2289         -  ** to select one of the aReadMark[] entries that is closest to
  2290         -  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
  2291         -  */
  2292         -  mxReadMark = 0;
  2293         -  mxI = 0;
  2294         -  mxFrame = pWal->hdr.mxFrame;
         2600  +    walShmBarrier(pWal);
         2601  +    if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
         2602  +      walLockReader(pWal, eLock, 0);
         2603  +      return WAL_RETRY;
         2604  +    }else{
         2605  +      pWal->readLock = eLock;
         2606  +    }
         2607  +    assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 );
         2608  +  }else{
         2609  +    u32 mxReadMark;               /* Largest aReadMark[] value */
         2610  +    int mxI;                      /* Index of largest aReadMark[] value */
         2611  +    int i;                        /* Loop counter */
         2612  +    u32 mxFrame;                  /* Wal frame to lock to */
         2613  +
         2614  +    if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame 
  2295   2615   #ifdef SQLITE_ENABLE_SNAPSHOT
  2296         -  if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){
  2297         -    mxFrame = pWal->pSnapshot->mxFrame;
  2298         -  }
         2616  +     && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0
         2617  +       || 0==memcmp(&pWal->hdr, pWal->pSnapshot, sizeof(WalIndexHdr)))
  2299   2618   #endif
  2300         -  for(i=1; i<WAL_NREADER; i++){
  2301         -    u32 thisMark = pInfo->aReadMark[i];
  2302         -    if( mxReadMark<=thisMark && thisMark<=mxFrame ){
  2303         -      assert( thisMark!=READMARK_NOT_USED );
  2304         -      mxReadMark = thisMark;
  2305         -      mxI = i;
  2306         -    }
  2307         -  }
  2308         -  if( (pWal->readOnly & WAL_SHM_RDONLY)==0
  2309         -   && (mxReadMark<mxFrame || mxI==0)
  2310         -  ){
  2311         -    for(i=1; i<WAL_NREADER; i++){
  2312         -      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
         2619  +      ){
         2620  +      /* The WAL has been completely backfilled (or it is empty).
         2621  +      ** and can be safely ignored.
         2622  +      */
         2623  +      rc = walLockShared(pWal, WAL_READ_LOCK(0));
         2624  +      walShmBarrier(pWal);
  2313   2625         if( rc==SQLITE_OK ){
  2314         -        mxReadMark = pInfo->aReadMark[i] = mxFrame;
  2315         -        mxI = i;
  2316         -        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
  2317         -        break;
         2626  +        if( memcmp((void*)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
         2627  +          /* It is not safe to allow the reader to continue here if frames
         2628  +          ** may have been appended to the log before READ_LOCK(0) was obtained.
         2629  +          ** When holding READ_LOCK(0), the reader ignores the entire log file,
         2630  +          ** which implies that the database file contains a trustworthy
         2631  +          ** snapshot. Since holding READ_LOCK(0) prevents a checkpoint from
         2632  +          ** happening, this is usually correct.
         2633  +          **
         2634  +          ** However, if frames have been appended to the log (or if the log 
         2635  +          ** is wrapped and written for that matter) before the READ_LOCK(0)
         2636  +          ** is obtained, that is not necessarily true. A checkpointer may
         2637  +          ** have started to backfill the appended frames but crashed before
         2638  +          ** it finished. Leaving a corrupt image in the database file.
         2639  +          */
         2640  +          walUnlockShared(pWal, WAL_READ_LOCK(0));
         2641  +          return WAL_RETRY;
         2642  +        }
         2643  +        pWal->readLock = 0;
         2644  +        return SQLITE_OK;
  2318   2645         }else if( rc!=SQLITE_BUSY ){
  2319   2646           return rc;
  2320   2647         }
  2321   2648       }
  2322         -  }
  2323         -  if( mxI==0 ){
  2324         -    assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
  2325         -    return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
  2326         -  }
  2327         -
  2328         -  rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
  2329         -  if( rc ){
  2330         -    return rc==SQLITE_BUSY ? WAL_RETRY : rc;
  2331         -  }
  2332         -  /* Now that the read-lock has been obtained, check that neither the
  2333         -  ** value in the aReadMark[] array or the contents of the wal-index
  2334         -  ** header have changed.
  2335         -  **
  2336         -  ** It is necessary to check that the wal-index header did not change
  2337         -  ** between the time it was read and when the shared-lock was obtained
  2338         -  ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
  2339         -  ** that the log file may have been wrapped by a writer, or that frames
  2340         -  ** that occur later in the log than pWal->hdr.mxFrame may have been
  2341         -  ** copied into the database by a checkpointer. If either of these things
  2342         -  ** happened, then reading the database with the current value of
  2343         -  ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
  2344         -  ** instead.
  2345         -  **
  2346         -  ** Before checking that the live wal-index header has not changed
  2347         -  ** since it was read, set Wal.minFrame to the first frame in the wal
  2348         -  ** file that has not yet been checkpointed. This client will not need
  2349         -  ** to read any frames earlier than minFrame from the wal file - they
  2350         -  ** can be safely read directly from the database file.
  2351         -  **
  2352         -  ** Because a ShmBarrier() call is made between taking the copy of 
  2353         -  ** nBackfill and checking that the wal-header in shared-memory still
  2354         -  ** matches the one cached in pWal->hdr, it is guaranteed that the 
  2355         -  ** checkpointer that set nBackfill was not working with a wal-index
  2356         -  ** header newer than that cached in pWal->hdr. If it were, that could
  2357         -  ** cause a problem. The checkpointer could omit to checkpoint
  2358         -  ** a version of page X that lies before pWal->minFrame (call that version
  2359         -  ** A) on the basis that there is a newer version (version B) of the same
  2360         -  ** page later in the wal file. But if version B happens to like past
  2361         -  ** frame pWal->hdr.mxFrame - then the client would incorrectly assume
  2362         -  ** that it can read version A from the database file. However, since
  2363         -  ** we can guarantee that the checkpointer that set nBackfill could not
  2364         -  ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
  2365         -  */
  2366         -  pWal->minFrame = pInfo->nBackfill+1;
  2367         -  walShmBarrier(pWal);
  2368         -  if( pInfo->aReadMark[mxI]!=mxReadMark
  2369         -   || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
  2370         -  ){
  2371         -    walUnlockShared(pWal, WAL_READ_LOCK(mxI));
  2372         -    return WAL_RETRY;
  2373         -  }else{
  2374         -    assert( mxReadMark<=pWal->hdr.mxFrame );
  2375         -    pWal->readLock = (i16)mxI;
         2649  +
         2650  +    /* If we get this far, it means that the reader will want to use
         2651  +    ** the WAL to get at content from recent commits.  The job now is
         2652  +    ** to select one of the aReadMark[] entries that is closest to
         2653  +    ** but not exceeding pWal->hdr.mxFrame and lock that entry.
         2654  +    */
         2655  +    mxReadMark = 0;
         2656  +    mxI = 0;
         2657  +    mxFrame = pWal->hdr.mxFrame;
         2658  +#ifdef SQLITE_ENABLE_SNAPSHOT
         2659  +    if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){
         2660  +      mxFrame = pWal->pSnapshot->mxFrame;
         2661  +    }
         2662  +#endif
         2663  +    for(i=1; i<WAL_NREADER; i++){
         2664  +      u32 thisMark = pInfo->aReadMark[i];
         2665  +      if( mxReadMark<=thisMark && thisMark<=mxFrame ){
         2666  +        assert( thisMark!=READMARK_NOT_USED );
         2667  +        mxReadMark = thisMark;
         2668  +        mxI = i;
         2669  +      }
         2670  +    }
         2671  +    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
         2672  +        && (mxReadMark<mxFrame || mxI==0)
         2673  +      ){
         2674  +      for(i=1; i<WAL_NREADER; i++){
         2675  +        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
         2676  +        if( rc==SQLITE_OK ){
         2677  +          mxReadMark = pInfo->aReadMark[i] = mxFrame;
         2678  +          mxI = i;
         2679  +          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
         2680  +          break;
         2681  +        }else if( rc!=SQLITE_BUSY ){
         2682  +          return rc;
         2683  +        }
         2684  +      }
         2685  +    }
         2686  +    if( mxI==0 ){
         2687  +      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
         2688  +      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
         2689  +    }
         2690  +
         2691  +    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
         2692  +    if( rc ){
         2693  +      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
         2694  +    }
         2695  +    /* Now that the read-lock has been obtained, check that neither the
         2696  +    ** value in the aReadMark[] array or the contents of the wal-index
         2697  +    ** header have changed.
         2698  +    **
         2699  +    ** It is necessary to check that the wal-index header did not change
         2700  +    ** between the time it was read and when the shared-lock was obtained
         2701  +    ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
         2702  +    ** that the log file may have been wrapped by a writer, or that frames
         2703  +    ** that occur later in the log than pWal->hdr.mxFrame may have been
         2704  +    ** copied into the database by a checkpointer. If either of these things
         2705  +    ** happened, then reading the database with the current value of
         2706  +    ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
         2707  +    ** instead.
         2708  +    **
         2709  +    ** Before checking that the live wal-index header has not changed
         2710  +    ** since it was read, set Wal.minFrame to the first frame in the wal
         2711  +    ** file that has not yet been checkpointed. This client will not need
         2712  +    ** to read any frames earlier than minFrame from the wal file - they
         2713  +    ** can be safely read directly from the database file.
         2714  +    **
         2715  +    ** Because a ShmBarrier() call is made between taking the copy of 
         2716  +    ** nBackfill and checking that the wal-header in shared-memory still
         2717  +    ** matches the one cached in pWal->hdr, it is guaranteed that the 
         2718  +    ** checkpointer that set nBackfill was not working with a wal-index
         2719  +    ** header newer than that cached in pWal->hdr. If it were, that could
         2720  +    ** cause a problem. The checkpointer could omit to checkpoint
         2721  +    ** a version of page X that lies before pWal->minFrame (call that version
         2722  +    ** A) on the basis that there is a newer version (version B) of the same
         2723  +    ** page later in the wal file. But if version B happens to like past
         2724  +    ** frame pWal->hdr.mxFrame - then the client would incorrectly assume
         2725  +    ** that it can read version A from the database file. However, since
         2726  +    ** we can guarantee that the checkpointer that set nBackfill could not
         2727  +    ** see any pages past pWal->hdr.mxFrame, this problem does not come up.
         2728  +    */
         2729  +    pWal->minFrame = pInfo->nBackfill+1;
         2730  +    walShmBarrier(pWal);
         2731  +    if( pInfo->aReadMark[mxI]!=mxReadMark
         2732  +        || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
         2733  +      ){
         2734  +      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
         2735  +      return WAL_RETRY;
         2736  +    }else{
         2737  +      assert( mxReadMark<=pWal->hdr.mxFrame );
         2738  +      pWal->readLock = (i16)mxI;
         2739  +    }
  2376   2740     }
  2377   2741     return rc;
  2378   2742   }
  2379   2743   
  2380   2744   #ifdef SQLITE_ENABLE_SNAPSHOT
  2381   2745   /*
  2382   2746   ** Attempt to reduce the value of the WalCkptInfo.nBackfillAttempted 
................................................................................
  2484   2848     do{
  2485   2849       rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
  2486   2850     }while( rc==WAL_RETRY );
  2487   2851     testcase( (rc&0xff)==SQLITE_BUSY );
  2488   2852     testcase( (rc&0xff)==SQLITE_IOERR );
  2489   2853     testcase( rc==SQLITE_PROTOCOL );
  2490   2854     testcase( rc==SQLITE_OK );
         2855  +  
         2856  +  if( rc==SQLITE_OK && pWal->hdr.iVersion==WAL_VERSION2 ){
         2857  +    rc = walOpenWal2(pWal);
         2858  +  }
  2491   2859   
  2492   2860   #ifdef SQLITE_ENABLE_SNAPSHOT
  2493   2861     if( rc==SQLITE_OK ){
  2494   2862       if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
  2495   2863         /* At this point the client has a lock on an aReadMark[] slot holding
  2496   2864         ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr
  2497   2865         ** is populated with the wal-index header corresponding to the head
................................................................................
  2557   2925   
  2558   2926   /*
  2559   2927   ** Finish with a read transaction.  All this does is release the
  2560   2928   ** read-lock.
  2561   2929   */
  2562   2930   void sqlite3WalEndReadTransaction(Wal *pWal){
  2563   2931     sqlite3WalEndWriteTransaction(pWal);
  2564         -  if( pWal->readLock>=0 ){
  2565         -    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
  2566         -    pWal->readLock = -1;
         2932  +  if( pWal->readLock!=WAL_LOCK_NONE ){
         2933  +    if( isWalMode2(pWal) ){
         2934  +      (void)walLockReader(pWal, pWal->readLock, 0);
         2935  +    }else{
         2936  +      walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
         2937  +    }
         2938  +    pWal->readLock = WAL_LOCK_NONE;
  2567   2939     }
  2568   2940   }
         2941  +
         2942  +/* Search hash table iHash for an entry matching page number
         2943  +** pgno. Each call to this function searches a single hash table
         2944  +** (each hash table indexes up to HASHTABLE_NPAGE frames).
         2945  +**
         2946  +** This code might run concurrently to the code in walIndexAppend()
         2947  +** that adds entries to the wal-index (and possibly to this hash 
         2948  +** table). This means the value just read from the hash 
         2949  +** slot (aHash[iKey]) may have been added before or after the 
         2950  +** current read transaction was opened. Values added after the
         2951  +** read transaction was opened may have been written incorrectly -
         2952  +** i.e. these slots may contain garbage data. However, we assume
         2953  +** that any slots written before the current read transaction was
         2954  +** opened remain unmodified.
         2955  +**
         2956  +** For the reasons above, the if(...) condition featured in the inner
         2957  +** loop of the following block is more stringent that would be required 
         2958  +** if we had exclusive access to the hash-table:
         2959  +**
         2960  +**   (aPgno[iFrame]==pgno): 
         2961  +**     This condition filters out normal hash-table collisions.
         2962  +**
         2963  +**   (iFrame<=iLast): 
         2964  +**     This condition filters out entries that were added to the hash
         2965  +**     table after the current read-transaction had started.
         2966  +*/
         2967  +static int walSearchHash(
         2968  +  Wal *pWal, 
         2969  +  u32 iLast,
         2970  +  int iHash, 
         2971  +  Pgno pgno, 
         2972  +  u32 *piRead
         2973  +){
         2974  +  volatile ht_slot *aHash;        /* Pointer to hash table */
         2975  +  volatile u32 *aPgno;            /* Pointer to array of page numbers */
         2976  +  u32 iZero;                      /* Frame number corresponding to aPgno[0] */
         2977  +  int iKey;                       /* Hash slot index */
         2978  +  int nCollide;                   /* Number of hash collisions remaining */
         2979  +  int rc;                         /* Error code */
         2980  +
         2981  +  rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
         2982  +  if( rc!=SQLITE_OK ){
         2983  +    return rc;
         2984  +  }
         2985  +  nCollide = HASHTABLE_NSLOT;
         2986  +  for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
         2987  +    u32 iFrame = aHash[iKey] + iZero;
         2988  +    if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){
         2989  +      assert( iFrame>*piRead || CORRUPT_DB );
         2990  +      *piRead = iFrame;
         2991  +    }
         2992  +    if( (nCollide--)==0 ){
         2993  +      return SQLITE_CORRUPT_BKPT;
         2994  +    }
         2995  +  }
         2996  +
         2997  +  return SQLITE_OK;
         2998  +}
         2999  +
  2569   3000   
  2570   3001   /*
  2571   3002   ** Search the wal file for page pgno. If found, set *piRead to the frame that
  2572   3003   ** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
  2573   3004   ** to zero.
  2574   3005   **
  2575   3006   ** Return SQLITE_OK if successful, or an error code if an error occurs. If an
................................................................................
  2576   3007   ** error does occur, the final value of *piRead is undefined.
  2577   3008   */
  2578   3009   int sqlite3WalFindFrame(
  2579   3010     Wal *pWal,                      /* WAL handle */
  2580   3011     Pgno pgno,                      /* Database page number to read data for */
  2581   3012     u32 *piRead                     /* OUT: Frame number (or zero) */
  2582   3013   ){
         3014  +  int bWal2 = isWalMode2(pWal);
         3015  +  int iApp = walidxGetFile(&pWal->hdr);
         3016  +  int rc = SQLITE_OK;
  2583   3017     u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  2584         -  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
         3018  +  u32 iLast;                      /* Last frame in wal file */
  2585   3019     int iHash;                      /* Used to loop through N hash tables */
  2586         -  int iMinHash;
  2587   3020   
  2588   3021     /* This routine is only be called from within a read transaction. */
  2589         -  assert( pWal->readLock>=0 || pWal->lockError );
  2590         -
  2591         -  /* If the "last page" field of the wal-index header snapshot is 0, then
  2592         -  ** no data will be read from the wal under any circumstances. Return early
  2593         -  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
  2594         -  ** then the WAL is ignored by the reader so return early, as if the 
  2595         -  ** WAL were empty.
  2596         -  */
  2597         -  if( iLast==0 || pWal->readLock==0 ){
  2598         -    *piRead = 0;
  2599         -    return SQLITE_OK;
  2600         -  }
  2601         -
  2602         -  /* Search the hash table or tables for an entry matching page number
  2603         -  ** pgno. Each iteration of the following for() loop searches one
  2604         -  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
  2605         -  **
  2606         -  ** This code might run concurrently to the code in walIndexAppend()
  2607         -  ** that adds entries to the wal-index (and possibly to this hash 
  2608         -  ** table). This means the value just read from the hash 
  2609         -  ** slot (aHash[iKey]) may have been added before or after the 
  2610         -  ** current read transaction was opened. Values added after the
  2611         -  ** read transaction was opened may have been written incorrectly -
  2612         -  ** i.e. these slots may contain garbage data. However, we assume
  2613         -  ** that any slots written before the current read transaction was
  2614         -  ** opened remain unmodified.
  2615         -  **
  2616         -  ** For the reasons above, the if(...) condition featured in the inner
  2617         -  ** loop of the following block is more stringent that would be required 
  2618         -  ** if we had exclusive access to the hash-table:
  2619         -  **
  2620         -  **   (aPgno[iFrame]==pgno): 
  2621         -  **     This condition filters out normal hash-table collisions.
  2622         -  **
  2623         -  **   (iFrame<=iLast): 
  2624         -  **     This condition filters out entries that were added to the hash
  2625         -  **     table after the current read-transaction had started.
  2626         -  */
  2627         -  iMinHash = walFramePage(pWal->minFrame);
  2628         -  for(iHash=walFramePage(iLast); iHash>=iMinHash && iRead==0; iHash--){
  2629         -    volatile ht_slot *aHash;      /* Pointer to hash table */
  2630         -    volatile u32 *aPgno;          /* Pointer to array of page numbers */
  2631         -    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
  2632         -    int iKey;                     /* Hash slot index */
  2633         -    int nCollide;                 /* Number of hash collisions remaining */
  2634         -    int rc;                       /* Error code */
  2635         -
  2636         -    rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
  2637         -    if( rc!=SQLITE_OK ){
  2638         -      return rc;
  2639         -    }
  2640         -    nCollide = HASHTABLE_NSLOT;
  2641         -    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
  2642         -      u32 iFrame = aHash[iKey] + iZero;
  2643         -      if( iFrame<=iLast && iFrame>=pWal->minFrame && aPgno[aHash[iKey]]==pgno ){
  2644         -        assert( iFrame>iRead || CORRUPT_DB );
  2645         -        iRead = iFrame;
  2646         -      }
  2647         -      if( (nCollide--)==0 ){
  2648         -        return SQLITE_CORRUPT_BKPT;
  2649         -      }
  2650         -    }
  2651         -  }
         3022  +  assert( pWal->readLock!=WAL_LOCK_NONE );
         3023  +
         3024  +  /* If this is a wal2 system, the client must have a partial-wal lock 
         3025  +  ** on wal file iApp. Or if it is a wal system, iApp==0 must be true.  */
         3026  +  assert( bWal2==0 || iApp==1
         3027  +       || pWal->readLock==WAL_LOCK_PART1 || pWal->readLock==WAL_LOCK_PART1_FULL2
         3028  +  );
         3029  +  assert( bWal2==0 || iApp==0
         3030  +       || pWal->readLock==WAL_LOCK_PART2 || pWal->readLock==WAL_LOCK_PART2_FULL1
         3031  +  );
         3032  +  assert( bWal2 || iApp==0 );
         3033  +
         3034  +  /* Search the wal file that the client holds a partial lock on first */
         3035  +  iLast = walidxGetMxFrame(&pWal->hdr, iApp);
         3036  +  if( iLast ){
         3037  +    u32 iExternal = bWal2 ? walExternalEncode(iApp, iLast) : iLast;
         3038  +    int iMinHash = walFramePage(pWal->minFrame);
         3039  +    for(iHash=walFramePage(iExternal); 
         3040  +        iHash>=iMinHash && iRead==0; 
         3041  +        iHash-=(1+bWal2)
         3042  +    ){
         3043  +      rc = walSearchHash(pWal, iExternal, iHash, pgno, &iRead);
         3044  +      if( rc!=SQLITE_OK ) break;
         3045  +    }
         3046  +  }
         3047  +
         3048  +  /* If the requested page was not found, no error has occured, and 
         3049  +  ** the client holds a full-wal lock on the other wal file, search it
         3050  +  ** too.  */
         3051  +  if( rc==SQLITE_OK && bWal2 && iRead==0 && (
         3052  +        pWal->readLock==WAL_LOCK_PART1_FULL2 
         3053  +     || pWal->readLock==WAL_LOCK_PART2_FULL1
         3054  +  )){
         3055  +    iLast = walidxGetMxFrame(&pWal->hdr, !iApp);
         3056  +    if( iLast ){
         3057  +      u32 iExternal = walExternalEncode(!iApp, iLast);
         3058  +      for(iHash=walFramePage2(!iApp, iLast); iHash>=0 && iRead==0; iHash -= 2){
         3059  +        rc = walSearchHash(pWal, iExternal, iHash, pgno, &iRead);
         3060  +        if( rc!=SQLITE_OK ) break;
         3061  +      }
         3062  +    }
         3063  +  }
         3064  +
         3065  +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
         3066  +  if( iRead ){ 
         3067  +    u32 iFrame;
         3068  +    int iWal = walExternalDecode(iRead, &iFrame);
         3069  +    WALTRACE(("WAL%p: page %d @ frame %d wal %d\n",pWal,(int)pgno,iFrame,iWal));
         3070  +  }else{
         3071  +    WALTRACE(("WAL%p: page %d not found\n", pWal, (int)pgno));
         3072  +  }
         3073  +#endif
  2652   3074   
  2653   3075   #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  2654   3076     /* If expensive assert() statements are available, do a linear search
  2655   3077     ** of the wal-index file content. Make sure the results agree with the
  2656         -  ** result obtained using the hash indexes above.  */
         3078  +  ** result obtained using the hash indexes above.  
         3079  +  **
         3080  +  ** TODO: This is broken for wal2.
         3081  +  */
  2657   3082     {
  2658   3083       u32 iRead2 = 0;
  2659   3084       u32 iTest;
  2660   3085       assert( pWal->minFrame>0 );
  2661   3086       for(iTest=iLast; iTest>=pWal->minFrame; iTest--){
  2662   3087         if( walFramePgno(pWal, iTest)==pgno ){
  2663   3088           iRead2 = iTest;
................................................................................
  2675   3100   /*
  2676   3101   ** Read the contents of frame iRead from the wal file into buffer pOut
  2677   3102   ** (which is nOut bytes in size). Return SQLITE_OK if successful, or an
  2678   3103   ** error code otherwise.
  2679   3104   */
  2680   3105   int sqlite3WalReadFrame(
  2681   3106     Wal *pWal,                      /* WAL handle */
  2682         -  u32 iRead,                      /* Frame to read */
         3107  +  u32 iExternal,                  /* Frame to read */
  2683   3108     int nOut,                       /* Size of buffer pOut in bytes */
  2684   3109     u8 *pOut                        /* Buffer to write page data to */
  2685   3110   ){
  2686   3111     int sz;
         3112  +  int iWal = 0;
         3113  +  u32 iRead;
  2687   3114     i64 iOffset;
         3115  +
         3116  +  /* Figure out the page size */
  2688   3117     sz = pWal->hdr.szPage;
  2689   3118     sz = (sz&0xfe00) + ((sz&0x0001)<<16);
  2690   3119     testcase( sz<=32768 );
  2691   3120     testcase( sz>=65536 );
         3121  +
         3122  +  if( isWalMode2(pWal) ){
         3123  +    /* Figure out which of the two wal files, and the frame within, that 
         3124  +    ** iExternal refers to.  */
         3125  +    iWal = walExternalDecode(iExternal, &iRead);
         3126  +  }else{
         3127  +    iRead = iExternal;
         3128  +  }
         3129  +
         3130  +  WALTRACE(("WAL%p: reading frame %d wal %d\n", pWal, iRead, iWal));
  2692   3131     iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
  2693   3132     /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
  2694         -  return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
         3133  +  return sqlite3OsRead(pWal->apWalFd[iWal], pOut, (nOut>sz?sz:nOut), iOffset);
  2695   3134   }
  2696   3135   
  2697   3136   /* 
  2698   3137   ** Return the size of the database in pages (or zero, if unknown).
  2699   3138   */
  2700   3139   Pgno sqlite3WalDbsize(Wal *pWal){
  2701         -  if( pWal && ALWAYS(pWal->readLock>=0) ){
         3140  +  if( pWal && ALWAYS(pWal->readLock!=WAL_LOCK_NONE) ){
  2702   3141       return pWal->hdr.nPage;
  2703   3142     }
  2704   3143     return 0;
  2705   3144   }
  2706   3145   
  2707   3146   
  2708   3147   /* 
................................................................................
  2719   3158   ** There can only be a single writer active at a time.
  2720   3159   */
  2721   3160   int sqlite3WalBeginWriteTransaction(Wal *pWal){
  2722   3161     int rc;
  2723   3162   
  2724   3163     /* Cannot start a write transaction without first holding a read
  2725   3164     ** transaction. */
  2726         -  assert( pWal->readLock>=0 );
         3165  +  assert( pWal->readLock!=WAL_LOCK_NONE );
  2727   3166     assert( pWal->writeLock==0 && pWal->iReCksum==0 );
  2728   3167   
  2729   3168     if( pWal->readOnly ){
  2730   3169       return SQLITE_READONLY;
  2731   3170     }
  2732   3171   
  2733   3172     /* Only one writer allowed at a time.  Get the write lock.  Return
................................................................................
  2777   3216   **
  2778   3217   ** Otherwise, if the callback function does not return an error, this
  2779   3218   ** function returns SQLITE_OK.
  2780   3219   */
  2781   3220   int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
  2782   3221     int rc = SQLITE_OK;
  2783   3222     if( ALWAYS(pWal->writeLock) ){
  2784         -    Pgno iMax = pWal->hdr.mxFrame;
         3223  +    int iWal = walidxGetFile(&pWal->hdr);
         3224  +    Pgno iMax = walidxGetMxFrame(&pWal->hdr, iWal);
         3225  +    Pgno iNew;
  2785   3226       Pgno iFrame;
  2786         -  
         3227  +
         3228  +    assert( isWalMode2(pWal) || iWal==0 );
         3229  +
  2787   3230       /* Restore the clients cache of the wal-index header to the state it
  2788   3231       ** was in before the client began writing to the database. 
  2789   3232       */
  2790   3233       memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
         3234  +    assert( walidxGetFile(&pWal->hdr)==iWal );
         3235  +    iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr));
  2791   3236   
  2792         -    for(iFrame=pWal->hdr.mxFrame+1; 
  2793         -        ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; 
  2794         -        iFrame++
  2795         -    ){
         3237  +    for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){
  2796   3238         /* This call cannot fail. Unless the page for which the page number
  2797   3239         ** is passed as the second argument is (a) in the cache and 
  2798   3240         ** (b) has an outstanding reference, then xUndo is either a no-op
  2799   3241         ** (if (a) is false) or simply expels the page from the cache (if (b)
  2800   3242         ** is false).
  2801   3243         **
  2802   3244         ** If the upper layer is doing a rollback, it is guaranteed that there
  2803   3245         ** are no outstanding references to any page other than page 1. And
  2804   3246         ** page 1 is never written to the log until the transaction is
  2805   3247         ** committed. As a result, the call to xUndo may not fail.
  2806   3248         */
  2807         -      assert( walFramePgno(pWal, iFrame)!=1 );
  2808         -      rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
         3249  +      Pgno pgno;
         3250  +      if( isWalMode2(pWal) ){
         3251  +        pgno = walFramePgno2(pWal, iWal, iFrame);
         3252  +      }else{
         3253  +        pgno = walFramePgno(pWal, iFrame);
         3254  +      }
         3255  +      assert( pgno!=1 );
         3256  +      rc = xUndo(pUndoCtx, pgno);
  2809   3257       }
  2810         -    if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
         3258  +    if( iMax!=iNew ) walCleanupHash(pWal);
  2811   3259     }
  2812   3260     return rc;
  2813   3261   }
  2814   3262   
  2815   3263   /* 
  2816   3264   ** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
  2817   3265   ** values. This function populates the array with values required to 
  2818   3266   ** "rollback" the write position of the WAL handle back to the current 
  2819   3267   ** point in the event of a savepoint rollback (via WalSavepointUndo()).
  2820   3268   */
  2821   3269   void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
         3270  +  int iWal = walidxGetFile(&pWal->hdr);
  2822   3271     assert( pWal->writeLock );
  2823         -  aWalData[0] = pWal->hdr.mxFrame;
         3272  +  assert( isWalMode2(pWal) || iWal==0 );
         3273  +  aWalData[0] = walidxGetMxFrame(&pWal->hdr, iWal);
  2824   3274     aWalData[1] = pWal->hdr.aFrameCksum[0];
  2825   3275     aWalData[2] = pWal->hdr.aFrameCksum[1];
  2826         -  aWalData[3] = pWal->nCkpt;
         3276  +  aWalData[3] = isWalMode2(pWal) ? iWal : pWal->nCkpt;
  2827   3277   }
  2828   3278   
  2829   3279   /* 
  2830   3280   ** Move the write position of the WAL back to the point identified by
  2831   3281   ** the values in the aWalData[] array. aWalData must point to an array
  2832   3282   ** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
  2833   3283   ** by a call to WalSavepoint().
  2834   3284   */
  2835   3285   int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
  2836   3286     int rc = SQLITE_OK;
         3287  +  int iWal = walidxGetFile(&pWal->hdr);
         3288  +  int iCmp = isWalMode2(pWal) ? iWal : pWal->nCkpt;
  2837   3289   
  2838   3290     assert( pWal->writeLock );
  2839         -  assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame );
         3291  +  assert( isWalMode2(pWal) || iWal==0 );
         3292  +  assert( aWalData[3]!=iCmp || aWalData[0]<=walidxGetMxFrame(&pWal->hdr,iWal) );
  2840   3293   
  2841         -  if( aWalData[3]!=pWal->nCkpt ){
         3294  +  if( aWalData[3]!=iCmp ){
  2842   3295       /* This savepoint was opened immediately after the write-transaction
  2843   3296       ** was started. Right after that, the writer decided to wrap around
  2844   3297       ** to the start of the log. Update the savepoint values to match.
  2845   3298       */
  2846   3299       aWalData[0] = 0;
  2847         -    aWalData[3] = pWal->nCkpt;
         3300  +    aWalData[3] = iCmp;
  2848   3301     }
  2849   3302   
  2850         -  if( aWalData[0]<pWal->hdr.mxFrame ){
  2851         -    pWal->hdr.mxFrame = aWalData[0];
         3303  +  if( aWalData[0]<walidxGetMxFrame(&pWal->hdr, iWal) ){
         3304  +    walidxSetMxFrame(&pWal->hdr, iWal, aWalData[0]);
  2852   3305       pWal->hdr.aFrameCksum[0] = aWalData[1];
  2853   3306       pWal->hdr.aFrameCksum[1] = aWalData[2];
  2854   3307       walCleanupHash(pWal);
  2855   3308     }
  2856   3309   
  2857   3310     return rc;
  2858   3311   }
         3312  +
         3313  +/*
         3314  +** This function is used in wal2 mode.
         3315  +**
         3316  +** This function is called when writer pWal is just about to start 
         3317  +** writing out frames. The "other" wal file (wal file !pWal->hdr.iAppend)
         3318  +** has been fully checkpointed. This function returns SQLITE_OK if there
         3319  +** are no readers preventing the writer from switching to the other wal
         3320  +** file. Or SQLITE_BUSY if there are.
         3321  +*/
         3322  +static int walRestartOk(Wal *pWal){
         3323  +  int rc;                                        /* Return code */
         3324  +  int iApp = walidxGetFile(&pWal->hdr);          /* Current WAL file */
         3325  +
         3326  +  /* No reader can be doing a "partial" read of wal file !iApp - in that
         3327  +  ** case it would not have been possible to checkpoint the file. So
         3328  +  ** it is only necessary to test for "full" readers. See the comment
         3329  +  ** above walLockReader() function for exactly what this means in terms
         3330  +  ** of locks.  */
         3331  +  int i = (iApp==0) ? 2 : 4;
         3332  +
         3333  +  rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
         3334  +  if( rc==SQLITE_OK ){
         3335  +    walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
         3336  +  }
         3337  +  return rc;
         3338  +}
  2859   3339   
  2860   3340   /*
  2861   3341   ** This function is called just before writing a set of frames to the log
  2862   3342   ** file (see sqlite3WalFrames()). It checks to see if, instead of appending
  2863         -** to the current log file, it is possible to overwrite the start of the
  2864         -** existing log file with the new frames (i.e. "reset" the log). If so,
  2865         -** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left
  2866         -** unchanged.
         3343  +** to the current log file, it is possible and desirable to switch to the
         3344  +** other log file and write the new transaction to the start of it.
         3345  +** If so, the wal-index header is updated accordingly - both in heap memory
         3346  +** and in the *-shm file.
  2867   3347   **
  2868   3348   ** SQLITE_OK is returned if no error is encountered (regardless of whether
  2869         -** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
         3349  +** or not the wal-index header is modified). An SQLite error code is returned
  2870   3350   ** if an error occurs.
  2871   3351   */
  2872   3352   static int walRestartLog(Wal *pWal){
  2873   3353     int rc = SQLITE_OK;
  2874         -  int cnt;
  2875   3354   
  2876         -  if( pWal->readLock==0 ){
         3355  +  if( isWalMode2(pWal) ){
         3356  +    int iApp = walidxGetFile(&pWal->hdr);
         3357  +    int nWalSize = WAL_DEFAULT_WALSIZE;
         3358  +    if( pWal->mxWalSize>0 ){
         3359  +      nWalSize = (pWal->mxWalSize-WAL_HDRSIZE+pWal->szPage+WAL_FRAME_HDRSIZE-1) 
         3360  +        / (pWal->szPage+WAL_FRAME_HDRSIZE);
         3361  +      nWalSize = MAX(nWalSize, 1);
         3362  +    }
         3363  +
         3364  +    if( walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize ){
         3365  +      volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
         3366  +      if( walidxGetMxFrame(&pWal->hdr, !iApp)==0 || pInfo->nBackfill ){
         3367  +        rc = walRestartOk(pWal);
         3368  +        if( rc==SQLITE_OK ){
         3369  +          iApp = !iApp;
         3370  +          pWal->nCkpt++;
         3371  +          walidxSetFile(&pWal->hdr, iApp);
         3372  +          walidxSetMxFrame(&pWal->hdr, iApp, 0);
         3373  +          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]);
         3374  +          sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]);
         3375  +          walIndexWriteHdr(pWal);
         3376  +          pInfo->nBackfill = 0;
         3377  +          walLockReader(pWal, pWal->readLock, 0);
         3378  +          pWal->readLock = iApp ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2;
         3379  +          rc = walLockReader(pWal, pWal->readLock, 1);
         3380  +        }else if( rc==SQLITE_BUSY ){
         3381  +          rc = SQLITE_OK;
         3382  +        }
         3383  +      }
         3384  +    }
         3385  +  }else if( pWal->readLock==0 ){
         3386  +    int cnt;
  2877   3387       volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
  2878   3388       assert( pInfo->nBackfill==pWal->hdr.mxFrame );
  2879   3389       if( pInfo->nBackfill>0 ){
  2880   3390         u32 salt1;
  2881   3391         sqlite3_randomness(4, &salt1);
  2882   3392         rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
  2883   3393         if( rc==SQLITE_OK ){
................................................................................
  2893   3403           walRestartHdr(pWal, salt1);
  2894   3404           walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
  2895   3405         }else if( rc!=SQLITE_BUSY ){
  2896   3406           return rc;
  2897   3407         }
  2898   3408       }
  2899   3409       walUnlockShared(pWal, WAL_READ_LOCK(0));
  2900         -    pWal->readLock = -1;
         3410  +    pWal->readLock = WAL_LOCK_NONE;
  2901   3411       cnt = 0;
  2902   3412       do{
  2903   3413         int notUsed;
  2904   3414         rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
  2905   3415       }while( rc==WAL_RETRY );
  2906   3416       assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
  2907   3417       testcase( (rc&0xff)==SQLITE_IOERR );
  2908   3418       testcase( rc==SQLITE_PROTOCOL );
  2909   3419       testcase( rc==SQLITE_OK );
  2910   3420     }
         3421  +
  2911   3422     return rc;
  2912   3423   }
  2913   3424   
  2914   3425   /*
  2915   3426   ** Information about the current state of the WAL file and where
  2916   3427   ** the next fsync should occur - passed from sqlite3WalFrames() into
  2917   3428   ** walWriteToLog().
................................................................................
  2962   3473     PgHdr *pPage,               /* The page of the frame to be written */
  2963   3474     int nTruncate,              /* The commit flag.  Usually 0.  >0 for commit */
  2964   3475     sqlite3_int64 iOffset       /* Byte offset at which to write */
  2965   3476   ){
  2966   3477     int rc;                         /* Result code from subfunctions */
  2967   3478     void *pData;                    /* Data actually written */
  2968   3479     u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
         3480  +
         3481  +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
         3482  +  { 
         3483  +    int iWal = walidxGetFile(&p->pWal->hdr);
         3484  +    int iFrame = 1 + (iOffset / (WAL_FRAME_HDRSIZE + p->pWal->szPage));
         3485  +    assert( p->pWal->apWalFd[iWal]==p->pFd );
         3486  +    WALTRACE(("WAL%p: page %d written to frame %d of wal %d\n",
         3487  +          p->pWal, (int)pPage->pgno, iFrame, iWal
         3488  +    ));
         3489  +  }
         3490  +#endif
         3491  +
  2969   3492   #if defined(SQLITE_HAS_CODEC)
  2970   3493     if( (pData = sqlite3PagerCodec(pPage))==0 ) return SQLITE_NOMEM_BKPT;
  2971   3494   #else
  2972   3495     pData = pPage->pData;
  2973   3496   #endif
  2974   3497     walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame);
  2975   3498     rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset);
................................................................................
  2984   3507   ** one or more frames have been overwritten. It updates the checksums for
  2985   3508   ** all frames written to the wal file by the current transaction starting
  2986   3509   ** with the earliest to have been overwritten.
  2987   3510   **
  2988   3511   ** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
  2989   3512   */
  2990   3513   static int walRewriteChecksums(Wal *pWal, u32 iLast){
  2991         -  const int szPage = pWal->szPage;/* Database page size */
  2992   3514     int rc = SQLITE_OK;             /* Return code */
         3515  +  const int szPage = pWal->szPage;/* Database page size */
  2993   3516     u8 *aBuf;                       /* Buffer to load data from wal file into */
  2994   3517     u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-headers in */
  2995   3518     u32 iRead;                      /* Next frame to read from wal file */
  2996   3519     i64 iCksumOff;
         3520  +
         3521  +  assert( isWalMode2(pWal)==0 );
  2997   3522   
  2998   3523     aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE);
  2999   3524     if( aBuf==0 ) return SQLITE_NOMEM_BKPT;
  3000   3525   
  3001   3526     /* Find the checksum values to use as input for the recalculating the
  3002   3527     ** first checksum. If the first frame is frame 1 (implying that the current
  3003   3528     ** transaction restarted the wal file), these values must be read from the
................................................................................
  3005   3530     ** previous frame.  */
  3006   3531     assert( pWal->iReCksum>0 );
  3007   3532     if( pWal->iReCksum==1 ){
  3008   3533       iCksumOff = 24;
  3009   3534     }else{
  3010   3535       iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16;
  3011   3536     }
  3012         -  rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff);
         3537  +  rc = sqlite3OsRead(pWal->apWalFd[0], aBuf, sizeof(u32)*2, iCksumOff);
  3013   3538     pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf);
  3014   3539     pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]);
  3015   3540   
  3016   3541     iRead = pWal->iReCksum;
  3017   3542     pWal->iReCksum = 0;
  3018   3543     for(; rc==SQLITE_OK && iRead<=iLast; iRead++){
  3019   3544       i64 iOff = walFrameOffset(iRead, szPage);
  3020         -    rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff);
         3545  +    rc = sqlite3OsRead(pWal->apWalFd[0], aBuf, szPage+WAL_FRAME_HDRSIZE, iOff);
  3021   3546       if( rc==SQLITE_OK ){
  3022   3547         u32 iPgno, nDbSize;
  3023   3548         iPgno = sqlite3Get4byte(aBuf);
  3024   3549         nDbSize = sqlite3Get4byte(&aBuf[4]);
  3025   3550   
  3026   3551         walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame);
  3027         -      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff);
         3552  +      rc = sqlite3OsWrite(pWal->apWalFd[0], aFrame, sizeof(aFrame), iOff);
  3028   3553       }
  3029   3554     }
  3030   3555   
  3031   3556     sqlite3_free(aBuf);
  3032   3557     return rc;
  3033   3558   }
  3034   3559   
................................................................................
  3050   3575     PgHdr *pLast = 0;               /* Last frame in list */
  3051   3576     int nExtra = 0;                 /* Number of extra copies of last page */
  3052   3577     int szFrame;                    /* The size of a single frame */
  3053   3578     i64 iOffset;                    /* Next byte to write in WAL file */
  3054   3579     WalWriter w;                    /* The writer */
  3055   3580     u32 iFirst = 0;                 /* First frame that may be overwritten */
  3056   3581     WalIndexHdr *pLive;             /* Pointer to shared header */
         3582  +  int iApp;
  3057   3583   
  3058   3584     assert( pList );
  3059   3585     assert( pWal->writeLock );
  3060   3586   
  3061   3587     /* If this frame set completes a transaction, then nTruncate>0.  If
  3062   3588     ** nTruncate==0 then this frame set does not complete the transaction. */
  3063   3589     assert( (isCommit!=0)==(nTruncate!=0) );
  3064   3590   
  3065         -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  3066         -  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
  3067         -    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
  3068         -              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
  3069         -  }
  3070         -#endif
  3071         -
  3072   3591     pLive = (WalIndexHdr*)walIndexHdr(pWal);
  3073   3592     if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
  3074         -    iFirst = pLive->mxFrame+1;
         3593  +    if( isWalMode2(pWal)==0 ){
         3594  +      iFirst = pLive->mxFrame+1;
         3595  +    }
  3075   3596     }
  3076   3597   
  3077   3598     /* See if it is possible to write these frames into the start of the
  3078   3599     ** log file, instead of appending to it at pWal->hdr.mxFrame.
  3079   3600     */
  3080         -  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
         3601  +  else if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
  3081   3602       return rc;
  3082   3603     }
  3083   3604   
  3084   3605     /* If this is the first frame written into the log, write the WAL
  3085   3606     ** header to the start of the WAL file. See comments at the top of
  3086   3607     ** this source file for a description of the WAL header format.
  3087   3608     */
  3088         -  iFrame = pWal->hdr.mxFrame;
         3609  +  iApp = walidxGetFile(&pWal->hdr);
         3610  +  iFrame = walidxGetMxFrame(&pWal->hdr, iApp);
         3611  +  assert( iApp==0 || isWalMode2(pWal) );
         3612  +
         3613  +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
         3614  +  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
         3615  +    WALTRACE(("WAL%p: frame write begin. %d frames. iWal=%d. mxFrame=%d. %s\n",
         3616  +              pWal, cnt, iApp, iFrame, isCommit ? "Commit" : "Spill"));
         3617  +  }
         3618  +#endif
         3619  +
  3089   3620     if( iFrame==0 ){
         3621  +    u32 iCkpt = 0;
  3090   3622       u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
  3091   3623       u32 aCksum[2];                /* Checksum for wal-header */
  3092   3624   
  3093   3625       sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
  3094         -    sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION);
         3626  +    sqlite3Put4byte(&aWalHdr[4], pWal->hdr.iVersion);
  3095   3627       sqlite3Put4byte(&aWalHdr[8], szPage);
  3096         -    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
  3097         -    if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt);
         3628  +    if( isWalMode2(pWal) ){
         3629  +      if( walidxGetMxFrame(&pWal->hdr, !iApp)>0 ){
         3630  +        u8 aPrev[4];
         3631  +        rc = sqlite3OsRead(pWal->apWalFd[!iApp], aPrev, 4, 12);
         3632  +        if( rc!=SQLITE_OK ){
         3633  +          return rc;
         3634  +        }
         3635  +        iCkpt = (sqlite3Get4byte(aPrev) + 1) & 0x0F;
         3636  +      }
         3637  +    }else{
         3638  +      iCkpt = pWal->nCkpt;
         3639  +    }
         3640  +    sqlite3Put4byte(&aWalHdr[12], iCkpt);
  3098   3641       memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
  3099   3642       walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum);
  3100   3643       sqlite3Put4byte(&aWalHdr[24], aCksum[0]);
  3101   3644       sqlite3Put4byte(&aWalHdr[28], aCksum[1]);
  3102         -    
         3645  +
  3103   3646       pWal->szPage = szPage;
  3104   3647       pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
  3105   3648       pWal->hdr.aFrameCksum[0] = aCksum[0];
  3106   3649       pWal->hdr.aFrameCksum[1] = aCksum[1];
  3107   3650       pWal->truncateOnCommit = 1;
  3108   3651   
  3109         -    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
         3652  +    rc = sqlite3OsWrite(pWal->apWalFd[iApp], aWalHdr, sizeof(aWalHdr), 0);
  3110   3653       WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
  3111   3654       if( rc!=SQLITE_OK ){
  3112   3655         return rc;
  3113   3656       }
  3114   3657   
  3115   3658       /* Sync the header (unless SQLITE_IOCAP_SEQUENTIAL is true or unless
  3116   3659       ** all syncing is turned off by PRAGMA synchronous=OFF).  Otherwise
  3117   3660       ** an out-of-order write following a WAL restart could result in
  3118   3661       ** database corruption.  See the ticket:
  3119   3662       **
  3120   3663       **     https://sqlite.org/src/info/ff5be73dee
  3121   3664       */
  3122   3665       if( pWal->syncHeader ){
  3123         -      rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
         3666  +      rc = sqlite3OsSync(pWal->apWalFd[iApp], CKPT_SYNC_FLAGS(sync_flags));
  3124   3667         if( rc ) return rc;
  3125   3668       }
  3126   3669     }
  3127   3670     assert( (int)pWal->szPage==szPage );
  3128   3671   
  3129   3672     /* Setup information needed to write frames into the WAL */
  3130   3673     w.pWal = pWal;
  3131         -  w.pFd = pWal->pWalFd;
         3674  +  w.pFd = pWal->apWalFd[iApp];
  3132   3675     w.iSyncPoint = 0;
  3133   3676     w.syncFlags = sync_flags;
  3134   3677     w.szPage = szPage;
  3135   3678     iOffset = walFrameOffset(iFrame+1, szPage);
  3136   3679     szFrame = szPage + WAL_FRAME_HDRSIZE;
  3137   3680   
  3138   3681     /* Write all frames into the log file exactly once */
................................................................................
  3154   3697             pWal->iReCksum = iWrite;
  3155   3698           }
  3156   3699   #if defined(SQLITE_HAS_CODEC)
  3157   3700           if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
  3158   3701   #else
  3159   3702           pData = p->pData;
  3160   3703   #endif
  3161         -        rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff);
         3704  +        rc = sqlite3OsWrite(pWal->apWalFd[iApp], pData, szPage, iOff);
  3162   3705           if( rc ) return rc;
  3163   3706           p->flags &= ~PGHDR_WAL_APPEND;
  3164   3707           continue;
  3165   3708         }
  3166   3709       }
  3167   3710   
  3168   3711       iFrame++;
................................................................................
  3194   3737     ** boundary is crossed.  Only the part of the WAL prior to the last
  3195   3738     ** sector boundary is synced; the part of the last frame that extends
  3196   3739     ** past the sector boundary is written after the sync.
  3197   3740     */
  3198   3741     if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){
  3199   3742       int bSync = 1;
  3200   3743       if( pWal->padToSectorBoundary ){
  3201         -      int sectorSize = sqlite3SectorSize(pWal->pWalFd);
         3744  +      int sectorSize = sqlite3SectorSize(w.pFd);
  3202   3745         w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize;
  3203   3746         bSync = (w.iSyncPoint==iOffset);
  3204   3747         testcase( bSync );
  3205   3748         while( iOffset<w.iSyncPoint ){
  3206   3749           rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset);
  3207   3750           if( rc ) return rc;
  3208   3751           iOffset += szFrame;
................................................................................
  3229   3772     }
  3230   3773   
  3231   3774     /* Append data to the wal-index. It is not necessary to lock the 
  3232   3775     ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
  3233   3776     ** guarantees that there are no other writers, and no data that may
  3234   3777     ** be in use by existing readers is being overwritten.
  3235   3778     */
  3236         -  iFrame = pWal->hdr.mxFrame;
         3779  +  iFrame = walidxGetMxFrame(&pWal->hdr, iApp);
  3237   3780     for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){
  3238   3781       if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue;
  3239   3782       iFrame++;
  3240         -    rc = walIndexAppend(pWal, iFrame, p->pgno);
         3783  +    rc = walIndexAppend(pWal, iApp, iFrame, p->pgno);
  3241   3784     }
  3242   3785     while( rc==SQLITE_OK && nExtra>0 ){
  3243   3786       iFrame++;
  3244   3787       nExtra--;
  3245         -    rc = walIndexAppend(pWal, iFrame, pLast->pgno);
         3788  +    rc = walIndexAppend(pWal, iApp, iFrame, pLast->pgno);
  3246   3789     }
  3247   3790   
  3248   3791     if( rc==SQLITE_OK ){
  3249   3792       /* Update the private copy of the header. */
  3250   3793       pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
  3251   3794       testcase( szPage<=32768 );
  3252   3795       testcase( szPage>=65536 );
  3253         -    pWal->hdr.mxFrame = iFrame;
         3796  +    walidxSetMxFrame(&pWal->hdr, iApp, iFrame);
  3254   3797       if( isCommit ){
  3255   3798         pWal->hdr.iChange++;
  3256   3799         pWal->hdr.nPage = nTruncate;
  3257   3800       }
  3258   3801       /* If this is a commit, update the wal-index header too. */
  3259   3802       if( isCommit ){
  3260   3803         walIndexWriteHdr(pWal);
  3261         -      pWal->iCallback = iFrame;
         3804  +      if( isWalMode2(pWal) ){
         3805  +        int iOther = !walidxGetFile(&pWal->hdr);
         3806  +        if( walidxGetMxFrame(&pWal->hdr, iOther) 
         3807  +            && !walCkptInfo(pWal)->nBackfill 
         3808  +        ){
         3809  +          pWal->iCallback = walidxGetMxFrame(&pWal->hdr, 0);
         3810  +          pWal->iCallback += walidxGetMxFrame(&pWal->hdr, 1);
         3811  +        }
         3812  +      }else{
         3813  +        pWal->iCallback = iFrame;
         3814  +      }
  3262   3815       }
  3263   3816     }
  3264   3817   
  3265   3818     WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
  3266   3819     return rc;
  3267   3820   }
  3268   3821   
................................................................................
  3346   3899         sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
  3347   3900       }
  3348   3901     }
  3349   3902   
  3350   3903     /* Copy data from the log to the database file. */
  3351   3904     if( rc==SQLITE_OK ){
  3352   3905   
  3353         -    if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
         3906  +    if( (walPagesize(pWal)!=nBuf) 
         3907  +     && (walidxGetMxFrame(&pWal->hdr, 0) || walidxGetMxFrame(&pWal->hdr, 1))
         3908  +    ){
  3354   3909         rc = SQLITE_CORRUPT_BKPT;
  3355   3910       }else{
  3356   3911         rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf);
  3357   3912       }
  3358   3913   
  3359   3914       /* If no error occurred, set the output variables. */
  3360   3915       if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
  3361         -      if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
  3362         -      if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
         3916  +      if( pnLog ){
         3917  +        *pnLog = walidxGetMxFrame(&pWal->hdr,0)+walidxGetMxFrame(&pWal->hdr,1);
         3918  +      }
         3919  +      if( pnCkpt ){
         3920  +        if( isWalMode2(pWal) ){
         3921  +          if( (int)(walCkptInfo(pWal)->nBackfill) ){
         3922  +            *pnCkpt = walidxGetMxFrame(&pWal->hdr, !walidxGetFile(&pWal->hdr));
         3923  +          }else{
         3924  +            *pnCkpt = 0;
         3925  +          }
         3926  +        }else{
         3927  +          *pnCkpt = walCkptInfo(pWal)->nBackfill;
         3928  +        }
         3929  +      }
  3363   3930       }
  3364   3931     }
  3365   3932   
  3366   3933     if( isChanged ){
  3367   3934       /* If a new wal-index header was loaded before the checkpoint was 
  3368   3935       ** performed, then the pager-cache associated with pWal is now
  3369   3936       ** out of date. So zero the cached wal-index header to ensure that
................................................................................
  3417   3984   ** If op is negative, then do a dry-run of the op==1 case but do
  3418   3985   ** not actually change anything. The pager uses this to see if it
  3419   3986   ** should acquire the database exclusive lock prior to invoking
  3420   3987   ** the op==1 case.
  3421   3988   */
  3422   3989   int sqlite3WalExclusiveMode(Wal *pWal, int op){
  3423   3990     int rc;
         3991  +
  3424   3992     assert( pWal->writeLock==0 );
  3425   3993     assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 );
  3426   3994   
  3427   3995     /* pWal->readLock is usually set, but might be -1 if there was a 
  3428   3996     ** prior error while attempting to acquire are read-lock. This cannot 
  3429   3997     ** happen if the connection is actually in exclusive mode (as no xShmLock
  3430   3998     ** locks are taken in this case). Nor should the pager attempt to
  3431   3999     ** upgrade to exclusive-mode following such an error.
  3432   4000     */
  3433         -  assert( pWal->readLock>=0 || pWal->lockError );
  3434         -  assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
         4001  +  assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError );
         4002  +  assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) );
  3435   4003   
  3436   4004     if( op==0 ){
  3437   4005       if( pWal->exclusiveMode ){
  3438   4006         pWal->exclusiveMode = 0;
  3439         -      if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
         4007  +      if( isWalMode2(pWal) ){
         4008  +        rc = walLockReader(pWal, pWal->readLock, 1);
         4009  +      }else{
         4010  +        rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock));
         4011  +      }
         4012  +      if( rc==SQLITE_OK ){
  3440   4013           pWal->exclusiveMode = 1;
  3441   4014         }
  3442   4015         rc = pWal->exclusiveMode==0;
  3443   4016       }else{
  3444   4017         /* Already in locking_mode=NORMAL */
  3445   4018         rc = 0;
  3446   4019       }
  3447   4020     }else if( op>0 ){
  3448   4021       assert( pWal->exclusiveMode==0 );
  3449   4022       assert( pWal->readLock>=0 );
  3450         -    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
         4023  +    if( isWalMode2(pWal) ){
         4024  +      walLockReader(pWal, pWal->readLock, 0);
         4025  +    }else{
         4026  +      walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
         4027  +    }
  3451   4028       pWal->exclusiveMode = 1;
  3452   4029       rc = 1;
  3453   4030     }else{
  3454   4031       rc = pWal->exclusiveMode==0;
  3455   4032     }
  3456   4033     return rc;
  3457   4034   }
................................................................................
  3527   4104     return (pWal ? pWal->szPage : 0);
  3528   4105   }
  3529   4106   #endif
  3530   4107   
  3531   4108   /* Return the sqlite3_file object for the WAL file
  3532   4109   */
  3533   4110   sqlite3_file *sqlite3WalFile(Wal *pWal){
  3534         -  return pWal->pWalFd;
         4111  +  return pWal->apWalFd[0];
  3535   4112   }
  3536   4113   
  3537   4114   #endif /* #ifndef SQLITE_OMIT_WAL */

Changes to src/wal.h.

    22     22   /* Macros for extracting appropriate sync flags for either transaction
    23     23   ** commits (WAL_SYNC_FLAGS(X)) or for checkpoint ops (CKPT_SYNC_FLAGS(X)):
    24     24   */
    25     25   #define WAL_SYNC_FLAGS(X)   ((X)&0x03)
    26     26   #define CKPT_SYNC_FLAGS(X)  (((X)>>2)&0x03)
    27     27   
    28     28   #ifdef SQLITE_OMIT_WAL
    29         -# define sqlite3WalOpen(x,y,z)                   0
           29  +# define sqlite3WalOpen(w,x,y,z)                 0
    30     30   # define sqlite3WalLimit(x,y)
    31     31   # define sqlite3WalClose(v,w,x,y,z)              0
    32     32   # define sqlite3WalBeginReadTransaction(y,z)     0
    33     33   # define sqlite3WalEndReadTransaction(z)
    34     34   # define sqlite3WalDbsize(y)                     0
    35     35   # define sqlite3WalBeginWriteTransaction(y)      0
    36     36   # define sqlite3WalEndWriteTransaction(x)        0
................................................................................
    51     51   
    52     52   /* Connection to a write-ahead log (WAL) file. 
    53     53   ** There is one object of this type for each pager. 
    54     54   */
    55     55   typedef struct Wal Wal;
    56     56   
    57     57   /* Open and close a connection to a write-ahead log. */
    58         -int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**);
           58  +int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *,int,i64,int,Wal**);
    59     59   int sqlite3WalClose(Wal *pWal, sqlite3*, int sync_flags, int, u8 *);
    60     60   
    61     61   /* Set the limiting size of a WAL file. */
    62     62   void sqlite3WalLimit(Wal*, i64);
    63     63   
    64     64   /* Used by readers to open (lock) and close (unlock) a snapshot.  A 
    65     65   ** snapshot is like a read-transaction.  It is the state of the database

Changes to test/permutations.test.

  1000   1000       insert.test   insert2.test  insert3.test rollback.test 
  1001   1001       select1.test  select2.test  select3.test
  1002   1002     }
  1003   1003   }
  1004   1004   
  1005   1005   test_suite "wal" -description {
  1006   1006     Run tests with journal_mode=WAL
         1007  +} -initialize {
         1008  +  set ::G(savepoint6_iterations) 100
         1009  +} -shutdown {
         1010  +  unset -nocomplain ::G(savepoint6_iterations)
         1011  +} -files {
         1012  +  savepoint.test     savepoint2.test     savepoint6.test
         1013  +  trans.test         avtrans.test
         1014  +
         1015  +  fts3aa.test  fts3ab.test  fts3ac.test  fts3ad.test
         1016  +  fts3ae.test  fts3af.test  fts3ag.test  fts3ah.test
         1017  +  fts3ai.test  fts3aj.test  fts3ak.test  fts3al.test
         1018  +  fts3am.test  fts3an.test  fts3ao.test  fts3b.test
         1019  +  fts3c.test   fts3d.test   fts3e.test   fts3query.test 
         1020  +}
         1021  +
         1022  +test_suite "wal2" -description {
         1023  +  Run tests with journal_mode=WAL2
  1007   1024   } -initialize {
  1008   1025     set ::G(savepoint6_iterations) 100
  1009   1026   } -shutdown {
  1010   1027     unset -nocomplain ::G(savepoint6_iterations)
  1011   1028   } -files {
  1012   1029     savepoint.test     savepoint2.test     savepoint6.test
  1013   1030     trans.test         avtrans.test

Changes to test/savepoint.test.

    24     24   do_test savepoint-1.1 {
    25     25     wal_set_journal_mode
    26     26     execsql {
    27     27       SAVEPOINT sp1;
    28     28       RELEASE sp1;
    29     29     }
    30     30   } {}
           31  +wal_check_journal_mode savepoint-1.1
    31     32   do_test savepoint-1.2 {
    32     33     execsql {
    33     34       SAVEPOINT sp1;
    34     35       ROLLBACK TO sp1;
    35     36     }
    36     37   } {}
    37     38   do_test savepoint-1.3 {

Changes to test/savepoint6.test.

    11     11   #
    12     12   # $Id: savepoint6.test,v 1.4 2009/06/05 17:09:12 drh Exp $
    13     13   
    14     14   set testdir [file dirname $argv0]
    15     15   source $testdir/tester.tcl
    16     16   
    17     17   proc sql {zSql} {
           18  +  if {0 && $::debug_op} {
           19  +    puts stderr "$zSql ;"
           20  +    flush stderr
           21  +  }
    18     22     uplevel db eval [list $zSql]
    19     23     #puts stderr "$zSql ;"
    20     24   }
    21     25   
    22     26   set DATABASE_SCHEMA {
    23     27       PRAGMA auto_vacuum = incremental;
    24     28       CREATE TABLE t1(x, y);
................................................................................
    63     67   #   rollback  NAME
    64     68   #   release   NAME
    65     69   #
    66     70   #   insert_rows XVALUES
    67     71   #   delete_rows XVALUES
    68     72   #
    69     73   proc savepoint {zName} {
           74  +  if {$::debug_op} { puts stderr "savepoint $zName" ; flush stderr }
    70     75     catch { sql "SAVEPOINT $zName" }
    71     76     lappend ::lSavepoint [list $zName [array get ::aEntry]]
    72     77   }
    73     78   
    74     79   proc rollback {zName} {
           80  +  if {$::debug_op} { puts stderr "rollback $zName" ; flush stderr }
    75     81     catch { sql "ROLLBACK TO $zName" }
    76     82     for {set i [expr {[llength $::lSavepoint]-1}]} {$i>=0} {incr i -1} {
    77     83       set zSavepoint [lindex $::lSavepoint $i 0]
    78     84       if {$zSavepoint eq $zName} {
    79     85         unset -nocomplain ::aEntry
    80     86         array set ::aEntry [lindex $::lSavepoint $i 1]
    81     87   
................................................................................
    85     91         }
    86     92         break
    87     93       }
    88     94     }
    89     95   }
    90     96   
    91     97   proc release {zName} {
           98  +  if {$::debug_op} { puts stderr "release $zName" ; flush stderr }
    92     99     catch { sql "RELEASE $zName" }
    93    100     for {set i [expr {[llength $::lSavepoint]-1}]} {$i>=0} {incr i -1} {
    94    101       set zSavepoint [lindex $::lSavepoint $i 0]
    95    102       if {$zSavepoint eq $zName} {
    96    103         set ::lSavepoint [lreplace $::lSavepoint $i end]
    97    104         break
    98    105       }
................................................................................
   100    107   
   101    108     if {[llength $::lSavepoint] == 0} {
   102    109       #puts stderr "-- End of transaction!!!!!!!!!!!!!"
   103    110     }
   104    111   }
   105    112   
   106    113   proc insert_rows {lX} {
          114  +  if {$::debug_op} { puts stderr "insert_rows $lX" ; flush stderr }
   107    115     foreach x $lX {
   108    116       set y [x_to_y $x]
   109    117   
   110    118       # Update database [db]
   111    119       sql "INSERT OR REPLACE INTO t1 VALUES($x, '$y')"
   112    120   
   113    121       # Update the Tcl database.
   114    122       set ::aEntry($x) $y
   115    123     }
   116    124   }
   117    125   
   118    126   proc delete_rows {lX} {
          127  +  if {$::debug_op} { puts stderr "delete_rows $lX" ; flush stderr }
   119    128     foreach x $lX {
   120    129       # Update database [db]
   121    130       sql "DELETE FROM t1 WHERE x = $x"
   122    131   
   123    132       # Update the Tcl database.
   124    133       unset -nocomplain ::aEntry($x)
   125    134     }
................................................................................
   159    168     set ret [list]
   160    169     for {set i 0} {$i<$nRes} {incr i} {
   161    170       lappend ret [expr int(rand()*$nRange)]
   162    171     }
   163    172     return $ret
   164    173   } 
   165    174   #-------------------------------------------------------------------------
          175  +
          176  +set ::debug_op 0
          177  +proc debug_ops {} {
          178  +  set ::debug_op 1
          179  +}
   166    180   
   167    181   proc database_op {} {
   168    182     set i [expr int(rand()*2)] 
   169    183     if {$i==0} {
   170    184       insert_rows [random_integers 100 1000]
   171    185     }
   172    186     if {$i==1} {
................................................................................
   181    195   proc savepoint_op {} {
   182    196     set names {one two three four five}
   183    197     set cmds  {savepoint savepoint savepoint savepoint release rollback}
   184    198   
   185    199     set C [lindex $cmds [expr int(rand()*6)]]
   186    200     set N [lindex $names [expr int(rand()*5)]]
   187    201   
   188         -  #puts stderr "   $C $N ;  "
   189         -  #flush stderr
   190         -
   191    202     $C $N
   192    203     return ok
   193    204   }
   194    205   
   195    206   expr srand(0)
   196    207   
   197    208   ############################################################################

Changes to test/tester.tcl.

   582    582   # Create a test database
   583    583   #
   584    584   proc reset_db {} {
   585    585     catch {db close}
   586    586     forcedelete test.db
   587    587     forcedelete test.db-journal
   588    588     forcedelete test.db-wal
          589  +  forcedelete test.db-wal2
   589    590     sqlite3 db ./test.db
   590    591     set ::DB [sqlite3_connection_pointer db]
   591    592     if {[info exists ::SETUP_SQL]} {
   592    593       db eval $::SETUP_SQL
   593    594     }
   594    595   }
   595    596   reset_db
................................................................................
  2049   2050   #     Otherwise (if not running a WAL permutation) this is a no-op.
  2050   2051   #
  2051   2052   #   wal_is_wal_mode
  2052   2053   #
  2053   2054   #     Returns true if this test should be run in WAL mode. False otherwise.
  2054   2055   #
  2055   2056   proc wal_is_wal_mode {} {
  2056         -  expr {[permutation] eq "wal"}
         2057  +  if {[permutation] eq "wal"} { return 1 }
         2058  +  if {[permutation] eq "wal2"} { return 2 }
         2059  +  return 0
  2057   2060   }
  2058   2061   proc wal_set_journal_mode {{db db}} {
  2059         -  if { [wal_is_wal_mode] } {
  2060         -    $db eval "PRAGMA journal_mode = WAL"
         2062  +  switch -- [wal_is_wal_mode] {
         2063  +    0 {
         2064  +    }
         2065  +
         2066  +    1 {
         2067  +      $db eval "PRAGMA journal_mode = WAL"
         2068  +    }
         2069  +
         2070  +    2 {
         2071  +      $db eval "PRAGMA journal_mode = WAL2"
         2072  +    }
  2061   2073     }
  2062   2074   }
  2063   2075   proc wal_check_journal_mode {testname {db db}} {
  2064   2076     if { [wal_is_wal_mode] } {
  2065   2077       $db eval { SELECT * FROM sqlite_master }
  2066         -    do_test $testname [list $db eval "PRAGMA main.journal_mode"] {wal}
         2078  +    set expected "wal"
         2079  +    if {[wal_is_wal_mode]==2} {
         2080  +      set expected "wal2"
         2081  +    }
         2082  +    do_test $testname [list $db eval "PRAGMA main.journal_mode"] $expected
  2067   2083     }
  2068   2084   }
  2069   2085   
  2070   2086   proc wal_is_capable {} {
  2071   2087     ifcapable !wal { return 0 }
  2072   2088     if {[permutation]=="journaltest"} { return 0 }
  2073   2089     return 1

Added test/waltwo2.test.

            1  +# 2017 September 19
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this file is testing the operation of the library in
           13  +# "PRAGMA journal_mode=WAL2" mode.
           14  +#
           15  +
           16  +set testdir [file dirname $argv0]
           17  +source $testdir/tester.tcl
           18  +source $testdir/lock_common.tcl
           19  +source $testdir/malloc_common.tcl
           20  +source $testdir/wal_common.tcl
           21  +
           22  +set testprefix walsimple
           23  +ifcapable !wal {finish_test ; return }
           24  +
           25  +db close
           26  +foreach f [glob -nocomplain test.db*] { forcedelete $f }
           27  +sqlite3 db test.db
           28  +
           29  +do_execsql_test 1.0 {
           30  +  CREATE TABLE t1(x, y);
           31  +  PRAGMA journal_mode = wal2;
           32  +} {wal2}
           33  +
           34  +do_execsql_test 1.1 {
           35  +  SELECT * FROM t1;
           36  +} {}
           37  +
           38  +do_execsql_test 1.2 {
           39  +  INSERT INTO t1 VALUES(1, 2);
           40  +} {}
           41  +
           42  +do_execsql_test 1.3 {
           43  +  SELECT * FROM t1;
           44  +} {1 2}
           45  +
           46  +do_test 1.4 {
           47  +  sqlite3 db2 test.db
           48  +  execsql { SELECT * FROM t1 } db2
           49  +} {1 2}
           50  +
           51  +do_test 1.5 {
           52  +  lsort [glob test.db*]
           53  +} {test.db test.db-shm test.db-wal test.db-wal2}
           54  +
           55  +do_test 1.6 {
           56  +  db close
           57  +  db2 close
           58  +  sqlite3 db test.db
           59  +  execsql { SELECT * FROM t1 }
           60  +} {1 2}
           61  +
           62  +do_execsql_test 1.7 {
           63  +  PRAGMA journal_size_limit = 4000;
           64  +  INSERT INTO t1 VALUES(3, 4);
           65  +  INSERT INTO t1 VALUES(5, 6);
           66  +  INSERT INTO t1 VALUES(7, 8);
           67  +  INSERT INTO t1 VALUES(9, 10);
           68  +  INSERT INTO t1 VALUES(11, 12);
           69  +  INSERT INTO t1 VALUES(13, 14);
           70  +  INSERT INTO t1 VALUES(15, 16);
           71  +  INSERT INTO t1 VALUES(17, 18);
           72  +  SELECT * FROM t1;
           73  +} {4000 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18}
           74  +
           75  +do_test 1.8 {
           76  +  sqlite3 db2 test.db
           77  +  execsql { SELECT * FROM t1 } db2
           78  +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18}
           79  +
           80  +do_test 1.9 {
           81  +  db close
           82  +  db2 close
           83  +  lsort [glob test.db*]
           84  +} {test.db}
           85  +
           86  +#-------------------------------------------------------------------------
           87  +reset_db
           88  +do_execsql_test 2.0 {
           89  +  CREATE TABLE t1(a INTEGER PRIMARY KEY, b, c);
           90  +  CREATE INDEX i1 ON t1(b, c);
           91  +  PRAGMA journal_mode = wal2;
           92  +  PRAGMA journal_size_limit = 4000;
           93  +} {wal2 4000}
           94  +
           95  +proc wal_hook {DB nm nFrame} {
           96  +  $DB eval { PRAGMA wal_checkpoint }
           97  +}
           98  +db wal_hook [list wal_hook db]
           99  +
          100  +
          101  +foreach js {4000 8000 12000} {
          102  +  foreach NROW [list 100 200 300 400 500 600 1000] {
          103  +    do_test 2.$js.$NROW.1 {
          104  +      db eval "DELETE FROM t1"
          105  +      db eval "PRAGMA journal_size_limit = $js"
          106  +      set nTotal 0
          107  +      for {set i 0} {$i < $NROW} {incr i} {
          108  +        db eval { INSERT INTO t1 VALUES($i, $i, randomblob(abs(random()%50))) }
          109  +        incr nTotal $i
          110  +      }
          111  +      set {} {}
          112  +    } {}
          113  +
          114  +    do_test 2.$js.$NROW.2 {
          115  +      sqlite3 db2 test.db
          116  +      db2 eval { 
          117  +        PRAGMA integrity_check;
          118  +        SELECT count(*), sum(b) FROM t1;
          119  +      }
          120  +    } [list ok $NROW $nTotal]
          121  +
          122  +    db2 close
          123  +  }
          124  +}
          125  +
          126  +finish_test
          127  +