/ Check-in [44ca4d12]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge in changes that cause the first sector of the WAL file to be synced when the WAL restarts. This is a fix for the power-loss corruption problem described in ticket [ff5be73dee086]
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 44ca4d123385d759c11919865525c998c2e35bdb
User & Date: drh 2011-12-17 13:45:28
Context
2011-12-19
11:16
Fix a couple of test cases to account for the master-journal name related change in [cf3bccc2]. check-in: 21b76af6 user: dan tags: trunk
2011-12-17
16:09
Add support for statvfs() in os_unix.c, for determining the sector size. This causes many TCL test failures under Linux. check-in: e0d44450 user: drh tags: statvfs
13:45
Merge in changes that cause the first sector of the WAL file to be synced when the WAL restarts. This is a fix for the power-loss corruption problem described in ticket [ff5be73dee086] check-in: 44ca4d12 user: drh tags: trunk
08:10
Add tests to walcrash3.test. check-in: d7688042 user: dan tags: trunk
2011-12-16
21:26
Enhance the WAL header sync so that it honors the various synchronous pragmas, settings, and device characteristics. Closed-Leaf check-in: 9799241f user: drh tags: wal-header-sync
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/pager.c.

612
613
614
615
616
617
618

619
620
621
622
623
624
625
...
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
....
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
....
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
....
3363
3364
3365
3366
3367
3368
3369




3370
3371
3372
3373
3374
3375
3376
....
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
....
4529
4530
4531
4532
4533
4534
4535






4536
4537

4538

4539
4540
4541
4542
4543
4544
4545
....
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 noReadlock;              /* Do not bother to obtain readlocks */
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 ckptSyncFlags;           /* SYNC_NORMAL or SYNC_FULL for checkpoint */

  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 memDb;                   /* True to inhibit all file I/O */

  /**************************************************************************
  ** The following block contains those class members that change during
................................................................................
#ifndef SQLITE_OMIT_WAL
static int pagerUseWal(Pager *pPager){
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y,z) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

#ifndef NDEBUG 
/*
** Usage:
................................................................................
** The list of pages passed into this routine is always sorted by page number.
** Hence, if page 1 appears anywhere on the list, it will be the first page.
*/ 
static int pagerWalFrames(
  Pager *pPager,                  /* Pager object */
  PgHdr *pList,                   /* List of frames to log */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit,                   /* True if this is a commit */
  int syncFlags                   /* Flags to pass to OsSync() (or 0) */
){
  int rc;                         /* Return code */
#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
  PgHdr *p;                       /* For looping over pages */
#endif

  assert( pPager->pWal );
................................................................................
      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
    }
    assert( pList );
  }

  if( pList->pgno==1 ) pager_write_changecounter(pList);
  rc = sqlite3WalFrames(pPager->pWal, 
      pPager->pageSize, pList, nTruncate, isCommit, syncFlags
  );
  if( rc==SQLITE_OK && pPager->pBackup ){
    PgHdr *p;
    for(p=pList; p; p=p->pDirty){
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
................................................................................
  }else if( bCkptFullFsync ){
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  }else{
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }




}
#endif

/*
** The following global variable is incremented whenever the library
** attempts to open a temporary file.  This information is used for
** testing and analysis only.  
................................................................................
  pPg->pDirty = 0;
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    if( subjRequiresPage(pPg) ){ 
      rc = subjournalPage(pPg); 
    }
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0, 0);
    }
  }else{
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){
................................................................................
  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  pPager->exclusiveMode = (u8)tempFile; 
  pPager->changeCountDone = pPager->tempFile;
  pPager->memDb = (u8)memDb;
  pPager->readOnly = (u8)readOnly;
  assert( useJournal || pPager->tempFile );
  pPager->noSync = pPager->tempFile;






  pPager->fullSync = pPager->noSync ?0:1;
  pPager->syncFlags = pPager->noSync ? 0 : SQLITE_SYNC_NORMAL;

  pPager->ckptSyncFlags = pPager->syncFlags;

  /* pPager->pFirst = 0; */
  /* pPager->pFirstSynced = 0; */
  /* pPager->pLast = 0; */
  pPager->nExtra = (u16)nExtra;
  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  assert( isOpen(pPager->fd) || tempFile );
  setSectorSize(pPager);
................................................................................
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
        pList = pPageOne;
        pList->pDirty = 0;
      }
      assert( rc==SQLITE_OK );
      if( ALWAYS(pList) ){
        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, 
            (pPager->fullSync ? pPager->syncFlags : 0)
        );
      }
      sqlite3PagerUnref(pPageOne);
      if( rc==SQLITE_OK ){
        sqlite3PcacheCleanAll(pPager->pPCache);
      }
    }else{
      /* The following block updates the change-counter. Exactly how it







>







 







|







 







|
<







 







|







 







>
>
>
>







 







|







 







>
>
>
>
>
>
|
|
>
|
>







 







|
<
<







612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
...
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
....
2952
2953
2954
2955
2956
2957
2958
2959

2960
2961
2962
2963
2964
2965
2966
....
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
....
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
....
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
....
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
....
5773
5774
5775
5776
5777
5778
5779
5780


5781
5782
5783
5784
5785
5786
5787
  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 noReadlock;              /* Do not bother to obtain readlocks */
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 ckptSyncFlags;           /* SYNC_NORMAL or SYNC_FULL for checkpoint */
  u8 walSyncFlags;            /* SYNC_NORMAL or SYNC_FULL for wal writes */
  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 memDb;                   /* True to inhibit all file I/O */

  /**************************************************************************
  ** The following block contains those class members that change during
................................................................................
#ifndef SQLITE_OMIT_WAL
static int pagerUseWal(Pager *pPager){
  return (pPager->pWal!=0);
}
#else
# define pagerUseWal(x) 0
# define pagerRollbackWal(x) 0
# define pagerWalFrames(v,w,x,y) 0
# define pagerOpenWalIfPresent(z) SQLITE_OK
# define pagerBeginReadTransaction(z) SQLITE_OK
#endif

#ifndef NDEBUG 
/*
** Usage:
................................................................................
** The list of pages passed into this routine is always sorted by page number.
** Hence, if page 1 appears anywhere on the list, it will be the first page.
*/ 
static int pagerWalFrames(
  Pager *pPager,                  /* Pager object */
  PgHdr *pList,                   /* List of frames to log */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit                    /* True if this is a commit */

){
  int rc;                         /* Return code */
#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
  PgHdr *p;                       /* For looping over pages */
#endif

  assert( pPager->pWal );
................................................................................
      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
    }
    assert( pList );
  }

  if( pList->pgno==1 ) pager_write_changecounter(pList);
  rc = sqlite3WalFrames(pPager->pWal, 
      pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags
  );
  if( rc==SQLITE_OK && pPager->pBackup ){
    PgHdr *p;
    for(p=pList; p; p=p->pDirty){
      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
    }
  }
................................................................................
  }else if( bCkptFullFsync ){
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
  }else{
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }
  pPager->walSyncFlags = pPager->syncFlags;
  if( pPager->fullSync ){
    pPager->walSyncFlags |= WAL_SYNC_TRANSACTIONS;
  }
}
#endif

/*
** The following global variable is incremented whenever the library
** attempts to open a temporary file.  This information is used for
** testing and analysis only.  
................................................................................
  pPg->pDirty = 0;
  if( pagerUseWal(pPager) ){
    /* Write a single frame for this page to the log. */
    if( subjRequiresPage(pPg) ){ 
      rc = subjournalPage(pPg); 
    }
    if( rc==SQLITE_OK ){
      rc = pagerWalFrames(pPager, pPg, 0, 0);
    }
  }else{
  
    /* Sync the journal file if required. */
    if( pPg->flags&PGHDR_NEED_SYNC 
     || pPager->eState==PAGER_WRITER_CACHEMOD
    ){
................................................................................
  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
  pPager->exclusiveMode = (u8)tempFile; 
  pPager->changeCountDone = pPager->tempFile;
  pPager->memDb = (u8)memDb;
  pPager->readOnly = (u8)readOnly;
  assert( useJournal || pPager->tempFile );
  pPager->noSync = pPager->tempFile;
  if( pPager->noSync ){
    assert( pPager->fullSync==0 );
    assert( pPager->syncFlags==0 );
    assert( pPager->walSyncFlags==0 );
    assert( pPager->ckptSyncFlags==0 );
  }else{
    pPager->fullSync = 1;
    pPager->syncFlags = SQLITE_SYNC_NORMAL;
    pPager->walSyncFlags = SQLITE_SYNC_NORMAL | WAL_SYNC_TRANSACTIONS;
    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
  }
  /* pPager->pFirst = 0; */
  /* pPager->pFirstSynced = 0; */
  /* pPager->pLast = 0; */
  pPager->nExtra = (u16)nExtra;
  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
  assert( isOpen(pPager->fd) || tempFile );
  setSectorSize(pPager);
................................................................................
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
        pList = pPageOne;
        pList->pDirty = 0;
      }
      assert( rc==SQLITE_OK );
      if( ALWAYS(pList) ){
        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1);


      }
      sqlite3PagerUnref(pPageOne);
      if( rc==SQLITE_OK ){
        sqlite3PcacheCleanAll(pPager->pPCache);
      }
    }else{
      /* The following block updates the change-counter. Exactly how it

Changes to src/wal.c.

410
411
412
413
414
415
416

417
418
419

420
421
422
423
424

425
426
427
428
429
430
431
....
1090
1091
1092
1093
1094
1095
1096

1097
1098
1099
1100
1101
1102
1103
....
1145
1146
1147
1148
1149
1150
1151

1152
1153
1154
1155
1156
1157

1158
1159







1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
....
1292
1293
1294
1295
1296
1297
1298


1299
1300
1301
1302
1303
1304
1305
....
2612
2613
2614
2615
2616
2617
2618






































2619
2620
2621
2622
2623
2624
2625
....
2681
2682
2683
2684
2685
2686
2687










2688
2689
2690
2691
2692
2693
2694
....
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */

  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */

  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */

  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};
................................................................................
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */
    u32 version;                  /* Magic value read from WAL header */


    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      goto recovery_error;
    }

................................................................................
      rc = SQLITE_NOMEM;
      goto recovery_error;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;

    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
      u32 nTruncate;              /* dbsize field from frame header */
      int isValid;                /* True if this frame is valid */

      /* Read and decode the next log frame. */

      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
      if( rc!=SQLITE_OK ) break;







      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
      if( !isValid ) break;
      rc = walIndexAppend(pWal, ++iFrame, pgno);
      if( rc!=SQLITE_OK ) break;

      /* If nTruncate is non-zero, this is a commit record. */
      if( nTruncate ){
        pWal->hdr.mxFrame = iFrame;
        pWal->hdr.nPage = nTruncate;
        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
................................................................................
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{


    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*
................................................................................
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}







































/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
................................................................................
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );











  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
................................................................................
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( sync_flags ){
    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
    i64 iOffset = walFrameOffset(iFrame+1, szPage);

    assert( isCommit );
    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      void *pData;
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); 
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }

    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);
    }







>



>





>







 







>







 







>



<


>


>
>
>
>
>
>
>

|
|







 







>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>







 







|





|







|



<












|




|







|







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
....
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
....
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159

1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
....
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
....
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
....
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
....
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785

2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */
  int szFirstBlock;          /* Size of first block written to WAL file */
  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 syncFlags;              /* Flags to use to sync header writes */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
  u8 truncateOnCommit;       /* True to truncate WAL file on commit */
  u8 noSyncHeader;           /* Avoid WAL header fsyncs if true */
  WalIndexHdr hdr;           /* Wal-index header for current transaction */
  const char *zWalName;      /* Name of WAL file */
  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
#ifdef SQLITE_DEBUG
  u8 lockError;              /* True if a locking error has occurred */
#endif
};
................................................................................
    int szFrame;                  /* Number of bytes in buffer aFrame[] */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int szPage;                   /* Page size according to the log */
    u32 magic;                    /* Magic value read from WAL header */
    u32 version;                  /* Magic value read from WAL header */
    int isValid;                  /* True if this frame is valid */

    /* Read in the WAL header. */
    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      goto recovery_error;
    }

................................................................................
      rc = SQLITE_NOMEM;
      goto recovery_error;
    }
    aData = &aFrame[WAL_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    isValid = 1;
    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
      u32 pgno;                   /* Database page number for frame */
      u32 nTruncate;              /* dbsize field from frame header */


      /* Read and decode the next log frame. */
      iFrame++;
      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
      if( rc!=SQLITE_OK ) break;
      if( sqlite3Get4byte(&aFrame[8]) ==
            1+sqlite3Get4byte((u8*)&pWal->hdr.aSalt[0]) ){
        pWal->hdr.mxFrame = 0;
        pWal->hdr.nPage = 0;
        break;
      }
      if( !isValid ) continue;
      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
      if( !isValid ) continue;
      rc = walIndexAppend(pWal, iFrame, pgno);
      if( rc!=SQLITE_OK ) break;

      /* If nTruncate is non-zero, this is a commit record. */
      if( nTruncate ){
        pWal->hdr.mxFrame = iFrame;
        pWal->hdr.nPage = nTruncate;
        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
................................................................................
  }

  if( rc!=SQLITE_OK ){
    walIndexClose(pRet, 0);
    sqlite3OsClose(pRet->pWalFd);
    sqlite3_free(pRet);
  }else{
    int iDC = sqlite3OsDeviceCharacteristics(pRet->pWalFd);
    if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->noSyncHeader = 1; }
    *ppWal = pRet;
    WALTRACE(("WAL%d: opened\n", pRet));
  }
  return rc;
}

/*
................................................................................
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}

/*
** Write iAmt bytes of content into the WAL file beginning at iOffset.
**
** When crossing the boundary between the first and second sectors of the
** file, first write all of the first sector content, then fsync(), then
** continue writing content for the second sector.  This ensures that
** the WAL header is overwritten before the first commit mark.
*/
static int walWriteToLog(
  Wal *pWal,                 /* WAL to write to */
  void *pContent,            /* Content to be written */
  int iAmt,                  /* Number of bytes to write */
  sqlite3_int64 iOffset      /* Start writing at this offset */
){
  int rc;
  if( iOffset>=pWal->szFirstBlock
   || iOffset+iAmt<pWal->szFirstBlock
   || pWal->syncFlags==0
  ){
    /* The common and fast case.  Just write the data. */
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset);
  }else{
    /* If this write will cross the first sector boundary, it has to
    ** be split it two with a sync in between. */
    int iFirstAmt = pWal->szFirstBlock - iOffset;
    assert( iFirstAmt>0 && iFirstAmt<iAmt );
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iFirstAmt, iOffset);
    if( rc ) return rc;
    assert( pWal->syncFlags & (SQLITE_SYNC_NORMAL|SQLITE_SYNC_FULL) );
    rc = sqlite3OsSync(pWal->pWalFd, pWal->syncFlags);
    if( rc ) return rc;
    pContent = (void*)(iFirstAmt + (char*)pContent);
    rc = sqlite3OsWrite(pWal->pWalFd, pContent,
                        iAmt-iFirstAmt, iOffset+iFirstAmt);
  }
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
................................................................................
    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );

  /* Setup information needed to do the WAL header sync */
  if( pWal->noSyncHeader ){
    assert( pWal->szFirstBlock==0 );
    assert( pWal->syncFlags==0 );
  }else{
    pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd);
    if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage;
    pWal->syncFlags = sync_flags & SQLITE_SYNC_MASK;
  }

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
................................................................................
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( isCommit && (sync_flags & WAL_SYNC_TRANSACTIONS)!=0 ){
    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
    i64 iOffset = walFrameOffset(iFrame+1, szPage);


    assert( iSegment>0 );

    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      void *pData;
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = walWriteToLog(pWal, pData, szPage, iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }

    rc = sqlite3OsSync(pWal->pWalFd, sync_flags & SQLITE_SYNC_MASK);
  }

  if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
    i64 sz = pWal->mxWalSize;
    if( walFrameOffset(iFrame+nLast+1, szPage)>pWal->mxWalSize ){
      sz = walFrameOffset(iFrame+nLast+1, szPage);
    }

Changes to src/wal.h.

83
84
85
86
87
88
89






90
91
92
93
94
95
96
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);







/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int eMode,                      /* One of PASSIVE, FULL and RESTART */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */







>
>
>
>
>
>







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command. */
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData);

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Additional values that can be added to the sync_flags argument of
** sqlite3WalFrames():
*/
#define WAL_SYNC_TRANSACTIONS  0x20   /* Sync at the end of each transaction */
#define SQLITE_SYNC_MASK       0x13   /* Mask off the SQLITE_SYNC_* values */

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int eMode,                      /* One of PASSIVE, FULL and RESTART */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */

Changes to test/wal2.test.

1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
....
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264

1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275







1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
  }
}

#-------------------------------------------------------------------------
# Test that "PRAGMA checkpoint_fullsync" appears to be working.
#
foreach {tn sql reslist} {
  1 { }                                 {8 0 3 0 5 0}
  2 { PRAGMA checkpoint_fullfsync = 1 } {8 4 3 2 5 2}
  3 { PRAGMA checkpoint_fullfsync = 0 } {8 0 3 0 5 0}
} {
  faultsim_delete_and_reopen

  execsql {PRAGMA auto_vacuum = 0}
  execsql $sql
  do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal}

  set sqlite_sync_count 0
  set sqlite_fullsync_count 0

  do_execsql_test wal2-14.$tn.2 {
    PRAGMA wal_autocheckpoint = 10;
    CREATE TABLE t1(a, b);                -- 2 wal syncs
    INSERT INTO t1 VALUES(1, 2);          -- 1 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
    BEGIN;
      INSERT INTO t1 VALUES(3, 4);
      INSERT INTO t1 VALUES(5, 6);
    COMMIT;                               -- 1 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
  } {10 0 5 5 0 2 2}

  do_test wal2-14.$tn.3 {
    cond_incr_sync_count 1
    list $sqlite_sync_count $sqlite_fullsync_count
  } [lrange $reslist 0 1]
................................................................................

catch { db close }

# PRAGMA checkpoint_fullsync
# PRAGMA fullfsync
# PRAGMA synchronous
#
foreach {tn settings commit_sync ckpt_sync} {
  1  {0 0 off}     {0 0}  {0 0}
  2  {0 0 normal}  {0 0}  {2 0}
  3  {0 0 full}    {1 0}  {2 0}

  4  {0 1 off}     {0 0}  {0 0}
  5  {0 1 normal}  {0 0}  {0 2}
  6  {0 1 full}    {0 1}  {0 2}

  7  {1 0 off}     {0 0}  {0 0}
  8  {1 0 normal}  {0 0}  {0 2}
  9  {1 0 full}    {1 0}  {0 2}

  10 {1 1 off}     {0 0}  {0 0}
  11 {1 1 normal}  {0 0}  {0 2}
  12 {1 1 full}    {0 1}  {0 2}
} {
  forcedelete test.db

  testvfs tvfs -default 1
  tvfs filter xSync
  tvfs script xSyncCb
  proc xSyncCb {method file fileid flags} {
    incr ::sync($flags)
  }

  sqlite3 db test.db
  do_execsql_test 15.$tn.1 "
    CREATE TABLE t1(x);

    PRAGMA journal_mode = WAL;
    PRAGMA checkpoint_fullfsync = [lindex $settings 0];
    PRAGMA fullfsync = [lindex $settings 1];
    PRAGMA synchronous = [lindex $settings 2];
  " {wal}

  do_test 15.$tn.2 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)







  } $commit_sync

  do_test 15.$tn.3 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('def') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.4 {
    set sync(normal) 0
    set sync(full) 0
    execsql { PRAGMA wal_checkpoint }
    list $::sync(normal) $::sync(full)
  } $ckpt_sync
  
  db close
  tvfs delete
}



finish_test







|
|
|













|




|







 







|
|
|
|

|
|
|

|
|
|

|
|
|













>




|






>
>
>
>
>
>
>


|






|













1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
....
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
  }
}

#-------------------------------------------------------------------------
# Test that "PRAGMA checkpoint_fullsync" appears to be working.
#
foreach {tn sql reslist} {
  1 { }                                 {10 0 4 0 6 0}
  2 { PRAGMA checkpoint_fullfsync = 1 } {10 4 4 2 6 2}
  3 { PRAGMA checkpoint_fullfsync = 0 } {10 0 4 0 6 0}
} {
  faultsim_delete_and_reopen

  execsql {PRAGMA auto_vacuum = 0}
  execsql $sql
  do_execsql_test wal2-14.$tn.1 { PRAGMA journal_mode = WAL } {wal}

  set sqlite_sync_count 0
  set sqlite_fullsync_count 0

  do_execsql_test wal2-14.$tn.2 {
    PRAGMA wal_autocheckpoint = 10;
    CREATE TABLE t1(a, b);                -- 2 wal syncs
    INSERT INTO t1 VALUES(1, 2);          -- 2 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
    BEGIN;
      INSERT INTO t1 VALUES(3, 4);
      INSERT INTO t1 VALUES(5, 6);
    COMMIT;                               -- 2 wal sync
    PRAGMA wal_checkpoint;                -- 1 wal sync, 1 db sync
  } {10 0 5 5 0 2 2}

  do_test wal2-14.$tn.3 {
    cond_incr_sync_count 1
    list $sqlite_sync_count $sqlite_fullsync_count
  } [lrange $reslist 0 1]
................................................................................

catch { db close }

# PRAGMA checkpoint_fullsync
# PRAGMA fullfsync
# PRAGMA synchronous
#
foreach {tn settings restart_sync commit_sync ckpt_sync} {
  1  {0 0 off}     {0 0}  {0 0}  {0 0}
  2  {0 0 normal}  {1 0}  {0 0}  {2 0}
  3  {0 0 full}    {2 0}  {1 0}  {2 0}

  4  {0 1 off}     {0 0}  {0 0}  {0 0}
  5  {0 1 normal}  {0 1}  {0 0}  {0 2}
  6  {0 1 full}    {0 2}  {0 1}  {0 2}

  7  {1 0 off}     {0 0}  {0 0}  {0 0}
  8  {1 0 normal}  {1 0}  {0 0}  {0 2}
  9  {1 0 full}    {2 0}  {1 0}  {0 2}

  10 {1 1 off}     {0 0}  {0 0}  {0 0}
  11 {1 1 normal}  {0 1}  {0 0}  {0 2}
  12 {1 1 full}    {0 2}  {0 1}  {0 2}
} {
  forcedelete test.db

  testvfs tvfs -default 1
  tvfs filter xSync
  tvfs script xSyncCb
  proc xSyncCb {method file fileid flags} {
    incr ::sync($flags)
  }

  sqlite3 db test.db
  do_execsql_test 15.$tn.1 "
    CREATE TABLE t1(x);
    PRAGMA wal_autocheckpoint = OFF;
    PRAGMA journal_mode = WAL;
    PRAGMA checkpoint_fullfsync = [lindex $settings 0];
    PRAGMA fullfsync = [lindex $settings 1];
    PRAGMA synchronous = [lindex $settings 2];
  " {0 wal}

  do_test 15.$tn.2 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)
  } $restart_sync

  do_test 15.$tn.3 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('abc') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.4 {
    set sync(normal) 0
    set sync(full) 0
    execsql { INSERT INTO t1 VALUES('def') }
    list $::sync(normal) $::sync(full)
  } $commit_sync

  do_test 15.$tn.5 {
    set sync(normal) 0
    set sync(full) 0
    execsql { PRAGMA wal_checkpoint }
    list $::sync(normal) $::sync(full)
  } $ckpt_sync
  
  db close
  tvfs delete
}



finish_test

Changes to test/wal3.test.

213
214
215
216
217
218
219

220
221
222
223
224
225
226
    testvfs T
    T filter {} 
    T script sync_counter
    sqlite3 db test.db -vfs T
  
    execsql "PRAGMA synchronous = $syncmode"
    execsql { PRAGMA journal_mode = WAL }


    set ::syncs [list]
    T filter xSync
    execsql {
      CREATE TABLE x(y);
      INSERT INTO x VALUES('z');
      PRAGMA wal_checkpoint;







>







213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
    testvfs T
    T filter {} 
    T script sync_counter
    sqlite3 db test.db -vfs T
  
    execsql "PRAGMA synchronous = $syncmode"
    execsql { PRAGMA journal_mode = WAL }
    execsql { CREATE TABLE filler(a,b,c); }

    set ::syncs [list]
    T filter xSync
    execsql {
      CREATE TABLE x(y);
      INSERT INTO x VALUES('z');
      PRAGMA wal_checkpoint;