SQLite

Check-in [91d0437c07]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Proposed changes that ensure that the WAL header is written prior to the first commit mark.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal-header-sync
Files: files | file ages | folders
SHA1: 91d0437c0702904d27f0ef7b1b52d0797efe1826
User & Date: drh 2011-12-16 19:34:36.384
Context
2011-12-16
21:26
Enhance the WAL header sync so that it honors the various synchronous pragmas, settings, and device characteristics. (Closed-Leaf check-in: 9799241f7d user: drh tags: wal-header-sync)
19:34
Proposed changes that ensure that the WAL header is written prior to the first commit mark. (check-in: 91d0437c07 user: drh tags: wal-header-sync)
15:38
Merge the fix for [a1fa75cbdd02] from the experimental branch. Also fix the persistent-wal mode feature of truncating the WAL on close so that it always truncates the WAL to zero bytes. (check-in: 09ccc4a1be user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/wal.c.
410
411
412
413
414
415
416

417
418
419
420
421
422
423
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */

  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */







>







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
struct Wal {
  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
  sqlite3_file *pDbFd;       /* File handle for the database file */
  sqlite3_file *pWalFd;      /* File handle for WAL file */
  u32 iCallback;             /* Value to pass to log callback (or 0) */
  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
  int nWiData;               /* Size of array apWiData */
  int szFirstBlock;          /* Size of first block written to WAL file */
  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
  u32 szPage;                /* Database page size */
  i16 readLock;              /* Which read lock is being held.  -1 for none */
  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
  u8 writeLock;              /* True if in a write transaction */
  u8 ckptLock;               /* True if holding a checkpoint lock */
  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
2612
2613
2614
2615
2616
2617
2618


































2619
2620
2621
2622
2623
2624
2625
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}



































/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
    testcase( (rc&0xff)==SQLITE_IOERR );
    testcase( rc==SQLITE_PROTOCOL );
    testcase( rc==SQLITE_OK );
  }
  return rc;
}

/*
** Write iAmt bytes of content into the WAL file beginning at iOffset.
**
** When crossing the boundary between the first and second sectors of the
** file, first write all of the first sector content, then fsync(), then
** continue writing content for the second sector.  This ensures that
** the WAL header is overwritten before the first commit mark.
*/
static int walWriteToLog(
  Wal *pWal,                 /* WAL to write to */
  void *pContent,            /* Content to be written */
  int iAmt,                  /* Number of bytes to write */
  sqlite3_int64 iOffset      /* Start writing at this offset */
){
  int rc;
  if( iOffset>=pWal->szFirstBlock || iOffset+iAmt<pWal->szFirstBlock ){
    /* The common and fast case.  Just write the data. */
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iAmt, iOffset);
  }else{
    /* If this write will cross the first sector boundary, it has to
    ** be split it two with a sync in between. */
    int iFirstAmt = pWal->szFirstBlock - iOffset;
    assert( iFirstAmt>0 && iFirstAmt<iAmt );
    rc = sqlite3OsWrite(pWal->pWalFd, pContent, iFirstAmt, iOffset);
    if( rc ) return rc;
    rc = sqlite3OsSync(pWal->pWalFd, SQLITE_SYNC_NORMAL);
    if( rc ) return rc;
    pContent = (void*)(iFirstAmt + (char*)pContent);
    rc = sqlite3OsWrite(pWal->pWalFd, pContent,
                        iAmt-iFirstAmt, iOffset+iFirstAmt);
  }
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
*/
int sqlite3WalFrames(
  Wal *pWal,                      /* Wal handle to write to */
2682
2683
2684
2685
2686
2687
2688




2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );





  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
    iOffset = walFrameOffset(++iFrame, szPage);
    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */







>
>
>
>

















|





|







2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }
  assert( (int)pWal->szPage==szPage );

  /* The size of the block containing the WAL header */
  pWal->szFirstBlock = sqlite3OsSectorSize(pWal->pWalFd);
  if( szPage>pWal->szFirstBlock ) pWal->szFirstBlock = szPage;

  /* Write the log file. */
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */
    void *pData;
   
    iOffset = walFrameOffset(++iFrame, szPage);
    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
#if defined(SQLITE_HAS_CODEC)
    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
#else
    pData = p->pData;
#endif
    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
    rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = walWriteToLog(pWal, pData, szPage, iOffset+sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); 
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }








|




|







2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
#if defined(SQLITE_HAS_CODEC)
      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
#else
      pData = pLast->pData;
#endif
      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
      rc = walWriteToLog(pWal, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      iOffset += WAL_FRAME_HDRSIZE;
      rc = walWriteToLog(pWal, pData, szPage, iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += szPage;
    }