/ Check-in [a4b02bc9]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix frame overwriting in wal2 mode.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | wal2
Files: files | file ages | folders
SHA3-256: a4b02bc9388226da21b3837a20c6c7eb0d13854dde62b7136e04f4978528dc71
User & Date: dan 2017-10-06 13:43:42
Wiki:wal2
Context
2017-10-06
14:08
Fix a bug in recovering wal2 mode databases introduced by the previous commit. check-in: 9e1502e1 user: dan tags: wal2
13:43
Fix frame overwriting in wal2 mode. check-in: a4b02bc9 user: dan tags: wal2
2017-10-05
18:14
Fix test case failures on this branch. check-in: 16decc13 user: dan tags: wal2
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/wal.c.

3002
3003
3004
3005
3006
3007
3008
























3009
3010
3011
3012
3013
3014
3015
....
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
....
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055

3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
....
3522
3523
3524
3525
3526
3527
3528

3529
3530
3531
3532
3533
3534
3535
....
3585
3586
3587
3588
3589
3590
3591

3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
....
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
....
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
....
3693
3694
3695
3696
3697
3698
3699
3700
3701



3702
3703
3704
3705
3706
3707
3708
....
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
      return SQLITE_CORRUPT_BKPT;
    }
  }

  return SQLITE_OK;
}


























/*
** Search the wal file for page pgno. If found, set *piRead to the frame that
** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
** to zero.
**
** Return SQLITE_OK if successful, or an error code if an error occurs. If an
................................................................................
  Pgno pgno,                      /* Database page number to read data for */
  u32 *piRead                     /* OUT: Frame number (or zero) */
){
  int bWal2 = isWalMode2(pWal);
  int iApp = walidxGetFile(&pWal->hdr);
  int rc = SQLITE_OK;
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
  u32 iLast;                      /* Last frame in wal file */
  int iHash;                      /* Used to loop through N hash tables */

  /* This routine is only be called from within a read transaction. */
  assert( pWal->readLock!=WAL_LOCK_NONE );

  /* If this is a wal2 system, the client must have a partial-wal lock 
  ** on wal file iApp. Or if it is a wal system, iApp==0 must be true.  */
  assert( bWal2==0 || iApp==1
................................................................................
  );
  assert( bWal2==0 || iApp==0
       || pWal->readLock==WAL_LOCK_PART2 || pWal->readLock==WAL_LOCK_PART2_FULL1
  );
  assert( bWal2 || iApp==0 );

  /* Search the wal file that the client holds a partial lock on first */
  iLast = walidxGetMxFrame(&pWal->hdr, iApp);
  if( iLast ){
    u32 iExternal = bWal2 ? walExternalEncode(iApp, iLast) : iLast;
    int iMinHash = walFramePage(pWal->minFrame);
    for(iHash=walFramePage(iExternal); 
        iHash>=iMinHash && iRead==0; 
        iHash-=(1+bWal2)
    ){
      rc = walSearchHash(pWal, iExternal, iHash, pgno, &iRead);
      if( rc!=SQLITE_OK ) break;
    }
  }


  /* If the requested page was not found, no error has occured, and 
  ** the client holds a full-wal lock on the other wal file, search it
  ** too.  */
  if( rc==SQLITE_OK && bWal2 && iRead==0 && (
        pWal->readLock==WAL_LOCK_PART1_FULL2 
     || pWal->readLock==WAL_LOCK_PART2_FULL1
  )){
    iLast = walidxGetMxFrame(&pWal->hdr, !iApp);
    if( iLast ){
      u32 iExternal = walExternalEncode(!iApp, iLast);
      for(iHash=walFramePage2(!iApp, iLast); iHash>=0 && iRead==0; iHash -= 2){
        rc = walSearchHash(pWal, iExternal, iHash, pgno, &iRead);
        if( rc!=SQLITE_OK ) break;
      }
    }
  }

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  if( iRead ){ 
    u32 iFrame;
    int iWal = walExternalDecode(iRead, &iFrame);
    WALTRACE(("WAL%p: page %d @ frame %d wal %d\n",pWal,(int)pgno,iFrame,iWal));
................................................................................
static int walRewriteChecksums(Wal *pWal, u32 iLast){
  int rc = SQLITE_OK;             /* Return code */
  const int szPage = pWal->szPage;/* Database page size */
  u8 *aBuf;                       /* Buffer to load data from wal file into */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-headers in */
  u32 iRead;                      /* Next frame to read from wal file */
  i64 iCksumOff;


  assert( isWalMode2(pWal)==0 );

  aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE);
  if( aBuf==0 ) return SQLITE_NOMEM_BKPT;

  /* Find the checksum values to use as input for the recalculating the
................................................................................
  int nExtra = 0;                 /* Number of extra copies of last page */
  int szFrame;                    /* The size of a single frame */
  i64 iOffset;                    /* Next byte to write in WAL file */
  WalWriter w;                    /* The writer */
  u32 iFirst = 0;                 /* First frame that may be overwritten */
  WalIndexHdr *pLive;             /* Pointer to shared header */
  int iApp;


  assert( pList );
  assert( pWal->writeLock );

  /* If this frame set completes a transaction, then nTruncate>0.  If
  ** nTruncate==0 then this frame set does not complete the transaction. */
  assert( (isCommit!=0)==(nTruncate!=0) );

  pLive = (WalIndexHdr*)walIndexHdr(pWal);
  if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
    if( isWalMode2(pWal)==0 ){
      iFirst = pLive->mxFrame+1;
    }
  }

  /* See if it is possible to write these frames into the start of the
  ** log file, instead of appending to it at pWal->hdr.mxFrame.
  */
  else if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
    return rc;
................................................................................

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iApp = walidxGetFile(&pWal->hdr);
  iFrame = walidxGetMxFrame(&pWal->hdr, iApp);
  assert( iApp==0 || isWalMode2(pWal) );

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. iWal=%d. mxFrame=%d. %s\n",
              pWal, cnt, iApp, iFrame, isCommit ? "Commit" : "Spill"));
  }
#endif
................................................................................
    u32 iCkpt = 0;
    u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
    u32 aCksum[2];                /* Checksum for wal-header */

    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
    sqlite3Put4byte(&aWalHdr[4], pWal->hdr.iVersion);
    sqlite3Put4byte(&aWalHdr[8], szPage);
    if( isWalMode2(pWal) ){
      if( walidxGetMxFrame(&pWal->hdr, !iApp)>0 ){
        u8 aPrev[4];
        rc = sqlite3OsRead(pWal->apWalFd[!iApp], aPrev, 4, 12);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        iCkpt = (sqlite3Get4byte(aPrev) + 1) & 0x0F;
................................................................................

    /* Check if this page has already been written into the wal file by
    ** the current transaction. If so, overwrite the existing frame and
    ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that 
    ** checksums must be recomputed when the transaction is committed.  */
    if( iFirst && (p->pDirty || isCommit==0) ){
      u32 iWrite = 0;
      VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite);
      assert( rc==SQLITE_OK || iWrite==0 );



      if( iWrite>=iFirst ){
        i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE;
        void *pData;
        if( pWal->iReCksum==0 || iWrite<pWal->iReCksum ){
          pWal->iReCksum = iWrite;
        }
#if defined(SQLITE_HAS_CODEC)
................................................................................
    if( isCommit ){
      pWal->hdr.iChange++;
      pWal->hdr.nPage = nTruncate;
    }
    /* If this is a commit, update the wal-index header too. */
    if( isCommit ){
      walIndexWriteHdr(pWal);
      if( isWalMode2(pWal) ){
        int iOther = !walidxGetFile(&pWal->hdr);
        if( walidxGetMxFrame(&pWal->hdr, iOther) 
            && !walCkptInfo(pWal)->nBackfill 
        ){
          pWal->iCallback = walidxGetMxFrame(&pWal->hdr, 0);
          pWal->iCallback += walidxGetMxFrame(&pWal->hdr, 1);
        }







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<







 







<
<
<
<
<
<
<
<
<
<
|
<
>








|
<
<
<
<
<
<
<







 







>







 







>










|
|
<







 







|







 







|







 







|

>
>
>







 







|







3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
....
3044
3045
3046
3047
3048
3049
3050


3051
3052
3053
3054
3055
3056
3057
....
3059
3060
3061
3062
3063
3064
3065










3066

3067
3068
3069
3070
3071
3072
3073
3074
3075
3076







3077
3078
3079
3080
3081
3082
3083
....
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
....
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610

3611
3612
3613
3614
3615
3616
3617
....
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
....
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
....
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
....
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
      return SQLITE_CORRUPT_BKPT;
    }
  }

  return SQLITE_OK;
}

static int walSearchWal(
  Wal *pWal, 
  int iWal, 
  Pgno pgno, 
  u32 *piRead
){
  int rc = SQLITE_OK;
  int bWal2 = isWalMode2(pWal);
  u32 iLast = walidxGetMxFrame(&pWal->hdr, iWal);
  if( iLast ){
    int iHash;
    int iMinHash = walFramePage(pWal->minFrame);
    u32 iExternal = bWal2 ? walExternalEncode(iWal, iLast) : iLast;
    assert( bWal2==0 || pWal->minFrame==0 );
    for(iHash=walFramePage(iExternal); 
        iHash>=iMinHash && *piRead==0; 
        iHash-=(1+bWal2)
    ){
      rc = walSearchHash(pWal, iExternal, iHash, pgno, piRead);
      if( rc!=SQLITE_OK ) break;
    }
  }
  return rc;
}

/*
** Search the wal file for page pgno. If found, set *piRead to the frame that
** contains the page. Otherwise, if pgno is not in the wal file, set *piRead
** to zero.
**
** Return SQLITE_OK if successful, or an error code if an error occurs. If an
................................................................................
  Pgno pgno,                      /* Database page number to read data for */
  u32 *piRead                     /* OUT: Frame number (or zero) */
){
  int bWal2 = isWalMode2(pWal);
  int iApp = walidxGetFile(&pWal->hdr);
  int rc = SQLITE_OK;
  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */



  /* This routine is only be called from within a read transaction. */
  assert( pWal->readLock!=WAL_LOCK_NONE );

  /* If this is a wal2 system, the client must have a partial-wal lock 
  ** on wal file iApp. Or if it is a wal system, iApp==0 must be true.  */
  assert( bWal2==0 || iApp==1
................................................................................
  );
  assert( bWal2==0 || iApp==0
       || pWal->readLock==WAL_LOCK_PART2 || pWal->readLock==WAL_LOCK_PART2_FULL1
  );
  assert( bWal2 || iApp==0 );

  /* Search the wal file that the client holds a partial lock on first */












  rc = walSearchWal(pWal, iApp, pgno, &iRead);

  /* If the requested page was not found, no error has occured, and 
  ** the client holds a full-wal lock on the other wal file, search it
  ** too.  */
  if( rc==SQLITE_OK && bWal2 && iRead==0 && (
        pWal->readLock==WAL_LOCK_PART1_FULL2 
     || pWal->readLock==WAL_LOCK_PART2_FULL1
  )){
    rc = walSearchWal(pWal, !iApp, pgno, &iRead);







  }

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  if( iRead ){ 
    u32 iFrame;
    int iWal = walExternalDecode(iRead, &iFrame);
    WALTRACE(("WAL%p: page %d @ frame %d wal %d\n",pWal,(int)pgno,iFrame,iWal));
................................................................................
static int walRewriteChecksums(Wal *pWal, u32 iLast){
  int rc = SQLITE_OK;             /* Return code */
  const int szPage = pWal->szPage;/* Database page size */
  u8 *aBuf;                       /* Buffer to load data from wal file into */
  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-headers in */
  u32 iRead;                      /* Next frame to read from wal file */
  i64 iCksumOff;
  sqlite3_file *pWalFd = pWal->apWalFd[walidxGetFile(&pWal->hdr)];

  assert( isWalMode2(pWal)==0 );

  aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE);
  if( aBuf==0 ) return SQLITE_NOMEM_BKPT;

  /* Find the checksum values to use as input for the recalculating the
................................................................................
  int nExtra = 0;                 /* Number of extra copies of last page */
  int szFrame;                    /* The size of a single frame */
  i64 iOffset;                    /* Next byte to write in WAL file */
  WalWriter w;                    /* The writer */
  u32 iFirst = 0;                 /* First frame that may be overwritten */
  WalIndexHdr *pLive;             /* Pointer to shared header */
  int iApp;
  int bWal2 = isWalMode2(pWal);

  assert( pList );
  assert( pWal->writeLock );

  /* If this frame set completes a transaction, then nTruncate>0.  If
  ** nTruncate==0 then this frame set does not complete the transaction. */
  assert( (isCommit!=0)==(nTruncate!=0) );

  pLive = (WalIndexHdr*)walIndexHdr(pWal);
  if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
    /* if( isWalMode2(pWal)==0 ) */
    iFirst = walidxGetMxFrame(pLive, walidxGetFile(pLive))+1;

  }

  /* See if it is possible to write these frames into the start of the
  ** log file, instead of appending to it at pWal->hdr.mxFrame.
  */
  else if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
    return rc;
................................................................................

  /* If this is the first frame written into the log, write the WAL
  ** header to the start of the WAL file. See comments at the top of
  ** this source file for a description of the WAL header format.
  */
  iApp = walidxGetFile(&pWal->hdr);
  iFrame = walidxGetMxFrame(&pWal->hdr, iApp);
  assert( iApp==0 || bWal2 );

#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
    WALTRACE(("WAL%p: frame write begin. %d frames. iWal=%d. mxFrame=%d. %s\n",
              pWal, cnt, iApp, iFrame, isCommit ? "Commit" : "Spill"));
  }
#endif
................................................................................
    u32 iCkpt = 0;
    u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
    u32 aCksum[2];                /* Checksum for wal-header */

    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
    sqlite3Put4byte(&aWalHdr[4], pWal->hdr.iVersion);
    sqlite3Put4byte(&aWalHdr[8], szPage);
    if( bWal2 ){
      if( walidxGetMxFrame(&pWal->hdr, !iApp)>0 ){
        u8 aPrev[4];
        rc = sqlite3OsRead(pWal->apWalFd[!iApp], aPrev, 4, 12);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        iCkpt = (sqlite3Get4byte(aPrev) + 1) & 0x0F;
................................................................................

    /* Check if this page has already been written into the wal file by
    ** the current transaction. If so, overwrite the existing frame and
    ** set Wal.writeLock to WAL_WRITELOCK_RECKSUM - indicating that 
    ** checksums must be recomputed when the transaction is committed.  */
    if( iFirst && (p->pDirty || isCommit==0) ){
      u32 iWrite = 0;
      VVA_ONLY(rc =) walSearchWal(pWal, iApp, p->pgno, &iWrite);
      assert( rc==SQLITE_OK || iWrite==0 );
      if( iWrite && bWal2 ){
        walExternalDecode(iWrite, &iWrite);
      }
      if( iWrite>=iFirst ){
        i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE;
        void *pData;
        if( pWal->iReCksum==0 || iWrite<pWal->iReCksum ){
          pWal->iReCksum = iWrite;
        }
#if defined(SQLITE_HAS_CODEC)
................................................................................
    if( isCommit ){
      pWal->hdr.iChange++;
      pWal->hdr.nPage = nTruncate;
    }
    /* If this is a commit, update the wal-index header too. */
    if( isCommit ){
      walIndexWriteHdr(pWal);
      if( bWal2 ){
        int iOther = !walidxGetFile(&pWal->hdr);
        if( walidxGetMxFrame(&pWal->hdr, iOther) 
            && !walCkptInfo(pWal)->nBackfill 
        ){
          pWal->iCallback = walidxGetMxFrame(&pWal->hdr, 0);
          pWal->iCallback += walidxGetMxFrame(&pWal->hdr, 1);
        }

Added test/wal2rewrite.test.

































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# 2017 September 19
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the operation of the library in
# "PRAGMA journal_mode=WAL2" mode.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/lock_common.tcl
source $testdir/malloc_common.tcl
source $testdir/wal_common.tcl

set testprefix wal2rewrite
ifcapable !wal {finish_test ; return }

proc filesize {filename} {
  if {[file exists $filename]} {
    return [file size $filename]
  }
  return 0
}

foreach {tn jrnlmode} {
  1 wal
  2 wal2
} {
  reset_db
  execsql "PRAGMA journal_mode = $jrnlmode"
  do_execsql_test $tn.1 {
    PRAGMA journal_size_limit = 10000;
    PRAGMA cache_size = 5;
    PRAGMA wal_autocheckpoint = 10;
  
    CREATE TABLE t1(a INTEGER PRIMARY KEY, b INTEGER, c BLOB);
    CREATE INDEX t1b ON t1(b);
    CREATE INDEX t1c ON t1(c);
  
    WITH s(i) AS (
      SELECT 1 UNION SELECT i+1 FROM s WHERE i<10
    )
    INSERT INTO t1 SELECT i, i, randomblob(800) FROM s;
  } {10000 10}
  
  for {set i 0} {$i < 4} {incr i} {
    do_execsql_test $tn.$i.1 {
      UPDATE t1 SET c=randomblob(800) WHERE (b%10)==5 AND ($i%2)
    }
    do_execsql_test $tn.$i.2 {
      BEGIN;
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
      UPDATE t1 SET b=b+10, c=randomblob(800);
    }
    execsql COMMIT

    do_test $tn.$i.3 { expr [filesize test.db-wal]  < 100000 } 1
    do_test $tn.$i.4 { expr [filesize test.db-wal2] < 100000 } 1
  }

}
    


finish_test