/ Check-in [36795c2b]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Remove entries from wal-index hash tables when a rollback or savepoint rollback occurs.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 36795c2b23a78978528cace193e386138adacd41
User & Date: dan 2010-05-21 15:31:57
Context
2010-05-21
16:23
Correct an off-by-one bug in the previous commit. check-in: 75a1130d user: dan tags: trunk
15:31
Remove entries from wal-index hash tables when a rollback or savepoint rollback occurs. check-in: 36795c2b user: dan tags: trunk
13:16
Fix the wal-index header read routine so that it correctly detects a zero header as being malformed and in need of a wal-index rebuild. check-in: 1a4eb3a3 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/wal.c.

189
190
191
192
193
194
195
196

197
198
199
200
201
202
203
204
...
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
....
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
....
1460
1461
1462
1463
1464
1465
1466


































1467
1468
1469
1470
1471
1472
1473
....
1486
1487
1488
1489
1490
1491
1492



1493
1494
1495
1496
1497
1498
1499
....
1506
1507
1508
1509
1510
1511
1512





1513
1514
1515
1516
1517
1518
1519
** K>K0 but to the first reader, those entries will appear to be unused
** slots in the hash table and so the first reader will get an answer as
** if no values greater than K0 had ever been inserted into the hash table
** in the first place - which is what reader one wants.  Meanwhile, the
** second reader using K1 will see additional values that were inserted
** later, which is exactly what reader two wants.  
**
** When a rollback occurs, the value of K is decreased.  This has the

** effect of automatically removing entries from the hash table.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"


/* Object declarations */
................................................................................
    volatile u32 *aPgno;                 /* Page number array */
    volatile HASHTABLE_DATATYPE *aHash;  /* Hash table */
    int idx;                             /* Value to write to hash-table slot */
    TESTONLY( int nCollide = 0;          /* Number of hash collisions */ )

    walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
    idx = iFrame - iZero;
    if( idx==1 ) memset((void*)aHash, 0xff, HASHTABLE_NBYTE);
    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
    aPgno[iFrame] = iPage;
    for(iKey=walHash(iPage); aHash[iKey]<idx; iKey=walNextHash(iKey)){
      assert( nCollide++ < idx );
    }
    aHash[iKey] = idx;
  }

  return rc;
}
................................................................................
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */
    int mxHash;                   /* upper bound on aHash[] values */

    walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
    mxHash = iLast - iZero;
    if( mxHash > HASHTABLE_NPAGE )  mxHash = HASHTABLE_NPAGE;
    for(iKey=walHash(pgno); aHash[iKey]<=mxHash; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( ALWAYS(iFrame<=iLast) && aPgno[iFrame]==pgno && iFrame>iRead ){
        iRead = iFrame;
      }
    }
  }
  assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
................................................................................
      }
    }
  }else if( pWal->lockState==SQLITE_SHM_WRITE ){
    rc = walSetLock(pWal, SQLITE_SHM_READ);
  }
  return rc;
}



































/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the log since the start of the transaction. If the callback returns
................................................................................
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
      assert( pWal->lockState==SQLITE_SHM_WRITE );
      rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
    }



    walIndexUnmap(pWal);
  }
  return rc;
}

/* Return an integer that records the current (uncommitted) write
** position in the WAL
................................................................................
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command.
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){
  int rc = SQLITE_OK;
  assert( pWal->lockState==SQLITE_SHM_WRITE );






  pWal->hdr.mxFrame = iFrame;
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalWriteLock()).







|
>
|







 







|


|







 







|

|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>







 







>
>
>
>
>







189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
...
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
....
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
....
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
....
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
....
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
** K>K0 but to the first reader, those entries will appear to be unused
** slots in the hash table and so the first reader will get an answer as
** if no values greater than K0 had ever been inserted into the hash table
** in the first place - which is what reader one wants.  Meanwhile, the
** second reader using K1 will see additional values that were inserted
** later, which is exactly what reader two wants.  
**
** When a rollback occurs, the value of K is decreased. Hash table entries
** that correspond to frames greater than the new K value are removed
** from the hash table at this point.
*/
#ifndef SQLITE_OMIT_WAL

#include "wal.h"


/* Object declarations */
................................................................................
    volatile u32 *aPgno;                 /* Page number array */
    volatile HASHTABLE_DATATYPE *aHash;  /* Hash table */
    int idx;                             /* Value to write to hash-table slot */
    TESTONLY( int nCollide = 0;          /* Number of hash collisions */ )

    walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
    idx = iFrame - iZero;
    if( idx==1 ) memset((void*)aHash, 0, HASHTABLE_NBYTE);
    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
    aPgno[iFrame] = iPage;
    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
      assert( nCollide++ < idx );
    }
    aHash[iKey] = idx;
  }

  return rc;
}
................................................................................
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */
    int mxHash;                   /* upper bound on aHash[] values */

    walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
    mxHash = iLast - iZero;
    if( mxHash > HASHTABLE_NPAGE )  mxHash = HASHTABLE_NPAGE;
    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( iFrame<=iLast && aPgno[iFrame]==pgno && iFrame>iRead ){
        iRead = iFrame;
      }
    }
  }
  assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
................................................................................
      }
    }
  }else if( pWal->lockState==SQLITE_SHM_WRITE ){
    rc = walSetLock(pWal, SQLITE_SHM_READ);
  }
  return rc;
}

/*
** Remove entries from zero or more hash-table indexes in the wal-index 
** file.
**
** This function is called when rolling back a transaction or savepoint
** transaction in WAL mode. Argument iNewMx is the value that 
** Wal.hdr.mxFrame will be set to following the rollback. Argument iOldMx
** is the value that it had before the rollback. This function removes 
** entries that refer to frames with frame numbers greater than iNewMx 
** from the hash table that contains the entry associated with iNewMx.
** It is not necessary to remove any entries from any subsequent hash
** tables, as they will be zeroed by walIndexAppend() before they are
** next used.
*/
static void walClearHash(Wal *pWal, u32 iOldMx, u32 iNewMx){
  if( iOldMx>iNewMx ){
    volatile HASHTABLE_DATATYPE *aHash;     /* Pointer to hash table to clear */
    volatile u32 *unused1;                  /* Only to satisfy walHashFind() */
    u32 iZero;                              /* frame == (aHash[x]+iZero) */
    int iLimit;                             /* Zero values greater than this */

    walHashFind(pWal, iNewMx+1, &aHash, &unused1, &iZero);
    iLimit = iNewMx - iZero;
    if( iLimit>0 ){
      int i;                      /* Used to iterate through aHash[] */
      for(i=1; i<=HASHTABLE_NPAGE; i++){
        if( aHash[i]>iLimit ){
          aHash[i] = 0;
        }
      }
    }
  }
}

/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
**
** Additionally, the callback function is invoked for each frame written
** to the log since the start of the transaction. If the callback returns
................................................................................
  
    assert( pWal->pWiData==0 );
    rc = walIndexReadHdr(pWal, &unused);
    for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
      assert( pWal->lockState==SQLITE_SHM_WRITE );
      rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
    }
    if( rc==SQLITE_OK ){
      walClearHash(pWal, iMax, pWal->hdr.mxFrame);
    }
    walIndexUnmap(pWal);
  }
  return rc;
}

/* Return an integer that records the current (uncommitted) write
** position in the WAL
................................................................................
/* Move the write position of the WAL back to iFrame.  Called in
** response to a ROLLBACK TO command.
*/
int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){
  int rc = SQLITE_OK;
  assert( pWal->lockState==SQLITE_SHM_WRITE );

  rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
  if( rc==SQLITE_OK ){
    walClearHash(pWal, pWal->hdr.mxFrame, iFrame);
    walIndexUnmap(pWal);
  }
  pWal->hdr.mxFrame = iFrame;
  return rc;
}

/* 
** Write a set of frames to the log. The caller must hold the write-lock
** on the log file (obtained using sqlite3WalWriteLock()).

Changes to test/wal2.test.

596
597
598
599
600
601
602

603























604
605
606

    PRAGMA locking_mode = exclusive;
    SELECT * FROM t2;
  }
} {normal exclusive I II III IV}
do_test wal2-6.5.3 {
  execsql { PRAGMA wal_checkpoint }
} {}

























db close

finish_test








>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|


>
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
    PRAGMA locking_mode = exclusive;
    SELECT * FROM t2;
  }
} {normal exclusive I II III IV}
do_test wal2-6.5.3 {
  execsql { PRAGMA wal_checkpoint }
} {}
db close

#-------------------------------------------------------------------------
# Test a theory about the checksum algorithm. Theory was false and this
# test did not provoke a bug.
file delete -force test.db test.db-wal test.db-journal
do_test wal2-7.1.1 {
  sqlite3 db test.db
  execsql {
    PRAGMA page_size = 4096;
    PRAGMA journal_mode = WAL;
    CREATE TABLE t1(a, b);
  }
  file size test.db
} {4096}
do_test wal2-7.1.2 {
  file copy -force test.db test2.db
  file copy -force test.db-wal test2.db-wal
  hexio_write test2.db-wal 48 FF
} {1}
do_test wal2-7.1.3 {
  sqlite3 db2 test2.db
  execsql { PRAGMA wal_checkpoint } db2
  execsql { SELECT * FROM sqlite_master } db2
} {}
db2 close

finish_test