/ Check-in [eade8bc2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix some problems with handling IO errors on the experimental branch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1: eade8bc238df580412f5cf1b91a91532ae671e46
User & Date: dan 2010-06-12 12:02:36
Context
2010-06-14
07:53
Add some fault-injection tests to improve coverage. check-in: 37b26d12 user: dan tags: experimental
2010-06-12
12:02
Fix some problems with handling IO errors on the experimental branch. check-in: eade8bc2 user: dan tags: experimental
2010-06-11
19:04
Experimental change to the xShmXXX parts of the VFS interface. check-in: ca68472d user: dan tags: experimental
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/test_vfs.c.

636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
  assert(0);
  return SQLITE_OK;
}

static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){
  assert( iPage<TESTVFS_MAX_PAGES );
  if( p->aPage[iPage]==0 ){
    p->aPage[iPage] = ckalloc(pgsz);
    memset(p->aPage[iPage], 0, pgsz);
    p->pgsz = pgsz;
  }
}

static int tvfsShmPage(
  sqlite3_file *pFile,            /* Handle open on database file */







|







636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
  assert(0);
  return SQLITE_OK;
}

static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){
  assert( iPage<TESTVFS_MAX_PAGES );
  if( p->aPage[iPage]==0 ){
    p->aPage[iPage] = (u8 *)ckalloc(pgsz);
    memset(p->aPage[iPage], 0, pgsz);
    p->pgsz = pgsz;
  }
}

static int tvfsShmPage(
  sqlite3_file *pFile,            /* Handle open on database file */

Changes to src/wal.c.

447
448
449
450
451
452
453
454
455
456







457
458
459
460
461
462
463
464
...
545
546
547
548
549
550
551
552

553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
...
723
724
725
726
727
728
729















730
731
732
733
734
735
736
737
738
739
740
741




742
743

744
745
746
747
748
749
750
751
752
753
754
755

756







757
758
759
760
761
762
763
...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
...
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829










830

831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
...
862
863
864
865
866
867
868
869



870


871
872
873
874
875
876
877
878
879
880
881
882
883
884
885

886
887
888
889
890
891
892
893
894
895
896
897
898

899
900
901
902

903
904
905

906
907
908
909
910
911
912
....
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313

1314
1315
1316
1317
1318
1319

1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
....
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
....
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
....
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
....
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547

1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
....
1605
1606
1607
1608
1609
1610
1611
1612
1613



1614
1615
1616
1617
1618

1619
1620
1621




1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632

1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644


1645
1646
1647
1648
1649
1650
1651
....
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
....
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
....
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
....
1931
1932
1933
1934
1935
1936
1937

1938
1939



1940
1941
1942
1943
1944
1945
1946
....
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
....
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
  return rc;
}

/*
** Return a pointer to the WalCkptInfo structure in the wal-index.
*/
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
  volatile u32 *page1 = 0;
  walIndexPage(pWal, 0, &page1);
  assert( page1 );







  return (volatile WalCkptInfo*)&page1[sizeof(WalIndexHdr)/2];
}

/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with
................................................................................

/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  WalIndexHdr *aHdr;


  assert( pWal->writeLock );
  pWal->hdr.isInit = 1;
  walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),
                   0, pWal->hdr.aCksum);
  walIndexPage(pWal, 0, (volatile u32 **)&aHdr);
  memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
................................................................................
  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
}
static int walNextHash(int iPriorHash){
  return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
}
















static void walHashGet(
  Wal *pWal,                      /* WAL handle */
  int iHash,                      /* Find the iHash'th table */
  volatile HASHTABLE_DATATYPE **paHash,     /* OUT: Pointer to hash index */
  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
){
  u32 iZero;
  volatile u32 *aPgno;
  volatile HASHTABLE_DATATYPE *aHash;

  walIndexPage(pWal, iHash, &aPgno);




  aHash = (volatile HASHTABLE_DATATYPE *)&aPgno[HASHTABLE_NPAGE];


  if( iHash==0 ){
    aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)-1];
    iZero = 0;
  }else{
    iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
    aPgno = &aPgno[-1*iZero-1];
  }

  *paPgno = aPgno;
  *paHash = aHash;
  *piZero = iZero;
}









static int walFramePage(u32 iFrame){
  int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
  assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
       && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
       && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
       && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
       && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
................................................................................
  int iHash = walFramePage(iFrame);
  if( iHash==0 ){
    return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
  }
  return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}

/* 
** Find the hash table and (section of the) page number array used to
** store data for WAL frame iFrame.
**
** Set output variable *paHash to point to the start of the hash table
** in the wal-index file. Set *piZero to one less than the frame 
** number of the first frame indexed by this hash table. If a
** slot in the hash table is set to N, it refers to frame number 
** (*piZero+N) in the log.
**
** Finally, set *paPgno such that for all frames F between (*piZero+1) and 
** (*piZero+HASHTABLE_NPAGE), (*paPgno)[F] is the database page number 
** associated with frame F.
*/
static void walHashFind(
  Wal *pWal,                      /* WAL handle */
  u32 iFrame,                     /* Find the hash table indexing this frame */
  volatile HASHTABLE_DATATYPE **paHash,    /* OUT: Pointer to hash index */
  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
){
  int iHash = walFramePage(iFrame);
  walHashGet(pWal, iHash, paHash, paPgno, piZero);
}

/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
................................................................................
** At most only the hash table containing pWal->hdr.mxFrame needs to be
** updated.  Any later hash tables will be automatically cleared when
** pWal->hdr.mxFrame advances to the point where those hash tables are
** actually needed.
*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Unused return from walHashFind() */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit = 0;                      /* Zero values greater than this */
  int nByte;                           /* Number of bytes to zero in aPgno[] */
  int i;                               /* Used to iterate through aHash[] */

  assert( pWal->writeLock );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );

  walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);










  if( iZero!=pWal->hdr.mxFrame ){

    iLimit = pWal->hdr.mxFrame - iZero;
    assert( iLimit>0 );
    for(i=0; i<HASHTABLE_NSLOT; i++){
      if( aHash[i]>iLimit ){
        aHash[i] = 0;
      }
    }
  
    /* Zero the entries in the aPgno array that correspond to frames with
    ** frame numbers greater than pWal->hdr.mxFrame. 
    */
    nByte = ((char *)aHash - (char *)&aPgno[pWal->hdr.mxFrame+1]);
    memset((void *)&aPgno[pWal->hdr.mxFrame+1], 0, nByte);
  }

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* Verify that the every entry in the mapping region is still reachable
  ** via the hash table even after the cleanup.
  */
  if( iLimit ){
    int i;           /* Loop counter */
................................................................................


/*
** Set an entry in the wal-index that will map database page number
** pPage into WAL frame iFrame.
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
  int rc = SQLITE_OK;             /* Return code */






  /* Assuming the wal-index file was successfully mapped, find the hash 
  ** table and section of of the page number array that pertain to frame 
  ** iFrame of the WAL. Then populate the page number array and the hash 
  ** table entry.
  */
  if( rc==SQLITE_OK ){
    int iKey;                     /* Hash table key */
    u32 iZero;                    /* One less than frame number of aPgno[1] */
    volatile u32 *aPgno;                 /* Page number array */
    volatile HASHTABLE_DATATYPE *aHash;  /* Hash table */
    int idx;                             /* Value to write to hash-table slot */
    TESTONLY( int nCollide = 0;          /* Number of hash collisions */ )

    walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
    idx = iFrame - iZero;

    if( idx==1 ){
      int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1+iZero];
      memset((void*)&aPgno[1+iZero], 0, nByte);
    }
    assert( idx <= HASHTABLE_NSLOT/2 + 1 );

    if( aPgno[iFrame] ){
      /* If the entry in aPgno[] is already set, then the previous writer
      ** must have exited unexpectedly in the middle of a transaction (after
      ** writing one or more dirty pages to the WAL to free up memory). 
      ** Remove the remnants of that writers uncommitted transaction from 
      ** the hash-table before writing any new entries.
      */

      walCleanupHash(pWal);
      assert( !aPgno[iFrame] );
    }
    aPgno[iFrame] = iPage;

    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
      assert( nCollide++ < idx );
    }

    aHash[iKey] = idx;

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
    /* Verify that the number of entries in the hash table exactly equals
    ** the number of entries in the mapping region.
    */
    {
................................................................................
  memset(p, 0, nByte);

  /* Allocate space for the WalIterator object */
  p->nSegment = nSegment;
  aSpace = (HASHTABLE_DATATYPE *)&p->aSegment[nSegment];
  aTmp = &aSpace[HASHTABLE_NPAGE*nSegment];
  for(i=0; i<nSegment; i++){
    volatile HASHTABLE_DATATYPE *pDummy;
    int j;
    u32 iZero;
    int nEntry;
    volatile u32 *aPgno;


    walHashGet(pWal, i, &pDummy, &aPgno, &iZero);
    if( i==(nSegment-1) ){
      nEntry = iLast - iZero;
    }else if( i==0 ){
      nEntry = HASHTABLE_NPAGE_ONE;

    }else{
      nEntry = HASHTABLE_NPAGE;
    }
    iZero++;
    aPgno += iZero;

    for(j=0; j<nEntry; j++){
      aSpace[j] = j;
    }
    walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry);
................................................................................
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
  u32 mxSafeFrame;                /* Max frame that can be backfilled */
  int i;                          /* Loop counter */
  volatile WalIndexHdr *pHdr;     /* The actual wal-index header in SHM */
  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */

  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    goto walcheckpoint_out;
  }
................................................................................

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */
  mxSafeFrame = pWal->hdr.mxFrame;
  walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
  pInfo = walCkptInfo(pWal);
  assert( pInfo==walCkptInfo(pWal) );
  for(i=1; i<WAL_NREADER; i++){
    u32 y = pInfo->aReadMark[i];
    if( mxSafeFrame>=y ){
      assert( y<=pWal->hdr.mxFrame );
      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
      if( rc==SQLITE_OK ){
        pInfo->aReadMark[i] = READMARK_NOT_USED;
................................................................................
      if( rc!=SQLITE_OK ) break;
      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
      if( rc!=SQLITE_OK ) break;
    }

    /* If work was actually accomplished... */
    if( rc==SQLITE_OK ){
      if( mxSafeFrame==pHdr[0].mxFrame ){
        rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));
        if( rc==SQLITE_OK && sync_flags ){
          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
        }
      }
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = mxSafeFrame;
................................................................................
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];               /* Checksum on the header content */
  WalIndexHdr h1, h2;          /* Two copies of the header content */
  WalIndexHdr *aHdr;           /* Header in shared memory */
  volatile u32 *page1 = 0;


  walIndexPage(pWal, 0, &page1);
  if( !page1 ){
    /* The wal-index is not large enough to hold the header, then assume
    ** header is invalid. */
    return 1;
  }

  /* Read the header. This might happen currently with a write to the
  ** same area of shared memory on a different CPU in a SMP,
  ** meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */
  aHdr = (WalIndexHdr*)page1;
  memcpy(&h1, &aHdr[0], sizeof(h1));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&h2, &aHdr[1], sizeof(h2));

  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
    return 1;   /* Dirty read */
  }  
  if( h1.isInit==0 ){
    return 1;   /* Malformed header - probably all zeros */
  }
................................................................................
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int badHdr;                     /* True if a header read failed */
  volatile u32 *dummy;




  assert( pChanged );
  rc = walIndexPage(pWal, 0, &dummy);
  if( rc!=SQLITE_OK ){
    return rc;
  }


  /* Try once to read the header straight out.  This works most of the
  ** time.




  */
  badHdr = walIndexTryHdr(pWal, pChanged);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );
  if( badHdr ){
    rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
    if( rc==SQLITE_OK ){
      pWal->writeLock = 1;

      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */
        rc = walIndexRecover(pWal);
        *pChanged = 1;
      }
      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
      pWal->writeLock = 0;
    }


  }

  return rc;
}

/*
** This is the value that walTryBeginRead returns when it needs to
................................................................................
** to select a particular WAL_READ_LOCK() that strives to let the
** checkpoint process do as much work as possible.  This routine might
** update values of the aReadMark[] array in the header, but if it does
** so it takes care to hold an exclusive lock on the corresponding
** WAL_READ_LOCK() while changing values.
*/
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
  volatile WalIndexHdr *pHdr;     /* Header of the wal-index */
  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  u32 mxReadMark;                 /* Largest aReadMark[] value */
  int mxI;                        /* Index of largest aReadMark[] value */
  int i;                          /* Loop counter */
  int rc = SQLITE_OK;             /* Return code  */

  assert( pWal->readLock<0 );     /* Not currently locked */
................................................................................
      }
    }
  }
  if( rc!=SQLITE_OK ){
    return rc;
  }

  walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
  pInfo = walCkptInfo(pWal);
  assert( pInfo==(volatile WalCkptInfo *)&pHdr[2] );
  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    sqlite3OsShmBarrier(pWal->pDbFd);
    if( rc==SQLITE_OK ){
      if( memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr)) ){
        /* It is not safe to allow the reader to continue here if frames
        ** may have been appended to the log before READ_LOCK(0) was obtained.
        ** When holding READ_LOCK(0), the reader ignores the entire log file,
        ** which implies that the database file contains a trustworthy
        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from
        ** happening, this is usually correct.
        **
................................................................................
    ** date before proceeding. That would not be possible without somehow
    ** blocking writers. It only guarantees that a dangerous checkpoint or 
    ** log-wrap (either of which would require an exclusive lock on
    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
    */
    sqlite3OsShmBarrier(pWal->pDbFd);
    if( pInfo->aReadMark[mxI]!=mxReadMark
     || memcmp((void *)pHdr, &pWal->hdr, sizeof(WalIndexHdr))
    ){
      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
      return WAL_RETRY;
    }else{
      assert( mxReadMark<=pWal->hdr.mxFrame );
      pWal->readLock = mxI;
    }
................................................................................
  **     table after the current read-transaction had started.
  */
  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
    volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table */
    volatile u32 *aPgno;                 /* Pointer to array of page numbers */
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */


    walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);



    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( iFrame<=iLast && aPgno[iFrame]==pgno ){
        assert( iFrame>iRead );
        iRead = iFrame;
      }
    }
................................................................................
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;
  volatile u32 *page1;

  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
................................................................................
  }
  pWal->writeLock = 1;

  /* If another connection has written to the database file since the
  ** time the read transaction on this connection was started, then
  ** the write is disallowed.
  */
  walIndexPage(pWal, 0, &page1);
  if( memcmp(&pWal->hdr, (void*)page1, sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    rc = SQLITE_BUSY;
  }

  return rc;
}







|
|
<
>
>
>
>
>
>
>
|







 







|
>



|
<
<
|

|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|






|

<

|
>
>
>
>
|

>
|
|
|
|
|
|
|
|
|
|
|
|
>
|
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|










|
>
>
>
>
>
>
>
>
>
>
|
>
|
|
|
|
|
|
|

|
|
|
|
|
<







 







|
>
>
>

>
>
|
<
<
|



|
|
|
|
|
|
|
|
>




<

<
|
|
|
|
|
|
>



|
>



>







 







|




>

|
<
|
|
<
>
|
<
|







 







<







 







<

<







 







|







 







|
|
|
<

>
|
<
<
<
<
<











|
|

|







 







|

>
>
>

|


|
>

<
<
>
>
>
>

|





<
|
<
|
>









<
<

>
>







 







<







 







<

<







|







 







|







 







>

|
>
>
>







 







<







 







<
|







447
448
449
450
451
452
453
454
455

456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
...
551
552
553
554
555
556
557
558
559
560
561
562
563


564
565
566
567
568
569
570
571
572
573
...
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758

759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
...
804
805
806
807
808
809
810

























811
812
813
814
815
816
817
...
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861

862
863
864
865
866
867
868
...
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893


894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910

911

912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
....
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338

1339
1340

1341
1342

1343
1344
1345
1346
1347
1348
1349
1350
....
1408
1409
1410
1411
1412
1413
1414

1415
1416
1417
1418
1419
1420
1421
....
1428
1429
1430
1431
1432
1433
1434

1435

1436
1437
1438
1439
1440
1441
1442
....
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
....
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563

1564
1565
1566





1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
....
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636


1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647

1648

1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659


1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
....
1696
1697
1698
1699
1700
1701
1702

1703
1704
1705
1706
1707
1708
1709
....
1734
1735
1736
1737
1738
1739
1740

1741

1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
....
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
....
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
....
2016
2017
2018
2019
2020
2021
2022

2023
2024
2025
2026
2027
2028
2029
....
2034
2035
2036
2037
2038
2039
2040

2041
2042
2043
2044
2045
2046
2047
2048
  return rc;
}

/*
** Return a pointer to the WalCkptInfo structure in the wal-index.
*/
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
  assert( pWal->nWiData>0 && pWal->apWiData[0] );
  return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);

}

/*
** Return a pointer to the WalIndexHdr structure in the wal-index.
*/
static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
  assert( pWal->nWiData>0 && pWal->apWiData[0] );
  return (volatile WalIndexHdr*)pWal->apWiData[0];
}

/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the WAL (in other words, the frame with
................................................................................

/*
** Write the header information in pWal->hdr into the wal-index.
**
** The checksum on pWal->hdr is updated before it is written.
*/
static void walIndexWriteHdr(Wal *pWal){
  volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
  const int nCksum = offsetof(WalIndexHdr, aCksum);

  assert( pWal->writeLock );
  pWal->hdr.isInit = 1;
  walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);


  memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr));
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
................................................................................
  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
}
static int walNextHash(int iPriorHash){
  return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
}

/* 
** Return pointers to the hash table and page number array stored on
** page iHash of the wal-index. The wal-index is broken into 32KB pages
** numbered starting from 0.
**
** Set output variable *paHash to point to the start of the hash table
** in the wal-index file. Set *piZero to one less than the frame 
** number of the first frame indexed by this hash table. If a
** slot in the hash table is set to N, it refers to frame number 
** (*piZero+N) in the log.
**
** Finally, set *paPgno such that for all frames F between (*piZero+1) and 
** (*piZero+HASHTABLE_NPAGE), (*paPgno)[F] is the database page number 
** associated with frame F.
*/
static int walHashGet(
  Wal *pWal,                      /* WAL handle */
  int iHash,                      /* Find the iHash'th table */
  volatile HASHTABLE_DATATYPE **paHash,     /* OUT: Pointer to hash index */
  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
){
  int rc;                         /* Return code */
  volatile u32 *aPgno;


  rc = walIndexPage(pWal, iHash, &aPgno);
  assert( rc==SQLITE_OK || iHash>0 );

  if( rc==SQLITE_OK ){
    u32 iZero;
    volatile HASHTABLE_DATATYPE *aHash;

    aHash = (volatile HASHTABLE_DATATYPE *)&aPgno[HASHTABLE_NPAGE];
    if( iHash==0 ){
      aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)-1];
      iZero = 0;
    }else{
      iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
      aPgno = &aPgno[-1*iZero-1];
    }
  
    *paPgno = aPgno;
    *paHash = aHash;
    *piZero = iZero;
  }
  return rc;
}

/*
** Return the number of the wal-index page that contains the hash-table
** and page-number array that contain entries corresponding to WAL frame
** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages 
** are numbered starting from 0.
*/
static int walFramePage(u32 iFrame){
  int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
  assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
       && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
       && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
       && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
       && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
................................................................................
  int iHash = walFramePage(iFrame);
  if( iHash==0 ){
    return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
  }
  return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}


























/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
................................................................................
** At most only the hash table containing pWal->hdr.mxFrame needs to be
** updated.  Any later hash tables will be automatically cleared when
** pWal->hdr.mxFrame advances to the point where those hash tables are
** actually needed.
*/
static void walCleanupHash(Wal *pWal){
  volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table to clear */
  volatile u32 *aPgno;                 /* Page number array for hash table */
  u32 iZero;                           /* frame == (aHash[x]+iZero) */
  int iLimit = 0;                      /* Zero values greater than this */
  int nByte;                           /* Number of bytes to zero in aPgno[] */
  int i;                               /* Used to iterate through aHash[] */

  assert( pWal->writeLock );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );

  if( pWal->hdr.mxFrame==0 ) return;

  /* Obtain pointers to the hash-table and page-number array containing 
  ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
  ** that the page said hash-table and array reside on is already mapped.
  */
  assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
  assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
  walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);

  /* Zero all hash-table entries that correspond to frame numbers greater
  ** than pWal->hdr.mxFrame.
  */
  iLimit = pWal->hdr.mxFrame - iZero;
  assert( iLimit>0 );
  for(i=0; i<HASHTABLE_NSLOT; i++){
    if( aHash[i]>iLimit ){
      aHash[i] = 0;
    }
  }
  
  /* Zero the entries in the aPgno array that correspond to frames with
  ** frame numbers greater than pWal->hdr.mxFrame. 
  */
  nByte = ((char *)aHash - (char *)&aPgno[pWal->hdr.mxFrame+1]);
  memset((void *)&aPgno[pWal->hdr.mxFrame+1], 0, nByte);


#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
  /* Verify that the every entry in the mapping region is still reachable
  ** via the hash table even after the cleanup.
  */
  if( iLimit ){
    int i;           /* Loop counter */
................................................................................


/*
** Set an entry in the wal-index that will map database page number
** pPage into WAL frame iFrame.
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
  int rc;                         /* Return code */
  u32 iZero;                      /* One less than frame number of aPgno[1] */
  volatile u32 *aPgno;            /* Page number array */
  volatile HASHTABLE_DATATYPE *aHash;   /* Hash table */

  rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);

  /* Assuming the wal-index file was successfully mapped, populate the


  ** page number array and hash table entry.
  */
  if( rc==SQLITE_OK ){
    int iKey;                     /* Hash table key */
    int idx;                      /* Value to write to hash-table slot */
    TESTONLY( int nCollide = 0;   /* Number of hash collisions */ )

    idx = iFrame - iZero;
    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
    
    /* If this is the first entry to be added to this hash-table, zero the
    ** entire hash table and aPgno[] array before proceding. 
    */
    if( idx==1 ){
      int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1+iZero];
      memset((void*)&aPgno[1+iZero], 0, nByte);
    }



    /* If the entry in aPgno[] is already set, then the previous writer
    ** must have exited unexpectedly in the middle of a transaction (after
    ** writing one or more dirty pages to the WAL to free up memory). 
    ** Remove the remnants of that writers uncommitted transaction from 
    ** the hash-table before writing any new entries.
    */
    if( aPgno[iFrame] ){
      walCleanupHash(pWal);
      assert( !aPgno[iFrame] );
    }

    /* Write the aPgno[] array entry and the hash-table slot. */
    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
      assert( nCollide++ < idx );
    }
    aPgno[iFrame] = iPage;
    aHash[iKey] = idx;

#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
    /* Verify that the number of entries in the hash table exactly equals
    ** the number of entries in the mapping region.
    */
    {
................................................................................
  memset(p, 0, nByte);

  /* Allocate space for the WalIterator object */
  p->nSegment = nSegment;
  aSpace = (HASHTABLE_DATATYPE *)&p->aSegment[nSegment];
  aTmp = &aSpace[HASHTABLE_NPAGE*nSegment];
  for(i=0; i<nSegment; i++){
    volatile HASHTABLE_DATATYPE *aHash;
    int j;
    u32 iZero;
    int nEntry;
    volatile u32 *aPgno;
    int rc;

    rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);

    if( rc!=SQLITE_OK ){
      return rc;

    }
    nEntry = ((i+1)==nSegment)?iLast-iZero:(u32 *)aHash-(u32 *)&aPgno[iZero+1];


    iZero++;
    aPgno += iZero;

    for(j=0; j<nEntry; j++){
      aSpace[j] = j;
    }
    walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry);
................................................................................
  int rc;                         /* Return code */
  int szPage = pWal->hdr.szPage;  /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
  u32 mxSafeFrame;                /* Max frame that can be backfilled */
  int i;                          /* Loop counter */

  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */

  /* Allocate the iterator */
  rc = walIteratorInit(pWal, &pIter);
  if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){
    goto walcheckpoint_out;
  }
................................................................................

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */
  mxSafeFrame = pWal->hdr.mxFrame;

  pInfo = walCkptInfo(pWal);

  for(i=1; i<WAL_NREADER; i++){
    u32 y = pInfo->aReadMark[i];
    if( mxSafeFrame>=y ){
      assert( y<=pWal->hdr.mxFrame );
      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
      if( rc==SQLITE_OK ){
        pInfo->aReadMark[i] = READMARK_NOT_USED;
................................................................................
      if( rc!=SQLITE_OK ) break;
      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage);
      if( rc!=SQLITE_OK ) break;
    }

    /* If work was actually accomplished... */
    if( rc==SQLITE_OK ){
      if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
        rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage));
        if( rc==SQLITE_OK && sync_flags ){
          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
        }
      }
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = mxSafeFrame;
................................................................................
** pWal->hdr, then pWal->hdr is updated to the content of the new header
** and *pChanged is set to 1.
**
** If the checksum cannot be verified return non-zero. If the header
** is read successfully and the checksum verified, return zero.
*/
int walIndexTryHdr(Wal *pWal, int *pChanged){
  u32 aCksum[2];                  /* Checksum on the header content */
  WalIndexHdr h1, h2;             /* Two copies of the header content */
  WalIndexHdr volatile *aHdr;     /* Header in shared memory */


  /* The first page of the wal-index must be mapped at this point. */
  assert( pWal->nWiData>0 && pWal->apWiData[0] );






  /* Read the header. This might happen currently with a write to the
  ** same area of shared memory on a different CPU in a SMP,
  ** meaning it is possible that an inconsistent snapshot is read
  ** from the file. If this happens, return non-zero.
  **
  ** There are two copies of the header at the beginning of the wal-index.
  ** When reading, read [0] first then [1].  Writes are in the reverse order.
  ** Memory barriers are used to prevent the compiler or the hardware from
  ** reordering the reads and writes.
  */
  aHdr = walIndexHdr(pWal);
  memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
  sqlite3OsShmBarrier(pWal->pDbFd);
  memcpy(&h2, (void *)&aHdr[1], sizeof(h2));

  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
    return 1;   /* Dirty read */
  }  
  if( h1.isInit==0 ){
    return 1;   /* Malformed header - probably all zeros */
  }
................................................................................
**
** If the wal-index header is successfully read, return SQLITE_OK. 
** Otherwise an SQLite error code.
*/
static int walIndexReadHdr(Wal *pWal, int *pChanged){
  int rc;                         /* Return code */
  int badHdr;                     /* True if a header read failed */
  volatile u32 *page0;

  /* Ensure that page 0 of the wal-index (the page that contains the 
  ** wal-index header) is mapped. Return early if an error occurs here.
  */
  assert( pChanged );
  rc = walIndexPage(pWal, 0, &page0);
  if( rc!=SQLITE_OK ){
    return rc;
  };
  assert( page0 || pWal->writeLock==0 );



  /* If the first page of the wal-index has been mapped, try to read the
  ** wal-index header immediately, without holding any lock. This usually
  ** works, but may fail if the wal-index header is corrupt or currently 
  ** being modified by another user.
  */
  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);

  /* If the first attempt failed, it might have been due to a race
  ** with a writer.  So get a WRITE lock and try again.
  */
  assert( badHdr==0 || pWal->writeLock==0 );

  if( badHdr && SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){

    pWal->writeLock = 1;
    if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
      badHdr = walIndexTryHdr(pWal, pChanged);
      if( badHdr ){
        /* If the wal-index header is still malformed even while holding
        ** a WRITE lock, it can only mean that the header is corrupted and
        ** needs to be reconstructed.  So run recovery to do exactly that.
        */
        rc = walIndexRecover(pWal);
        *pChanged = 1;
      }


    }
    pWal->writeLock = 0;
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
  }

  return rc;
}

/*
** This is the value that walTryBeginRead returns when it needs to
................................................................................
** to select a particular WAL_READ_LOCK() that strives to let the
** checkpoint process do as much work as possible.  This routine might
** update values of the aReadMark[] array in the header, but if it does
** so it takes care to hold an exclusive lock on the corresponding
** WAL_READ_LOCK() while changing values.
*/
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){

  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
  u32 mxReadMark;                 /* Largest aReadMark[] value */
  int mxI;                        /* Index of largest aReadMark[] value */
  int i;                          /* Loop counter */
  int rc = SQLITE_OK;             /* Return code  */

  assert( pWal->readLock<0 );     /* Not currently locked */
................................................................................
      }
    }
  }
  if( rc!=SQLITE_OK ){
    return rc;
  }


  pInfo = walCkptInfo(pWal);

  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
    /* The WAL has been completely backfilled (or it is empty).
    ** and can be safely ignored.
    */
    rc = walLockShared(pWal, WAL_READ_LOCK(0));
    sqlite3OsShmBarrier(pWal->pDbFd);
    if( rc==SQLITE_OK ){
      if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
        /* It is not safe to allow the reader to continue here if frames
        ** may have been appended to the log before READ_LOCK(0) was obtained.
        ** When holding READ_LOCK(0), the reader ignores the entire log file,
        ** which implies that the database file contains a trustworthy
        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from
        ** happening, this is usually correct.
        **
................................................................................
    ** date before proceeding. That would not be possible without somehow
    ** blocking writers. It only guarantees that a dangerous checkpoint or 
    ** log-wrap (either of which would require an exclusive lock on
    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
    */
    sqlite3OsShmBarrier(pWal->pDbFd);
    if( pInfo->aReadMark[mxI]!=mxReadMark
     || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
    ){
      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
      return WAL_RETRY;
    }else{
      assert( mxReadMark<=pWal->hdr.mxFrame );
      pWal->readLock = mxI;
    }
................................................................................
  **     table after the current read-transaction had started.
  */
  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
    volatile HASHTABLE_DATATYPE *aHash;  /* Pointer to hash table */
    volatile u32 *aPgno;                 /* Pointer to array of page numbers */
    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
    int iKey;                     /* Hash slot index */
    int rc;

    rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
      u32 iFrame = aHash[iKey] + iZero;
      if( iFrame<=iLast && aPgno[iFrame]==pgno ){
        assert( iFrame>iRead );
        iRead = iFrame;
      }
    }
................................................................................
** thread to write as doing so would cause a fork.  So this routine
** returns SQLITE_BUSY in that case and no write transaction is started.
**
** There can only be a single writer active at a time.
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
  int rc;


  /* Cannot start a write transaction without first holding a read
  ** transaction. */
  assert( pWal->readLock>=0 );

  /* Only one writer allowed at a time.  Get the write lock.  Return
  ** SQLITE_BUSY if unable.
................................................................................
  }
  pWal->writeLock = 1;

  /* If another connection has written to the database file since the
  ** time the read transaction on this connection was started, then
  ** the write is disallowed.
  */

  if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
    pWal->writeLock = 0;
    rc = SQLITE_BUSY;
  }

  return rc;
}