/ Check-in [7e3fc2c8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add experimental command "PRAGMA wal_blocking_checkpoint", which uses the busy-handler to block until all readers have finished in order to ensure the next writer will be able to wrap around to the start of the log file.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | blocking-checkpoint
Files: files | file ages | folders
SHA1: 7e3fc2c833a5baa08820c499867b6902bdc2ed5a
User & Date: dan 2010-11-16 18:56:51
Context
2010-11-18
12:11
Modify the interface to the blocking wal-checkpoint functionality. check-in: 72787c01 user: dan tags: blocking-checkpoint
2010-11-16
18:56
Add experimental command "PRAGMA wal_blocking_checkpoint", which uses the busy-handler to block until all readers have finished in order to ensure the next writer will be able to wrap around to the start of the log file. check-in: 7e3fc2c8 user: dan tags: blocking-checkpoint
02:49
Use the estimated number of rows computed for subqueries in the cost computations for outer queries. check-in: 56bbc539 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/btree.c.

7931
7932
7933
7934
7935
7936
7937







7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954

#ifndef SQLITE_OMIT_WAL
/*
** Run a checkpoint on the Btree passed as the first argument.
**
** Return SQLITE_LOCKED if this or any other connection has an open 
** transaction on the shared-cache the argument Btree is connected to.







*/
int sqlite3BtreeCheckpoint(Btree *p){
  int rc = SQLITE_OK;
  if( p ){
    BtShared *pBt = p->pBt;
    sqlite3BtreeEnter(p);
    if( pBt->inTransaction!=TRANS_NONE ){
      rc = SQLITE_LOCKED;
    }else{
      rc = sqlite3PagerCheckpoint(pBt->pPager);
    }
    sqlite3BtreeLeave(p);
  }
  return rc;
}
#endif








>
>
>
>
>
>
>

|







|







7931
7932
7933
7934
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956
7957
7958
7959
7960
7961

#ifndef SQLITE_OMIT_WAL
/*
** Run a checkpoint on the Btree passed as the first argument.
**
** Return SQLITE_LOCKED if this or any other connection has an open 
** transaction on the shared-cache the argument Btree is connected to.
**
** If parameter bBlock is true, then the layers below invoke the 
** busy-handler callback while waiting for readers to release locks so
** that the entire WAL can be checkpointed. If it is false, then as
** much as possible of the WAL is checkpointed without waiting for readers
** to finish. bBlock is true for "PRAGMA wal_blocking_checkpoint" and false
** for "PRAGMA wal_checkpoint".
*/
int sqlite3BtreeCheckpoint(Btree *p, int bBlock){
  int rc = SQLITE_OK;
  if( p ){
    BtShared *pBt = p->pBt;
    sqlite3BtreeEnter(p);
    if( pBt->inTransaction!=TRANS_NONE ){
      rc = SQLITE_LOCKED;
    }else{
      rc = sqlite3PagerCheckpoint(pBt->pPager, bBlock);
    }
    sqlite3BtreeLeave(p);
  }
  return rc;
}
#endif

Changes to src/btree.h.

203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

#ifdef SQLITE_TEST
int sqlite3BtreeCursorInfo(BtCursor*, int*, int);
void sqlite3BtreeCursorList(Btree*);
#endif

#ifndef SQLITE_OMIT_WAL
  int sqlite3BtreeCheckpoint(Btree*);
#endif

/*
** If we are not using shared cache, then there is no need to
** use mutexes to access the BtShared structures.  So make the
** Enter and Leave procedures no-ops.
*/







|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217

#ifdef SQLITE_TEST
int sqlite3BtreeCursorInfo(BtCursor*, int*, int);
void sqlite3BtreeCursorList(Btree*);
#endif

#ifndef SQLITE_OMIT_WAL
  int sqlite3BtreeCheckpoint(Btree*, int);
#endif

/*
** If we are not using shared cache, then there is no need to
** use mutexes to access the BtShared structures.  So make the
** Enter and Leave procedures no-ops.
*/

Changes to src/main.c.

1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
....
1383
1384
1385
1386
1387
1388
1389



1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
  if( zDb && zDb[0] ){
    iDb = sqlite3FindDbName(db, zDb);
  }
  if( iDb<0 ){
    rc = SQLITE_ERROR;
    sqlite3Error(db, SQLITE_ERROR, "unknown database: %s", zDb);
  }else{
    rc = sqlite3Checkpoint(db, iDb);
    sqlite3Error(db, rc, 0);
  }
  rc = sqlite3ApiExit(db, rc);
  sqlite3_mutex_leave(db->mutex);
  return rc;
#endif
}
................................................................................
** The mutex on database handle db should be held by the caller. The mutex
** associated with the specific b-tree being checkpointed is taken by
** this function while the checkpoint is running.
**
** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are
** checkpointed. If an error is encountered it is returned immediately -
** no attempt is made to checkpoint any remaining databases.



*/
int sqlite3Checkpoint(sqlite3 *db, int iDb){
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Used to iterate through attached dbs */

  assert( sqlite3_mutex_held(db->mutex) );

  for(i=0; i<db->nDb && rc==SQLITE_OK; i++){
    if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){
      rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt);
    }
  }

  return rc;
}
#endif /* SQLITE_OMIT_WAL */








|







 







>
>
>

|







|







1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
....
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
  if( zDb && zDb[0] ){
    iDb = sqlite3FindDbName(db, zDb);
  }
  if( iDb<0 ){
    rc = SQLITE_ERROR;
    sqlite3Error(db, SQLITE_ERROR, "unknown database: %s", zDb);
  }else{
    rc = sqlite3Checkpoint(db, iDb, 0);
    sqlite3Error(db, rc, 0);
  }
  rc = sqlite3ApiExit(db, rc);
  sqlite3_mutex_leave(db->mutex);
  return rc;
#endif
}
................................................................................
** The mutex on database handle db should be held by the caller. The mutex
** associated with the specific b-tree being checkpointed is taken by
** this function while the checkpoint is running.
**
** If iDb is passed SQLITE_MAX_ATTACHED, then all attached databases are
** checkpointed. If an error is encountered it is returned immediately -
** no attempt is made to checkpoint any remaining databases.
**
** Parameter bBlock is true for a blocking-checkpoint, false for an 
** ordinary, non-blocking, checkpoint.
*/
int sqlite3Checkpoint(sqlite3 *db, int iDb, int bBlock){
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Used to iterate through attached dbs */

  assert( sqlite3_mutex_held(db->mutex) );

  for(i=0; i<db->nDb && rc==SQLITE_OK; i++){
    if( i==iDb || iDb==SQLITE_MAX_ATTACHED ){
      rc = sqlite3BtreeCheckpoint(db->aDb[i].pBt, bBlock);
    }
  }

  return rc;
}
#endif /* SQLITE_OMIT_WAL */

Changes to src/pager.c.

6512
6513
6514
6515
6516
6517
6518
6519





6520
6521
6522
6523
6524
6525

6526
6527
6528
6529
6530
6531
6532
*/
sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
  return &pPager->pBackup;
}

#ifndef SQLITE_OMIT_WAL
/*
** This function is called when the user invokes "PRAGMA checkpoint".





*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,

        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf
    );
  }
  return rc;
}








|
>
>
>
>
>

|




>







6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
*/
sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
  return &pPager->pBackup;
}

#ifndef SQLITE_OMIT_WAL
/*
** This function is called when the user invokes "PRAGMA wal_checkpoint",
** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint()
** or wal_blocking_checkpoint() API functions.
**
** Parameter bBlock is true for a blocking-checkpoint, false for an 
** ordinary, non-blocking, checkpoint.
*/
int sqlite3PagerCheckpoint(Pager *pPager, int bBlock){
  int rc = SQLITE_OK;
  if( pPager->pWal ){
    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3WalCheckpoint(pPager->pWal,
        (bBlock ? pPager->xBusyHandler : 0), pPager->pBusyHandlerArg,
        (pPager->noSync ? 0 : pPager->sync_flags),
        pPager->pageSize, zBuf
    );
  }
  return rc;
}

Changes to src/pager.h.

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
int sqlite3PagerSync(Pager *pPager);
int sqlite3PagerCommitPhaseTwo(Pager*);
int sqlite3PagerRollback(Pager*);
int sqlite3PagerOpenSavepoint(Pager *pPager, int n);
int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint);
int sqlite3PagerSharedLock(Pager *pPager);

int sqlite3PagerCheckpoint(Pager *pPager);
int sqlite3PagerWalSupported(Pager *pPager);
int sqlite3PagerWalCallback(Pager *pPager);
int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen);
int sqlite3PagerCloseWal(Pager *pPager);

/* Functions used to query pager state and configuration. */
u8 sqlite3PagerIsreadonly(Pager*);







|







134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
int sqlite3PagerSync(Pager *pPager);
int sqlite3PagerCommitPhaseTwo(Pager*);
int sqlite3PagerRollback(Pager*);
int sqlite3PagerOpenSavepoint(Pager *pPager, int n);
int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint);
int sqlite3PagerSharedLock(Pager *pPager);

int sqlite3PagerCheckpoint(Pager *pPager, int);
int sqlite3PagerWalSupported(Pager *pPager);
int sqlite3PagerWalCallback(Pager *pPager);
int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen);
int sqlite3PagerCloseWal(Pager *pPager);

/* Functions used to query pager state and configuration. */
u8 sqlite3PagerIsreadonly(Pager*);

Changes to src/pragma.c.

1390
1391
1392
1393
1394
1395
1396

1397
1398
1399
1400





1401
1402

1403
1404
1405
1406
1407
1408
1409
    }
  }else
#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */

#ifndef SQLITE_OMIT_WAL
  /*
  **   PRAGMA [database.]wal_checkpoint

  **
  ** Checkpoint the database.
  */
  if( sqlite3StrICmp(zLeft, "wal_checkpoint")==0 ){





    if( sqlite3ReadSchema(pParse) ) goto pragma_out;
    sqlite3VdbeAddOp3(v, OP_Checkpoint, pId2->z?iDb:SQLITE_MAX_ATTACHED, 0, 0);

  }else

  /*
  **   PRAGMA wal_autocheckpoint
  **   PRAGMA wal_autocheckpoint = N
  **
  ** Configure a database connection to automatically checkpoint a database







>



|
>
>
>
>
>

<
>







1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407

1408
1409
1410
1411
1412
1413
1414
1415
    }
  }else
#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */

#ifndef SQLITE_OMIT_WAL
  /*
  **   PRAGMA [database.]wal_checkpoint
  **   PRAGMA [database.]wal_blocking_checkpoint
  **
  ** Checkpoint the database.
  */
  if( sqlite3StrICmp(zLeft, "wal_checkpoint")==0 
   || sqlite3StrICmp(zLeft, "wal_blocking_checkpoint")==0 
  ){
    int bBlock = (zLeft[14]!=0);
    int iBt = (pId2->z?iDb:SQLITE_MAX_ATTACHED);
    assert( bBlock==(sqlite3StrICmp(zLeft, "wal_checkpoint")!=0) );
    if( sqlite3ReadSchema(pParse) ) goto pragma_out;

    sqlite3VdbeAddOp2(v, OP_Checkpoint, iBt, bBlock);
  }else

  /*
  **   PRAGMA wal_autocheckpoint
  **   PRAGMA wal_autocheckpoint = N
  **
  ** Configure a database connection to automatically checkpoint a database

Changes to src/sqliteInt.h.

3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
int sqlite3Reprepare(Vdbe*);
void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
int sqlite3TempInMemory(const sqlite3*);
VTable *sqlite3GetVTable(sqlite3*, Table*);
const char *sqlite3JournalModename(int);
int sqlite3Checkpoint(sqlite3*, int);
int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int);

/* Declarations for functions in fkey.c. All of these are replaced by
** no-op macros if OMIT_FOREIGN_KEY is defined. In this case no foreign
** key functionality is available. If OMIT_TRIGGER is defined but
** OMIT_FOREIGN_KEY is not, only some of the functions are no-oped. In
** this case foreign keys are parsed, but no other functionality is 







|







3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
int sqlite3TransferBindings(sqlite3_stmt *, sqlite3_stmt *);
int sqlite3Reprepare(Vdbe*);
void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
int sqlite3TempInMemory(const sqlite3*);
VTable *sqlite3GetVTable(sqlite3*, Table*);
const char *sqlite3JournalModename(int);
int sqlite3Checkpoint(sqlite3*, int, int);
int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int);

/* Declarations for functions in fkey.c. All of these are replaced by
** no-op macros if OMIT_FOREIGN_KEY is defined. In this case no foreign
** key functionality is available. If OMIT_TRIGGER is defined but
** OMIT_FOREIGN_KEY is not, only some of the functions are no-oped. In
** this case foreign keys are parsed, but no other functionality is 

Changes to src/vdbe.c.

5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
  if( sqlite3VdbeMemTooBig(pMem) ){
    goto too_big;
  }
  break;
}

#ifndef SQLITE_OMIT_WAL
/* Opcode: Checkpoint P1 * * * *
**
** Checkpoint database P1. This is a no-op if P1 is not currently in
** WAL mode.
*/
case OP_Checkpoint: {
  rc = sqlite3Checkpoint(db, pOp->p1);
  break;
};  
#endif

#ifndef SQLITE_OMIT_PRAGMA
/* Opcode: JournalMode P1 P2 P3 * P5
**







|


|


|







5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
  if( sqlite3VdbeMemTooBig(pMem) ){
    goto too_big;
  }
  break;
}

#ifndef SQLITE_OMIT_WAL
/* Opcode: Checkpoint P1 P2 * * *
**
** Checkpoint database P1. This is a no-op if P1 is not currently in
** WAL mode. If P2 is non-zero, this is a blocking checkpoint.
*/
case OP_Checkpoint: {
  rc = sqlite3Checkpoint(db, pOp->p1, pOp->p2);
  break;
};  
#endif

#ifndef SQLITE_OMIT_PRAGMA
/* Opcode: JournalMode P1 P2 P3 * P5
**

Changes to src/wal.c.

1518
1519
1520
1521
1522
1523
1524




















1525
1526
1527
1528
1529
1530
1531
....
1552
1553
1554
1555
1556
1557
1558


1559
1560
1561
1562
1563
1564
1565
....
1589
1590
1591
1592
1593
1594
1595

1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613

1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
....
1662
1663
1664
1665
1666
1667
1668










1669
1670
1671
1672

1673
1674
1675
1676
1677
1678
1679
....
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
....
2615
2616
2617
2618
2619
2620
2621



2622
2623
2624


2625
2626
2627
2628
2629
2630
2631
2632

2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643









2644

2645

2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659

2660
2661
2662
2663
2664
2665
2666

  if( rc!=SQLITE_OK ){
    walIteratorFree(p);
  }
  *pp = p;
  return rc;
}





















/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
................................................................................
**
** The caller must be holding sufficient locks to ensure that no other
** checkpoint is running (in any other thread or process) at the same
** time.
*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */


  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage;                     /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
................................................................................
  }

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */

  mxSafeFrame = pWal->hdr.mxFrame;
  mxPage = pWal->hdr.nPage;
  pInfo = walCkptInfo(pWal);
  for(i=1; i<WAL_NREADER; i++){
    u32 y = pInfo->aReadMark[i];
    if( mxSafeFrame>=y ){
      assert( y<=pWal->hdr.mxFrame );
      rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
      if( rc==SQLITE_OK ){
        pInfo->aReadMark[i] = READMARK_NOT_USED;
        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
      }else if( rc==SQLITE_BUSY ){
        mxSafeFrame = y;
      }else{
        goto walcheckpoint_out;
      }
    }
  }


  if( pInfo->nBackfill<mxSafeFrame
   && (rc = walLockExclusive(pWal, WAL_READ_LOCK(0), 1))==SQLITE_OK
  ){
    i64 nSize;                    /* Current size of database file */
    u32 nBackfill = pInfo->nBackfill;

    /* Sync the WAL to disk */
    if( sync_flags ){
      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
................................................................................
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = mxSafeFrame;
      }
    }

    /* Release the reader lock held while backfilling */
    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);










  }else if( rc==SQLITE_BUSY ){
    /* Reset the return code so as not to report a checkpoint failure
    ** just because active readers prevent any backfill.
    */

    rc = SQLITE_OK;
  }

 walcheckpoint_out:
  walIteratorFree(pIter);
  return rc;
}
................................................................................
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){
      if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
      }
      rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
    }

    walIndexClose(pWal, isDelete);
    sqlite3OsClose(pWal->pWalFd);
................................................................................

/* 
** This routine is called to implement sqlite3_wal_checkpoint() and
** related interfaces.
**
** Obtain a CHECKPOINT lock and then backfill as much information as
** we can from WAL into the database.



*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */


  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->ckptLock==0 );


  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
  if( rc ){
    /* Usually this is SQLITE_BUSY meaning that another thread or process
    ** is already running a checkpoint, or maybe a recovery.  But it might
    ** also be SQLITE_IOERR. */
    return rc;
  }
  pWal->ckptLock = 1;










  /* Copy data from the log to the database file. */

  rc = walIndexReadHdr(pWal, &isChanged);

  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */

  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  pWal->ckptLock = 0;
  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>







 







>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>


|







 







>
>
>
>
>
>
>
>
>
>
|

<
<
>







 







|







 







>
>
>



>
>








>











>
>
>
>
>
>
>
>
>

>
|
>

|












>







1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
....
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
....
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
....
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704


1705
1706
1707
1708
1709
1710
1711
1712
....
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
....
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717

  if( rc!=SQLITE_OK ){
    walIteratorFree(p);
  }
  *pp = p;
  return rc;
}

/*
** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and
** n. If the attempt fails and parameter xBusy is not NULL, then it is a
** busy-handler function. Invoke it and retry the lock until either the
** lock is successfully obtained or the busy-handler returns 0.
*/
static int walBusyLock(
  Wal *pWal,                      /* WAL connection */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int lockIdx,                    /* Offset of first byte to lock */
  int n                           /* Number of bytes to lock */
){
  int rc;
  do {
    rc = walLockExclusive(pWal, lockIdx, n);
  }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
  return rc;
}

/*
** Copy as much content as we can from the WAL back into the database file
** in response to an sqlite3_wal_checkpoint() request or the equivalent.
**
** The amount of information copies from WAL to database might be limited
** by active readers.  This routine will never overwrite a database page
................................................................................
**
** The caller must be holding sufficient locks to ensure that no other
** checkpoint is running (in any other thread or process) at the same
** time.
*/
static int walCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags for OsSync() (or 0) */
  int nBuf,                       /* Size of zBuf in bytes */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int szPage;                     /* Database page-size */
  WalIterator *pIter = 0;         /* Wal iterator context */
................................................................................
  }

  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
  ** safe to write into the database.  Frames beyond mxSafeFrame might
  ** overwrite database pages that are in use by active readers and thus
  ** cannot be backfilled from the WAL.
  */
  do {
    mxSafeFrame = pWal->hdr.mxFrame;
    mxPage = pWal->hdr.nPage;
    pInfo = walCkptInfo(pWal);
    for(i=1; i<WAL_NREADER; i++){
      u32 y = pInfo->aReadMark[i];
      if( mxSafeFrame>=y ){
        assert( y<=pWal->hdr.mxFrame );
        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
        if( rc==SQLITE_OK ){
          pInfo->aReadMark[i] = READMARK_NOT_USED;
          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
        }else if( rc==SQLITE_BUSY ){
          mxSafeFrame = y;
        }else{
          goto walcheckpoint_out;
        }
      }
    }
  }while( xBusy && mxSafeFrame<pWal->hdr.mxFrame && xBusy(pBusyArg) );

  if( pInfo->nBackfill<mxSafeFrame
   && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK
  ){
    i64 nSize;                    /* Current size of database file */
    u32 nBackfill = pInfo->nBackfill;

    /* Sync the WAL to disk */
    if( sync_flags ){
      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
................................................................................
      if( rc==SQLITE_OK ){
        pInfo->nBackfill = mxSafeFrame;
      }
    }

    /* Release the reader lock held while backfilling */
    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);

    if( xBusy && rc==SQLITE_OK && pWal->hdr.mxFrame==mxSafeFrame ){
      assert( pWal->writeLock );
      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1);
      if( rc==SQLITE_OK ){
        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
      }
    }
  }

  if( rc==SQLITE_BUSY ){
    /* Reset the return code so as not to report a checkpoint failure


    ** just because there are active readers.  */
    rc = SQLITE_OK;
  }

 walcheckpoint_out:
  walIteratorFree(pIter);
  return rc;
}
................................................................................
    ** The EXCLUSIVE lock is not released before returning.
    */
    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
    if( rc==SQLITE_OK ){
      if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
      }
      rc = sqlite3WalCheckpoint(pWal, 0, 0, sync_flags, nBuf, zBuf);
      if( rc==SQLITE_OK ){
        isDelete = 1;
      }
    }

    walIndexClose(pWal, isDelete);
    sqlite3OsClose(pWal->pWalFd);
................................................................................

/* 
** This routine is called to implement sqlite3_wal_checkpoint() and
** related interfaces.
**
** Obtain a CHECKPOINT lock and then backfill as much information as
** we can from WAL into the database.
**
** If parameter xBusy is not NULL, it is a pointer to a busy-handler
** callback. In this case this function runs a blocking checkpoint.
*/
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Wal connection */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of temporary buffer */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int isChanged = 0;              /* True if a new wal-index header is loaded */

  assert( pWal->ckptLock==0 );
  assert( pWal->writeLock==0 );

  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
  if( rc ){
    /* Usually this is SQLITE_BUSY meaning that another thread or process
    ** is already running a checkpoint, or maybe a recovery.  But it might
    ** also be SQLITE_IOERR. */
    return rc;
  }
  pWal->ckptLock = 1;

  /* If this is a blocking-checkpoint, then obtain the write-lock as well
  ** to prevent any writers from running while the checkpoint is underway.
  ** This has to be done before the call to walIndexReadHdr() below.
  */
  if( xBusy ){
    rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);
    if( rc==SQLITE_OK ) pWal->writeLock = 1;
  }

  /* Copy data from the log to the database file. */
  if( rc==SQLITE_OK ){
    rc = walIndexReadHdr(pWal, &isChanged);
  }
  if( rc==SQLITE_OK ){
    rc = walCheckpoint(pWal, xBusy, pBusyArg, sync_flags, nBuf, zBuf);
  }
  if( isChanged ){
    /* If a new wal-index header was loaded before the checkpoint was 
    ** performed, then the pager-cache associated with pWal is now
    ** out of date. So zero the cached wal-index header to ensure that
    ** next time the pager opens a snapshot on this database it knows that
    ** the cache needs to be reset.
    */
    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
  }

  /* Release the locks. */
  sqlite3WalEndWriteTransaction(pWal);
  walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
  pWal->ckptLock = 0;
  WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
  return rc;
}

/* Return the value to pass to a sqlite3_wal_hook callback, the

Changes to src/wal.h.

28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
..
82
83
84
85
86
87
88


89
90
91
92
93
94
95
# define sqlite3WalDbsize(y)                   0
# define sqlite3WalBeginWriteTransaction(y)    0
# define sqlite3WalEndWriteTransaction(x)      0
# define sqlite3WalUndo(x,y,z)                 0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)          0
# define sqlite3WalFrames(u,v,w,x,y,z)         0
# define sqlite3WalCheckpoint(u,v,w,x)         0
# define sqlite3WalCallback(z)                 0
# define sqlite3WalExclusiveMode(y,z)          0
# define sqlite3WalHeapMemory(z)               0
#else

#define WAL_SAVEPOINT_NDATA 4

................................................................................

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */


  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf                        /* Temporary buffer to use */
);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since







|







 







>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
..
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# define sqlite3WalDbsize(y)                   0
# define sqlite3WalBeginWriteTransaction(y)    0
# define sqlite3WalEndWriteTransaction(x)      0
# define sqlite3WalUndo(x,y,z)                 0
# define sqlite3WalSavepoint(y,z)
# define sqlite3WalSavepointUndo(y,z)          0
# define sqlite3WalFrames(u,v,w,x,y,z)         0
# define sqlite3WalCheckpoint(u,v,w,x,y,z)     0
# define sqlite3WalCallback(z)                 0
# define sqlite3WalExclusiveMode(y,z)          0
# define sqlite3WalHeapMemory(z)               0
#else

#define WAL_SAVEPOINT_NDATA 4

................................................................................

/* Write a frame or frames to the log. */
int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3WalCheckpoint(
  Wal *pWal,                      /* Write-ahead log connection */
  int (*xBusy)(void*),            /* Function to call when busy */
  void *pBusyArg,                 /* Context argument for xBusyHandler */
  int sync_flags,                 /* Flags to sync db file with (or 0) */
  int nBuf,                       /* Size of buffer nBuf */
  u8 *zBuf                        /* Temporary buffer to use */
);

/* Return the value to pass to a sqlite3_wal_hook callback, the
** number of frames in the WAL at the point of the last commit since

Added test/wal5.test.













































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# 2010 April 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the operation of "blocking-checkpoint"
# operations.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/lock_common.tcl
source $testdir/wal_common.tcl
ifcapable !wal {finish_test ; return }

set testprefix wal5

do_multiclient_test tn {

  proc db_page_count  {} { expr [file size test.db] / 1024 }
  proc wal_page_count {} { wal_frame_count test.db-wal 1024 }

  set ::nBusyHandler 0
  set ::busy_handler_script ""
  proc busyhandler {n} {
    incr ::nBusyHandler 
    eval $::busy_handler_script
    return 0
  }

  proc reopen_all {} {
    code1 {db close}
    code2 {db2 close}
    code3 {db3 close}
    code1 {sqlite3 db test.db}
    code2 {sqlite3 db2 test.db}
    code3 {sqlite3 db3 test.db}
    sql1  { PRAGMA synchronous = NORMAL }
    code1 { db busy busyhandler }
  }

  do_test 1.$tn.1 {
    reopen_all
    sql1 {
      PRAGMA page_size = 1024;
      PRAGMA auto_vacuum = 0;
      CREATE TABLE t1(x, y);
      PRAGMA journal_mode = WAL;
      INSERT INTO t1 VALUES(1, zeroblob(1200));
      INSERT INTO t1 VALUES(2, zeroblob(1200));
      INSERT INTO t1 VALUES(3, zeroblob(1200));
    }
    expr [file size test.db] / 1024
  } {2}

  # Have connection 2 grab a read-lock on the current snapshot.
  do_test 1.$tn.2 { sql2 { BEGIN; SELECT x FROM t1 } } {1 2 3}

  # Attempt a checkpoint.
  do_test 1.$tn.3 {
    sql1 { PRAGMA wal_checkpoint }
    list [db_page_count] [wal_page_count]
  } {5 9}

  # Write to the db again. The log cannot wrap because of the lock still
  # held by connection 2. The busy-handler has not yet been invoked.
  do_test 1.$tn.4 {
    sql1 { INSERT INTO t1 VALUES(4, zeroblob(1200)) }
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {5 12 0}

  # Now do a blocking-checkpoint. Set the busy-handler up so that connection
  # 2 releases its lock on the 6th invocation. The checkpointer should then
  # proceed to checkpoint the entire log file. Next write should go to the 
  # start of the log file.
  #
  set ::busy_handler_script { if {$n==5} { sql2 COMMIT } }
  do_test 1.$tn.5 {
    sql1 { PRAGMA wal_blocking_checkpoint }
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {6 12 6}
  do_test 1.$tn.6 {
    set ::nBusyHandler 0
    sql1 { INSERT INTO t1 VALUES(5, zeroblob(1200)) }
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {6 12 0}

  do_test 1.$tn.7 {
    reopen_all
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {7 0 0}

  do_test 1.$tn.8  { sql2 { BEGIN ; SELECT x FROM t1 } } {1 2 3 4 5}
  do_test 1.$tn.9  {
    sql1 { INSERT INTO t1 VALUES(6, zeroblob(1200)) }
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {7 5 0}
  do_test 1.$tn.10 { sql3 { BEGIN ; SELECT x FROM t1 } } {1 2 3 4 5 6}

  set ::busy_handler_script { 
    if {$n==5} { sql2 COMMIT } 
    if {$n==6} { set ::db_file_size [db_page_count] }
    if {$n==7} { sql3 COMMIT }
  }
  do_test 1.$tn.11 {
    sql1 { PRAGMA wal_blocking_checkpoint }
    list [db_page_count] [wal_page_count] $::nBusyHandler
  } {10 5 8}
  do_test 1.$tn.12 { set ::db_file_size } 10
}

finish_test

Changes to test/wal_common.tcl.

14
15
16
17
18
19
20

21

22
23
24
25
26
27
28
#

proc wal_file_size {nFrame pgsz} {
  expr {32 + ($pgsz+24)*$nFrame}
}

proc wal_frame_count {zFile pgsz} {

  set f [file size $zFile]

  expr {($f - 32) / ($pgsz+24)}
}

proc wal_cksum_intlist {ckv1 ckv2 intlist} {
  upvar $ckv1 c1
  upvar $ckv2 c2
  foreach {v1 v2} $intlist {







>

>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#

proc wal_file_size {nFrame pgsz} {
  expr {32 + ($pgsz+24)*$nFrame}
}

proc wal_frame_count {zFile pgsz} {
  if {[file exists $zFile]==0} { return 0 }
  set f [file size $zFile]
  if {$f < 32} { return 0 }
  expr {($f - 32) / ($pgsz+24)}
}

proc wal_cksum_intlist {ckv1 ckv2 intlist} {
  upvar $ckv1 c1
  upvar $ckv2 c2
  foreach {v1 v2} $intlist {