SQLite

Check-in [3f958e87c3]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add experimental locking scheme.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 3f958e87c33d667d299b03ffdef58db5dc6363f4
User & Date: dan 2010-04-13 19:27:31.000
Context
2010-04-14
11:23
Fixes for locking issues in WAL mode. (check-in: a9617eff39 user: dan tags: wal)
2010-04-13
19:27
Add experimental locking scheme. (check-in: 3f958e87c3 user: dan tags: wal)
15:30
Fix an uninitialized variable in readDbPage of pager.c. (check-in: f4e1150fed user: drh tags: wal)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/log.c.
9
10
11
12
13
14
15

16
17
18
19
20
21
22
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>

typedef struct LogSummaryHdr LogSummaryHdr;
typedef struct LogSummary LogSummary;
typedef struct LogCheckpoint LogCheckpoint;



/*
** The following structure may be used to store the same data that
** is stored in the log-summary header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the







>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>

typedef struct LogSummaryHdr LogSummaryHdr;
typedef struct LogSummary LogSummary;
typedef struct LogCheckpoint LogCheckpoint;
typedef struct LogLock LogLock;


/*
** The following structure may be used to store the same data that
** is stored in the log-summary header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the
51
52
53
54
55
56
57

58
59
60
61
62

63
64
65
66
67
68



69
















70
71
72
73
74
75
76
77
78

79

80
81
82
83
84
85
86
**       the required blocking file-locks.
*/
struct LogSummary {
  sqlite3_mutex *mutex;           /* Mutex used to protect this object */
  int nRef;                       /* Number of pointers to this structure */
  int fd;                         /* File descriptor open on log-summary */
  char *zPath;                    /* Path to associated WAL file */

  LogSummary *pNext;              /* Next in global list */
  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};


/*
** List of all LogSummary objects created by this process. Protected by
** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex
** here instead of borrowing the LRU mutex.
*/
#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU



static LogSummary *pLogSummary = 0;

















struct Log {
  LogSummary *pSummary;           /* Log file summary data */
  sqlite3_vfs *pVfs;              /* The VFS used to create pFd */
  sqlite3_file *pFd;              /* File handle for log file */
  int sync_flags;                 /* Flags to use with OsSync() */
  int isLocked;                   /* True if a snapshot is held open */
  int isWriteLocked;              /* True if this is the writer connection */
  LogSummaryHdr hdr;              /* Log summary header for current snapshot */

};


/*
** This structure is used to implement an iterator that iterates through
** all frames in the log in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the log.
**







>





>

<
|
|

|
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|


>

>







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
**       the required blocking file-locks.
*/
struct LogSummary {
  sqlite3_mutex *mutex;           /* Mutex used to protect this object */
  int nRef;                       /* Number of pointers to this structure */
  int fd;                         /* File descriptor open on log-summary */
  char *zPath;                    /* Path to associated WAL file */
  LogLock *pLock;                 /* Linked list of locks on this object */
  LogSummary *pNext;              /* Next in global list */
  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};


/*

** The four lockable regions associated with each log-summary. A connection
** may take either a SHARED or EXCLUSIVE lock on each.
*/
#define LOG_REGION_A 0x01
#define LOG_REGION_B 0x02
#define LOG_REGION_C 0x04
#define LOG_REGION_D 0x08

/*
** A single instance of this structure is allocated as part of each 
** connection to a database log. All structures associated with the 
** same log file are linked together into a list using LogLock.pNext
** starting at LogSummary.pLock.
**
** The mLock field of the structure describes the locks (if any) 
** currently held by the connection. If a SHARED lock is held on
** any of the four locking regions, then the associated LOG_REGION_X
** bit (see above) is set. If an EXCLUSIVE lock is held on the region,
** then the (LOG_REGION_X << 8) bit is set.
*/
struct LogLock {
  LogLock *pNext;                 /* Next lock on the same log */
  u32 mLock;                      /* Mask of locks */
};

struct Log {
  LogSummary *pSummary;           /* Log file summary data */
  sqlite3_vfs *pVfs;              /* The VFS used to create pFd */
  sqlite3_file *pFd;              /* File handle for log file */
  int sync_flags;                 /* Flags to use with OsSync() */
  int isLocked;                   /* Non-zero if a snapshot is held open */
  int isWriteLocked;              /* True if this is the writer connection */
  LogSummaryHdr hdr;              /* Log summary header for current snapshot */
  LogLock lock;                   /* Lock held by this connection (if any) */
};


/*
** This structure is used to implement an iterator that iterates through
** all frames in the log in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the log.
**
98
99
100
101
102
103
104









105
106
107
108
109
110
111
  struct LogSegment {
    int iNext;                    /* Next aIndex index */
    u8 *aIndex;                   /* Pointer to index array */
    u32 *aDbPage;                 /* Pointer to db page array */
  } aSegment[1];
};










/*
** Generate an 8 byte checksum based on the data in array aByte[] and the
** initial values of aCksum[0] and aCksum[1]. The checksum is written into
** aCksum[] before returning.
*/
#define LOG_CKSM_BYTES 8
static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){







>
>
>
>
>
>
>
>
>







121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
  struct LogSegment {
    int iNext;                    /* Next aIndex index */
    u8 *aIndex;                   /* Pointer to index array */
    u32 *aDbPage;                 /* Pointer to db page array */
  } aSegment[1];
};


/*
** List of all LogSummary objects created by this process. Protected by
** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex
** here instead of borrowing the LRU mutex.
*/
#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU
static LogSummary *pLogSummary = 0;

/*
** Generate an 8 byte checksum based on the data in array aByte[] and the
** initial values of aCksum[0] and aCksum[1]. The checksum is written into
** aCksum[] before returning.
*/
#define LOG_CKSM_BYTES 8
static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){
660
661
662
663
664
665
666



667
668
669
670
671
672
673
  mutex = pSummary->mutex;
  if( pSummary->fd<0 ){
    rc = logSummaryInit(pSummary, pRet->pFd);
  }else{
    rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
  }




 out:
  sqlite3_mutex_leave(mutex);
  sqlite3_free(zWal);
  if( rc!=SQLITE_OK ){
    assert(0);
    if( pRet ){
      sqlite3OsClose(pRet->pFd);







>
>
>







692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
  mutex = pSummary->mutex;
  if( pSummary->fd<0 ){
    rc = logSummaryInit(pSummary, pRet->pFd);
  }else{
    rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
  }

  pRet->lock.pNext = pSummary->pLock;
  pSummary->pLock = &pRet->lock;

 out:
  sqlite3_mutex_leave(mutex);
  sqlite3_free(zWal);
  if( rc!=SQLITE_OK ){
    assert(0);
    if( pRet ){
      sqlite3OsClose(pRet->pFd);
834
835
836
837
838
839
840

841
842





843
844
845
846
847
848
849
int sqlite3LogClose(
  Log *pLog,                      /* Log to close */
  sqlite3_file *pFd,              /* Database file */
  u8 *zBuf                        /* Buffer of at least page-size bytes */
){
  int rc = SQLITE_OK;
  if( pLog ){

    LogSummary *pSummary = pLog->pSummary;
    sqlite3_mutex *mutex = 0;






    if( sqlite3GlobalConfig.bCoreMutex ){
      mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);
    }
    sqlite3_mutex_enter(mutex);

    /* Decrement the reference count on the log summary. If this is the last







>


>
>
>
>
>







869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
int sqlite3LogClose(
  Log *pLog,                      /* Log to close */
  sqlite3_file *pFd,              /* Database file */
  u8 *zBuf                        /* Buffer of at least page-size bytes */
){
  int rc = SQLITE_OK;
  if( pLog ){
    LogLock **ppL;
    LogSummary *pSummary = pLog->pSummary;
    sqlite3_mutex *mutex = 0;

    sqlite3_mutex_enter(pSummary->mutex);
    for(ppL=&pSummary->pLock; *ppL!=&pLog->lock; ppL=&(*ppL)->pNext);
    *ppL = pLog->lock.pNext;
    sqlite3_mutex_leave(pSummary->mutex);

    if( sqlite3GlobalConfig.bCoreMutex ){
      mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);
    }
    sqlite3_mutex_enter(mutex);

    /* Decrement the reference count on the log summary. If this is the last
935
936
937
938
939
940
941





942
































































































943
944
945
946
947
948
949
950
951
952






























953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977





978
979
980
981
982
983
984
985




986
987
988
989
990
991
992
static void logLeaveMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  logSummaryUnlock(pSummary);
  sqlite3_mutex_leave(pSummary->mutex);
}

/*





** The caller must hold a SHARED lock on the database file.
































































































**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to LogCloseSnapshot() on this Log
** connection, then *pChanged is set to 1 before returning. Otherwise, it 
** is left unmodified. This is used by the pager layer to determine whether 
** or not any cached pages may be safely reused.
*/
int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
  int rc = SQLITE_OK;
  if( pLog->isLocked==0 ){






























    if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){
      u32 aCksum[2] = {1, 1};
      u32 aHdr[LOGSUMMARY_HDR_NFIELD+2];
      memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));

      /* Verify the checksum on the log-summary header. If it fails,
      ** recover the log-summary from the log file.
      */
      logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum);
      if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD]
       || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1]
      ){
        rc = logSummaryRecover(pLog->pSummary, pLog->pFd);
        memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));
        *pChanged = 1;
      }
      if( rc==SQLITE_OK ){
        pLog->isLocked = 1;
        if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){
          *pChanged = 1;
          memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32));
        }
      }
      logLeaveMutex(pLog);
    }





  }
  return rc;
}

/*
** Unlock the current snapshot.
*/
void sqlite3LogCloseSnapshot(Log *pLog){




  pLog->isLocked = 0;
}



/* 
** Read a page from the log, if it is present. 







>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

















<







>
>
>
>
>








>
>
>
>







976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141

1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
static void logLeaveMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  logSummaryUnlock(pSummary);
  sqlite3_mutex_leave(pSummary->mutex);
}

/*
** Values for the second parameter to logLockRegion().
*/
#define LOG_UNLOCK 0
#define LOG_RDLOCK 1
#define LOG_WRLOCK 2

static int logLockRegion(Log *pLog, u32 mRegion, int op){
  LogSummary *pSummary = pLog->pSummary;
  LogLock *p;                     /* Used to iterate through in-process locks */
  u32 mNew;                       /* New locks on file */
  u32 mOld;                       /* Old locks on file */
  u32 mNewLock;                   /* New locks held by pLog */

  assert( 
       /* Writer lock operations */
          (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))

       /* Reader lock operations */
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
       || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))

       /* Checkpointer lock operations */
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
       || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
  );

  sqlite3_mutex_enter(pSummary->mutex);

  /* If obtaining (not releasing) a lock, check if there exist any 
  ** conflicting locks in process. Return SQLITE_BUSY in this case.
  */
  if( op ){
    u32 mConflict = (mRegion<<8) | ((op==LOG_WRLOCK) ? mRegion : 0);
    for(p=pSummary->pLock; p; p=p->pNext){
      if( p!=&pLog->lock && (p->mLock & mConflict) ){
        sqlite3_mutex_leave(pSummary->mutex);
        return SQLITE_BUSY;
      }
    }
  }

  /* Determine the new lock mask for this log connection */
  switch( op ){
    case LOG_UNLOCK: 
      mNewLock = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); 
      break;
    case LOG_RDLOCK:
      mNewLock = ((pLog->lock.mLock & ~(mRegion<<8)) | mRegion);
      break;
    default:
      assert( op==LOG_WRLOCK );
      mNewLock = (pLog->lock.mLock | (mRegion<<8) | mRegion);
      break;
  }

  /* Determine the current and desired sets of locks at the file level. */
  mNew = 0;
  for(p=pSummary->pLock; p; p=p->pNext){
    assert( (p->mLock & (p->mLock<<8))==(p->mLock & 0x00000F00) );
    if( p!=&pLog->lock ) mNew |= p->mLock;
  }
  mOld = mNew | pLog->lock.mLock;
  mNew = mNew | mNewLock;

  if( mNew!=mOld ){
    int rc;
    u32 mChange = (mNew^mOld) | ((mNew^mOld)>>8);
    struct flock f;
    memset(&f, 0, sizeof(f));
    f.l_type = (op==LOG_WRLOCK?F_WRLCK:(op==LOG_RDLOCK?F_RDLCK:F_UNLCK));
    f.l_whence = SEEK_SET;

    if(      mChange & LOG_REGION_A ) f.l_start = 12;
    else if( mChange & LOG_REGION_B ) f.l_start = 13;
    else if( mChange & LOG_REGION_C ) f.l_start = 14;
    else if( mChange & LOG_REGION_D ) f.l_start = 15;

    if(      mChange & LOG_REGION_D ) f.l_len   = 16 - f.l_start;
    else if( mChange & LOG_REGION_C ) f.l_len   = 15 - f.l_start;
    else if( mChange & LOG_REGION_B ) f.l_len   = 14 - f.l_start;
    else if( mChange & LOG_REGION_A ) f.l_len   = 13 - f.l_start;

    rc = fcntl(pSummary->fd, F_SETLK, &f);
    if( rc!=0 ){
      sqlite3_mutex_leave(pSummary->mutex);
      return SQLITE_BUSY;
    }
  }

  pLog->lock.mLock = mNewLock;
  sqlite3_mutex_leave(pSummary->mutex);
  return SQLITE_OK;
}

/*
** Lock a snapshot.
**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to LogCloseSnapshot() on this Log
** connection, then *pChanged is set to 1 before returning. Otherwise, it 
** is left unmodified. This is used by the pager layer to determine whether 
** or not any cached pages may be safely reused.
*/
int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
  int rc = SQLITE_OK;
  if( pLog->isLocked==0 ){
    int nAttempt;

    /* Obtain a snapshot-lock on the log-summary file. The procedure
    ** for obtaining the snapshot log is:
    **
    **    1. Attempt a SHARED lock on regions A and B.
    **    2a. If step 1 is successful, drop the lock on region B.
    **    2b. If step 1 is unsuccessful, attempt a SHARED lock on region D.
    **    3. Repeat the above until the lock attempt in step 1 or 2b is 
    **       successful.
    **
    ** If neither of the locks can be obtained after 5 tries, presumably
    ** something is wrong (i.e. a process not following the locking protocol). 
    ** Return an error code in this case.
    */
    rc = SQLITE_BUSY;
    for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
      rc = logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B, LOG_RDLOCK);
      if( rc==SQLITE_BUSY ){
        rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK);
        if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D;
      }else{
        logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK);
        pLog->isLocked = LOG_REGION_A;
      }
    }
    if( rc!=SQLITE_OK ){
      return rc;
    }

    if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){
      u32 aCksum[2] = {1, 1};
      u32 aHdr[LOGSUMMARY_HDR_NFIELD+2];
      memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));

      /* Verify the checksum on the log-summary header. If it fails,
      ** recover the log-summary from the log file.
      */
      logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum);
      if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD]
       || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1]
      ){
        rc = logSummaryRecover(pLog->pSummary, pLog->pFd);
        memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));
        *pChanged = 1;
      }
      if( rc==SQLITE_OK ){

        if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){
          *pChanged = 1;
          memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32));
        }
      }
      logLeaveMutex(pLog);
    }

    if( rc!=SQLITE_OK ){
      /* An error occured while attempting log recovery. */
      sqlite3LogCloseSnapshot(pLog);
    }
  }
  return rc;
}

/*
** Unlock the current snapshot.
*/
void sqlite3LogCloseSnapshot(Log *pLog){
  if( pLog->isLocked ){
    assert( pLog->isLocked==LOG_REGION_A || pLog->isLocked==LOG_REGION_D );
    logLockRegion(pLog, pLog->isLocked, LOG_UNLOCK);
  }
  pLog->isLocked = 0;
}



/* 
** Read a page from the log, if it is present. 
1068
1069
1070
1071
1072
1073
1074







1075
1076
1077
1078

1079

1080
1081
1082
1083
1084
1085
1086
** This function returns SQLITE_OK if the caller may write to the database.
** Otherwise, if the caller is operating on a snapshot that has already
** been overwritten by another writer, SQLITE_OBE is returned.
*/
int sqlite3LogWriteLock(Log *pLog, int op){
  assert( pLog->isLocked );
  if( op ){







    if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){
      return SQLITE_BUSY;
    }
    pLog->isWriteLocked = 1;

  }else if( pLog->isWriteLocked ){

    memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr));
    pLog->isWriteLocked = 0;
  }
  return SQLITE_OK;
}

/* 







>
>
>
>
>
>
>




>

>







1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
** This function returns SQLITE_OK if the caller may write to the database.
** Otherwise, if the caller is operating on a snapshot that has already
** been overwritten by another writer, SQLITE_OBE is returned.
*/
int sqlite3LogWriteLock(Log *pLog, int op){
  assert( pLog->isLocked );
  if( op ){

    /* Obtain the writer lock */
    int rc = logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_WRLOCK);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){
      return SQLITE_BUSY;
    }
    pLog->isWriteLocked = 1;

  }else if( pLog->isWriteLocked ){
    logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK);
    memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr));
    pLog->isWriteLocked = 0;
  }
  return SQLITE_OK;
}

/* 
1222
1223
1224
1225
1226
1227
1228
1229


1230

1231
1232
1233
1234

1235
1236
1237
1238
1239



1240
1241
1242


1243

/* 
** Checkpoint the database. When this function is called the caller
** must hold an exclusive lock on the database file.
*/
int sqlite3LogCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf                        /* Temporary buffer to use */


){


  /* Assert() that the caller is holding an EXCLUSIVE lock on the 
  ** database file. 
  */

#ifdef SQLITE_DEBUG
  int lock;
  sqlite3OsFileControl(pFd, SQLITE_FCNTL_LOCKSTATE, &lock);
  assert( lock>=4 );
#endif



  
  return logCheckpoint(pLog, pFd, zBuf);
}











|
>
>

>

<
|
<
>
|
<
|
|
|
>
>
>

|
|
>
>
|
>
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423

1424

1425
1426

1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
/* 
** Checkpoint the database. When this function is called the caller
** must hold an exclusive lock on the database file.
*/
int sqlite3LogCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
){
  int rc;


  do {

    rc = logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_WRLOCK);
  }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );

  if( rc!=SQLITE_OK ) return rc;

  do {
    rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK);
  }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );
  if( rc!=SQLITE_OK ) return rc;
  
  rc = logCheckpoint(pLog, pFd, zBuf);

  logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK);
  return rc;
}

Changes to src/log.h.
51
52
53
54
55
56
57
58


59
60
61
/* Write a segment to the log. */
int sqlite3LogFrames(Log *pLog, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3LogCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf                        /* Temporary buffer to use */


);

#endif /* _LOG_H_ */







|
>
>



51
52
53
54
55
56
57
58
59
60
61
62
63
/* Write a segment to the log. */
int sqlite3LogFrames(Log *pLog, int, PgHdr *, Pgno, int, int);

/* Copy pages from the log to the database file */ 
int sqlite3LogCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf,                       /* Temporary buffer to use */
  int (*xBusyHandler)(void *),    /* Pointer to busy-handler function */
  void *pBusyHandlerArg           /* Argument to pass to xBusyHandler */
);

#endif /* _LOG_H_ */
Changes to src/pager.c.
3116
3117
3118
3119
3120
3121
3122

3123
3124
3125
3126
3127
3128
3129
  **
  ** While the pager is in the RESERVED state, the original database file
  ** is unchanged and we can rollback without having to playback the
  ** journal into the original database file.  Once we transition to
  ** EXCLUSIVE, it means the database file has been changed and any rollback
  ** will require a journal playback.
  */

  assert( pPager->state>=PAGER_RESERVED );
  rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);

  /* If the file is a temp-file has not yet been opened, open it now. It
  ** is not possible for rc to be other than SQLITE_OK if this branch
  ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
  */







>







3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
  **
  ** While the pager is in the RESERVED state, the original database file
  ** is unchanged and we can rollback without having to playback the
  ** journal into the original database file.  Once we transition to
  ** EXCLUSIVE, it means the database file has been changed and any rollback
  ** will require a journal playback.
  */
  assert( !pagerUseLog(pList->pPager) );
  assert( pPager->state>=PAGER_RESERVED );
  rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);

  /* If the file is a temp-file has not yet been opened, open it now. It
  ** is not possible for rc to be other than SQLITE_OK if this branch
  ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
  */
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseLog(pPager) ){
    int changed = 0;

    /* TODO: Change the following block to grab a WAL read-lock. Or, 
    ** combine obtaining the read-lock with LogOpenSnapshot()?  */
    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
    if( rc!=SQLITE_OK ){
      assert( pPager->state==PAGER_UNLOCK );
      return pager_error(pPager, rc);
    }

    rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
    if( rc==SQLITE_OK ){
      if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
      }
      pPager->state = PAGER_SHARED;
      rc = sqlite3PagerPagecount(pPager, &changed);
    }
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );







<
<
<
<
<
<
<
<






|







3782
3783
3784
3785
3786
3787
3788








3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pagerUseLog(pPager) ){
    int changed = 0;









    rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
    if( rc==SQLITE_OK ){
      if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
      }
      pPager->state = PAGER_SHARED;         /* TODO: Is this right? */
      rc = sqlite3PagerPagecount(pPager, &changed);
    }
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354














4355
4356
4357
4358
4359
4360
4361
  int rc = SQLITE_OK;
  assert( pPager->state!=PAGER_UNLOCK );
  pPager->subjInMemory = (u8)subjInMemory;
  if( pPager->state==PAGER_SHARED ){
    assert( pPager->pInJournal==0 );
    assert( !MEMDB && !pPager->tempFile );

    /* Obtain a RESERVED lock on the database file. If the exFlag parameter
    ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
    ** busy-handler callback can be used when upgrading to the EXCLUSIVE
    ** lock, but not when obtaining the RESERVED lock.
    */
    rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
    if( rc==SQLITE_OK ){
      pPager->state = PAGER_RESERVED;
      if( exFlag ){
        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
      }
    }

    if( rc==SQLITE_OK && pagerUseLog(pPager) ){
      /* Grab the write lock on the log file. If successful, upgrade to
      ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller.
      ** The busy-handler is not invoked if another connection already
      ** holds the write-lock. If possible, the upper layer will call it.
      */
      rc = sqlite3LogWriteLock(pPager->pLog, 1);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;














      }
    }

    /* No need to open the journal file at this time.  It will be
    ** opened before it is written to.  If we defer opening the journal,
    ** we might save the work of creating a file if the transaction
    ** ends up being a no-op.







<
<
<
<
<
<
<
<
<
<
<
<
<
|








>
>
>
>
>
>
>
>
>
>
>
>
>
>







4319
4320
4321
4322
4323
4324
4325













4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
  int rc = SQLITE_OK;
  assert( pPager->state!=PAGER_UNLOCK );
  pPager->subjInMemory = (u8)subjInMemory;
  if( pPager->state==PAGER_SHARED ){
    assert( pPager->pInJournal==0 );
    assert( !MEMDB && !pPager->tempFile );














    if( pagerUseLog(pPager) ){
      /* Grab the write lock on the log file. If successful, upgrade to
      ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller.
      ** The busy-handler is not invoked if another connection already
      ** holds the write-lock. If possible, the upper layer will call it.
      */
      rc = sqlite3LogWriteLock(pPager->pLog, 1);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
        pPager->state = PAGER_RESERVED;
      }
    }else{
      /* Obtain a RESERVED lock on the database file. If the exFlag parameter
      ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
      ** busy-handler callback can be used when upgrading to the EXCLUSIVE
      ** lock, but not when obtaining the RESERVED lock.
      */
      rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
      if( rc==SQLITE_OK ){
        pPager->state = PAGER_RESERVED;
        if( exFlag ){
          rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
        }
      }
    }

    /* No need to open the journal file at this time.  It will be
    ** opened before it is written to.  If we defer opening the journal,
    ** we might save the work of creating a file if the transaction
    ** ends up being a no-op.
5653
5654
5655
5656
5657
5658
5659
5660
5661
5662
5663
5664


5665
5666
5667
5668
5669

/*
** This function is called when the user invokes "PRAGMA checkpoint".
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pLog ){
    rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
    if( rc==SQLITE_OK ){
      u8 *zBuf = (u8 *)pPager->pTmpSpace;
      rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, zBuf);
    }


  }
  return rc;
}

#endif /* SQLITE_OMIT_DISKIO */







<
<
|
|
<
>
>





5647
5648
5649
5650
5651
5652
5653


5654
5655

5656
5657
5658
5659
5660
5661
5662

/*
** This function is called when the user invokes "PRAGMA checkpoint".
*/
int sqlite3PagerCheckpoint(Pager *pPager){
  int rc = SQLITE_OK;
  if( pPager->pLog ){


    u8 *zBuf = (u8 *)pPager->pTmpSpace;
    rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, 

        zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg
    );
  }
  return rc;
}

#endif /* SQLITE_OMIT_DISKIO */