SQLite

Check-in [409d61baeb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Import experimental write-ahead-logging code.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | wal
Files: files | file ages | folders
SHA1: 409d61baeb0a19d1700c973f16c8acef7b8506cd
User & Date: dan 2010-04-12 19:00:30.000
Context
2010-04-12
19:05
Add "log.h", which should have been part of the previous commit. (check-in: 8de9f01004 user: dan tags: wal)
19:00
Import experimental write-ahead-logging code. (check-in: 409d61baeb user: dan tags: wal)
17:08
Fix a problem where a process in exclusive mode could delete a hot-journal file without rolling it back from within sqlite3_close() or DETACH. This problem was introduced by commits from the past few days; it is not present in any release. (check-in: 51a6139508 user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to main.mk.
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#
LIBOBJ+= alter.o analyze.o attach.o auth.o \
         backup.o bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
         fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
         fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \
         func.o global.o hash.o \
         icu.o insert.o journal.o legacy.o loadext.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \
         notify.o opcodes.o os.o os_os2.o os_unix.o os_win.o \
         pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \
         random.o resolve.o rowset.o rtree.o select.o status.o \
         table.o tokenize.o trigger.o \







|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#
LIBOBJ+= alter.o analyze.o attach.o auth.o \
         backup.o bitvec.o btmutex.o btree.o build.o \
         callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
         fts3.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
         fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o fts3_write.o \
         func.o global.o hash.o \
         icu.o insert.o journal.o legacy.o loadext.o log.o \
         main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         memjournal.o \
         mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \
         notify.o opcodes.o os.o os_os2.o os_unix.o os_win.o \
         pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \
         random.o resolve.o rowset.o rtree.o select.o status.o \
         table.o tokenize.o trigger.o \
97
98
99
100
101
102
103


104
105
106
107
108
109
110
  $(TOP)/src/hash.c \
  $(TOP)/src/hash.h \
  $(TOP)/src/hwtime.h \
  $(TOP)/src/insert.c \
  $(TOP)/src/journal.c \
  $(TOP)/src/legacy.c \
  $(TOP)/src/loadext.c \


  $(TOP)/src/main.c \
  $(TOP)/src/malloc.c \
  $(TOP)/src/mem0.c \
  $(TOP)/src/mem1.c \
  $(TOP)/src/mem2.c \
  $(TOP)/src/mem3.c \
  $(TOP)/src/mem5.c \







>
>







97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  $(TOP)/src/hash.c \
  $(TOP)/src/hash.h \
  $(TOP)/src/hwtime.h \
  $(TOP)/src/insert.c \
  $(TOP)/src/journal.c \
  $(TOP)/src/legacy.c \
  $(TOP)/src/loadext.c \
  $(TOP)/src/log.c \
  $(TOP)/src/log.h \
  $(TOP)/src/main.c \
  $(TOP)/src/malloc.c \
  $(TOP)/src/mem0.c \
  $(TOP)/src/mem1.c \
  $(TOP)/src/mem2.c \
  $(TOP)/src/mem3.c \
  $(TOP)/src/mem5.c \
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266

#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c $(TOP)/src/backup.c $(TOP)/src/btree.c                   \
  $(TOP)/src/build.c $(TOP)/src/date.c                                         \
  $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/mem5.c    \
  $(TOP)/src/os.c                                                              \
  $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c                 \
  $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c                  \
  $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c                  \
  $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c               \
  $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c            \
  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_expr.c                           \







|
|







253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268

#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c $(TOP)/src/backup.c $(TOP)/src/btree.c                   \
  $(TOP)/src/build.c $(TOP)/src/date.c                                         \
  $(TOP)/src/expr.c $(TOP)/src/func.c $(TOP)/src/insert.c $(TOP)/src/log.c     \
  $(TOP)/src/mem5.c $(TOP)/src/os.c                                            \
  $(TOP)/src/os_os2.c $(TOP)/src/os_unix.c $(TOP)/src/os_win.c                 \
  $(TOP)/src/pager.c $(TOP)/src/pragma.c $(TOP)/src/prepare.c                  \
  $(TOP)/src/printf.c $(TOP)/src/random.c $(TOP)/src/pcache.c                  \
  $(TOP)/src/pcache1.c $(TOP)/src/select.c $(TOP)/src/tokenize.c               \
  $(TOP)/src/utf.c $(TOP)/src/util.c $(TOP)/src/vdbeapi.c $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c $(TOP)/src/vdbemem.c $(TOP)/src/where.c parse.c            \
  $(TOP)/ext/fts3/fts3.c $(TOP)/ext/fts3/fts3_expr.c                           \
Added src/log.c.






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243

/*
** This file contains the implementation of a log file used in 
** "journal_mode=wal" mode.
*/

#include "log.h"

#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>

typedef struct LogSummaryHdr LogSummaryHdr;
typedef struct LogSummary LogSummary;
typedef struct LogCheckpoint LogCheckpoint;


/*
** The following structure may be used to store the same data that
** is stored in the log-summary header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the
** last frame written to the log, or 2 and 3 respectively if the log 
** is currently empty.
*/
struct LogSummaryHdr {
  u32 iChange;                    /* Counter incremented each transaction */
  u32 pgsz;                       /* Database page size in bytes */
  u32 iLastPg;                    /* Address of last valid frame in log */
  u32 nPage;                      /* Size of database in pages */
  u32 iCheck1;                    /* Checkpoint value 1 */
  u32 iCheck2;                    /* Checkpoint value 2 */
};

/* Size of serialized LogSummaryHdr object. */
#define LOGSUMMARY_HDR_NFIELD (sizeof(LogSummaryHdr) / sizeof(u32))

#define LOGSUMMARY_FRAME_OFFSET \
  (LOGSUMMARY_HDR_NFIELD + LOG_CKSM_BYTES/sizeof(u32))

/* Size of frame header */
#define LOG_FRAME_HDRSIZE 20

/*
** There is one instance of this structure for each log-summary object
** that this process has a connection to. They are stored in a linked
** list starting at pLogSummary (global variable).
**
** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used 
**       directly in this implementation because the VFS does not support
**       the required blocking file-locks.
*/
struct LogSummary {
  sqlite3_mutex *mutex;           /* Mutex used to protect this object */
  int nRef;                       /* Number of pointers to this structure */
  int fd;                         /* File descriptor open on log-summary */
  char *zPath;                    /* Path to associated WAL file */
  LogSummary *pNext;              /* Next in global list */
  int nData;                      /* Size of aData allocation/mapping */
  u32 *aData;                     /* File body */
};

/*
** List of all LogSummary objects created by this process. Protected by
** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex
** here instead of borrowing the LRU mutex.
*/
#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU
static LogSummary *pLogSummary = 0;

struct Log {
  LogSummary *pSummary;           /* Log file summary data */
  sqlite3_vfs *pVfs;              /* The VFS used to create pFd */
  sqlite3_file *pFd;              /* File handle for log file */
  int sync_flags;                 /* Flags to use with OsSync() */
  int isLocked;                   /* True if a snapshot is held open */
  int isWriteLocked;              /* True if this is the writer connection */
  LogSummaryHdr hdr;              /* Log summary header for current snapshot */
};

/*
** This structure is used to implement an iterator that iterates through
** all frames in the log in database page order. Where two or more frames
** correspond to the same database page, the iterator visits only the 
** frame most recently written to the log.
**
** The internals of this structure are only accessed by:
**
**   logCheckpointInit() - Create a new iterator,
**   logCheckpointNext() - Step an iterator,
**   logCheckpointFree() - Free an iterator.
**
** This functionality is used by the checkpoint code (see logCheckpoint()).
*/
struct LogCheckpoint {
  int nSegment;                   /* Size of LogCheckpoint.aSummary[] array */
  int nFinal;                     /* Elements in segment nSegment-1 */
  struct LogSegment {
    int iNext;                    /* Next aIndex index */
    u8 *aIndex;                   /* Pointer to index array */
    u32 *aDbPage;                 /* Pointer to db page array */
  } aSegment[1];
};

/*
** Generate an 8 byte checksum based on the data in array aByte[] and the
** initial values of aCksum[0] and aCksum[1]. The checksum is written into
** aCksum[] before returning.
*/
#define LOG_CKSM_BYTES 8
static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){
  u32 *z32 = (u32 *)aByte;
  int n32 = nByte / sizeof(u32);
  int i;

  assert( LOG_CKSM_BYTES==2*sizeof(u32) );
  assert( (nByte&0x00000003)==0 );

  u32 cksum0 = aCksum[0];
  u32 cksum1 = aCksum[1];

  for(i=0; i<n32; i++){
    cksum0 = (cksum0 >> 8) + (cksum0 ^ z32[i]);
    cksum1 = (cksum1 >> 8) + (cksum1 ^ z32[i]);
  }

  aCksum[0] = cksum0;
  aCksum[1] = cksum1;
}

/*
** Argument zPath must be a nul-terminated string containing a path-name.
** This function modifies the string in-place by removing any "./" or "../" 
** elements in the path. For example, the following input:
**
**   "/home/user/plans/good/../evil/./world_domination.txt"
**
** is overwritten with the 'normalized' version:
**
**   "/home/user/plans/evil/world_domination.txt"
*/
static void logNormalizePath(char *zPath){
  int i, j;
  char *z = zPath;
  int n = strlen(z);

  while( n>1 && z[n-1]=='/' ){ n--; }
  for(i=j=0; i<n; i++){
    if( z[i]=='/' ){
      if( z[i+1]=='/' ) continue;
      if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
        i += 1;
        continue;
      }
      if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
        while( j>0 && z[j-1]!='/' ){ j--; }
        if( j>0 ){ j--; }
        i += 2;
        continue;
      }
    }
    z[j++] = z[i];
  }
  z[j] = 0;
}

/*
** Lock the summary file pSummary->fd.
*/
static int logSummaryLock(LogSummary *pSummary){
  int rc;
  struct flock f;
  memset(&f, 0, sizeof(f));
  f.l_type = F_WRLCK;
  f.l_whence = SEEK_SET;
  f.l_start = 0;
  f.l_len = 1;
  rc = fcntl(pSummary->fd, F_SETLKW, &f);
  if( rc!=0 ){
    return SQLITE_IOERR;
  }
  return SQLITE_OK;
}

/*
** Unlock the summary file pSummary->fd.
*/
static int logSummaryUnlock(LogSummary *pSummary){
  int rc;
  struct flock f;
  memset(&f, 0, sizeof(f));
  f.l_type = F_UNLCK;
  f.l_whence = SEEK_SET;
  f.l_start = 0;
  f.l_len = 1;
  rc = fcntl(pSummary->fd, F_SETLK, &f);
  if( rc!=0 ){
    return SQLITE_IOERR;
  }
  return SQLITE_OK;
}

/*
** Memory map the first nByte bytes of the summary file opened with 
** pSummary->fd at pSummary->aData. If the summary file is smaller than
** nByte bytes in size when this function is called, ftruncate() is
** used to expand it before it is mapped.
**
** It is assumed that an exclusive lock is held on the summary file
** by the caller (to protect the ftruncate()).
*/
static int logSummaryMap(LogSummary *pSummary, int nByte){
  struct stat sStat;
  int rc;
  int fd = pSummary->fd;
  void *pMap;

  assert( pSummary->aData==0 );

  /* If the file is less than nByte bytes in size, cause it to grow. */
  rc = fstat(fd, &sStat);
  if( rc!=0 ) return SQLITE_IOERR;
  if( sStat.st_size<nByte ){
    rc = ftruncate(fd, nByte);
    if( rc!=0 ) return SQLITE_IOERR;
  }

  /* Map the file. */
  pMap = mmap(0, nByte, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
  if( pMap==MAP_FAILED ){
    return SQLITE_IOERR;
  }
  pSummary->aData = (u32 *)pMap;
  pSummary->nData = nByte;

  return SQLITE_OK;
}

/*
** Unmap the log-summary mapping and close the file-descriptor. If
** the isTruncate argument is non-zero, truncate the log-summary file
** region to zero bytes.
**
** Regardless of the value of isTruncate, close the file-descriptor
** opened on the log-summary file.
*/
static int logSummaryUnmap(LogSummary *pSummary, int isTruncate){
  int rc = SQLITE_OK;
  if( pSummary->aData ){
    assert( pSummary->fd>0 );
    munmap(pSummary->aData, pSummary->nData);
    pSummary->aData = 0;
    if( isTruncate ){
      rc = (ftruncate(pSummary->fd, 0) ? SQLITE_IOERR : SQLITE_OK);
    }
  }
  if( pSummary->fd>0 ){
    close(pSummary->fd);
    pSummary->fd = -1;
  }
  return rc;
}


static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){
  u32 *aData = pSummary->aData;
  memcpy(aData, pHdr, sizeof(LogSummaryHdr));
  aData[LOGSUMMARY_HDR_NFIELD] = 1;
  aData[LOGSUMMARY_HDR_NFIELD+1] = 1;
  logChecksumBytes(
    (u8 *)aData, sizeof(LogSummaryHdr), &aData[LOGSUMMARY_HDR_NFIELD]
  );
}

/*
** This function encodes a single frame header and writes it to a buffer
** supplied by the caller. A log frame-header is made up of a series of 
** 4-byte big-endian integers, as follows:
**
**     0: Database page size in bytes.
**     4: Page number.
**     8: New database size (for commit frames, otherwise zero).
**    12: Frame checksum 1.
**    16: Frame checksum 2.
*/
static void logEncodeFrame(
  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 iPage,                      /* Database page number for frame */
  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* OUT: Write encoded frame here */
){
  assert( LOG_FRAME_HDRSIZE==20 );

  sqlite3Put4byte(&aFrame[0], nData);
  sqlite3Put4byte(&aFrame[4], iPage);
  sqlite3Put4byte(&aFrame[8], nTruncate);

  logChecksumBytes(aFrame, 12, aCksum);
  logChecksumBytes(aData, nData, aCksum);

  sqlite3Put4byte(&aFrame[12], aCksum[0]);
  sqlite3Put4byte(&aFrame[16], aCksum[1]);
}

/*
** Return 1 and populate *piPage, *pnTruncate and aCksum if the 
** frame checksum looks Ok. Otherwise return 0.
*/
static int logDecodeFrame(
  u32 *aCksum,                    /* IN/OUT: Checksum values */
  u32 *piPage,                    /* OUT: Database page number for frame */
  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
  int nData,                      /* Database page size (size of aData[]) */
  u8 *aData,                      /* Pointer to page data (for checksum) */
  u8 *aFrame                      /* Frame data */
){
  logChecksumBytes(aFrame, 12, aCksum);
  logChecksumBytes(aData, nData, aCksum);

  if( aCksum[0]!=sqlite3Get4byte(&aFrame[12]) 
   || aCksum[1]!=sqlite3Get4byte(&aFrame[16]) 
  ){
    /* Checksum failed. */
    return 0;
  }

  *piPage = sqlite3Get4byte(&aFrame[4]);
  *pnTruncate = sqlite3Get4byte(&aFrame[8]);
  return 1;
}

static void logMergesort8(
  Pgno *aContent,                 /* Pages in log */
  u8 *aBuffer,                    /* Buffer of at least *pnList items to use */
  u8 *aList,                      /* IN/OUT: List to sort */
  int *pnList                     /* IN/OUT: Number of elements in aList[] */
){
  int nList = *pnList;
  if( nList>1 ){
    int nLeft = nList / 2;        /* Elements in left list */
    int nRight = nList - nLeft;   /* Elements in right list */
    u8 *aLeft = aList;            /* Left list */
    u8 *aRight = &aList[nLeft];   /* Right list */
    int iLeft = 0;                /* Current index in aLeft */
    int iRight = 0;               /* Current index in aright */
    int iOut = 0;                 /* Current index in output buffer */

    /* TODO: Change to non-recursive version. */
    logMergesort8(aContent, aBuffer, aLeft, &nLeft);
    logMergesort8(aContent, aBuffer, aRight, &nRight);

    while( iRight<nRight || iLeft<nLeft ){
      u8 logpage;
      Pgno dbpage;

      if( (iLeft<nLeft) 
       && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
      ){
        logpage = aLeft[iLeft++];
      }else{
        logpage = aRight[iRight++];
      }
      dbpage = aContent[logpage];

      aBuffer[iOut++] = logpage;
      if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++;

      assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage );
      assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage );
    }
    memcpy(aList, aBuffer, sizeof(aList[0])*iOut);
    *pnList = iOut;
  }

#ifdef SQLITE_DEBUG
  {
    int i;
    for(i=1; i<*pnList; i++){
      assert( aContent[aList[i]] > aContent[aList[i-1]] );
    }
  }
#endif
}


/*
** Return the index in the LogSummary.aData array that corresponds to 
** frame iFrame. The log-summary file consists of a header, followed by
** alternating "map" and "index" blocks.
*/
static int logSummaryEntry(u32 iFrame){
  return ((((iFrame-1)>>8)<<6) + iFrame-1 + 2 + LOGSUMMARY_HDR_NFIELD);
}


/*
** Set an entry in the log-summary map to map log frame iFrame to db 
** page iPage. Values are always appended to the log-summary (i.e. the
** value of iFrame is always exactly one more than the value passed to
** the previous call), but that restriction is not enforced or asserted
** here.
*/
static void logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){
  u32 iSlot = logSummaryEntry(iFrame);

  /* Set the log-summary entry itself */
  pSummary->aData[iSlot] = iPage;

  /* If the frame number is a multiple of 256 (frames are numbered starting
  ** at 1), build an index of the most recently added 256 frames.
  */
  if( (iFrame&0x000000FF)==0 ){
    int i;                        /* Iterator used while initializing aIndex */
    u32 *aFrame;                  /* Pointer to array of 256 frames */
    int nIndex;                   /* Number of entries in index */
    u8 *aIndex;                   /* 256 bytes to build index in */
    u8 *aTmp;                     /* Scratch space to use while sorting */

    aFrame = &pSummary->aData[iSlot-255];
    aIndex = (u8 *)&pSummary->aData[iSlot+1];
    aTmp = &aIndex[256];

    nIndex = 256;
    for(i=0; i<256; i++) aIndex[i] = (u8)i;
    logMergesort8(aFrame, aTmp, aIndex, &nIndex);
    memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex);
  }
}


/*
** Recover the log-summary by reading the log file. The caller must hold 
** an exclusive lock on the log-summary file.
*/
static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){
  int rc;                         /* Return Code */
  i64 nSize;                      /* Size of log file */
  LogSummaryHdr hdr;              /* Recovered log-summary header */

  memset(&hdr, 0, sizeof(hdr));

  rc = sqlite3OsFileSize(pFd, &nSize);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  if( nSize>LOG_FRAME_HDRSIZE ){
    u8 aBuf[LOG_FRAME_HDRSIZE];   /* Buffer to load first frame header into */
    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
    int nFrame;                   /* Number of bytes at aFrame */
    u8 *aData;                    /* Pointer to data part of aFrame buffer */
    int iFrame;                   /* Index of last frame read */
    i64 iOffset;                  /* Next offset to read from log file */
    int nPgsz;                    /* Page size according to the log */
    u32 aCksum[2] = {2, 3};       /* Running checksum */

    /* Read in the first frame header in the file (to determine the 
    ** database page size).
    */
    rc = sqlite3OsRead(pFd, aBuf, LOG_FRAME_HDRSIZE, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* If the database page size is not a power of two, or is greater than
    ** SQLITE_MAX_PAGE_SIZE, conclude that the log file contains no valid data.
    */
    nPgsz = sqlite3Get4byte(&aBuf[0]);
    if( nPgsz&(nPgsz-1) || nPgsz>SQLITE_MAX_PAGE_SIZE ){
      goto finished;
    }

    /* Malloc a buffer to read frames into. */
    nFrame = nPgsz + LOG_FRAME_HDRSIZE;
    aFrame = (u8 *)sqlite3_malloc(nFrame);
    if( !aFrame ){
      return SQLITE_NOMEM;
    }
    aData = &aFrame[LOG_FRAME_HDRSIZE];

    /* Read all frames from the log file. */
    iFrame = 0;
    iOffset = 0;
    for(iOffset=0; (iOffset+nFrame)<nSize; iOffset+=nFrame){
      u32 pgno;                   /* Database page number for frame */
      u32 nTruncate;              /* dbsize field from frame header */
      int isValid;                /* True if this frame is valid */

      /* Read and decode the next log frame. */
      rc = sqlite3OsRead(pFd, aFrame, nFrame, iOffset);
      if( rc!=SQLITE_OK ) break;
      isValid = logDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame);
      if( !isValid ) break;
      logSummaryAppend(pSummary, ++iFrame, pgno);

      /* If nTruncate is non-zero, this is a commit record. */
      if( nTruncate ){
        hdr.iCheck1 = aCksum[0];
        hdr.iCheck2 = aCksum[1];
        hdr.iLastPg = iFrame;
        hdr.nPage = nTruncate;
        hdr.pgsz = nPgsz;
      }
    }

    sqlite3_free(aFrame);
  }else{
    hdr.iCheck1 = 2;
    hdr.iCheck2 = 3;
  }

finished:
  logSummaryWriteHdr(pSummary, &hdr);
  return rc;
}


/*
** This function intializes the connection to the log-summary identified
** by struct pSummary.
*/
static int logSummaryInit(LogSummary *pSummary, sqlite3_file *pFd){
  int rc;                         /* Return Code */
  char *zFile;                    /* File name for summary file */

  assert( pSummary->fd<0 );
  assert( pSummary->aData==0 );
  assert( pSummary->nRef>0 );
  assert( pSummary->zPath );

  /* Open a file descriptor on the summary file. */
  zFile = sqlite3_mprintf("%s-summary", pSummary->zPath);
  if( !zFile ){
    return SQLITE_NOMEM;
  }
  pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR);
  sqlite3_free(zFile);
  if( pSummary->fd<0 ){
    return SQLITE_IOERR;
  }

  /* Grab an exclusive lock the summary file. Then mmap() it. TODO: This 
  ** code needs to be enhanced to support a growable mapping. For now, just 
  ** make the mapping very large to start with.
  */
  rc = logSummaryLock(pSummary);
  if( rc!=SQLITE_OK ) return rc;
  rc = logSummaryMap(pSummary, 512*1024);
  if( rc!=SQLITE_OK ) goto out;

  /* Grab a SHARED lock on the log file. Then try to upgrade to an EXCLUSIVE
  ** lock. If successful, then this is the first (and only) connection to
  ** the database. In this case assume the contents of the log-summary 
  ** cannot be trusted. Zero the log-summary header to make sure.
  **
  ** The SHARED lock on the log file is not released until the connection
  ** to the database is closed.
  */
  rc = sqlite3OsLock(pFd, SQLITE_LOCK_SHARED);
  if( rc!=SQLITE_OK ) goto out;
  rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE);
  if( rc==SQLITE_OK ){
    /* This is the first and only connection. */
    memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) );
    rc = sqlite3OsUnlock(pFd, SQLITE_LOCK_SHARED);
  }else if( rc==SQLITE_BUSY ){
    rc = SQLITE_OK;
  }

 out:
  logSummaryUnlock(pSummary);
  return rc;
}

/* 
** Open a connection to the log file associated with database zDb. The
** database file does not actually have to exist. zDb is used only to
** figure out the name of the log file to open. If the log file does not 
** exist it is created by this call.
*/
int sqlite3LogOpen(
  sqlite3_vfs *pVfs,              /* vfs module to open log file with */
  const char *zDb,                /* Name of database file */
  Log **ppLog                     /* OUT: Allocated Log handle */
){
  int rc;                         /* Return Code */
  Log *pRet;                      /* Object to allocate and return */
  LogSummary *pSummary = 0;       /* Summary object */
  sqlite3_mutex *mutex = 0;       /* LOG_SUMMARY_MUTEX mutex */
  int flags;                      /* Flags passed to OsOpen() */
  char *zWal = 0;                 /* Path to WAL file */
  int nWal;                       /* Length of zWal in bytes */

  /* Zero output variables */
  assert( zDb );
  *ppLog = 0;

  /* Allocate an instance of struct Log to return. */
  pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile);
  if( !pRet ) goto out;
  pRet->pVfs = pVfs;
  pRet->pFd = (sqlite3_file *)&pRet[1];
  pRet->sync_flags = SQLITE_SYNC_NORMAL;

  /* Normalize the path name. */
  zWal = sqlite3_mprintf("%s-wal", zDb);
  if( !zWal ) goto out;
  logNormalizePath(zWal);
  flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_DB);
  nWal = sqlite3Strlen30(zWal);

  /* Enter the mutex that protects the linked-list of LogSummary structures */
  if( sqlite3GlobalConfig.bCoreMutex ){
    mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);
  }
  sqlite3_mutex_enter(mutex);

  /* Search for an existing log summary object in the linked list. If one 
  ** cannot be found, allocate and initialize a new object.
  */
  for(pSummary=pLogSummary; pSummary; pSummary=pSummary->pNext){
    int nPath = sqlite3Strlen30(pSummary->zPath);
    if( nWal==nPath && 0==memcmp(pSummary->zPath, zWal, nPath) ) break;
  }
  if( !pSummary ){
    int nByte = sizeof(LogSummary) + nWal + 1;
    pSummary = (LogSummary *)sqlite3MallocZero(nByte);
    if( !pSummary ){
      rc = SQLITE_NOMEM;
      goto out;
    }
    if( sqlite3GlobalConfig.bCoreMutex ){
      pSummary->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE);
    }
    pSummary->zPath = (char *)&pSummary[1];
    pSummary->fd = -1;
    memcpy(pSummary->zPath, zWal, nWal);
    pSummary->pNext = pLogSummary;
    pLogSummary = pSummary;
  }
  pSummary->nRef++;
  pRet->pSummary = pSummary;

  /* Exit the mutex protecting the linked-list of LogSummary objects. */
  sqlite3_mutex_leave(mutex);
  mutex = 0;

  /* Open file handle on the log file. */
  rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags);
  if( rc!=SQLITE_OK ) goto out;

  /* Object pSummary is shared between all connections to the database made
  ** by this process. So at this point it may or may not be connected to
  ** the log-summary. If it is not, connect it. Otherwise, just take the
  ** SHARED lock on the log file.
  */
  sqlite3_mutex_enter(pSummary->mutex);
  mutex = pSummary->mutex;
  if( pSummary->fd<0 ){
    rc = logSummaryInit(pSummary, pRet->pFd);
  }else{
    rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
  }

 out:
  sqlite3_mutex_leave(mutex);
  sqlite3_free(zWal);
  if( rc!=SQLITE_OK ){
    assert(0);
    if( pRet ){
      sqlite3OsClose(pRet->pFd);
      sqlite3_free(pRet);
    }
    assert( !pSummary || pSummary->nRef==0 );
    sqlite3_free(pSummary);
  }
  *ppLog = pRet;
  return rc;
}

static int logCheckpointNext(
  LogCheckpoint *p,               /* Iterator */
  u32 *piPage,                    /* OUT: Next db page to write */
  u32 *piFrame                    /* OUT: Log frame to read from */
){
  u32 iMin = *piPage;
  u32 iRet = 0xFFFFFFFF;
  int i;
  int nBlock = p->nFinal;

  for(i=p->nSegment-1; i>=0; i--){
    struct LogSegment *pSegment = &p->aSegment[i];
    while( pSegment->iNext<nBlock ){
      u32 iPg = pSegment->aDbPage[pSegment->aIndex[pSegment->iNext]];
      if( iPg>iMin ){
        if( iPg<iRet ){
          iRet = iPg;
          *piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext];
        }
        break;
      }
      pSegment->iNext++;
    }

    nBlock = 256;
  }

  *piPage = iRet;
  return (iRet==0xFFFFFFFF);
}

static LogCheckpoint *logCheckpointInit(Log *pLog){
  u32 *aData = pLog->pSummary->aData;
  LogCheckpoint *p;               /* Return value */
  int nSegment;                   /* Number of segments to merge */
  u32 iLast;                      /* Last frame in log */
  int nByte;                      /* Number of bytes to allocate */
  int i;                          /* Iterator variable */
  int nFinal;                     /* Number of unindexed entries */
  struct LogSegment *pFinal;      /* Final (unindexed) segment */
  u8 *aTmp;                       /* Temp space used by merge-sort */

  iLast = pLog->hdr.iLastPg;
  nSegment = (iLast >> 8) + 1;
  nFinal = (iLast & 0x000000FF);

  nByte = sizeof(LogCheckpoint) + (nSegment-1)*sizeof(struct LogSegment) + 512;
  p = (LogCheckpoint *)sqlite3_malloc(nByte);
  if( p ){
    memset(p, 0, nByte);
    p->nSegment = nSegment;
    p->nFinal = nFinal;
  }

  for(i=0; i<nSegment-1; i++){
    p->aSegment[i].aDbPage = &aData[logSummaryEntry(i*256+1)];
    p->aSegment[i].aIndex = (u8 *)&aData[logSummaryEntry(i*256+1)+256];
  }
  pFinal = &p->aSegment[nSegment-1];

  pFinal->aDbPage = &aData[logSummaryEntry((nSegment-1)*256+1)];
  pFinal->aIndex = (u8 *)&pFinal[1];
  aTmp = &pFinal->aIndex[256];
  for(i=0; i<nFinal; i++){
    pFinal->aIndex[i] = i;
  }
  logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal);
  p->nFinal = nFinal;

  return p;
}

/* 
** Free a log iterator allocated by logCheckpointInit().
*/
static void logCheckpointFree(LogCheckpoint *p){
  sqlite3_free(p);
}

/*
** Checkpoint the contents of the log file.
*/
static int logCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf                        /* Temporary buffer to use */
){
  int rc;                         /* Return code */
  int pgsz = pLog->hdr.pgsz;      /* Database page-size */
  LogCheckpoint *pIter = 0;       /* Log iterator context */
  u32 iDbpage = 0;                /* Next database page to write */
  u32 iFrame;                     /* Log frame containing data for iDbpage */

  /* Allocate the iterator */
  pIter = logCheckpointInit(pLog);
  if( !pIter ) return SQLITE_NOMEM;

  /* Sync the log file to disk */
  rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags);
  if( rc!=SQLITE_OK ) goto out;

  /* Iterate through the contents of the log, copying data to the db file. */
  while( 0==logCheckpointNext(pIter, &iDbpage, &iFrame) ){
    rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, 
        (iFrame-1) * (pgsz+LOG_FRAME_HDRSIZE) + LOG_FRAME_HDRSIZE
    );
    if( rc!=SQLITE_OK ) goto out;
    rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz);
    if( rc!=SQLITE_OK ) goto out;
  }

  /* Truncate the database file */
  rc = sqlite3OsTruncate(pFd, ((i64)pLog->hdr.nPage*(i64)pgsz));
  if( rc!=SQLITE_OK ) goto out;

  /* Sync the database file. If successful, update the log-summary. */
  rc = sqlite3OsSync(pFd, pLog->sync_flags);
  if( rc!=SQLITE_OK ) goto out;
  pLog->hdr.iLastPg = 0;
  pLog->hdr.iCheck1 = 2;
  pLog->hdr.iCheck2 = 3;
  logSummaryWriteHdr(pLog->pSummary, &pLog->hdr);

  /* TODO: If a crash occurs and the current log is copied into the 
  ** database there is no problem. However, if a crash occurs while
  ** writing the next transaction into the start of the log, such that:
  **
  **   * The first transaction currently in the log is left intact, but
  **   * The second (or subsequent) transaction is damaged,
  **
  ** then the database could become corrupt.
  **
  ** The easiest thing to do would be to write and sync a dummy header
  ** into the log at this point. Unfortunately, that turns out to be
  ** an unwelcome performance hit. Alternatives are...
  */
#if 0 
  memset(zBuf, 0, LOG_FRAME_HDRSIZE);
  rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0);
  if( rc!=SQLITE_OK ) goto out;
  rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags);
#endif

 out:
  logCheckpointFree(pIter);
  return rc;
}

/*
** Close a connection to a log file.
*/
int sqlite3LogClose(
  Log *pLog,                      /* Log to close */
  sqlite3_file *pFd,              /* Database file */
  u8 *zBuf                        /* Buffer of at least page-size bytes */
){
  int rc = SQLITE_OK;
  if( pLog ){
    LogSummary *pSummary = pLog->pSummary;
    sqlite3_mutex *mutex = 0;

    if( sqlite3GlobalConfig.bCoreMutex ){
      mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);
    }
    sqlite3_mutex_enter(mutex);

    /* Decrement the reference count on the log summary. If this is the last
    ** reference to the log summary object in this process, the object will
    ** be freed. If this is also the last connection to the database, then
    ** checkpoint the database and truncate the log and log-summary files
    ** to zero bytes in size.
    **/
    pSummary->nRef--;
    if( pSummary->nRef==0 ){
      LogSummary **pp;

      rc = logSummaryLock(pSummary);
      if( rc==SQLITE_OK ){
        int isTruncate = 0;
        int rc2 = sqlite3OsLock(pLog->pFd, SQLITE_LOCK_EXCLUSIVE);
        if( rc2==SQLITE_OK ){
          /* This is the last connection to the database (including other
          ** processes). Do three things:
          **
          **   1. Checkpoint the db.
          **   2. Truncate the log file to zero bytes.
          **   3. Truncate the log-summary file to zero bytes.
          */
          rc2 = logCheckpoint(pLog, pFd, zBuf);
          if( rc2==SQLITE_OK ){
            rc2 = sqlite3OsTruncate(pLog->pFd, 0);
          }
          isTruncate = 1;
        }else if( rc2==SQLITE_BUSY ){
          rc2 = SQLITE_OK;
        }
        logSummaryUnmap(pSummary, isTruncate);
        sqlite3OsUnlock(pLog->pFd, SQLITE_LOCK_NONE);
        rc = logSummaryUnlock(pSummary);
        if( rc2!=SQLITE_OK ) rc = rc2;
      }

      /* Remove the LogSummary object from the global list. Then free the 
      ** mutex and the object itself.
      */
      for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext);
      *pp = (*pp)->pNext;
      sqlite3_mutex_free(pSummary->mutex);
      sqlite3_free(pSummary);
    }

    sqlite3_mutex_leave(mutex);

    /* Close the connection to the log file and free the Log handle. */
    sqlite3OsClose(pLog->pFd);
    sqlite3_free(pLog);
  }
  return rc;
}

/*
** Set the flags to pass to the sqlite3OsSync() function when syncing
** the log file.
*/
#if 0
void sqlite3LogSetSyncflags(Log *pLog, int sync_flags){
  assert( sync_flags==SQLITE_SYNC_NORMAL || sync_flags==SQLITE_SYNC_FULL );
  pLog->sync_flags = sync_flags;
}
#endif

/*
** Enter and leave the log-summary mutex. In this context, entering the
** log-summary mutex means:
**
**   1. Obtaining mutex pLog->pSummary->mutex, and
**   2. Taking an exclusive lock on the log-summary file.
**
** i.e. this mutex locks out other processes as well as other threads
** hosted in this address space.
*/
static int logEnterMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  int rc;

  sqlite3_mutex_enter(pSummary->mutex);
  rc = logSummaryLock(pSummary);
  if( rc!=SQLITE_OK ){
    sqlite3_mutex_leave(pSummary->mutex);
  }
  return rc;
}
static void logLeaveMutex(Log *pLog){
  LogSummary *pSummary = pLog->pSummary;
  logSummaryUnlock(pSummary);
  sqlite3_mutex_leave(pSummary->mutex);
}

/*
** The caller must hold a SHARED lock on the database file.
**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to LogCloseSnapshot() on this Log
** connection, then *pChanged is set to 1 before returning. Otherwise, it 
** is left unmodified. This is used by the pager layer to determine whether 
** or not any cached pages may be safely reused.
*/
int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
  int rc = SQLITE_OK;
  if( pLog->isLocked==0 ){
    if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){
      u32 aCksum[2] = {1, 1};
      u32 aHdr[LOGSUMMARY_HDR_NFIELD+2];
      memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));

      /* Verify the checksum on the log-summary header. If it fails,
      ** recover the log-summary from the log file.
      */
      logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum);
      if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD]
       || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1]
      ){
        rc = logSummaryRecover(pLog->pSummary, pLog->pFd);
        memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr));
        *pChanged = 1;
      }
      if( rc==SQLITE_OK ){
        pLog->isLocked = 1;
        if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){
          *pChanged = 1;
          memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32));
        }
      }
      logLeaveMutex(pLog);
    }
  }
  return rc;
}

/*
** Unlock the current snapshot.
*/
void sqlite3LogCloseSnapshot(Log *pLog){
  pLog->isLocked = 0;
}



/* 
** Read a page from the log, if it is present. 
*/
int sqlite3LogRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){
  u32 iRead = 0;
  u32 *aData = pLog->pSummary->aData;
  int iFrame = (pLog->hdr.iLastPg & 0xFFFFFF00);

  /* Do a linear search of the unindexed block of page-numbers (if any) 
  ** at the end of the log-summary. An alternative to this would be to
  ** build an index in private memory each time a read transaction is
  ** opened on a new snapshot.
  */
  if( pLog->hdr.iLastPg ){
    u32 *pi = &aData[logSummaryEntry(pLog->hdr.iLastPg)];
    u32 *piStop = pi - (pLog->hdr.iLastPg & 0xFF);
    while( *pi!=pgno && pi!=piStop ) pi--;
    if( pi!=piStop ){
      iRead = (pi-piStop) + iFrame;
    }
  }
  assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno );

  while( iRead==0 && iFrame>0 ){
    int iLow = 0;
    int iHigh = 255;
    u32 *aFrame;
    u8 *aIndex;

    iFrame -= 256;
    aFrame = &aData[logSummaryEntry(iFrame+1)];
    aIndex = (u8 *)&aFrame[256];

    while( iLow<=iHigh ){
      int iTest = (iLow+iHigh)>>1;
      u32 iPg = aFrame[aIndex[iTest]];

      if( iPg==pgno ){
        iRead = iFrame + 1 + aIndex[iTest];
        break;
      }
      else if( iPg<pgno ){
        iLow = iTest+1;
      }else{
        iHigh = iTest-1;
      }
    }
  }
  assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno );

  /* If iRead is non-zero, then it is the log frame number that contains the
  ** required page. Read and return data from the log file.
  */
  if( iRead ){
    i64 iOffset = (iRead-1) * (pLog->hdr.pgsz+LOG_FRAME_HDRSIZE);
    iOffset += LOG_FRAME_HDRSIZE;
    *pInLog = 1;
    return sqlite3OsRead(pLog->pFd, pOut, pLog->hdr.pgsz, iOffset);
  }

  *pInLog = 0;
  return SQLITE_OK;
}


/* 
** Set *pPgno to the size of the database file (or zero, if unknown).
*/
void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno){
  assert( pLog->isLocked );
  *pPgno = pLog->hdr.nPage;
}

/* 
** The caller must hold at least a RESERVED lock on the database file
** when invoking this function.
**
** This function returns SQLITE_OK if the caller may write to the database.
** Otherwise, if the caller is operating on a snapshot that has already
** been overwritten by another writer, SQLITE_OBE is returned.
*/
int sqlite3LogWriteLock(Log *pLog, int op){
  assert( pLog->isLocked );
  if( op ){
    if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){
      return SQLITE_BUSY;
    }
    pLog->isWriteLocked = 1;
  }else if( pLog->isWriteLocked ){
    memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr));
    pLog->isWriteLocked = 0;
  }
  return SQLITE_OK;
}

/* 
** Write a set of frames to the log. The caller must hold at least a
** RESERVED lock on the database file.
*/
int sqlite3LogFrames(
  Log *pLog,                      /* Log handle to write to */
  int nPgsz,                      /* Database page-size in bytes */
  PgHdr *pList,                   /* List of dirty pages to write */
  Pgno nTruncate,                 /* Database size after this commit */
  int isCommit,                   /* True if this is a commit */
  int isSync                      /* True to sync the log file */
){
  /* Each frame has a 20 byte header, as follows:
  **
  **   + Pseudo-random salt (4 bytes)
  **   + Page number (4 bytes)
  **   + New database size, or 0 if not a commit frame (4 bytes)
  **   + Checksum (CHECKSUM_BYTES bytes);
  **
  ** The checksum is computed based on the following:
  **
  **   + The previous checksum, or {2, 3} for the first frame in the log.
  **   + The non-checksum fields of the frame header, and
  **   + The frame contents (page data).
  **
  ** This format must also be understood by the code in logSummaryRecover().
  ** The size of the frame header is used by LogRead() and LogCheckpoint().
  */
  int rc;                         /* Used to catch return codes */
  u32 iFrame;                     /* Next frame address */
  u8 aFrame[LOG_FRAME_HDRSIZE];
  PgHdr *p;                       /* Iterator to run through pList with. */
  u32 aCksum[2];

  PgHdr *pLast;                   /* Last frame in list */
  int nLast = 0;                  /* Number of extra copies of last page */

  assert( LOG_FRAME_HDRSIZE==(4 * 3 + LOG_CKSM_BYTES) );
  assert( pList );

  aCksum[0] = pLog->hdr.iCheck1;
  aCksum[1] = pLog->hdr.iCheck2;

  /* Write the log file. */
  iFrame = pLog->hdr.iLastPg;
  for(p=pList; p; p=p->pDirty){
    u32 nDbsize;                  /* Db-size field for frame header */
    i64 iOffset;                  /* Write offset in log file */

    iFrame++;
    iOffset = (iFrame-1) * (nPgsz+sizeof(aFrame));
    
    /* Populate and write the frame header */
    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
    logEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame);
    rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    /* Write the page data */
    rc = sqlite3OsWrite(pLog->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame));
    if( rc!=SQLITE_OK ){
      return rc;
    }
    pLast = p;
  }

  /* Sync the log file if the 'isSync' flag was specified. */
  if( isSync ){
#if 0
    i64 iSegment = sqlite3OsSectorSize(pLog->pFd);
    i64 iOffset = iFrame * (nPgsz+sizeof(aFrame));

    if( iSegment<SQLITE_DEFAULT_SECTOR_SIZE ){
      iSegment = SQLITE_DEFAULT_SECTOR_SIZE;
    }
    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
    while( iOffset<iSegment ){
      logEncodeFrame(aCksum,pLast->pgno,nTruncate,nPgsz,pLast->pData,aFrame);
      rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      iOffset += LOG_FRAME_HDRSIZE;
      rc = sqlite3OsWrite(pLog->pFd, pLast->pData, nPgsz, iOffset); 
      if( rc!=SQLITE_OK ){
        return rc;
      }
      nLast++;
      iOffset += nPgsz;
    }
#endif

    rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags);
    if( rc!=SQLITE_OK ){
      return rc;
    }
  }

  /* Append data to the log summary. It is not necessary to lock the 
  ** log-summary to do this as the RESERVED lock held on the db file
  ** guarantees that there are no other writers, and no data that may
  ** be in use by existing readers is being overwritten.
  */
  iFrame = pLog->hdr.iLastPg;
  for(p=pList; p; p=p->pDirty){
    iFrame++;
    logSummaryAppend(pLog->pSummary, iFrame, p->pgno);
  }
  while( nLast>0 ){
    iFrame++;
    nLast--;
    logSummaryAppend(pLog->pSummary, iFrame, pLast->pgno);
  }

  /* Update the private copy of the header. */
  pLog->hdr.pgsz = nPgsz;
  pLog->hdr.iLastPg = iFrame;
  if( isCommit ){
    pLog->hdr.iChange++;
    pLog->hdr.nPage = nTruncate;
  }
  pLog->hdr.iCheck1 = aCksum[0];
  pLog->hdr.iCheck2 = aCksum[1];

  /* If this is a commit, update the log-summary header too. */
  if( isCommit && SQLITE_OK==(rc = logEnterMutex(pLog)) ){
    logSummaryWriteHdr(pLog->pSummary, &pLog->hdr);
    logLeaveMutex(pLog);
  }

  return SQLITE_OK;
}

/* 
** Checkpoint the database. When this function is called the caller
** must hold an exclusive lock on the database file.
*/
int sqlite3LogCheckpoint(
  Log *pLog,                      /* Log connection */
  sqlite3_file *pFd,              /* File descriptor open on db file */
  u8 *zBuf                        /* Temporary buffer to use */
){

  /* Assert() that the caller is holding an EXCLUSIVE lock on the 
  ** database file. 
  */
#ifdef SQLITE_DEBUG
  int lock;
  sqlite3OsFileControl(pFd, SQLITE_FCNTL_LOCKSTATE, &lock);
  assert( lock>=4 );
#endif
  
  return logCheckpoint(pLog, pFd, zBuf);
}

Changes to src/os_unix.c.
1532
1533
1534
1535
1536
1537
1538

1539
1540
1541

1542
1543
1544
1545
1546
1547
1548
    ** reading the database file again, make sure that the
    ** transaction counter was updated if any part of the database
    ** file changed.  If the transaction counter is not updated,
    ** other connections to the same file might not realize that
    ** the file has changed and hence might not know to flush their
    ** cache.  The use of a stale cache can lead to database corruption.
    */

    assert( pFile->inNormalWrite==0
         || pFile->dbUpdate==0
         || pFile->transCntrChng==1 );

    pFile->inNormalWrite = 0;
#endif

    /* downgrading to a shared lock on NFS involves clearing the write lock
    ** before establishing the readlock - to avoid a race condition we downgrade
    ** the lock in 2 blocks, so that part of the range will be covered by a 
    ** write lock until the rest is covered by a read lock:







>



>







1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
    ** reading the database file again, make sure that the
    ** transaction counter was updated if any part of the database
    ** file changed.  If the transaction counter is not updated,
    ** other connections to the same file might not realize that
    ** the file has changed and hence might not know to flush their
    ** cache.  The use of a stale cache can lead to database corruption.
    */
#if 0
    assert( pFile->inNormalWrite==0
         || pFile->dbUpdate==0
         || pFile->transCntrChng==1 );
#endif
    pFile->inNormalWrite = 0;
#endif

    /* downgrading to a shared lock on NFS involves clearing the write lock
    ** before establishing the readlock - to avoid a race condition we downgrade
    ** the lock in 2 blocks, so that part of the range will be covered by a 
    ** write lock until the rest is covered by a read lock:
2952
2953
2954
2955
2956
2957
2958

2959
2960
2961
2962

2963
2964
2965
2966
2967
2968
2969
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */

  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );


  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */
    return SQLITE_IOERR_READ;







>




>







2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
){
  unixFile *pFile = (unixFile *)id;
  int got;
  assert( id );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );
#endif

  got = seekAndRead(pFile, offset, pBuf, amt);
  if( got==amt ){
    return SQLITE_OK;
  }else if( got<0 ){
    /* lastErrno set by seekAndRead */
    return SQLITE_IOERR_READ;
3027
3028
3029
3030
3031
3032
3033

3034
3035
3036
3037

3038
3039
3040
3041
3042
3043
3044
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */

  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );


#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a
  ** normal database file) then record the fact that the database
  ** has changed.  If the transaction counter is modified, record that
  ** fact too.







>




>







3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
  unixFile *pFile = (unixFile*)id;
  int wrote = 0;
  assert( id );
  assert( amt>0 );

  /* If this is a database file (not a journal, master-journal or temp
  ** file), the bytes in the locking range should never be read or written. */
#if 0
  assert( pFile->pUnused==0
       || offset>=PENDING_BYTE+512
       || offset+amt<=PENDING_BYTE 
  );
#endif

#ifndef NDEBUG
  /* If we are doing a normal write to a database file (as opposed to
  ** doing a hot-journal rollback or a write to some file other than a
  ** normal database file) then record the fact that the database
  ** has changed.  If the transaction counter is modified, record that
  ** fact too.
Changes to src/pager.c.
16
17
18
19
20
21
22

23
24
25
26
27
28
29
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"


/*
******************** NOTES ON THE DESIGN OF THE PAGER ************************
**
** Within this comment block, a page is deemed to have been synced
** automatically as soon as it is written when PRAGMA synchronous=OFF.
** Otherwise, the page is not synced until the xSync method of the VFS







>







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
#include "log.h"

/*
******************** NOTES ON THE DESIGN OF THE PAGER ************************
**
** Within this comment block, a page is deemed to have been synced
** automatically as soon as it is written when PRAGMA synchronous=OFF.
** Otherwise, the page is not synced until the xSync method of the VFS
393
394
395
396
397
398
399

400
401
402
403
404
405
406
  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
  void (*xCodecFree)(void*);             /* Destructor for the codec */
  void *pCodec;               /* First argument to xCodec... methods */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  PCache *pPCache;            /* Pointer to page cache object */
  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */

};

/*
** The following global variables hold counters used for
** testing purposes only.  These variables do not exist in
** a non-testing build.  These variables are not thread-safe.
*/







>







394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
  void (*xCodecFree)(void*);             /* Destructor for the codec */
  void *pCodec;               /* First argument to xCodec... methods */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  PCache *pPCache;            /* Pointer to page cache object */
  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
  Log *pLog;                  /* Log used by "journal_mode=wal" */
};

/*
** The following global variables hold counters used for
** testing purposes only.  These variables do not exist in
** a non-testing build.  These variables are not thread-safe.
*/
1180
1181
1182
1183
1184
1185
1186





















1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218



1219
1220
1221
1222
1223
1224
1225
      rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
      testcase( rc==SQLITE_NOMEM );
      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
    }
  }
  return rc;
}






















/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 
** the cache and reset the Pager structure internal state. If there is
** an open journal-file, then the next time a shared-lock is obtained
** on the pager file (by this or any other process), it will be
** treated as a hot-journal and rolled back.
*/
static void pager_unlock(Pager *pPager){
  if( !pPager->exclusiveMode ){
    int rc;                      /* Return code */

    /* Always close the journal file when dropping the database lock.
    ** Otherwise, another connection with journal_mode=delete might
    ** delete the file out from under us.
    */
    sqlite3OsClose(pPager->jfd);
    sqlite3BitvecDestroy(pPager->pInJournal);
    pPager->pInJournal = 0;
    releaseAllSavepoints(pPager);

    /* If the file is unlocked, somebody else might change it. The
    ** values stored in Pager.dbSize etc. might become invalid if
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;




    rc = osUnlock(pPager->fd, NO_LOCK);
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))

    /* If Pager.errCode is set, the contents of the pager cache cannot be







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













|


















>
>
>







1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
      rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
      testcase( rc==SQLITE_NOMEM );
      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
    }
  }
  return rc;
}

/*
** Open a connection to the write-ahead log file for pager pPager.
*/
static int pagerOpenLog(Pager *pPager){
  if( !pPager->pLog ){
    int rc;                       /* Return code from LogOpen() */

    rc = sqlite3LogOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog);
    if( rc!=SQLITE_OK ) return rc;
  }
  return SQLITE_OK;
}

/*
** Return true if this pager uses a write-ahead log instead of the usual
** rollback journal. Otherwise false.
*/
static int pagerUseLog(Pager *pPager){
  return (pPager->pLog!=0);
}

/*
** Unlock the database file. This function is a no-op if the pager
** is in exclusive mode.
**
** If the pager is currently in error state, discard the contents of 
** the cache and reset the Pager structure internal state. If there is
** an open journal-file, then the next time a shared-lock is obtained
** on the pager file (by this or any other process), it will be
** treated as a hot-journal and rolled back.
*/
static void pager_unlock(Pager *pPager){
  if( !pPager->exclusiveMode ){
    int rc = SQLITE_OK;          /* Return code */

    /* Always close the journal file when dropping the database lock.
    ** Otherwise, another connection with journal_mode=delete might
    ** delete the file out from under us.
    */
    sqlite3OsClose(pPager->jfd);
    sqlite3BitvecDestroy(pPager->pInJournal);
    pPager->pInJournal = 0;
    releaseAllSavepoints(pPager);

    /* If the file is unlocked, somebody else might change it. The
    ** values stored in Pager.dbSize etc. might become invalid if
    ** this happens.  One can argue that this doesn't need to be cleared
    ** until the change-counter check fails in PagerSharedLock().
    ** Clearing the page size cache here is being conservative.
    */
    pPager->dbSizeValid = 0;

    if( pagerUseLog(pPager) ){
      sqlite3LogCloseSnapshot(pPager->pLog);
    }
    rc = osUnlock(pPager->fd, NO_LOCK);
    if( rc ){
      pPager->errCode = rc;
    }
    IOTRACE(("UNLOCK %p\n", pPager))

    /* If Pager.errCode is set, the contents of the pager cache cannot be
1361
1362
1363
1364
1365
1366
1367

1368
1369
1370
1371
1372
1373
1374
  if( pPager->state<PAGER_RESERVED ){
    return SQLITE_OK;
  }
  releaseAllSavepoints(pPager);

  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
  if( isOpen(pPager->jfd) ){


    /* Finalize the journal file. */
    if( sqlite3IsMemJournal(pPager->jfd) ){
      assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
      sqlite3OsClose(pPager->jfd);
    }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
      if( pPager->journalOff==0 ){







>







1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
  if( pPager->state<PAGER_RESERVED ){
    return SQLITE_OK;
  }
  releaseAllSavepoints(pPager);

  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
  if( isOpen(pPager->jfd) ){
    assert( !pagerUseLog(pPager) );

    /* Finalize the journal file. */
    if( sqlite3IsMemJournal(pPager->jfd) ){
      assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
      sqlite3OsClose(pPager->jfd);
    }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
      if( pPager->journalOff==0 ){
1404
1405
1406
1407
1408
1409
1410



1411
1412
1413
1414
1415
1416
1417
1418
#endif
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);




  if( !pPager->exclusiveMode ){
    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    pPager->state = PAGER_SHARED;
    pPager->changeCountDone = 0;
  }else if( pPager->state==PAGER_SYNCED ){
    pPager->state = PAGER_EXCLUSIVE;
  }
  pPager->setMaster = 0;







>
>
>
|







1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
#endif
  }
  sqlite3BitvecDestroy(pPager->pInJournal);
  pPager->pInJournal = 0;
  pPager->nRec = 0;
  sqlite3PcacheCleanAll(pPager->pPCache);

  if( pagerUseLog(pPager) ){
    rc2 = sqlite3LogWriteLock(pPager->pLog, 0);
    pPager->state = PAGER_SHARED;
  }else if( !pPager->exclusiveMode ){
    rc2 = osUnlock(pPager->fd, SHARED_LOCK);
    pPager->state = PAGER_SHARED;
    pPager->changeCountDone = 0;
  }else if( pPager->state==PAGER_SYNCED ){
    pPager->state = PAGER_EXCLUSIVE;
  }
  pPager->setMaster = 0;
2116
2117
2118
2119
2120
2121
2122



2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142



























































































2143
2144
2145
2146
2147
2148
2149
    zMaster = pPager->pTmpSpace;
    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
    testcase( rc!=SQLITE_OK );
  }
  if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){
    rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  }



  if( rc==SQLITE_OK ){
    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
    testcase( rc!=SQLITE_OK );
  }
  if( rc==SQLITE_OK && zMaster[0] && res ){
    /* If there was a master journal and this routine will return success,
    ** see if it is possible to delete the master journal.
    */
    rc = pager_delmaster(pPager, zMaster);
    testcase( rc!=SQLITE_OK );
  }

  /* The Pager.sectorSize variable may have been updated while rolling
  ** back a journal created by a process with a different sector size
  ** value. Reset it to the correct value for this process.
  */
  setSectorSize(pPager);
  return rc;
}




























































































/*
** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
** the entire master journal file. The case pSavepoint==NULL occurs when 
** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction 
** savepoint.
**
** When pSavepoint is not NULL (meaning a non-transaction savepoint is 







>
>
>




















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
    zMaster = pPager->pTmpSpace;
    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
    testcase( rc!=SQLITE_OK );
  }
  if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){
    rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  }
  if( rc==SQLITE_OK && pPager->noSync==0 && pPager->state>=PAGER_EXCLUSIVE ){
    rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
  }
  if( rc==SQLITE_OK ){
    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
    testcase( rc!=SQLITE_OK );
  }
  if( rc==SQLITE_OK && zMaster[0] && res ){
    /* If there was a master journal and this routine will return success,
    ** see if it is possible to delete the master journal.
    */
    rc = pager_delmaster(pPager, zMaster);
    testcase( rc!=SQLITE_OK );
  }

  /* The Pager.sectorSize variable may have been updated while rolling
  ** back a journal created by a process with a different sector size
  ** value. Reset it to the correct value for this process.
  */
  setSectorSize(pPager);
  return rc;
}


/*
** Read the content for page pPg out of the database file and into 
** pPg->pData. A shared lock or greater must be held on the database
** file before this function is called.
**
** If page 1 is read, then the value of Pager.dbFileVers[] is set to
** the value read from the database file.
**
** If an IO error occurs, then the IO error is returned to the caller.
** Otherwise, SQLITE_OK is returned.
*/
static int readDbPage(PgHdr *pPg){
  Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
  Pgno pgno = pPg->pgno;       /* Page number to read */
  int rc;                      /* Return code */
  i64 iOffset;                 /* Byte offset of file to read from */
  int isInLog = 0;             /* True if page is in log file */

  assert( pPager->state>=PAGER_SHARED && !MEMDB );
  assert( isOpen(pPager->fd) );

  if( NEVER(!isOpen(pPager->fd)) ){
    assert( pPager->tempFile );
    memset(pPg->pData, 0, pPager->pageSize);
    return SQLITE_OK;
  }

  if( pagerUseLog(pPager) ){
    /* Try to pull the page from the write-ahead log. */
    rc = sqlite3LogRead(pPager->pLog, pgno, &isInLog, pPg->pData);
  }
  if( rc==SQLITE_OK && !isInLog ){
    iOffset = (pgno-1)*(i64)pPager->pageSize;
    rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset);
    if( rc==SQLITE_IOERR_SHORT_READ ){
      rc = SQLITE_OK;
    }
  }

  if( pgno==1 ){
    if( rc ){
      /* If the read is unsuccessful, set the dbFileVers[] to something
      ** that will never be a valid file version.  dbFileVers[] is a copy
      ** of bytes 24..39 of the database.  Bytes 28..31 should always be
      ** zero.  Bytes 32..35 and 35..39 should be page numbers which are
      ** never 0xffffffff.  So filling pPager->dbFileVers[] with all 0xff
      ** bytes should suffice.
      **
      ** For an encrypted database, the situation is more complex:  bytes
      ** 24..39 of the database are white noise.  But the probability of
      ** white noising equaling 16 bytes of 0xff is vanishingly small so
      ** we should still be ok.
      */
      memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
    }else{
      u8 *dbFileVers = &((u8*)pPg->pData)[24];
      memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
    }
  }
  CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);

  PAGER_INCR(sqlite3_pager_readdb_count);
  PAGER_INCR(pPager->nRead);
  IOTRACE(("PGIN %p %d\n", pPager, pgno));
  PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
               PAGERID(pPager), pgno, pager_pagehash(pPg)));

  return rc;
}

static int pagerRollbackLog(Pager *pPager){
  int rc = SQLITE_OK;
  PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
  pPager->dbSize = pPager->dbOrigSize;
  while( pList && rc==SQLITE_OK ){
    PgHdr *pNext = pList->pDirty;
    if( sqlite3PcachePageRefcount(pList)==0 ){
      sqlite3PagerLookup(pPager, pList->pgno);
      sqlite3PcacheDrop(pList);
    }else{
      rc = readDbPage(pList);
      if( rc==SQLITE_OK ){
        pPager->xReiniter(pList);
      }
    }
    pList = pNext;
  }
  return rc;
}

/*
** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
** the entire master journal file. The case pSavepoint==NULL occurs when 
** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction 
** savepoint.
**
** When pSavepoint is not NULL (meaning a non-transaction savepoint is 
2192
2193
2194
2195
2196
2197
2198




2199
2200
2201
2202
2203
2204
2205

2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
    }
  }

  /* Set the database size back to the value it was before the savepoint 
  ** being reverted was opened.
  */
  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;





  /* Use pPager->journalOff as the effective size of the main rollback
  ** journal.  The actual file might be larger than this in
  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything
  ** past pPager->journalOff is off-limits to us.
  */
  szJ = pPager->journalOff;


  /* Begin by rolling back records from the main journal starting at
  ** PagerSavepoint.iOffset and continuing to the next journal header.
  ** There might be records in the main journal that have a page number
  ** greater than the current database size (pPager->dbSize) but those
  ** will be skipped automatically.  Pages are added to pDone as they
  ** are played back.
  */
  if( pSavepoint ){
    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
    pPager->journalOff = pSavepoint->iOffset;
    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
    }
    assert( rc!=SQLITE_DONE );
  }else{







>
>
>
>







>








|







2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
    }
  }

  /* Set the database size back to the value it was before the savepoint 
  ** being reverted was opened.
  */
  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;

  if( !pSavepoint && pagerUseLog(pPager) ){
    return pagerRollbackLog(pPager);
  }

  /* Use pPager->journalOff as the effective size of the main rollback
  ** journal.  The actual file might be larger than this in
  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything
  ** past pPager->journalOff is off-limits to us.
  */
  szJ = pPager->journalOff;
  assert( pagerUseLog(pPager)==0 || szJ==0 );

  /* Begin by rolling back records from the main journal starting at
  ** PagerSavepoint.iOffset and continuing to the next journal header.
  ** There might be records in the main journal that have a page number
  ** greater than the current database size (pPager->dbSize) but those
  ** will be skipped automatically.  Pages are added to pDone as they
  ** are played back.
  */
  if( pSavepoint && !pagerUseLog(pPager) ){
    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
    pPager->journalOff = pSavepoint->iOffset;
    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
    }
    assert( rc!=SQLITE_DONE );
  }else{
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569





2570
2571

2572
2573
2574

2575
2576
2577
2578

2579
2580
2581
2582
2583
2584
2585
** the query attempt returns an IO error, the IO error code is returned
** and *pnPage is left unchanged.
**
** Otherwise, if everything is successful, then SQLITE_OK is returned
** and *pnPage is set to the number of pages in the database.
*/
int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
  Pgno nPage;               /* Value to return via *pnPage */

  /* Determine the number of pages in the file. Store this in nPage. */
  if( pPager->dbSizeValid ){
    nPage = pPager->dbSize;
  }else{
    int rc;                 /* Error returned by OsFileSize() */
    i64 n = 0;              /* File size in bytes returned by OsFileSize() */






    assert( isOpen(pPager->fd) || pPager->tempFile );
    if( isOpen(pPager->fd) && (0 != (rc = sqlite3OsFileSize(pPager->fd, &n))) ){

      pager_error(pPager, rc);
      return rc;
    }

    if( n>0 && n<pPager->pageSize ){
      nPage = 1;
    }else{
      nPage = (Pgno)(n / pPager->pageSize);

    }
    if( pPager->state!=PAGER_UNLOCK ){
      pPager->dbSize = nPage;
      pPager->dbFileSize = nPage;
      pPager->dbSizeValid = 1;
    }
  }







|








>
>
>
>
>
|
|
>
|
|
|
>
|
|
|
|
>







2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
** the query attempt returns an IO error, the IO error code is returned
** and *pnPage is left unchanged.
**
** Otherwise, if everything is successful, then SQLITE_OK is returned
** and *pnPage is set to the number of pages in the database.
*/
int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
  Pgno nPage = 0;           /* Value to return via *pnPage */

  /* Determine the number of pages in the file. Store this in nPage. */
  if( pPager->dbSizeValid ){
    nPage = pPager->dbSize;
  }else{
    int rc;                 /* Error returned by OsFileSize() */
    i64 n = 0;              /* File size in bytes returned by OsFileSize() */

    if( pagerUseLog(pPager) ){
      sqlite3LogMaxpgno(pPager->pLog, &nPage);
    }

    if( nPage==0 ){
      assert( isOpen(pPager->fd) || pPager->tempFile );
      if( isOpen(pPager->fd) ){
        if( SQLITE_OK!=(rc = sqlite3OsFileSize(pPager->fd, &n)) ){
          pager_error(pPager, rc);
          return rc;
        }
      }
      if( n>0 && n<pPager->pageSize ){
        nPage = 1;
      }else{
        nPage = (Pgno)(n / pPager->pageSize);
      }
    }
    if( pPager->state!=PAGER_UNLOCK ){
      pPager->dbSize = nPage;
      pPager->dbFileSize = nPage;
      pPager->dbSizeValid = 1;
    }
  }
2693
2694
2695
2696
2697
2698
2699

2700
2701
2702
2703
2704
2705
2706
void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
  assert( pPager->dbSizeValid );
  assert( pPager->dbSize>=nPage );
  assert( pPager->state>=PAGER_RESERVED );
  pPager->dbSize = nPage;
  assertTruncateConstraint(pPager);
}


/*
** This function is called before attempting a hot-journal rollback. It
** syncs the journal file to disk, then sets pPager->journalHdr to the
** size of the journal file so that the pager_playback() routine knows
** that the entire journal file has been synced.
**







>







2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
  assert( pPager->dbSizeValid );
  assert( pPager->dbSize>=nPage );
  assert( pPager->state>=PAGER_RESERVED );
  pPager->dbSize = nPage;
  assertTruncateConstraint(pPager);
}


/*
** This function is called before attempting a hot-journal rollback. It
** syncs the journal file to disk, then sets pPager->journalHdr to the
** size of the journal file so that the pager_playback() routine knows
** that the entire journal file has been synced.
**
2734
2735
2736
2737
2738
2739
2740


2741
2742
2743
2744


2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
**
** This function always succeeds. If a transaction is active an attempt
** is made to roll it back. If an error occurs during the rollback 
** a hot journal may be left in the filesystem but no error is returned
** to the caller.
*/
int sqlite3PagerClose(Pager *pPager){


  disable_simulated_io_errors();
  sqlite3BeginBenignMalloc();
  pPager->errCode = 0;
  pPager->exclusiveMode = 0;


  pager_reset(pPager);
  if( MEMDB ){
    pager_unlock(pPager);
  }else{
    /* Set Pager.journalHdr to -1 for the benefit of the pager_playback() 
    ** call which may be made from within pagerUnlockAndRollback(). If it
    ** is not -1, then the unsynced portion of an open journal file may
    ** be played back into the database. If a power failure occurs while
    ** this is happening, the database may become corrupt.
    */
    if( isOpen(pPager->jfd) ){
      pPager->errCode = pagerSyncHotJournal(pPager);
    }
    pagerUnlockAndRollback(pPager);
  }
  sqlite3EndBenignMalloc();
  enable_simulated_io_errors();
  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
  IOTRACE(("CLOSE %p\n", pPager))
  sqlite3OsClose(pPager->fd);
  sqlite3PageFree(pPager->pTmpSpace);
  sqlite3PcacheClose(pPager->pPCache);

#ifdef SQLITE_HAS_CODEC
  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
#endif

  assert( !pPager->aSavepoint && !pPager->pInJournal );







>
>




>
>




















|







2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
**
** This function always succeeds. If a transaction is active an attempt
** is made to roll it back. If an error occurs during the rollback 
** a hot journal may be left in the filesystem but no error is returned
** to the caller.
*/
int sqlite3PagerClose(Pager *pPager){
  u8 *pTmp = (u8 *)pPager->pTmpSpace;

  disable_simulated_io_errors();
  sqlite3BeginBenignMalloc();
  pPager->errCode = 0;
  pPager->exclusiveMode = 0;
  sqlite3LogClose(pPager->pLog, pPager->fd, pTmp);
  pPager->pLog = 0;
  pager_reset(pPager);
  if( MEMDB ){
    pager_unlock(pPager);
  }else{
    /* Set Pager.journalHdr to -1 for the benefit of the pager_playback() 
    ** call which may be made from within pagerUnlockAndRollback(). If it
    ** is not -1, then the unsynced portion of an open journal file may
    ** be played back into the database. If a power failure occurs while
    ** this is happening, the database may become corrupt.
    */
    if( isOpen(pPager->jfd) ){
      pPager->errCode = pagerSyncHotJournal(pPager);
    }
    pagerUnlockAndRollback(pPager);
  }
  sqlite3EndBenignMalloc();
  enable_simulated_io_errors();
  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
  IOTRACE(("CLOSE %p\n", pPager))
  sqlite3OsClose(pPager->fd);
  sqlite3PageFree(pTmp);
  sqlite3PcacheClose(pPager->pPCache);

#ifdef SQLITE_HAS_CODEC
  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
#endif

  assert( !pPager->aSavepoint && !pPager->pInJournal );
3062
3063
3064
3065
3066
3067
3068


3069

3070
3071
3072
3073
3074
3075
3076
    void *pData = pPg->pData;
    i64 offset = pPager->nSubRec*(4+pPager->pageSize);
    char *pData2;

    CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
    PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
  


    assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );

    rc = write32bits(pPager->sjfd, offset, pPg->pgno);
    if( rc==SQLITE_OK ){
      rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
    }
  }
  if( rc==SQLITE_OK ){
    pPager->nSubRec++;







>
>
|
>







3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
    void *pData = pPg->pData;
    i64 offset = pPager->nSubRec*(4+pPager->pageSize);
    char *pData2;

    CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
    PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
  
    assert( pagerUseLog(pPager) 
         || pageInJournal(pPg) 
         || pPg->pgno>pPager->dbOrigSize 
    );
    rc = write32bits(pPager->sjfd, offset, pPg->pgno);
    if( rc==SQLITE_OK ){
      rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
    }
  }
  if( rc==SQLITE_OK ){
    pPager->nSubRec++;
3102
3103
3104
3105
3106
3107
3108


3109
3110
3111
3112
3113
3114
3115
*/
static int pagerStress(void *p, PgHdr *pPg){
  Pager *pPager = (Pager *)p;
  int rc = SQLITE_OK;

  assert( pPg->pPager==pPager );
  assert( pPg->flags&PGHDR_DIRTY );



  /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
  ** is journalling a set of two or more database pages that are stored
  ** on the same disk sector. Syncing the journal is not allowed while
  ** this is happening as it is important that all members of such a
  ** set of pages are synced to disk together. So, if the page this function
  ** is trying to make clean will require a journal sync and the doNotSync







>
>







3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
*/
static int pagerStress(void *p, PgHdr *pPg){
  Pager *pPager = (Pager *)p;
  int rc = SQLITE_OK;

  assert( pPg->pPager==pPager );
  assert( pPg->flags&PGHDR_DIRTY );

  if( pagerUseLog(pPager) ) return SQLITE_OK;

  /* The doNotSync flag is set by the sqlite3PagerWrite() function while it
  ** is journalling a set of two or more database pages that are stored
  ** on the same disk sector. Syncing the journal is not allowed while
  ** this is happening as it is important that all members of such a
  ** set of pages are synced to disk together. So, if the page this function
  ** is trying to make clean will require a journal sync and the doNotSync
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
      }
    }
  }

  return rc;
}

/*
** Read the content for page pPg out of the database file and into 
** pPg->pData. A shared lock or greater must be held on the database
** file before this function is called.
**
** If page 1 is read, then the value of Pager.dbFileVers[] is set to
** the value read from the database file.
**
** If an IO error occurs, then the IO error is returned to the caller.
** Otherwise, SQLITE_OK is returned.
*/
static int readDbPage(PgHdr *pPg){
  Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
  Pgno pgno = pPg->pgno;       /* Page number to read */
  int rc;                      /* Return code */
  i64 iOffset;                 /* Byte offset of file to read from */

  assert( pPager->state>=PAGER_SHARED && !MEMDB );
  assert( isOpen(pPager->fd) );

  if( NEVER(!isOpen(pPager->fd)) ){
    assert( pPager->tempFile );
    memset(pPg->pData, 0, pPager->pageSize);
    return SQLITE_OK;
  }
  iOffset = (pgno-1)*(i64)pPager->pageSize;
  rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset);
  if( rc==SQLITE_IOERR_SHORT_READ ){
    rc = SQLITE_OK;
  }
  if( pgno==1 ){
    if( rc ){
      /* If the read is unsuccessful, set the dbFileVers[] to something
      ** that will never be a valid file version.  dbFileVers[] is a copy
      ** of bytes 24..39 of the database.  Bytes 28..31 should always be
      ** zero.  Bytes 32..35 and 35..39 should be page numbers which are
      ** never 0xffffffff.  So filling pPager->dbFileVers[] with all 0xff
      ** bytes should suffice.
      **
      ** For an encrypted database, the situation is more complex:  bytes
      ** 24..39 of the database are white noise.  But the probability of
      ** white noising equaling 16 bytes of 0xff is vanishingly small so
      ** we should still be ok.
      */
      memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
    }else{
      u8 *dbFileVers = &((u8*)pPg->pData)[24];
      memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
    }
  }
  CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);

  PAGER_INCR(sqlite3_pager_readdb_count);
  PAGER_INCR(pPager->nRead);
  IOTRACE(("PGIN %p %d\n", pPager, pgno));
  PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
               PAGERID(pPager), pgno, pager_pagehash(pPg)));

  return rc;
}

/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerAcquire() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**
** The following operations are also performed by this function.







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3725
3726
3727
3728
3729
3730
3731





























































3732
3733
3734
3735
3736
3737
3738
      }
    }
  }

  return rc;
}






























































/*
** This function is called to obtain a shared lock on the database file.
** It is illegal to call sqlite3PagerAcquire() until after this function
** has been successfully called. If a shared-lock is already held when
** this function is called, it is a no-op.
**
** The following operations are also performed by this function.
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714

















3715
3716
3717
3718
3719
3720
3721
    if( isOpen(pPager->jfd) || pPager->zJournal ){
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }

  if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
    if( pPager->noReadlock ){
      assert( pPager->readOnly );
      pPager->state = PAGER_SHARED;
    }else{
      rc = pager_wait_on_lock(pPager, SHARED_LOCK);
      if( rc!=SQLITE_OK ){
        assert( pPager->state==PAGER_UNLOCK );
        return pager_error(pPager, rc);
      }
    }
    assert( pPager->state>=SHARED_LOCK );


















    /* If a journal file exists, and there is no RESERVED lock on the
    ** database file, then it either needs to be played back or deleted.
    */
    if( !isErrorReset ){
      assert( pPager->state <= PAGER_SHARED );
      rc = hasHotJournal(pPager, &isHotJournal);







<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3778
3779
3780
3781
3782
3783
3784





3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
    if( isOpen(pPager->jfd) || pPager->zJournal ){
      isErrorReset = 1;
    }
    pPager->errCode = SQLITE_OK;
    pager_reset(pPager);
  }






  if( pPager->noReadlock ){
    assert( pPager->readOnly );
    pPager->state = PAGER_SHARED;
  }else{
    rc = pager_wait_on_lock(pPager, SHARED_LOCK);
    if( rc!=SQLITE_OK ){
      assert( pPager->state==PAGER_UNLOCK );
      return pager_error(pPager, rc);
    }
  }
  assert( pPager->state>=SHARED_LOCK );

  if( pagerUseLog(pPager) ){
    int changed = 0;
    rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
    if( rc==SQLITE_OK ){
      if( changed ){
        pager_reset(pPager);
        assert( pPager->errCode || pPager->dbSizeValid==0 );
      }
      pPager->state = PAGER_SHARED;
      rc = sqlite3PagerPagecount(pPager, &changed);
    }
  }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
    sqlite3_vfs * const pVfs = pPager->pVfs;
    int isHotJournal = 0;
    assert( !MEMDB );
    assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );

    /* If a journal file exists, and there is no RESERVED lock on the
    ** database file, then it either needs to be played back or deleted.
    */
    if( !isErrorReset ){
      assert( pPager->state <= PAGER_SHARED );
      rc = hasHotJournal(pPager, &isHotJournal);
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795

      /* Reset the journal status fields to indicates that we have no
      ** rollback journal at this time. */
      pPager->journalStarted = 0;
      pPager->journalOff = 0;
      pPager->setMaster = 0;
      pPager->journalHdr = 0;

      /* Make sure the journal file has been synced to disk. */
 
      /* Playback and delete the journal.  Drop the database write
      ** lock and reacquire the read lock. Purge the cache before
      ** playing back the hot-journal so that we don't end up with
      ** an inconsistent cache.  Sync the hot journal before playing
      ** it back since the process that crashed and left the hot journal







|







3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893

      /* Reset the journal status fields to indicates that we have no
      ** rollback journal at this time. */
      pPager->journalStarted = 0;
      pPager->journalOff = 0;
      pPager->setMaster = 0;
      pPager->journalHdr = 0;
 
      /* Make sure the journal file has been synced to disk. */
 
      /* Playback and delete the journal.  Drop the database write
      ** lock and reacquire the read lock. Purge the cache before
      ** playing back the hot-journal so that we don't end up with
      ** an inconsistent cache.  Sync the hot journal before playing
      ** it back since the process that crashed and left the hot journal
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
    rc = sqlite3PagerPagecount(pPager, &nMax);
    if( rc!=SQLITE_OK ){
      goto pager_acquire_err;
    }

    if( MEMDB || nMax<(int)pgno || noContent || !isOpen(pPager->fd) ){
      if( pgno>pPager->mxPgno ){
	rc = SQLITE_FULL;
	goto pager_acquire_err;
      }
      if( noContent ){
        /* Failure to set the bits in the InJournal bit-vectors is benign.
        ** It merely means that we might do some extra work to journal a 
        ** page that does not need to be journaled.  Nevertheless, be sure 
        ** to test the case where a malloc error occurs while trying to set 
        ** a bit in a bit vector.







|
|







4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
    rc = sqlite3PagerPagecount(pPager, &nMax);
    if( rc!=SQLITE_OK ){
      goto pager_acquire_err;
    }

    if( MEMDB || nMax<(int)pgno || noContent || !isOpen(pPager->fd) ){
      if( pgno>pPager->mxPgno ){
        rc = SQLITE_FULL;
        goto pager_acquire_err;
      }
      if( noContent ){
        /* Failure to set the bits in the InJournal bit-vectors is benign.
        ** It merely means that we might do some extra work to journal a 
        ** page that does not need to be journaled.  Nevertheless, be sure 
        ** to test the case where a malloc error occurs while trying to set 
        ** a bit in a bit vector.
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
**
** SQLITE_OK is returned if everything goes according to plan. 
** An SQLITE_IOERR_XXX error code is returned if a call to 
** sqlite3OsOpen() fails.
*/
static int openSubJournal(Pager *pPager){
  int rc = SQLITE_OK;
  if( isOpen(pPager->jfd) && !isOpen(pPager->sjfd) ){
    if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
      sqlite3MemJournalOpen(pPager->sjfd);
    }else{
      rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
    }
  }
  return rc;







|







4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
**
** SQLITE_OK is returned if everything goes according to plan. 
** An SQLITE_IOERR_XXX error code is returned if a call to 
** sqlite3OsOpen() fails.
*/
static int openSubJournal(Pager *pPager){
  int rc = SQLITE_OK;
  if( (pagerUseLog(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){
    if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
      sqlite3MemJournalOpen(pPager->sjfd);
    }else{
      rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
    }
  }
  return rc;
4232
4233
4234
4235
4236
4237
4238












4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251

4252
4253
4254
4255
4256
4257
4258
    rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
    if( rc==SQLITE_OK ){
      pPager->state = PAGER_RESERVED;
      if( exFlag ){
        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
      }
    }













    /* No need to open the journal file at this time.  It will be
    ** opened before it is written to.  If we defer opening the journal,
    ** we might save the work of creating a file if the transaction
    ** ends up being a no-op.
    */
  }else if( isOpen(pPager->jfd) && pPager->journalOff==0 ){
    /* This happens when the pager was in exclusive-access mode the last
    ** time a (read or write) transaction was successfully concluded
    ** by this connection. Instead of deleting the journal file it was 
    ** kept open and either was truncated to 0 bytes or its header was
    ** overwritten with zeros.
    */

    assert( pPager->nRec==0 );
    assert( pPager->dbOrigSize==0 );
    assert( pPager->pInJournal==0 );
    rc = pager_open_journal(pPager);
  }

  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));







>
>
>
>
>
>
>
>
>
>
>
>













>







4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
    rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
    if( rc==SQLITE_OK ){
      pPager->state = PAGER_RESERVED;
      if( exFlag ){
        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
      }
    }

    if( rc==SQLITE_OK && pagerUseLog(pPager) ){
      /* Grab the write lock on the log file. If successful, upgrade to
      ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller.
      ** The busy-handler is not invoked if another connection already
      ** holds the write-lock. If possible, the upper layer will call it.
      */
      rc = sqlite3LogWriteLock(pPager->pLog, 1);
      if( rc==SQLITE_OK ){
        pPager->dbOrigSize = pPager->dbSize;
      }
    }

    /* No need to open the journal file at this time.  It will be
    ** opened before it is written to.  If we defer opening the journal,
    ** we might save the work of creating a file if the transaction
    ** ends up being a no-op.
    */
  }else if( isOpen(pPager->jfd) && pPager->journalOff==0 ){
    /* This happens when the pager was in exclusive-access mode the last
    ** time a (read or write) transaction was successfully concluded
    ** by this connection. Instead of deleting the journal file it was 
    ** kept open and either was truncated to 0 bytes or its header was
    ** overwritten with zeros.
    */
    assert( pagerUseLog(pPager)==0 );
    assert( pPager->nRec==0 );
    assert( pPager->dbOrigSize==0 );
    assert( pPager->pInJournal==0 );
    rc = pager_open_journal(pPager);
  }

  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
4299
4300
4301
4302
4303
4304
4305

4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320

4321


4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332

4333
4334
4335
4336
4337
4338
4339
  CHECK_PAGE(pPg);

  /* Mark the page as dirty.  If the page has already been written
  ** to the journal then we can return right away.
  */
  sqlite3PcacheMakeDirty(pPg);
  if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){

    pPager->dbModified = 1;
  }else{

    /* If we get this far, it means that the page needs to be
    ** written to the transaction journal or the ckeckpoint journal
    ** or both.
    **
    ** Higher level routines should have already started a transaction,
    ** which means they have acquired the necessary locks but the rollback
    ** journal might not yet be open.
    */
    rc = sqlite3PagerBegin(pPager, 0, pPager->subjInMemory);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){


      assert( pPager->useJournal );
      rc = pager_open_journal(pPager);
      if( rc!=SQLITE_OK ) return rc;
    }
    pPager->dbModified = 1;
  
    /* The transaction journal now exists and we have a RESERVED or an
    ** EXCLUSIVE lock on the main database file.  Write the current page to
    ** the transaction journal if it is not there already.
    */
    if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){

      if( pPg->pgno<=pPager->dbOrigSize ){
        u32 cksum;
        char *pData2;

        /* We should never write to the journal file the page that
        ** contains the database locks.  The following assert verifies
        ** that we do not. */







>















>
|
>
>











>







4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
  CHECK_PAGE(pPg);

  /* Mark the page as dirty.  If the page has already been written
  ** to the journal then we can return right away.
  */
  sqlite3PcacheMakeDirty(pPg);
  if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
    assert( !pagerUseLog(pPager) );
    pPager->dbModified = 1;
  }else{

    /* If we get this far, it means that the page needs to be
    ** written to the transaction journal or the ckeckpoint journal
    ** or both.
    **
    ** Higher level routines should have already started a transaction,
    ** which means they have acquired the necessary locks but the rollback
    ** journal might not yet be open.
    */
    rc = sqlite3PagerBegin(pPager, 0, pPager->subjInMemory);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    if( !isOpen(pPager->jfd) 
     && pPager->journalMode!=PAGER_JOURNALMODE_OFF 
     && pPager->journalMode!=PAGER_JOURNALMODE_WAL 
    ){
      assert( pPager->useJournal );
      rc = pager_open_journal(pPager);
      if( rc!=SQLITE_OK ) return rc;
    }
    pPager->dbModified = 1;
  
    /* The transaction journal now exists and we have a RESERVED or an
    ** EXCLUSIVE lock on the main database file.  Write the current page to
    ** the transaction journal if it is not there already.
    */
    if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){
      assert( !pagerUseLog(pPager) );
      if( pPg->pgno<=pPager->dbOrigSize ){
        u32 cksum;
        char *pData2;

        /* We should never write to the journal file the page that
        ** contains the database locks.  The following assert verifies
        ** that we do not. */
4706
4707
4708
4709
4710
4711
4712






4713


4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834

4835
4836
4837
4838
4839
4840
4841
  if( MEMDB && pPager->dbModified ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.
    */
    sqlite3BackupRestart(pPager->pBackup);
  }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){









    /* The following block updates the change-counter. Exactly how it
    ** does this depends on whether or not the atomic-update optimization
    ** was enabled at compile time, and if this transaction meets the 
    ** runtime criteria to use the operation: 
    **
    **    * The file-system supports the atomic-write property for
    **      blocks of size page-size, and 
    **    * This commit is not part of a multi-file transaction, and
    **    * Exactly one page has been modified and store in the journal file.
    **
    ** If the optimization was not enabled at compile time, then the
    ** pager_incr_changecounter() function is called to update the change
    ** counter in 'indirect-mode'. If the optimization is compiled in but
    ** is not applicable to this transaction, call sqlite3JournalCreate()
    ** to make sure the journal file has actually been created, then call
    ** pager_incr_changecounter() to update the change-counter in indirect
    ** mode. 
    **
    ** Otherwise, if the optimization is both enabled and applicable,
    ** then call pager_incr_changecounter() to update the change-counter
    ** in 'direct' mode. In this case the journal file will never be
    ** created for this transaction.
    */
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
    PgHdr *pPg;
    assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF );
    if( !zMaster && isOpen(pPager->jfd) 
     && pPager->journalOff==jrnlBufferSize(pPager) 
     && pPager->dbSize>=pPager->dbFileSize
     && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
    ){
      /* Update the db file change counter via the direct-write method. The 
      ** following call will modify the in-memory representation of page 1 
      ** to include the updated change counter and then write page 1 
      ** directly to the database file. Because of the atomic-write 
      ** property of the host file-system, this is safe.
      */
      rc = pager_incr_changecounter(pPager, 1);
    }else{
      rc = sqlite3JournalCreate(pPager->jfd);
      if( rc==SQLITE_OK ){
        rc = pager_incr_changecounter(pPager, 0);
      }
    }
#else
    rc = pager_incr_changecounter(pPager, 0);
#endif
    if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

    /* If this transaction has made the database smaller, then all pages
    ** being discarded by the truncation must be written to the journal
    ** file. This can only happen in auto-vacuum mode.
    **
    ** Before reading the pages with page numbers larger than the 
    ** current value of Pager.dbSize, set dbSize back to the value
    ** that it took at the start of the transaction. Otherwise, the
    ** calls to sqlite3PagerGet() return zeroed pages instead of 
    ** reading data from the database file.
    **
    ** When journal_mode==OFF the dbOrigSize is always zero, so this
    ** block never runs if journal_mode=OFF.
    */
#ifndef SQLITE_OMIT_AUTOVACUUM
    if( pPager->dbSize<pPager->dbOrigSize 
     && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF)
    ){
      Pgno i;                                   /* Iterator variable */
      const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */
      const Pgno dbSize = pPager->dbSize;       /* Database image size */ 
      pPager->dbSize = pPager->dbOrigSize;
      for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){
        if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
          PgHdr *pPage;             /* Page to journal */
          rc = sqlite3PagerGet(pPager, i, &pPage);
          if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
          rc = sqlite3PagerWrite(pPage);
          sqlite3PagerUnref(pPage);
          if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
        }
      } 
      pPager->dbSize = dbSize;
    }
#endif

    /* Write the master journal name into the journal file. If a master 
    ** journal file name has already been written to the journal file, 
    ** or if zMaster is NULL (no master journal), then this call is a no-op.
    */
    rc = writeMasterJournal(pPager, zMaster);
    if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

    /* Sync the journal file. If the atomic-update optimization is being
    ** used, this call will not create the journal file or perform any
    ** real IO.
    */
    rc = syncJournal(pPager);
    if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

    /* Write all dirty pages to the database file. */
    rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache));
    if( rc!=SQLITE_OK ){
      assert( rc!=SQLITE_IOERR_BLOCKED );
      goto commit_phase_one_exit;
    }
    sqlite3PcacheCleanAll(pPager->pPCache);

    /* If the file on disk is not the same size as the database image,
    ** then use pager_truncate to grow or shrink the file here.
    */
    if( pPager->dbSize!=pPager->dbFileSize ){
      Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
      assert( pPager->state>=PAGER_EXCLUSIVE );
      rc = pager_truncate(pPager, nNew);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
    }

    /* Finally, sync the database file. */
    if( !pPager->noSync && !noSync ){
      rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
    }
    IOTRACE(("DBSYNC %p\n", pPager))


    pPager->state = PAGER_SYNCED;
  }

commit_phase_one_exit:
  return rc;
}







>
>
>
>
>
>
|
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>







4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
  if( MEMDB && pPager->dbModified ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.
    */
    sqlite3BackupRestart(pPager->pBackup);
  }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){
    if( pagerUseLog(pPager) ){
      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
      if( pList ){
        rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pList,
            pPager->dbSize, 1, 1
        );
      }
      sqlite3PcacheCleanAll(pPager->pPCache);
    }else{
      /* The following block updates the change-counter. Exactly how it
      ** does this depends on whether or not the atomic-update optimization
      ** was enabled at compile time, and if this transaction meets the 
      ** runtime criteria to use the operation: 
      **
      **    * The file-system supports the atomic-write property for
      **      blocks of size page-size, and 
      **    * This commit is not part of a multi-file transaction, and
      **    * Exactly one page has been modified and store in the journal file.
      **
      ** If the optimization was not enabled at compile time, then the
      ** pager_incr_changecounter() function is called to update the change
      ** counter in 'indirect-mode'. If the optimization is compiled in but
      ** is not applicable to this transaction, call sqlite3JournalCreate()
      ** to make sure the journal file has actually been created, then call
      ** pager_incr_changecounter() to update the change-counter in indirect
      ** mode. 
      **
      ** Otherwise, if the optimization is both enabled and applicable,
      ** then call pager_incr_changecounter() to update the change-counter
      ** in 'direct' mode. In this case the journal file will never be
      ** created for this transaction.
      */
  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
      PgHdr *pPg;
      assert( isOpen(pPager->jfd) || pPager->journalMode==PAGER_JOURNALMODE_OFF );
      if( !zMaster && isOpen(pPager->jfd) 
       && pPager->journalOff==jrnlBufferSize(pPager) 
       && pPager->dbSize>=pPager->dbFileSize
       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
      ){
        /* Update the db file change counter via the direct-write method. The 
        ** following call will modify the in-memory representation of page 1 
        ** to include the updated change counter and then write page 1 
        ** directly to the database file. Because of the atomic-write 
        ** property of the host file-system, this is safe.
        */
        rc = pager_incr_changecounter(pPager, 1);
      }else{
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc==SQLITE_OK ){
          rc = pager_incr_changecounter(pPager, 0);
        }
      }
  #else
      rc = pager_incr_changecounter(pPager, 0);
  #endif
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* If this transaction has made the database smaller, then all pages
      ** being discarded by the truncation must be written to the journal
      ** file. This can only happen in auto-vacuum mode.
      **
      ** Before reading the pages with page numbers larger than the 
      ** current value of Pager.dbSize, set dbSize back to the value
      ** that it took at the start of the transaction. Otherwise, the
      ** calls to sqlite3PagerGet() return zeroed pages instead of 
      ** reading data from the database file.
      **
      ** When journal_mode==OFF the dbOrigSize is always zero, so this
      ** block never runs if journal_mode=OFF.
      */
  #ifndef SQLITE_OMIT_AUTOVACUUM
      if( pPager->dbSize<pPager->dbOrigSize 
       && ALWAYS(pPager->journalMode!=PAGER_JOURNALMODE_OFF)
      ){
        Pgno i;                                   /* Iterator variable */
        const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */
        const Pgno dbSize = pPager->dbSize;       /* Database image size */ 
        pPager->dbSize = pPager->dbOrigSize;
        for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){
          if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
            PgHdr *pPage;             /* Page to journal */
            rc = sqlite3PagerGet(pPager, i, &pPage);
            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
            rc = sqlite3PagerWrite(pPage);
            sqlite3PagerUnref(pPage);
            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
          }
        }
        pPager->dbSize = dbSize;
      } 
  #endif
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Sync the journal file. If the atomic-update optimization is being
      ** used, this call will not create the journal file or perform any
      ** real IO.
      */
      rc = syncJournal(pPager);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write all dirty pages to the database file. */
      rc = pager_write_pagelist(sqlite3PcacheDirtyList(pPager->pPCache));
      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);
  
      /* If the file on disk is not the same size as the database image,
      ** then use pager_truncate to grow or shrink the file here.
      */
      if( pPager->dbSize!=pPager->dbFileSize ){
        Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
        assert( pPager->state>=PAGER_EXCLUSIVE );
        rc = pager_truncate(pPager, nNew);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
      }
  
      /* Finally, sync the database file. */
      if( !pPager->noSync && !noSync ){
        rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
      }
      IOTRACE(("DBSYNC %p\n", pPager))
    }

    pPager->state = PAGER_SYNCED;
  }

commit_phase_one_exit:
  return rc;
}
4936
4937
4938
4939
4940
4941
4942





4943
4944
4945
4946
4947
4948
4949
4950
**   the journal file. It needs to be left in the file-system so that
**   some other process can use it to restore the database state (by
**   hot-journal rollback).
*/
int sqlite3PagerRollback(Pager *pPager){
  int rc = SQLITE_OK;                  /* Return code */
  PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));





  if( !pPager->dbModified || !isOpen(pPager->jfd) ){
    rc = pager_end_transaction(pPager, pPager->setMaster);
  }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
    if( pPager->state>=PAGER_EXCLUSIVE ){
      pager_playback(pPager, 0);
    }
    rc = pPager->errCode;
  }else{







>
>
>
>
>
|







5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
**   the journal file. It needs to be left in the file-system so that
**   some other process can use it to restore the database state (by
**   hot-journal rollback).
*/
int sqlite3PagerRollback(Pager *pPager){
  int rc = SQLITE_OK;                  /* Return code */
  PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
  if( pagerUseLog(pPager) ){
    int rc2;
    rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1);
    rc2 = pager_end_transaction(pPager, pPager->setMaster);
    if( rc==SQLITE_OK ) rc = rc2;
  }else if( !pPager->dbModified || !isOpen(pPager->jfd) ){
    rc = pager_end_transaction(pPager, pPager->setMaster);
  }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
    if( pPager->state>=PAGER_EXCLUSIVE ){
      pager_playback(pPager, 0);
    }
    rc = pPager->errCode;
  }else{
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
      }
    }
    /* Else this is a rollback operation, playback the specified savepoint.
    ** If this is a temp-file, it is possible that the journal file has
    ** not yet been opened. In this case there have been no changes to
    ** the database file, so the playback operation can be skipped.
    */
    else if( isOpen(pPager->jfd) ){
      PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
      rc = pagerPlaybackSavepoint(pPager, pSavepoint);
      assert(rc!=SQLITE_DONE);
    }
  
  }
  return rc;







|







5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
      }
    }
    /* Else this is a rollback operation, playback the specified savepoint.
    ** If this is a temp-file, it is possible that the journal file has
    ** not yet been opened. In this case there have been no changes to
    ** the database file, so the playback operation can be skipped.
    */
    else if( pagerUseLog(pPager) || isOpen(pPager->jfd) ){
      PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
      rc = pagerPlaybackSavepoint(pPager, pSavepoint);
      assert(rc!=SQLITE_DONE);
    }
  
  }
  return rc;
5431
5432
5433
5434
5435
5436
5437

5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455

5456
5457
5458

5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475










5476
5477
5478
5479
5480
5481
5482
**
**    PAGER_JOURNALMODE_QUERY
**    PAGER_JOURNALMODE_DELETE
**    PAGER_JOURNALMODE_TRUNCATE
**    PAGER_JOURNALMODE_PERSIST
**    PAGER_JOURNALMODE_OFF
**    PAGER_JOURNALMODE_MEMORY

**
** If the parameter is not _QUERY, then the journal_mode is set to the
** value specified if the change is allowed.  The change is disallowed
** for the following reasons:
**
**   *  An in-memory database can only have its journal_mode set to _OFF
**      or _MEMORY.
**
**   *  The journal mode may not be changed while a transaction is active.
**
** The returned indicate the current (possibly updated) journal-mode.
*/
int sqlite3PagerJournalMode(Pager *pPager, int eMode){
  assert( eMode==PAGER_JOURNALMODE_QUERY
            || eMode==PAGER_JOURNALMODE_DELETE
            || eMode==PAGER_JOURNALMODE_TRUNCATE
            || eMode==PAGER_JOURNALMODE_PERSIST
            || eMode==PAGER_JOURNALMODE_OFF 

            || eMode==PAGER_JOURNALMODE_MEMORY );
  assert( PAGER_JOURNALMODE_QUERY<0 );
  if( eMode>=0

   && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY 
              || eMode==PAGER_JOURNALMODE_OFF)
   && !pPager->dbModified
   && (!isOpen(pPager->jfd) || 0==pPager->journalOff)
  ){
    if( isOpen(pPager->jfd) ){
      sqlite3OsClose(pPager->jfd);
    }
    assert( (PAGER_JOURNALMODE_TRUNCATE & 1)==1 );
    assert( (PAGER_JOURNALMODE_PERSIST & 1)==1 );
    assert( (PAGER_JOURNALMODE_DELETE & 1)==0 );
    assert( (PAGER_JOURNALMODE_MEMORY & 1)==0 );
    assert( (PAGER_JOURNALMODE_OFF & 1)==0 );
    if( (pPager->journalMode & 1)==1 && (eMode & 1)==0
         && !pPager->exclusiveMode ){
      sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
    }










    pPager->journalMode = (u8)eMode;
  }
  return (int)pPager->journalMode;
}

/*
** Get/set the size-limit used for persistent journal files.







>


















>



>
|
<















>
>
>
>
>
>
>
>
>
>







5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592

5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
**
**    PAGER_JOURNALMODE_QUERY
**    PAGER_JOURNALMODE_DELETE
**    PAGER_JOURNALMODE_TRUNCATE
**    PAGER_JOURNALMODE_PERSIST
**    PAGER_JOURNALMODE_OFF
**    PAGER_JOURNALMODE_MEMORY
**    PAGER_JOURNALMODE_WAL
**
** If the parameter is not _QUERY, then the journal_mode is set to the
** value specified if the change is allowed.  The change is disallowed
** for the following reasons:
**
**   *  An in-memory database can only have its journal_mode set to _OFF
**      or _MEMORY.
**
**   *  The journal mode may not be changed while a transaction is active.
**
** The returned indicate the current (possibly updated) journal-mode.
*/
int sqlite3PagerJournalMode(Pager *pPager, int eMode){
  assert( eMode==PAGER_JOURNALMODE_QUERY
            || eMode==PAGER_JOURNALMODE_DELETE
            || eMode==PAGER_JOURNALMODE_TRUNCATE
            || eMode==PAGER_JOURNALMODE_PERSIST
            || eMode==PAGER_JOURNALMODE_OFF 
            || eMode==PAGER_JOURNALMODE_WAL 
            || eMode==PAGER_JOURNALMODE_MEMORY );
  assert( PAGER_JOURNALMODE_QUERY<0 );
  if( eMode>=0
   && (pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL)
   && (!MEMDB || eMode==PAGER_JOURNALMODE_MEMORY||eMode==PAGER_JOURNALMODE_OFF)

   && !pPager->dbModified
   && (!isOpen(pPager->jfd) || 0==pPager->journalOff)
  ){
    if( isOpen(pPager->jfd) ){
      sqlite3OsClose(pPager->jfd);
    }
    assert( (PAGER_JOURNALMODE_TRUNCATE & 1)==1 );
    assert( (PAGER_JOURNALMODE_PERSIST & 1)==1 );
    assert( (PAGER_JOURNALMODE_DELETE & 1)==0 );
    assert( (PAGER_JOURNALMODE_MEMORY & 1)==0 );
    assert( (PAGER_JOURNALMODE_OFF & 1)==0 );
    if( (pPager->journalMode & 1)==1 && (eMode & 1)==0
         && !pPager->exclusiveMode ){
      sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
    }

    if( eMode==PAGER_JOURNALMODE_WAL ){
      int rc = pagerOpenLog(pPager);
      if( rc!=SQLITE_OK ){
        /* TODO: The error code should not just get dropped here. Change 
        ** this to set a flag to force the log to be opened the first time
        ** it is actually required.  */
        return (int)pPager->journalMode;
      }
    }
    pPager->journalMode = (u8)eMode;
  }
  return (int)pPager->journalMode;
}

/*
** Get/set the size-limit used for persistent journal files.
5496
5497
5498
5499
5500
5501
5502
5503















5504
** in backup.c maintains the content of this variable. This module
** uses it opaquely as an argument to sqlite3BackupRestart() and
** sqlite3BackupUpdate() only.
*/
sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
  return &pPager->pBackup;
}
















#endif /* SQLITE_OMIT_DISKIO */








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659
5660
5661
** in backup.c maintains the content of this variable. This module
** uses it opaquely as an argument to sqlite3BackupRestart() and
** sqlite3BackupUpdate() only.
*/
sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
  return &pPager->pBackup;
}

/*
** This function is called when the user invokes "PRAGMA checkpoint".
*/
int sqlite3PagerCheckpoint(Pager *pPager, int nMin, int nMax, int doSync){
  int rc = SQLITE_OK;
  if( pPager->pLog ){
    rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
    if( rc==SQLITE_OK ){
      u8 *zBuf = (u8 *)pPager->pTmpSpace;
      rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, zBuf);
    }
  }
  return rc;
}

#endif /* SQLITE_OMIT_DISKIO */
Changes to src/pager.h.
72
73
74
75
76
77
78

79
80
81
82
83
84
85
*/
#define PAGER_JOURNALMODE_QUERY      -1
#define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
#define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
#define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */


/*
** The remainder of this file contains the declarations of the functions
** that make up the Pager sub-system API. See source code comments for 
** a detailed description of each routine.
*/








>







72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
*/
#define PAGER_JOURNALMODE_QUERY      -1
#define PAGER_JOURNALMODE_DELETE      0   /* Commit by deleting journal file */
#define PAGER_JOURNALMODE_PERSIST     1   /* Commit by zeroing journal header */
#define PAGER_JOURNALMODE_OFF         2   /* Journal omitted.  */
#define PAGER_JOURNALMODE_TRUNCATE    3   /* Commit by truncating journal */
#define PAGER_JOURNALMODE_MEMORY      4   /* In-memory journal file */
#define PAGER_JOURNALMODE_WAL         5   /* Use write-ahead logging */

/*
** The remainder of this file contains the declarations of the functions
** that make up the Pager sub-system API. See source code comments for 
** a detailed description of each routine.
*/

Changes to src/pragma.c.
441
442
443
444
445
446
447

























448
449
450
451
452
453
454
        sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b);
      }
    }
    b = sqlite3BtreeSecureDelete(pBt, b);
    returnSingleInt(pParse, "secure_delete", b);
  }else


























  /*
  **  PRAGMA [database.]page_count
  **
  ** Return the number of pages in the specified database.
  */
  if( sqlite3StrICmp(zLeft,"page_count")==0 ){
    int iReg;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
        sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b);
      }
    }
    b = sqlite3BtreeSecureDelete(pBt, b);
    returnSingleInt(pParse, "secure_delete", b);
  }else

  /*
  **  PRAGMA [database.]secure_delete
  **  PRAGMA [database.]secure_delete=ON/OFF
  **
  ** The first form reports the current setting for the
  ** secure_delete flag.  The second form changes the secure_delete
  ** flag setting and reports thenew value.
  */
  if( sqlite3StrICmp(zLeft,"secure_delete")==0 ){
    Btree *pBt = pDb->pBt;
    int b = -1;
    assert( pBt!=0 );
    if( zRight ){
      b = getBoolean(zRight);
    }
    if( pId2->n==0 && b>=0 ){
      int ii;
      for(ii=0; ii<db->nDb; ii++){
        sqlite3BtreeSecureDelete(db->aDb[ii].pBt, b);
      }
    }
    b = sqlite3BtreeSecureDelete(pBt, b);
    returnSingleInt(pParse, "secure_delete", b);
  }else

  /*
  **  PRAGMA [database.]page_count
  **
  ** Return the number of pages in the specified database.
  */
  if( sqlite3StrICmp(zLeft,"page_count")==0 ){
    int iReg;
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
  /*
  **  PRAGMA [database.]journal_mode
  **  PRAGMA [database.]journal_mode = (delete|persist|off|truncate|memory)
  */
  if( sqlite3StrICmp(zLeft,"journal_mode")==0 ){
    int eMode;
    static char * const azModeName[] = {
      "delete", "persist", "off", "truncate", "memory"
    };

    if( zRight==0 ){
      eMode = PAGER_JOURNALMODE_QUERY;
    }else{
      int n = sqlite3Strlen30(zRight);
      eMode = sizeof(azModeName)/sizeof(azModeName[0]) - 1;







|







536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
  /*
  **  PRAGMA [database.]journal_mode
  **  PRAGMA [database.]journal_mode = (delete|persist|off|truncate|memory)
  */
  if( sqlite3StrICmp(zLeft,"journal_mode")==0 ){
    int eMode;
    static char * const azModeName[] = {
      "delete", "persist", "off", "truncate", "memory", "wal"
    };

    if( zRight==0 ){
      eMode = PAGER_JOURNALMODE_QUERY;
    }else{
      int n = sqlite3Strlen30(zRight);
      eMode = sizeof(azModeName)/sizeof(azModeName[0]) - 1;
557
558
559
560
561
562
563

564
565
566
567
568
569
570
      pPager = sqlite3BtreePager(pDb->pBt);
      eMode = sqlite3PagerJournalMode(pPager, eMode);
    }
    assert( eMode==PAGER_JOURNALMODE_DELETE
              || eMode==PAGER_JOURNALMODE_TRUNCATE
              || eMode==PAGER_JOURNALMODE_PERSIST
              || eMode==PAGER_JOURNALMODE_OFF

              || eMode==PAGER_JOURNALMODE_MEMORY );
    sqlite3VdbeSetNumCols(v, 1);
    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC);
    sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, 
           azModeName[eMode], P4_STATIC);
    sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
  }else







>







582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
      pPager = sqlite3BtreePager(pDb->pBt);
      eMode = sqlite3PagerJournalMode(pPager, eMode);
    }
    assert( eMode==PAGER_JOURNALMODE_DELETE
              || eMode==PAGER_JOURNALMODE_TRUNCATE
              || eMode==PAGER_JOURNALMODE_PERSIST
              || eMode==PAGER_JOURNALMODE_OFF
              || eMode==PAGER_JOURNALMODE_WAL
              || eMode==PAGER_JOURNALMODE_MEMORY );
    sqlite3VdbeSetNumCols(v, 1);
    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "journal_mode", SQLITE_STATIC);
    sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, 
           azModeName[eMode], P4_STATIC);
    sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
  }else
1378
1379
1380
1381
1382
1383
1384




















1385
1386
1387
1388
1389
1390
1391
    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "compile_option", SQLITE_STATIC);
    while( (zOpt = sqlite3_compileoption_get(i++))!=0 ){
      sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, zOpt, 0);
      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
    }
  }else
#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */





















#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
  /*
  ** Report the current state of file logs for all databases
  */
  if( sqlite3StrICmp(zLeft, "lock_status")==0 ){
    static const char *const azLockName[] = {







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
    sqlite3VdbeSetColName(v, 0, COLNAME_NAME, "compile_option", SQLITE_STATIC);
    while( (zOpt = sqlite3_compileoption_get(i++))!=0 ){
      sqlite3VdbeAddOp4(v, OP_String8, 0, 1, 0, zOpt, 0);
      sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 1);
    }
  }else
#endif /* SQLITE_OMIT_COMPILEOPTION_DIAGS */

  if( sqlite3StrICmp(zLeft, "checkpoint")==0 ){
    int nMin = 0;
    int nMax = 0;
    int nosync = 0;

    if( zRight ){
      char *z = zRight;
      sqlite3GetInt32(z, &nMin);
      while( sqlite3Isdigit(*z) ) z++;
      while( *z && !sqlite3Isdigit(*z) ) z++;
      sqlite3GetInt32(z, &nMax);
      while( sqlite3Isdigit(*z) ) z++;
      while( *z && !sqlite3Isdigit(*z) ) z++;
      sqlite3GetInt32(z, &nosync);
    }
    sqlite3VdbeUsesBtree(v, iDb);
    sqlite3VdbeAddOp2(v, OP_Transaction, iDb, 1);
    sqlite3VdbeAddOp3(v, OP_Checkpoint, iDb, nMin, nMax);
  }else

#if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
  /*
  ** Report the current state of file logs for all databases
  */
  if( sqlite3StrICmp(zLeft, "lock_status")==0 ){
    static const char *const azLockName[] = {
Changes to src/vdbe.c.
5182
5183
5184
5185
5186
5187
5188















5189
5190
5191
5192
5193
5194
5195
  UPDATE_MAX_BLOBSIZE(pMem);
  if( sqlite3VdbeMemTooBig(pMem) ){
    goto too_big;
  }
  break;
}

















#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH)
/* Opcode: Vacuum * * * * *
**
** Vacuum the entire database.  This opcode will cause other virtual
** machines to be created and run.  It may not be called from within
** a transaction.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
  UPDATE_MAX_BLOBSIZE(pMem);
  if( sqlite3VdbeMemTooBig(pMem) ){
    goto too_big;
  }
  break;
}

/* Opcode: Checkpoint P1 P2 P3 * P5
*/
case OP_Checkpoint: {
  Btree *pBt;                     /* Btree to checkpoint */
  int nMin = pOp->p2;             /* Minimum number of pages to copy */
  int nMax = pOp->p3;             /* Maximum number of pages to copy */
  int doNotSync = pOp->p5;        /* True to sync database */

  assert( pOp->p1>=0 && pOp->p1<db->nDb );
  assert( (p->btreeMask & (1<<pOp->p1))!=0 );
  pBt = db->aDb[pOp->p1].pBt;

  rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt), nMin, nMax, !doNotSync);
  break;
};  

#if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH)
/* Opcode: Vacuum * * * * *
**
** Vacuum the entire database.  This opcode will cause other virtual
** machines to be created and run.  It may not be called from within
** a transaction.
Added test/wal.test.














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc range {args} {
  set ret [list]
  foreach {start end} $args {
    for {set i $start} {$i <= $end} {incr i} {
      lappend ret $i
    }
  }
  set ret
}

proc reopen_db {} {
  db close
  file delete -force test.db test.db-wal
  sqlite3_wal db test.db
  #register_logtest
}
proc register_logtest {{db db}} {
  register_logsummary_module $db
  execsql { CREATE VIRTUAL TABLE temp.logsummary USING logsummary } $db
  execsql { CREATE VIRTUAL TABLE temp.logcontent USING logcontent } $db
  execsql { CREATE VIRTUAL TABLE temp.loglock USING loglock } $db
}

proc sqlite3_wal {args} {
  eval sqlite3 $args
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
}

#
# These are 'warm-body' tests used while developing the WAL code. They
# serve to prove that a few really simple cases work:
#
# wal-1.*: Read and write the database.
# wal-2.*: Test MVCC with one reader, one writer.
# wal-3.*: Test transaction rollback.
# wal-4.*: Test savepoint/statement rollback.
# wal-5.*: Test the temp database.
# wal-6.*: Test creating databases with different page sizes.
#

do_test wal-0.1 {
  execsql { PRAGMA journal_mode = wal }
} {wal}

do_test wal-1.0 {
  execsql { 
    BEGIN;
    CREATE TABLE t1(a, b); 
  }
  list [file exists test.db-journal] [file exists test.db-wal]
} {0 1}
do_test wal-1.1 {
  execsql COMMIT
  list [file exists test.db-journal] [file exists test.db-wal]
} {0 1}
do_test wal-1.2 {
  # There are now two pages in the log.
  file size test.db-wal
} [expr (20+1024)*2]

do_test wal-1.3 {
  execsql { SELECT * FROM sqlite_master }
} {table t1 t1 2 {CREATE TABLE t1(a, b)}}

do_test wal-1.4 {
  execsql { INSERT INTO t1 VALUES(1, 2) }
  execsql { INSERT INTO t1 VALUES(3, 4) }
  execsql { INSERT INTO t1 VALUES(5, 6) }
  execsql { INSERT INTO t1 VALUES(7, 8) }
  execsql { INSERT INTO t1 VALUES(9, 10) }
} {}

do_test wal-1.5 {
  execsql { SELECT * FROM t1 }
} {1 2 3 4 5 6 7 8 9 10}

do_test wal-2.1 {
  sqlite3_wal db2 ./test.db
  execsql { BEGIN; SELECT * FROM t1 } db2
} {1 2 3 4 5 6 7 8 9 10}

do_test wal-2.2 {
  execsql { INSERT INTO t1 VALUES(11, 12) }
  execsql { SELECT * FROM t1 }
} {1 2 3 4 5 6 7 8 9 10 11 12}

do_test wal-2.3 {
  execsql { SELECT * FROM t1 } db2
} {1 2 3 4 5 6 7 8 9 10}

do_test wal-2.4 {
  execsql { INSERT INTO t1 VALUES(13, 14) }
  execsql { SELECT * FROM t1 }
} {1 2 3 4 5 6 7 8 9 10 11 12 13 14}

do_test wal-2.5 {
  execsql { SELECT * FROM t1 } db2
} {1 2 3 4 5 6 7 8 9 10}

do_test wal-2.6 {
  execsql { COMMIT; SELECT * FROM t1 } db2
} {1 2 3 4 5 6 7 8 9 10 11 12 13 14}

do_test wal-3.1 {
  execsql { BEGIN; DELETE FROM t1 }
  execsql { SELECT * FROM t1 }
} {}
do_test wal-3.2 {
  execsql { SELECT * FROM t1 } db2
} {1 2 3 4 5 6 7 8 9 10 11 12 13 14}
do_test wal-3.3 {
  execsql { ROLLBACK }
  execsql { SELECT * FROM t1 }
} {1 2 3 4 5 6 7 8 9 10 11 12 13 14}
db2 close

do_test wal-4.1 {
  execsql {
    DELETE FROM t1;
    BEGIN;
      INSERT INTO t1 VALUES('a', 'b');
      SAVEPOINT sp;
        INSERT INTO t1 VALUES('c', 'd');
        SELECT * FROM t1;
  }
} {a b c d}
do_test wal-4.2 {
  execsql {
      ROLLBACK TO sp;
      SELECT * FROM t1;
  }
} {a b}
do_test wal-4.3 {
  execsql {
    COMMIT;
    SELECT * FROM t1;
  }
} {a b}

do_test wal-5.1 {
  execsql {
    CREATE TEMP TABLE t2(a, b);
    INSERT INTO t2 VALUES(1, 2);
  }
} {}
do_test wal-5.2 {
  execsql {
    BEGIN;
      INSERT INTO t2 VALUES(3, 4);
      SELECT * FROM t2;
  }
} {1 2 3 4}
do_test wal-5.3 {
  execsql {
    ROLLBACK;
    SELECT * FROM t2;
  }
} {1 2}
do_test wal-5.4 {
  execsql {
    CREATE TEMP TABLE t3(x UNIQUE);
    BEGIN;
      INSERT INTO t2 VALUES(3, 4);
      INSERT INTO t3 VALUES('abc');
  }
  catchsql { INSERT INTO t3 VALUES('abc') }
} {1 {column x is not unique}}
do_test wal-5.5 {
  execsql {
    COMMIT;
    SELECT * FROM t2;
  }
} {1 2 3 4}
db close


foreach sector {512 4096} {
  sqlite3_simulate_device -sectorsize $sector
  foreach pgsz {512 1024 2048 4096} {
    file delete -force test.db test.db-wal
    do_test wal-6.$sector.$pgsz.1 {
      sqlite3_wal db test.db -vfs devsym
      execsql "
        PRAGMA page_size = $pgsz ;
      "
      execsql "
        CREATE TABLE t1(a, b);
        INSERT INTO t1 VALUES(1, 2);
      "
      db close
      file size test.db
    } [expr $pgsz*2]
  
    do_test wal-6.$sector.$pgsz.2 {
      file size test.db-wal
    } {0}
  }
}

do_test wal-7.1 {
  file delete -force test.db test.db-wal
  sqlite3_wal db test.db
  execsql {
    PRAGMA page_size = 1024;
    CREATE TABLE t1(a, b);
    INSERT INTO t1 VALUES(1, 2);
  }

  list [file size test.db] [file size test.db-wal]
} [list 0 [expr (1024+20)*3]]
do_test wal-7.2 {
  execsql { PRAGMA checkpoint }
  list [file size test.db] [file size test.db-wal]
} [list 2048 [expr (1024+20)*3]]

# db close
# sqlite3_wal db test.db
# register_logsummary_module db
# # Warm-body tests of the virtual tables used for testing.
# # 
# do_test wal-8.1 {
#   execsql { CREATE VIRTUAL TABLE temp.logsummary USING logsummary }
#   execsql { CREATE VIRTUAL TABLE temp.logcontent USING logcontent }
#   execsql { CREATE VIRTUAL TABLE temp.loglock USING loglock }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0]
# 
# do_test wal-8.2 {
#   sqlite3_wal db2 test.db
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 2 0 0 0 0 0 0]
# do_test wal-8.3 {
#   db2 close
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0]
# do_test wal-8.4 {
#   execsql { INSERT INTO t1 VALUES(3, 4) }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 2 3 0 0]
# do_test wal-8.5 {
#   execsql { PRAGMA checkpoint }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 0 0 0 0 0]
# 
# do_test wal-8.6 {
#   execsql { INSERT INTO t1 VALUES(5, 6) }
#   execsql { PRAGMA checkpoint('1 1 1') }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 0 3 0 0]
# do_test wal-8.7 {
#   execsql { SELECT logpage, dbpage FROM logcontent }
# } {}
# do_test wal-8.8 {
#   execsql { INSERT INTO t1 VALUES(7, 8) }
#   execsql { SELECT logpage, dbpage FROM logcontent }
# } {4 T:4 5 2}
# do_test wal-8.9 {
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 4 5 0 0]
# do_test wal-8.10 {
#   execsql { SELECT * FROM loglock }
# } [list [file join [pwd] test.db] 0 0 0]
# do_test wal-8.11 {
#   execsql { BEGIN; SELECT * FROM t1; }
#   execsql { SELECT * FROM loglock }
# } [list [file join [pwd] test.db] 0 0 4]
# 
# # Try making the log wrap around.
# #
# reopen_db
# 
# do_test wal-9.1 {
#   execsql {
#     BEGIN;
#     CREATE TABLE t1(a PRIMARY KEY, b);
#   }
#   for {set i 0} {$i < 100} {incr i} {
#     execsql { INSERT INTO t1 VALUES($i, randomblob(100)) }
#   }
#   execsql COMMIT
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 2 17 0 0]
# do_test wal-9.2 {
#   execsql { SELECT logpage, dbpage FROM logcontent }
# } {2 T:2 3 1 4 2 5 3 6 4 7 5 8 6 9 7 10 8 11 9 12 10 13 11 14 12 15 13 16 14 17 15}
# do_test wal-9.3 {
#   execsql { PRAGMA checkpoint('12, 12') }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 15 17 0 0]
# do_test wal-9.4 {
#   execsql { SELECT logpage, dbpage FROM logcontent }
# } {15 13 16 14 17 15}
# do_test wal-9.5 {
#   execsql { SELECT count(*) FROM t1 }
# } {100}
# do_test wal-9.6 {
#   execsql { INSERT INTO t1 VALUES(100, randomblob(100)) }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 2 15 20 0 0]
# 
# do_test wal-9.7 {
#   execsql { SELECT count(*) FROM t1 }
# } {101}
# do_test wal-9.8 {
#   db close
#   sqlite3_wal db test.db
#   register_logtest
#   execsql { SELECT count(*) FROM t1 }
# } {101}
# do_test wal-9.9 {
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 0 0 0 0 0 0]
# 
# reopen_db
# do_test wal-10.1 {
#   execsql {
#     PRAGMA page_size = 1024;
#     CREATE TABLE t1(x PRIMARY KEY);
#     INSERT INTO t1 VALUES(randomblob(900));
#     INSERT INTO t1 VALUES(randomblob(900));
#     INSERT INTO t1 SELECT randomblob(900) FROM t1;         -- 4
#     INSERT INTO t1 SELECT randomblob(900) FROM t1;         -- 8
#     INSERT INTO t1 SELECT randomblob(900) FROM t1;         -- 16
#   }
#   list [file size test.db] [file size test.db-wal]
# } {0 55296}
# do_test wal-10.2 {
#   execsql { PRAGMA checkpoint('20 30') }
# } {}
# do_test wal-10.3 {
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 34 38 54 0 0]
# do_test wal-10.4 {
#   execsql { SELECT dbpage FROM logcontent }
# } {21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37}
# do_test wal-10.5 {
#   execsql { INSERT INTO t1 VALUES(randomblob(900)) }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 34 38 6 34 54]
# do_test wal-10.6 {
#   execsql { SELECT count(*) FROM t1 WHERE x NOT NULL }
# } {17}
# do_test wal-10.8 {
#   execsql { SELECT logpage FROM logcontent }
# } [range 38 54  1 6]
# do_test wal-10.9 {
#   execsql { INSERT INTO t1 SELECT randomblob(900) FROM t1 }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 34 38 68 34 54]
# 
# do_test wal-10.10 {
#   execsql { SELECT logpage FROM logcontent }
# } [range 38 54  1 33  55 68]
# 
# do_test wal-10.11 {
#   execsql { SELECT count(*) FROM t1 WHERE x NOT NULL }
# } {34}
# 
# do_test wal-10.12 {
#   execsql { PRAGMA checkpoint('35 35') }
# } {}
# do_test wal-10.13 {
#   execsql { SELECT logpage FROM logcontent }
# } [range 22 68]
# do_test wal-10.13a {
#   execsql { SELECT dbpage FROM logcontent }
# } [list \
#   50 51 52 53 54 55 56 57 58 59 60 61    \
#   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \
#   62 63 64 65 66 67 68 69 70 71 72 73 74 75 \
# ]
# 
# do_test wal-10.14 {
#   execsql { SELECT count(*) FROM t1 WHERE x NOT NULL }
# } {34}
# do_test wal-10.15 {
#   execsql { PRAGMA integrity_check }
# } {ok}
# do_test wal-10.16 {
#   execsql { PRAGMA checkpoint('20 20') }
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 7 63 68 0 0]
# do_test wal-10.17 {
#   execsql { SELECT logpage FROM logcontent }
# } [range 63 68]
# do_test wal-10.17a {
#   execsql { SELECT dbpage FROM logcontent }
# } {70 71 72 73 74 75}
# 
# do_test wal-10.18 {
#   execsql { INSERT INTO t1 SELECT randomblob(900) FROM t1 }
#   execsql { SELECT logpage FROM logcontent }
# } [range 63 147]
# integrity_check wal-10.19
# 
# do_test wal-10.20 {
#   execsql { PRAGMA checkpoint('52 52') }
#   execsql { SELECT logpage FROM logcontent }
# } [range 116 147]
# do_test wal-10.20a {
#   execsql { SELECT * FROM logsummary }
# } [list [file join [pwd] test.db] 1 1024 69 116 147 0 0]
# integrity_check wal-10.20.integrity
# 
# do_test wal-10.21 {
#   execsql { INSERT INTO t1 VALUES( randomblob(900) ) }
#   execsql { SELECT logpage FROM logcontent }
# } [range 116 152]
# do_test wal-10.22 {
#   execsql { PRAGMA integrity_check }
# } {ok}
# 
# file delete -force testX.db testX.db-wal
# file copy test.db testX.db
# file copy test.db-wal testX.db-wal
# do_test wal-10.23 {
#   sqlite3_wal db2 testX.db
#   register_logtest db2
#   execsql { SELECT logpage FROM logcontent WHERE db LIKE '%testX%' } db2
# } [range 34 54  1 33  55 152]
# 
# do_test wal-10.24 {
#   execsql { PRAGMA integrity_check } db2
# } {ok}
# db2 close
# 
# do_test wal-11.1 {
#   reopen_db
#   sqlite3_wal db2 test.db
# 
#   execsql {
#     BEGIN;
#       CREATE TABLE t1(x);
#       CREATE TABLE t2(x PRIMARY KEY);
#       INSERT INTO t1 VALUES(randomblob(900));
#       INSERT INTO t1 VALUES(randomblob(900));
#       INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  4 */
#       INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  8 */
#       INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 16 */
#       INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 32 */
#       INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 64 */
# 
#       INSERT INTO t2 VALUES('x');
#       INSERT INTO t2 VALUES('y');
#       INSERT INTO t2 VALUES('z');
#     COMMIT;
#     SELECT * FROM logsummary;
#   }
# } [list [file join [pwd] test.db] 2 1024 2 2 70 0 0]
# 
# do_test wal-11.2 {
#   execsql {
#     BEGIN; SELECT x FROM t2;
#   } db2
# } {x y z}
# do_test wal-11.2 {
#   execsql {
#     INSERT INTO t1 VALUES(randomblob(900));
#     PRAGMA checkpoint('10 100');
#     INSERT INTO t1 VALUES(randomblob(900));
#     INSERT INTO t2 VALUES('0');
#     SELECT * FROM logsummary;
#   }
# } [list [file join [pwd] test.db] 2 1024 71 71 7 71 73]
# do_test wal-12.3 {
#   execsql { PRAGMA integrity_check } db2
# } {ok}
# db2 close


# Execute some transactions in auto-vacuum mode to test database file
# truncation.
#
do_test wal-12.1 {
  reopen_db
  execsql {
    PRAGMA auto_vacuum = 1;
    PRAGMA auto_vacuum;
  }
} {1}
do_test wal-12.2 {
  execsql {
    PRAGMA page_size = 1024;
    CREATE TABLE t1(x);
    INSERT INTO t1 VALUES(randomblob(900));
    INSERT INTO t1 VALUES(randomblob(900));
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  4 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  8 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 16 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 32 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 64 */
    PRAGMA checkpoint;
  }
  file size test.db
} [expr 67*1024]
do_test wal-12.3 {
  execsql { 
    DELETE FROM t1 WHERE rowid<54;
    PRAGMA checkpoint('1 100000');
  }
  file size test.db
} [expr 14*1024]

# Run some "warm-body" tests to ensure that log-summary files with more
# than 256 entries (log summaries that contain index blocks) work Ok.
#
do_test wal-13.1 {
  reopen_db
  execsql {
    PRAGMA page_size = 1024;
    CREATE TABLE t1(x PRIMARY KEY);
    INSERT INTO t1 VALUES(randomblob(900));
    INSERT INTO t1 VALUES(randomblob(900));
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  4 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /*  8 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 16 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 32 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 64 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 128 */
    INSERT INTO t1 SELECT randomblob(900) FROM t1;       /* 256 */
  }
  file size test.db
} 0
do_test wal-13.2 {
  sqlite3_wal db2 test.db
  execsql {PRAGMA integrity_check } db2
} {ok}

do_test wal-13.3 {
  file delete -force test2.db test2.db-wal
  file copy test.db test2.db
  file copy test.db-wal test2.db-wal
  sqlite3_wal db3 test2.db 
  execsql {PRAGMA integrity_check } db3
} {ok}
db3 close

do_test wal-13.4 {
breakpoint
  execsql { PRAGMA checkpoint }
  db2 close
  sqlite3_wal db2 test.db
  execsql {PRAGMA integrity_check } db2
} {ok}

finish_test

Added test/walcrash.test.
















































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# 2010 February 8
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.
#

#
# These are 'warm-body' tests of database recovery used while developing 
# the WAL code. They serve to prove that a few really simple cases work:
#
# walcrash-1.*: Recover a database.
# walcrash-2.*: Recover a database where the failed transaction spanned more
#               than one page.
# walcrash-3.*: Recover multiple databases where the failed transaction 
#               was a multi-file transaction.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
db close

set seed 0
set REPEATS 100

proc sqlite3_wal {args} {
  eval sqlite3 $args
  [lindex $args 0] eval { PRAGMA journal_mode = wal }
}

# walcrash-1.*
#
for {set i 1} {$i < $REPEATS} {incr i} {
  file delete -force test.db test.db-wal
  do_test walcrash-1.$i.1 {
    crashsql -delay 4 -file test.db-wal -seed [incr seed] {
      PRAGMA journal_mode = WAL;
      CREATE TABLE t1(a, b);
      INSERT INTO t1 VALUES(1, 1);
      INSERT INTO t1 VALUES(2, 3);
      INSERT INTO t1 VALUES(3, 6);
    }
  } {1 {child process exited abnormally}}
  do_test walcrash-1.$i.2 {
    sqlite3_wal db test.db
    execsql { SELECT sum(a)==max(b) FROM t1 }
  } {1}
  integrity_check walcrash-1.$i.3
  db close
  
  do_test walcrash-1.$i.4 {
    crashsql -delay 2 -file test.db-wal -seed [incr seed] {
      PRAGMA journal_mode = WAL;
      PRAGMA journal_mode = WAL;
      INSERT INTO t1 VALUES(4, (SELECT sum(a) FROM t1) + 4);
      INSERT INTO t1 VALUES(5, (SELECT sum(a) FROM t1) + 5);
    }
  } {1 {child process exited abnormally}}
  do_test walcrash-1.$i.5 {
    sqlite3_wal db test.db
    execsql { SELECT sum(a)==max(b) FROM t1 }
  } {1}
  integrity_check walcrash-1.$i.6
  db close
}

# walcrash-2.*
#
for {set i 1} {$i < $REPEATS} {incr i} {
  file delete -force test.db test.db-wal
  do_test walcrash-2.$i.1 {
    crashsql -delay 4 -file test.db-wal -seed [incr seed] {
      PRAGMA journal_mode = WAL;
      CREATE TABLE t1(a PRIMARY KEY, b);
      INSERT INTO t1 VALUES(1, 2);
      INSERT INTO t1 VALUES(3, 4);
      INSERT INTO t1 VALUES(5, 9);
    }
  } {1 {child process exited abnormally}}
  do_test walcrash-2.$i.2 {
    sqlite3_wal db test.db
    execsql { SELECT sum(a)==max(b) FROM t1 }
  } {1}
  integrity_check walcrash-2.$i.3
  db close
  
  do_test walcrash-2.$i.4 {
    crashsql -delay 2 -file test.db-wal -seed [incr seed] {
      PRAGMA journal_mode = WAL;
      INSERT INTO t1 VALUES(6, (SELECT sum(a) FROM t1) + 6);
      INSERT INTO t1 VALUES(7, (SELECT sum(a) FROM t1) + 7);
    }
  } {1 {child process exited abnormally}}
  do_test walcrash-2.$i.5 {
    sqlite3_wal db test.db
    execsql { SELECT sum(a)==max(b) FROM t1 }
  } {1}
  integrity_check walcrash-2.$i.6
  db close
}

# walcrash-3.*
#
# for {set i 1} {$i < $REPEATS} {incr i} {
#   file delete -force test.db test.db-wal
#   file delete -force test2.db test2.db-wal
# 
#   do_test walcrash-3.$i.1 {
#     crashsql -delay 2 -file test2.db-wal -seed [incr seed] {
#       PRAGMA journal_mode = WAL;
#       ATTACH 'test2.db' AS aux;
#       CREATE TABLE t1(a PRIMARY KEY, b);
#       CREATE TABLE aux.t2(a PRIMARY KEY, b);
#       BEGIN;
#         INSERT INTO t1 VALUES(1, 2);
#         INSERT INTO t2 VALUES(1, 2);
#       COMMIT;
#     }
#   } {1 {child process exited abnormally}}
# 
#   do_test walcrash-3.$i.2 {
#     sqlite3_wal db test.db
#     execsql { 
#       ATTACH 'test2.db' AS aux;
#       SELECT * FROM t1 EXCEPT SELECT * FROM t2;
#     }
#   } {}
#   do_test walcrash-3.$i.3 { execsql { PRAGMA main.integrity_check } } {ok}
#   do_test walcrash-3.$i.4 { execsql { PRAGMA aux.integrity_check  } } {ok}
# 
#   db close
# }

# walcrash-4.*
#
for {set i 1} {$i < $REPEATS} {incr i} {
  file delete -force test.db test.db-wal
  file delete -force test2.db test2.db-wal

  do_test walcrash-4.$i.1 {
    crashsql -delay 3 -file test.db-wal -seed [incr seed] -blocksize 4096 {
      PRAGMA journal_mode = WAL;
      PRAGMA page_size = 1024;
      CREATE TABLE t1(a PRIMARY KEY, b);
      INSERT INTO t1 VALUES(1, 2);
      INSERT INTO t1 VALUES(3, 4);
    }
  } {1 {child process exited abnormally}}

  do_test walcrash-4.$i.2 {
    sqlite3_wal db test.db
    execsql { 
      SELECT * FROM t1 WHERE a = 1;
    }
  } {1 2}
  do_test walcrash-4.$i.3 { execsql { PRAGMA main.integrity_check } } {ok}

  db close
}

# walcrash-5.*
#
for {set i 1} {$i < $REPEATS} {incr i} {
  file delete -force test.db test.db-wal
  file delete -force test2.db test2.db-wal

  do_test walcrash-5.$i.1 {
    crashsql -delay 11 -file test.db-wal -seed [incr seed] -blocksize 4096 {
      PRAGMA journal_mode = WAL;
      PRAGMA page_size = 1024;
      BEGIN;
        CREATE TABLE t1(x PRIMARY KEY);
        INSERT INTO t1 VALUES(randomblob(900));
        INSERT INTO t1 VALUES(randomblob(900));
        INSERT INTO t1 SELECT randomblob(900) FROM t1;           /* 4 */
      COMMIT;
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 8 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 12 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 16 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 20 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 24 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 28 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 32 */

      PRAGMA checkpoint('70 70');
      INSERT INTO t1 VALUES(randomblob(900));
      INSERT INTO t1 VALUES(randomblob(900));
      INSERT INTO t1 VALUES(randomblob(900));
    }
  } {1 {child process exited abnormally}}

  do_test walcrash-5.$i.2 {
    sqlite3_wal db test.db
    execsql { SELECT count(*)==33 OR count(*)==34 FROM t1 WHERE x != 1 }
  } {1}
  do_test walcrash-5.$i.3 { execsql { PRAGMA main.integrity_check } } {ok}

  db close
}

# walcrash-6.*
#
for {set i 1} {$i < $REPEATS} {incr i} {
  file delete -force test.db test.db-wal
  file delete -force test2.db test2.db-wal

  do_test walcrash-6.$i.1 {
    crashsql -delay 12 -file test.db-wal -seed [incr seed] -blocksize 512 {
      PRAGMA journal_mode = WAL;
      PRAGMA page_size = 1024;
      BEGIN;
        CREATE TABLE t1(x PRIMARY KEY);
        INSERT INTO t1 VALUES(randomblob(900));
        INSERT INTO t1 VALUES(randomblob(900));
        INSERT INTO t1 SELECT randomblob(900) FROM t1;           /* 4 */
      COMMIT;
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 8 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 12 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 16 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 20 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 24 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 28 */
      INSERT INTO t1 SELECT randomblob(900) FROM t1 LIMIT 4;   /* 32 */

      PRAGMA checkpoint('70 70');
      INSERT INTO t1 VALUES(randomblob(900));
      INSERT INTO t1 VALUES(randomblob(900));
      INSERT INTO t1 VALUES(randomblob(900));
    }
  } {1 {child process exited abnormally}}

  do_test walcrash-6.$i.2 {
    sqlite3_wal db test.db
    execsql { SELECT count(*)==34 OR count(*)==35 FROM t1 WHERE x != 1 }
  } {1}
  do_test walcrash-6.$i.3 { execsql { PRAGMA main.integrity_check } } {ok}

  db close
}

finish_test

Added test/walslow.test.










































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# 2010 March 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc reopen_db {} {
  catch { db close }
  file delete -force test.db test.db-wal
  sqlite3 db test.db
  execsql { PRAGMA journal_mode = wal }
}

db close
save_prng_state
for {set seed 1} {$seed<10} {incr seed} {
  expr srand($seed)
  restore_prng_state
  reopen_db
  do_test walslow-1.seed=$seed.0 {
    execsql { CREATE TABLE t1(a, b) }
    execsql { CREATE INDEX i1 ON t1(a) }
    execsql { CREATE INDEX i2 ON t1(b) }
  } {}

  for {set iTest 1} {$iTest < 100} {incr iTest} {

    do_test walslow-1.seed=$seed.$iTest.1 {
      set w [expr int(rand()*2000)]
      set x [expr int(rand()*2000)]
      set y [expr int(rand()*9)+1]
      set z [expr int(rand()*2)]
      execsql { INSERT INTO t1 VALUES(randomblob($w), randomblob($x)) }
      execsql { PRAGMA integrity_check }
    } {ok}

    do_test walslow-1.seed=$seed.$iTest.2 {
      execsql "PRAGMA checkpoint('$y $y $z')"
      execsql { PRAGMA integrity_check }
    } {ok}

    do_test walslow-1.seed=$seed.$iTest.3 {
      file delete -force testX.db testX.db-wal
      file copy test.db testX.db
      file copy test.db-wal testX.db-wal
  
      sqlite3 db2 testX.db
      execsql { PRAGMA journal_mode = WAL } db2
      execsql { PRAGMA integrity_check } db2
    } {ok}
  
    do_test walslow-1.seed=$seed.$iTest.4 {
      execsql { SELECT count(*) FROM t1 WHERE a!=b } db2
    } [execsql { SELECT count(*) FROM t1 WHERE a!=b }]
    db2 close
  }
}


finish_test
Added test/walthread.test.
































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# 2007 September 7
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#

set testdir [file dirname $argv0]

source $testdir/tester.tcl
if {[run_thread_tests]==0} { finish_test ; return }

do_test walthread-1.1 {
  execsql {
    PRAGMA journal_mode = WAL;
    CREATE TABLE t1(x PRIMARY KEY);
    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 VALUES(randomblob(100));
    INSERT INTO t1 SELECT md5sum(x) FROM t1;
  }
} {wal}
do_test walthread-1.2 {
  execsql {
    SELECT (SELECT count(*) FROM t1), (
      SELECT md5sum(x) FROM t1 WHERE oid != (SELECT max(oid) FROM t1)
    ) == (
      SELECT x FROM t1 WHERE oid = (SELECT max(oid) FROM t1)
    )
  }
} {3 1}
do_test walthread-1.3 {
  execsql { PRAGMA integrity_check } 
} {ok}
do_test walthread-1.4 {
  execsql { PRAGMA lock_status } 
} {main unlocked temp unknown}

#--------------------------------------------------------------------------
# Start N threads. Each thread performs both read and write transactions.
# Each read transaction consists of:
#
#   1) Reading the md5sum of all but the last table row,
#   2) Running integrity check.
#   3) Reading the value stored in the last table row,
#   4) Check that the values read in steps 1 and 3 are the same, and that
#      the md5sum of all but the last table row has not changed.
#
# Each write transaction consists of:
#
#   1) Modifying the contents of t1 (inserting, updating, deleting rows).
#   2) Appending a new row to the table containing the md5sum() of all
#      rows in the table.
#
# Each of the N threads runs N read transactions followed by a single write
# transaction in a loop as fast as possible.
#
# There is also a single checkpointer thread. It runs the following loop:
#
#   1) Execute "CHECKPOINT main 32 -1 1"
#   2) Sleep for 500 ms.
#

set thread_program {
  proc rest {ms} {
    set ::rest 0
    after $ms {set ::rest 1}
    vwait ::rest
  }

  proc dosql {DB sql} {
    set res ""
    set stmt [sqlite3_prepare_v2 $DB $sql -1 dummy_tail]
    set rc [sqlite3_step $stmt]
    if {$rc eq "SQLITE_ROW"} {
      set res [sqlite3_column_text $stmt 0]
    }
    set rc [sqlite3_finalize $stmt]

    if {$rc ne "SQLITE_OK"} {
      error $rc 
    }
    return $res
  }

  proc read_transaction {DB} {
    dosql $DB BEGIN

    set md5_1 [dosql $DB {
      SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1)
    }]
    set check [dosql $DB { PRAGMA integrity_check }]
    set md5_2 [dosql $DB { 
      SELECT x FROM t1 WHERE rowid = (SELECT max(rowid) FROM t1)
    }]
    set md5_3 [dosql $DB {
      SELECT md5sum(x) FROM t1 WHERE rowid != (SELECT max(rowid) FROM t1)
    }]

    dosql $DB COMMIT

    if {$check ne "ok" 
     || $md5_1 ne $md5_2
     || $md5_2 ne $md5_3
    } {
      error "Failed read transaction $check $md5_1 $md5_2 $md5_3"
    }
  }

  proc write_transaction {DB} {
    dosql $DB BEGIN
    dosql $DB "INSERT INTO t1 VALUES(randomblob(100))"
    dosql $DB "INSERT INTO t1 VALUES(randomblob(100))"
    dosql $DB "INSERT INTO t1 SELECT md5sum(x) FROM t1"
    dosql $DB COMMIT
  }

  proc checkpointer {DB} {
    while { !$::finished } {
      dosql $DB "PRAGMA checkpoint(32)"
      rest 1000
    }
  }

  proc worker {DB N} {
    set j 0
    while { !$::finished } {
      for {set i 0} {$i < $N} {incr i} { read_transaction $DB }
      write_transaction $DB
      rest 1
    }
  }

  set ::finished 0
  after [expr $seconds*1000] {set ::finished 1}

  set ::DB [sqlthread open test.db]
  dosql $::DB { PRAGMA journal_mode = WAL }

  set rc [catch {
    if {$role eq "worker"} { worker $DB $N }
    if {$role eq "checkpointer"} { checkpointer $DB }
  } msg]

  sqlite3_close $::DB

  if {$rc==0} { set msg OK } 
  set msg
}

set NTHREAD 6
set SECONDS 30

#set prg "set N $NTHREAD ; set seconds $SECONDS"
set prg "set N 1 ; set seconds $SECONDS"

array unset finished
for {set i 0} {$i < $NTHREAD} {incr i} {
  thread_spawn finished($i) {set role worker} $prg $thread_program
}
thread_spawn finished(C) {set role checkpointer} $prg $thread_program
#set finished(C) 1

puts "... test runs for approximately $SECONDS seconds ..."
for {set i 0} {$i < $::NTHREAD} {incr i} {
  if {![info exists finished($i)]} {
    vwait finished($i)
  }
  do_test walthread-2.$i {
    set ::finished($i)
  } OK
}
do_test walthread-2.C {
  if {![info exists finished(C)]} { vwait finished(C) }
  set ::finished(C)
} OK

set logsize 0

set rows    [execsql { SELECT count(*) FROM t1 }]
catch { set logsize [expr [file size test.db-wal] / 1024] }
set dbsize  [expr [file size test.db] / 1024]

puts "rows=$rows db=${dbsize}K log=${logsize}K"

finish_test


Changes to tool/mksqlite3c.tcl.
89
90
91
92
93
94
95

96
97
98
99
100
101
102
   fts3.h
   fts3Int.h
   fts3_hash.h
   fts3_tokenizer.h
   hash.h
   hwtime.h
   keywordhash.h

   mutex.h
   opcodes.h
   os_common.h
   os.h
   os_os2.h
   pager.h
   parse.h







>







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
   fts3.h
   fts3Int.h
   fts3_hash.h
   fts3_tokenizer.h
   hash.h
   hwtime.h
   keywordhash.h
   log.h
   mutex.h
   opcodes.h
   os_common.h
   os.h
   os_os2.h
   pager.h
   parse.h
239
240
241
242
243
244
245

246
247
248
249
250
251
252
   os_unix.c
   os_win.c

   bitvec.c
   pcache.c
   pcache1.c
   rowset.c

   pager.c

   btmutex.c
   btree.c
   backup.c

   vdbemem.c







>







240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
   os_unix.c
   os_win.c

   bitvec.c
   pcache.c
   pcache1.c
   rowset.c
   log.c
   pager.c

   btmutex.c
   btree.c
   backup.c

   vdbemem.c