/ Check-in [2e0357c2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 2e0357c2ed30927921cd17129e348a18a6f3fd086d1fc0a42756036b321a084d
User & Date: dan 2018-07-17 14:01:55
Context
2018-07-18
17:37
Fix a harmless compiler warning in the pager. Enhance the docs for sqlite3_changes() and sqlite3_total_changes() to refer to the data_version pragma. check-in: 4c70ea5b user: drh tags: trunk
2018-07-17
14:01
If an SQLITE_IOERR error is encountered as part of an atomic commit on an F2FS file-system, retry the commit in legacy journal mode. check-in: 2e0357c2 user: dan tags: trunk
13:55
Fix for builds without SQLITE_ENABLE_BATCH_ATOMIC_WRITE. Closed-Leaf check-in: b10ec14e user: dan tags: exp-retry-atomic-commit
2018-07-16
11:32
Minor simplification to sqlite3RollbackAll(). check-in: 432fdc22 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Added doc/F2FS.txt.















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

SQLite's OS layer contains the following definitions used in F2FS related
calls:

#define F2FS_IOCTL_MAGIC        0xf5
#define F2FS_IOC_START_ATOMIC_WRITE     _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE    _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_ABORT_VOLATILE_WRITE   _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GET_FEATURES           _IOR(F2FS_IOCTL_MAGIC, 12, u32)
#define F2FS_FEATURE_ATOMIC_WRITE       0x0004

After opening a database file on Linux (including Android), SQLite determines
whether or not a file supports F2FS atomic commits as follows:

  u32 flags = 0;
  rc = ioctl(fd, F2FS_IOC_GET_FEATURES, &flags);
  if( rc==0 && (flags & F2FS_FEATURE_ATOMIC_WRITE) ){
    /* File supports F2FS atomic commits */
  }else{
    /* File does NOT support F2FS atomic commits */
  }

where "fd" is the file-descriptor open on the database file.

Usually, when writing to a database file that supports atomic commits, SQLite
accumulates the entire transaction in heap memory, deferring all writes to the
db file until the transaction is committed.

When it is time to commit a transaction on a file that supports atomic
commits, SQLite does:

  /* Take an F_WRLCK lock on the database file. This prevents any other
  ** SQLite clients from reading or writing the file until the lock
  ** is released.  */
  rc = fcntl(fd, F_SETLK, ...);
  if( rc!=0 ) goto failed;

  rc = ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE);
  if( rc!=0 ) goto fallback_to_legacy_journal_commit;

  foreach (dirty page){
    rc = write(fd, ...dirty page...);
    if( rc!=0 ){
      ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
      goto fallback_to_legacy_journal_commit;
    }
  }

  rc = ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE);
  if( rc!=0 ){
    ioctl(fd, F2FS_IOC_ABORT_VOLATILE_WRITE);
    goto fallback_to_legacy_journal_commit;
  }

  /* If we get there, the transaction has been successfully 
  ** committed to persistent storage. The following call
  ** relinquishes the F_WRLCK lock.  */
  fcntl(fd, F_SETLK, ...);

Assumptions:

1. After either of the F2FS_IOC_ABORT_VOLATILE_WRITE calls return,
   the database file is in the state that it was in before
   F2FS_IOC_START_ATOMIC_WRITE was invoked. Even if the ioctl()
   fails - we're ignoring the return code.

   This is true regardless of the type of error that occurred in
   ioctl() or write().

2. If the system fails before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
   completed, then following a reboot the database file is in the
   state that it was in before F2FS_IOC_START_ATOMIC_WRITE was invoked.
   Or, if the write was commited right before the system failed, in a 
   state indicating that all write() calls were successfully committed
   to persistent storage before the failure occurred.

3. If the process crashes before the F2FS_IOC_COMMIT_ATOMIC_WRITE is
   completed then the file is automatically restored to the state that
   it was in before F2FS_IOC_START_ATOMIC_WRITE was called. This occurs
   before the posix advisory lock is automatically dropped - there is
   no chance that another client will be able to read the file in a
   half-committed state before the rollback operation occurs.




Changes to src/pager.c.

6378
6379
6380
6381
6382
6383
6384

6385
6386
6387

6388
6389
6390
6391
6392
6393
6394
....
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
....
6460
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471

6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
....
6492
6493
6494
6495
6496
6497
6498


6499
6500
6501
6502
6503
6504
6505
6506

6507
6508
6509
6510
6511
6512
6513
6514
6515
















6516
6517
6518
6519
6520
6521
6522
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{

    if( pagerUseWal(pPager) ){
      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
      PgHdr *pPageOne = 0;

      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);
        pList = pPageOne;
        pList->pDirty = 0;
      }
................................................................................
    }else{
      /* The bBatch boolean is true if the batch-atomic-write commit method
      ** should be used.  No rollback journal is created if batch-atomic-write
      ** is enabled.
      */
      sqlite3_file *fd = pPager->fd;
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      const int bBatch = zMaster==0    /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
        && !pPager->noSync
        && sqlite3JournalIsInMemory(pPager->jfd);
#else
# define bBatch 0
#endif

................................................................................
        }else{
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc==SQLITE_OK ){
            rc = pager_incr_changecounter(pPager, 0);
          }
        }
      }
#else 
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      if( zMaster ){
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

      }
#endif
      rc = pager_incr_changecounter(pPager, 0);
#endif
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;



      if( bBatch ){
        /* The pager is now in DBMOD state. But regardless of what happens
        ** next, attempting to play the journal back into the database would
        ** be unsafe. Close it now to make sure that does not happen.  */
        sqlite3OsClose(pPager->jfd);
        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
      }

      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
      if( bBatch ){
        if( rc==SQLITE_OK ){
          rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
        }
        if( rc!=SQLITE_OK ){
          sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
        }
      }

















      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);








>

<

>







 







|







 







|




>



|







 







>
>

<
<
<
<

<
<
>
|
<
|
|
|
|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







6378
6379
6380
6381
6382
6383
6384
6385
6386

6387
6388
6389
6390
6391
6392
6393
6394
6395
....
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415
6416
6417
6418
....
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478
6479
6480
6481
6482
6483
6484
....
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503




6504


6505
6506

6507
6508
6509
6510
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
  assert( isOpen(pPager->fd) || pPager->tempFile );
  if( 0==pagerFlushOnCommit(pPager, 1) ){
    /* If this is an in-memory db, or no pages have been written to, or this
    ** function has already been called, it is mostly a no-op.  However, any
    ** backup in progress needs to be restarted.  */
    sqlite3BackupRestart(pPager->pBackup);
  }else{
    PgHdr *pList;
    if( pagerUseWal(pPager) ){

      PgHdr *pPageOne = 0;
      pList = sqlite3PcacheDirtyList(pPager->pPCache);
      if( pList==0 ){
        /* Must have at least one page for the WAL commit flag.
        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
        rc = sqlite3PagerGet(pPager, 1, &pPageOne, 0);
        pList = pPageOne;
        pList->pDirty = 0;
      }
................................................................................
    }else{
      /* The bBatch boolean is true if the batch-atomic-write commit method
      ** should be used.  No rollback journal is created if batch-atomic-write
      ** is enabled.
      */
      sqlite3_file *fd = pPager->fd;
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      int bBatch = zMaster==0    /* An SQLITE_IOCAP_BATCH_ATOMIC commit */
        && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC)
        && !pPager->noSync
        && sqlite3JournalIsInMemory(pPager->jfd);
#else
# define bBatch 0
#endif

................................................................................
        }else{
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc==SQLITE_OK ){
            rc = pager_incr_changecounter(pPager, 0);
          }
        }
      }
#else  /* SQLITE_ENABLE_ATOMIC_WRITE */
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      if( zMaster ){
        rc = sqlite3JournalCreate(pPager->jfd);
        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
        assert( bBatch==0 );
      }
#endif
      rc = pager_incr_changecounter(pPager, 0);
#endif /* !SQLITE_ENABLE_ATOMIC_WRITE */
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
  
      /* Write the master journal name into the journal file. If a master 
      ** journal file name has already been written to the journal file, 
      ** or if zMaster is NULL (no master journal), then this call is a no-op.
      */
      rc = writeMasterJournal(pPager, zMaster);
................................................................................
      ** on a system under memory pressure it is just possible that this is 
      ** not the case. In this case it is likely enough that the redundant
      ** xSync() call will be changed to a no-op by the OS anyhow. 
      */
      rc = syncJournal(pPager, 0);
      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;

      pList = sqlite3PcacheDirtyList(pPager->pPCache);
#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE
      if( bBatch ){




        rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0);


        if( rc==SQLITE_OK ){
          rc = pager_write_pagelist(pPager, pList);

          if( rc==SQLITE_OK ){
            rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0);
          }
          if( rc!=SQLITE_OK ){
            sqlite3OsFileControlHint(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0);
          }
        }

        if( (rc&0xFF)==SQLITE_IOERR && rc!=SQLITE_IOERR_NOMEM ){
          rc = sqlite3JournalCreate(pPager->jfd);
          if( rc!=SQLITE_OK ){
            sqlite3OsClose(pPager->jfd);
          }
          bBatch = 0;
        }else{
          sqlite3OsClose(pPager->jfd);
        }
      }
#endif /* SQLITE_ENABLE_BATCH_ATOMIC_WRITE */

      if( bBatch==0 && rc==SQLITE_OK ){
        rc = pager_write_pagelist(pPager, pList);
      }

      if( rc!=SQLITE_OK ){
        assert( rc!=SQLITE_IOERR_BLOCKED );
        goto commit_phase_one_exit;
      }
      sqlite3PcacheCleanAll(pPager->pPCache);

Changes to src/test_vfs.c.

129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
...
513
514
515
516
517
518
519
520

521
522
523
524
525
526
527
...
531
532
533
534
535
536
537
538
539
540
541























542
543
544
545
546
547
548
549
....
1156
1157
1158
1159
1160
1161
1162

1163
1164
1165
1166
1167
1168
1169
#define TESTVFS_TRUNCATE_MASK     0x00002000
#define TESTVFS_ACCESS_MASK       0x00004000
#define TESTVFS_FULLPATHNAME_MASK 0x00008000
#define TESTVFS_READ_MASK         0x00010000
#define TESTVFS_UNLOCK_MASK       0x00020000
#define TESTVFS_LOCK_MASK         0x00040000
#define TESTVFS_CKLOCK_MASK       0x00080000


#define TESTVFS_ALL_MASK          0x000FFFFF


#define TESTVFS_MAX_PAGES 1024

/*
** A shared-memory buffer. There is one of these objects for each shared
** memory region opened by clients. If two clients open the same file,
................................................................................
  return sqlite3OsCheckReservedLock(pFd->pReal, pResOut);
}

/*
** File control method. For custom operations on an tvfs-file.
*/
static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){
  TestvfsFd *p = tvfsGetFd(pFile);

  if( op==SQLITE_FCNTL_PRAGMA ){
    char **argv = (char**)pArg;
    if( sqlite3_stricmp(argv[1],"error")==0 ){
      int rc = SQLITE_ERROR;
      if( argv[2] ){
        const char *z = argv[2];
        int x = atoi(z);
................................................................................
          while( sqlite3Isspace(z[0]) ){ z++; }
        }
        if( z[0] ) argv[0] = sqlite3_mprintf("%s", z);
      }
      return rc;
    }
    if( sqlite3_stricmp(argv[1], "filename")==0 ){
      argv[0] = sqlite3_mprintf("%s", p->zFilename);
      return SQLITE_OK;
    }
  }























  return sqlite3OsFileControl(p->pReal, op, pArg);
}

/*
** Return the sector-size in bytes for an tvfs-file.
*/
static int tvfsSectorSize(sqlite3_file *pFile){
  TestvfsFd *pFd = tvfsGetFd(pFile);
................................................................................
        { "xOpen",              TESTVFS_OPEN_MASK },
        { "xClose",             TESTVFS_CLOSE_MASK },
        { "xAccess",            TESTVFS_ACCESS_MASK },
        { "xFullPathname",      TESTVFS_FULLPATHNAME_MASK },
        { "xUnlock",            TESTVFS_UNLOCK_MASK },
        { "xLock",              TESTVFS_LOCK_MASK },
        { "xCheckReservedLock", TESTVFS_CKLOCK_MASK },

      };
      Tcl_Obj **apElem = 0;
      int nElem = 0;
      int mask = 0;
      if( objc!=3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "LIST");
        return TCL_ERROR;







>

|







 







|
>







 







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







 







>







129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
...
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
...
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
....
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
#define TESTVFS_TRUNCATE_MASK     0x00002000
#define TESTVFS_ACCESS_MASK       0x00004000
#define TESTVFS_FULLPATHNAME_MASK 0x00008000
#define TESTVFS_READ_MASK         0x00010000
#define TESTVFS_UNLOCK_MASK       0x00020000
#define TESTVFS_LOCK_MASK         0x00040000
#define TESTVFS_CKLOCK_MASK       0x00080000
#define TESTVFS_FCNTL_MASK        0x00100000

#define TESTVFS_ALL_MASK          0x001FFFFF


#define TESTVFS_MAX_PAGES 1024

/*
** A shared-memory buffer. There is one of these objects for each shared
** memory region opened by clients. If two clients open the same file,
................................................................................
  return sqlite3OsCheckReservedLock(pFd->pReal, pResOut);
}

/*
** File control method. For custom operations on an tvfs-file.
*/
static int tvfsFileControl(sqlite3_file *pFile, int op, void *pArg){
  TestvfsFd *pFd = tvfsGetFd(pFile);
  Testvfs *p = (Testvfs *)pFd->pVfs->pAppData;
  if( op==SQLITE_FCNTL_PRAGMA ){
    char **argv = (char**)pArg;
    if( sqlite3_stricmp(argv[1],"error")==0 ){
      int rc = SQLITE_ERROR;
      if( argv[2] ){
        const char *z = argv[2];
        int x = atoi(z);
................................................................................
          while( sqlite3Isspace(z[0]) ){ z++; }
        }
        if( z[0] ) argv[0] = sqlite3_mprintf("%s", z);
      }
      return rc;
    }
    if( sqlite3_stricmp(argv[1], "filename")==0 ){
      argv[0] = sqlite3_mprintf("%s", pFd->zFilename);
      return SQLITE_OK;
    }
  }
  if( p->pScript && (p->mask&TESTVFS_FCNTL_MASK) ){
    struct Fcntl {
      int iFnctl;
      const char *zFnctl;
    } aF[] = {
      { SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, "BEGIN_ATOMIC_WRITE" },
      { SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, "COMMIT_ATOMIC_WRITE" },
    };
    int i;
    for(i=0; i<sizeof(aF)/sizeof(aF[0]); i++){
      if( op==aF[i].iFnctl ) break;
    }
    if( i<sizeof(aF)/sizeof(aF[0]) ){
      int rc = 0;
      tvfsExecTcl(p, "xFileControl", 
          Tcl_NewStringObj(pFd->zFilename, -1), 
          Tcl_NewStringObj(aF[i].zFnctl, -1),
          0, 0
      );
      tvfsResultCode(p, &rc);
      if( rc ) return rc;
    }
  }
  return sqlite3OsFileControl(pFd->pReal, op, pArg);
}

/*
** Return the sector-size in bytes for an tvfs-file.
*/
static int tvfsSectorSize(sqlite3_file *pFile){
  TestvfsFd *pFd = tvfsGetFd(pFile);
................................................................................
        { "xOpen",              TESTVFS_OPEN_MASK },
        { "xClose",             TESTVFS_CLOSE_MASK },
        { "xAccess",            TESTVFS_ACCESS_MASK },
        { "xFullPathname",      TESTVFS_FULLPATHNAME_MASK },
        { "xUnlock",            TESTVFS_UNLOCK_MASK },
        { "xLock",              TESTVFS_LOCK_MASK },
        { "xCheckReservedLock", TESTVFS_CKLOCK_MASK },
        { "xFileControl",       TESTVFS_FCNTL_MASK },
      };
      Tcl_Obj **apElem = 0;
      int nElem = 0;
      int mask = 0;
      if( objc!=3 ){
        Tcl_WrongNumArgs(interp, 2, objv, "LIST");
        return TCL_ERROR;

Added test/atomic2.test.































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# 2018-07-15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing that if an IO error is encountered
# as part of an atomic F2FS commit, an attempt is made to commit the
# transaction using a legacy journal commit.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/malloc_common.tcl
set ::testprefix atomic2

db close
if {[atomic_batch_write test.db]==0} {
  puts "No f2fs atomic-batch-write support. Skipping tests..."
  finish_test
  return
}

reset_db

do_execsql_test 1.0 {
  CREATE TABLE t1(x, y);
  CREATE INDEX i1x ON t1(x);
  CREATE INDEX i2x ON t1(y);

  WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 )
  INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s;
}

set setup [list \
  -injectstart at_injectstart \
  -injectstop  at_injectstop  \
]

set ::at_fail  0
set ::at_nfail 0

proc at_injectstart {iFail} {
  set ::at_fail $iFail
  set ::at_nfail 0
}
proc at_injectstop {} {
  set ::at_fail 0
  return $::at_nfail
}

proc at_vfs_callback {method file z args} {
  if {$::at_fail>0} {
    incr ::at_fail -1
    if {$::at_fail==0} {
      incr ::at_nfail
      return SQLITE_IOERR
    } elseif {$method=="xFileControl" && $z=="COMMIT_ATOMIC_WRITE"} {
      set ::at_fail 0
    }
  }
  return SQLITE_OK
}

testvfs tvfs -default 1
tvfs script at_vfs_callback
tvfs filter {xFileControl xWrite}

faultsim_save_and_close

do_one_faultsim_test 2.0 {*}$setup -prep {
  faultsim_restore_and_reopen
} -body {
  execsql {
    WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<100 )
    INSERT INTO t1 SELECT randomblob(400), randomblob(400) FROM s;
  }
} -test {
  faultsim_test_result {0 {}}

  set res [execsql {SELECT count(*) FROM t1; PRAGMA integrity_check}]
  if {$res!="200 ok"} {
    error "expected {200 ok}, got $res"
  }
}

db close
tvfs delete

finish_test