Index: src/ctime.c ================================================================== --- src/ctime.c +++ src/ctime.c @@ -87,10 +87,13 @@ #if SQLITE_ENABLE_API_ARMOR "ENABLE_API_ARMOR", #endif #if SQLITE_ENABLE_ATOMIC_WRITE "ENABLE_ATOMIC_WRITE", +#endif +#if SQLITE_ENABLE_BATCH_ATOMIC_WRITE + "ENABLE_BATCH_ATOMIC_WRITE", #endif #if SQLITE_ENABLE_CEROD "ENABLE_CEROD", #endif #if SQLITE_ENABLE_COLUMN_METADATA Index: src/memjournal.c ================================================================== --- src/memjournal.c +++ src/memjournal.c @@ -94,11 +94,12 @@ u8 *zOut = zBuf; int nRead = iAmt; int iChunkOffset; FileChunk *pChunk; -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) if( (iAmt+iOfst)>p->endpoint.iOffset ){ return SQLITE_IOERR_SHORT_READ; } #endif @@ -213,11 +214,12 @@ ** access writes are not required. The only exception to this is when ** the in-memory journal is being used by a connection using the ** atomic-write optimization. In this case the first 28 bytes of the ** journal file may be written as part of committing the transaction. */ assert( iOfst==p->endpoint.iOffset || iOfst==0 ); -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) if( iOfst==0 && p->pFirst ){ assert( p->nChunkSize>iAmt ); memcpy((u8*)p->pFirst->zChunk, zBuf, iAmt); }else #else @@ -382,21 +384,35 @@ */ void sqlite3MemJournalOpen(sqlite3_file *pJfd){ sqlite3JournalOpen(0, 0, pJfd, 0, -1); } -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) /* ** If the argument p points to a MemJournal structure that is not an ** in-memory-only journal file (i.e. is one that was opened with a +ve -** nSpill parameter), and the underlying file has not yet been created, -** create it now. +** nSpill parameter or as SQLITE_OPEN_MAIN_JOURNAL), and the underlying +** file has not yet been created, create it now. */ -int sqlite3JournalCreate(sqlite3_file *p){ +int sqlite3JournalCreate(sqlite3_file *pJfd){ int rc = SQLITE_OK; - if( p->pMethods==&MemJournalMethods && ((MemJournal*)p)->nSpill>0 ){ - rc = memjrnlCreateFile((MemJournal*)p); + MemJournal *p = (MemJournal*)pJfd; + if( p->pMethod==&MemJournalMethods && ( +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + p->nSpill>0 +#else + /* While this appears to not be possible without ATOMIC_WRITE, the + ** paths are complex, so it seems prudent to leave the test in as + ** a NEVER(), in case our analysis is subtly flawed. */ + NEVER(p->nSpill>0) +#endif +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + || (p->flags & SQLITE_OPEN_MAIN_JOURNAL) +#endif + )){ + rc = memjrnlCreateFile(p); } return rc; } #endif Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -88,10 +88,11 @@ ** standard include files. */ #include #include #include +#include #include #include #include #include #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 @@ -218,14 +219,12 @@ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ sqlite3_int64 mmapSizeActual; /* Actual size of mapping at pMapRegion */ sqlite3_int64 mmapSizeMax; /* Configured FCNTL_MMAP_SIZE value */ void *pMapRegion; /* Memory mapped region */ #endif -#ifdef __QNXNTO__ int sectorSize; /* Device sector size */ int deviceCharacteristics; /* Precomputed device characteristics */ -#endif #if SQLITE_ENABLE_LOCKING_STYLE int openFlags; /* The flags specified at open() */ #endif #if SQLITE_ENABLE_LOCKING_STYLE || defined(__APPLE__) unsigned fsFlags; /* cached details from statfs() */ @@ -325,10 +324,24 @@ ** is the 32-bit version, even if _FILE_OFFSET_BITS=64 is defined. */ #ifdef __ANDROID__ # define lseek lseek64 #endif + +#ifdef __linux__ +/* +** Linux-specific IOCTL magic numbers used for controlling F2FS +*/ +#define F2FS_IOCTL_MAGIC 0xf5 +#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) +#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) +#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) +#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) +#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, u32) +#define F2FS_FEATURE_ATOMIC_WRITE 0x0004 +#endif /* __linux__ */ + /* ** Different Unix systems declare open() in different ways. Same use ** open(const char*,int,mode_t). Others use open(const char*,int,...). ** The difference is important when using a pointer to the function. @@ -497,10 +510,13 @@ { "lstat", (sqlite3_syscall_ptr)lstat, 0 }, #else { "lstat", (sqlite3_syscall_ptr)0, 0 }, #endif #define osLstat ((int(*)(const char*,struct stat*))aSyscall[27].pCurrent) + + { "ioctl", (sqlite3_syscall_ptr)ioctl, 0 }, +#define osIoctl ((int(*)(int,int,...))aSyscall[28].pCurrent) }; /* End of the overrideable system calls */ /* @@ -3775,10 +3791,25 @@ ** Information and control of an open file handle. */ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ unixFile *pFile = (unixFile*)id; switch( op ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + case SQLITE_FCNTL_BEGIN_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_START_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_BEGIN_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_COMMIT_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_COMMIT_ATOMIC_WRITE); + return rc ? SQLITE_IOERR_COMMIT_ATOMIC : SQLITE_OK; + } + case SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE: { + int rc = osIoctl(pFile->h, F2FS_IOC_ABORT_VOLATILE_WRITE); + return rc ? SQLITE_IOERR_ROLLBACK_ATOMIC : SQLITE_OK; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; } case SQLITE_FCNTL_LAST_ERRNO: { @@ -3858,34 +3889,45 @@ } return SQLITE_NOTFOUND; } /* -** Return the sector size in bytes of the underlying block device for -** the specified file. This is almost always 512 bytes, but may be -** larger for some devices. -** -** SQLite code assumes this function cannot fail. It also assumes that -** if two files are created in the same file-system directory (i.e. -** a database and its journal file) that the sector size will be the -** same for both. -*/ -#ifndef __QNXNTO__ -static int unixSectorSize(sqlite3_file *NotUsed){ - UNUSED_PARAMETER(NotUsed); - return SQLITE_DEFAULT_SECTOR_SIZE; -} -#endif - -/* -** The following version of unixSectorSize() is optimized for QNX. -*/ -#ifdef __QNXNTO__ +** If pFd->sectorSize is non-zero when this function is called, it is a +** no-op. Otherwise, the values of pFd->sectorSize and +** pFd->deviceCharacteristics are set according to the file-system +** characteristics. +** +** There are two versions of this function. One for QNX and one for all +** other systems. +*/ +#ifndef __QNXNTO__ +static void setDeviceCharacteristics(unixFile *pFd){ + assert( pFd->deviceCharacteristics==0 || pFd->sectorSize!=0 ); + if( pFd->sectorSize==0 ){ +#if defined(__linux__) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int res; + u32 f = 0; + + /* Check for support for F2FS atomic batch writes. */ + res = osIoctl(pFd->h, F2FS_IOC_GET_FEATURES, &f); + if( res==0 && (f & F2FS_FEATURE_ATOMIC_WRITE) ){ + pFd->deviceCharacteristics = SQLITE_IOCAP_BATCH_ATOMIC; + } +#endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + + /* Set the POWERSAFE_OVERWRITE flag if requested. */ + if( pFd->ctrlFlags & UNIXFILE_PSOW ){ + pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; + } + + pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; + } +} +#else #include #include -static int unixSectorSize(sqlite3_file *id){ - unixFile *pFile = (unixFile*)id; +static void setDeviceCharacteristics(unixFile *pFile){ if( pFile->sectorSize == 0 ){ struct statvfs fsInfo; /* Set defaults for non-supported filesystems */ pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; @@ -3950,13 +3992,28 @@ ** then it isn't valid.*/ if( pFile->sectorSize % 512 != 0 ){ pFile->deviceCharacteristics = 0; pFile->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } - return pFile->sectorSize; } -#endif /* __QNXNTO__ */ +#endif + +/* +** Return the sector size in bytes of the underlying block device for +** the specified file. This is almost always 512 bytes, but may be +** larger for some devices. +** +** SQLite code assumes this function cannot fail. It also assumes that +** if two files are created in the same file-system directory (i.e. +** a database and its journal file) that the sector size will be the +** same for both. +*/ +static int unixSectorSize(sqlite3_file *id){ + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->sectorSize; +} /* ** Return the device characteristics for the file. ** ** This VFS is set up to return SQLITE_IOCAP_POWERSAFE_OVERWRITE by default. @@ -3968,20 +4025,13 @@ ** of required I/O for journaling, since a lot of padding is eliminated. ** Hence, while POWERSAFE_OVERWRITE is on by default, there is a file-control ** available to turn it off and URI query parameter available to turn it off. */ static int unixDeviceCharacteristics(sqlite3_file *id){ - unixFile *p = (unixFile*)id; - int rc = 0; -#ifdef __QNXNTO__ - if( p->sectorSize==0 ) unixSectorSize(id); - rc = p->deviceCharacteristics; -#endif - if( p->ctrlFlags & UNIXFILE_PSOW ){ - rc |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; - } - return rc; + unixFile *pFd = (unixFile*)id; + setDeviceCharacteristics(pFd); + return pFd->deviceCharacteristics; } #if !defined(SQLITE_OMIT_WAL) || SQLITE_MAX_MMAP_SIZE>0 /* @@ -7596,11 +7646,11 @@ }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==28 ); + assert( ArraySize(aSyscall)==29 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -945,10 +945,11 @@ assert( !pagerUseWal(pPager) ); assert( p->eLock>=EXCLUSIVE_LOCK ); assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); assert( pPager->dbOrigSize<=pPager->dbHintSize ); break; case PAGER_WRITER_FINISHED: @@ -956,10 +957,11 @@ assert( pPager->errCode==SQLITE_OK ); assert( !pagerUseWal(pPager) ); assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); break; case PAGER_ERROR: /* There must be at least one outstanding reference to the pager if @@ -1166,51 +1168,62 @@ } return rc; } /* -** This function determines whether or not the atomic-write optimization -** can be used with this pager. The optimization can be used if: +** This function determines whether or not the atomic-write or +** atomic-batch-write optimizations can be used with this pager. The +** atomic-write optimization can be used if: ** ** (a) the value returned by OsDeviceCharacteristics() indicates that ** a database page may be written atomically, and ** (b) the value returned by OsSectorSize() is less than or equal ** to the page size. ** -** The optimization is also always enabled for temporary files. It is -** an error to call this function if pPager is opened on an in-memory -** database. +** If it can be used, then the value returned is the size of the journal +** file when it contains rollback data for exactly one page. ** -** If the optimization cannot be used, 0 is returned. If it can be used, -** then the value returned is the size of the journal file when it -** contains rollback data for exactly one page. +** The atomic-batch-write optimization can be used if OsDeviceCharacteristics() +** returns a value with the SQLITE_IOCAP_BATCH_ATOMIC bit set. -1 is +** returned in this case. +** +** If neither optimization can be used, 0 is returned. */ -#ifdef SQLITE_ENABLE_ATOMIC_WRITE static int jrnlBufferSize(Pager *pPager){ assert( !MEMDB ); - if( !pPager->tempFile ){ - int dc; /* Device characteristics */ - int nSector; /* Sector size */ - int szPage; /* Page size */ - - assert( isOpen(pPager->fd) ); - dc = sqlite3OsDeviceCharacteristics(pPager->fd); - nSector = pPager->sectorSize; - szPage = pPager->pageSize; + +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) + int dc; /* Device characteristics */ + + assert( isOpen(pPager->fd) ); + dc = sqlite3OsDeviceCharacteristics(pPager->fd); +#endif + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( dc&SQLITE_IOCAP_BATCH_ATOMIC ){ + return -1; + } +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE + { + int nSector = pPager->sectorSize; + int szPage = pPager->pageSize; assert(SQLITE_IOCAP_ATOMIC512==(512>>8)); assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8)); if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){ return 0; } } return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager); -} -#else -# define jrnlBufferSize(x) 0 #endif + + return 0; +} /* ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking ** on the cache using a hash function. This is used for testing ** and debugging only. @@ -2010,11 +2023,13 @@ if( pPager->eStateeLockjfd) || pPager->pInJournal==0 ); + assert( isOpen(pPager->jfd) || pPager->pInJournal==0 + || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_BATCH_ATOMIC) + ); if( isOpen(pPager->jfd) ){ assert( !pagerUseWal(pPager) ); /* Finalize the journal file. */ if( sqlite3JournalIsInMemory(pPager->jfd) ){ @@ -4567,10 +4582,17 @@ rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } }else{ + +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( pPager->tempFile==0 ){ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc!=SQLITE_OK ) return pager_error(pPager, rc); + } +#endif /* Sync the journal file if required. */ if( pPg->flags&PGHDR_NEED_SYNC || pPager->eState==PAGER_WRITER_CACHEMOD ){ @@ -6352,10 +6374,25 @@ sqlite3PagerUnref(pPageOne); if( rc==SQLITE_OK ){ sqlite3PcacheCleanAll(pPager->pPCache); } }else{ + /* The bBatch boolean is true if the batch-atomic-write commit method + ** should be used. No rollback journal is created if batch-atomic-write + ** is enabled. + */ + sqlite3_file *fd = pPager->fd; +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + const int bBatch = zMaster==0 /* An SQLITE_IOCAP_BATCH_ATOMIC commit */ + && (sqlite3OsDeviceCharacteristics(fd) & SQLITE_IOCAP_BATCH_ATOMIC) + && !pPager->noSync + && sqlite3JournalIsInMemory(pPager->jfd); +#else +# define bBatch 0 +#endif + +#ifdef SQLITE_ENABLE_ATOMIC_WRITE /* The following block updates the change-counter. Exactly how it ** does this depends on whether or not the atomic-update optimization ** was enabled at compile time, and if this transaction meets the ** runtime criteria to use the operation: ** @@ -6375,37 +6412,44 @@ ** Otherwise, if the optimization is both enabled and applicable, ** then call pager_incr_changecounter() to update the change-counter ** in 'direct' mode. In this case the journal file will never be ** created for this transaction. */ - #ifdef SQLITE_ENABLE_ATOMIC_WRITE - PgHdr *pPg; - assert( isOpen(pPager->jfd) - || pPager->journalMode==PAGER_JOURNALMODE_OFF - || pPager->journalMode==PAGER_JOURNALMODE_WAL - ); - if( !zMaster && isOpen(pPager->jfd) - && pPager->journalOff==jrnlBufferSize(pPager) - && pPager->dbSize>=pPager->dbOrigSize - && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) - ){ - /* Update the db file change counter via the direct-write method. The - ** following call will modify the in-memory representation of page 1 - ** to include the updated change counter and then write page 1 - ** directly to the database file. Because of the atomic-write - ** property of the host file-system, this is safe. - */ - rc = pager_incr_changecounter(pPager, 1); - }else{ + if( bBatch==0 ){ + PgHdr *pPg; + assert( isOpen(pPager->jfd) + || pPager->journalMode==PAGER_JOURNALMODE_OFF + || pPager->journalMode==PAGER_JOURNALMODE_WAL + ); + if( !zMaster && isOpen(pPager->jfd) + && pPager->journalOff==jrnlBufferSize(pPager) + && pPager->dbSize>=pPager->dbOrigSize + && (!(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty) + ){ + /* Update the db file change counter via the direct-write method. The + ** following call will modify the in-memory representation of page 1 + ** to include the updated change counter and then write page 1 + ** directly to the database file. Because of the atomic-write + ** property of the host file-system, this is safe. + */ + rc = pager_incr_changecounter(pPager, 1); + }else{ + rc = sqlite3JournalCreate(pPager->jfd); + if( rc==SQLITE_OK ){ + rc = pager_incr_changecounter(pPager, 0); + } + } + } +#else +#ifdef SQLITE_ENABLE_BATCH_ATOMIC_WRITE + if( zMaster ){ rc = sqlite3JournalCreate(pPager->jfd); - if( rc==SQLITE_OK ){ - rc = pager_incr_changecounter(pPager, 0); - } + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; } - #else +#endif rc = pager_incr_changecounter(pPager, 0); - #endif +#endif if( rc!=SQLITE_OK ) goto commit_phase_one_exit; /* Write the master journal name into the journal file. If a master ** journal file name has already been written to the journal file, ** or if zMaster is NULL (no master journal), then this call is a no-op. @@ -6424,12 +6468,28 @@ ** not the case. In this case it is likely enough that the redundant ** xSync() call will be changed to a no-op by the OS anyhow. */ rc = syncJournal(pPager, 0); if( rc!=SQLITE_OK ) goto commit_phase_one_exit; - + + if( bBatch ){ + /* The pager is now in DBMOD state. But regardless of what happens + ** next, attempting to play the journal back into the database would + ** be unsafe. Close it now to make sure that does not happen. */ + sqlite3OsClose(pPager->jfd); + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_BEGIN_ATOMIC_WRITE, 0); + if( rc!=SQLITE_OK ) goto commit_phase_one_exit; + } rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache)); + if( bBatch ){ + if( rc==SQLITE_OK ){ + rc = sqlite3OsFileControl(fd, SQLITE_FCNTL_COMMIT_ATOMIC_WRITE, 0); + }else{ + sqlite3OsFileControl(fd, SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE, 0); + } + } + if( rc!=SQLITE_OK ){ assert( rc!=SQLITE_IOERR_BLOCKED ); goto commit_phase_one_exit; } sqlite3PcacheCleanAll(pPager->pPCache); Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -492,10 +492,13 @@ #define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8)) #define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8)) #define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8)) #define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8)) #define SQLITE_IOERR_AUTH (SQLITE_IOERR | (28<<8)) +#define SQLITE_IOERR_BEGIN_ATOMIC (SQLITE_IOERR | (29<<8)) +#define SQLITE_IOERR_COMMIT_ATOMIC (SQLITE_IOERR | (30<<8)) +#define SQLITE_IOERR_ROLLBACK_ATOMIC (SQLITE_IOERR | (31<<8)) #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) #define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) @@ -578,10 +581,15 @@ ** guaranteed to be unchanged. The SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN ** flag indicates that a file cannot be deleted when open. The ** SQLITE_IOCAP_IMMUTABLE flag indicates that the file is on ** read-only media and cannot be changed even by processes with ** elevated privileges. +** +** The SQLITE_IOCAP_BATCH_ATOMIC property means that the underlying +** filesystem supports doing multiple write operations atomically when those +** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and +** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 #define SQLITE_IOCAP_ATOMIC1K 0x00000004 #define SQLITE_IOCAP_ATOMIC2K 0x00000008 @@ -593,10 +601,11 @@ #define SQLITE_IOCAP_SAFE_APPEND 0x00000200 #define SQLITE_IOCAP_SEQUENTIAL 0x00000400 #define SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN 0x00000800 #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 +#define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 /* ** CAPI3REF: File Locking Levels ** ** SQLite uses one of these integer values as the second @@ -727,10 +736,11 @@ **
  • [SQLITE_IOCAP_SAFE_APPEND] **
  • [SQLITE_IOCAP_SEQUENTIAL] **
  • [SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN] **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] +**
  • [SQLITE_IOCAP_BATCH_ATOMIC] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of ** any size are atomic. The SQLITE_IOCAP_ATOMICnnn values ** mean that writes of blocks that are nnn bytes in size and @@ -1010,10 +1020,44 @@ ** **
  • [[SQLITE_FCNTL_RBU]] ** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by ** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for ** this opcode. +** +**
  • [[SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]] +** If the [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] opcode returns SQLITE_OK, then +** the file descriptor is placed in "batch write mode", which +** means all subsequent write operations will be deferred and done +** atomically at the next [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. Systems +** that do not support batch atomic writes will return SQLITE_NOTFOUND. +** ^Following a successful SQLITE_FCNTL_BEGIN_ATOMIC_WRITE and prior to +** the closing [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] or +** [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE], SQLite will make +** no VFS interface calls on the same [sqlite3_file] file descriptor +** except for calls to the xWrite method and the xFileControl method +** with [SQLITE_FCNTL_SIZE_HINT]. +** +**
  • [[SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be performed atomically. +** This file control returns [SQLITE_OK] if and only if the writes were +** all performed successfully and have been committed to persistent storage. +** ^Regardless of whether or not it is successful, this file control takes +** the file descriptor out of batch write mode so that all subsequent +** write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_COMMIT_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. +** +**
  • [[SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE]] +** The [SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE] opcode causes all write +** operations since the previous successful call to +** [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] to be rolled back. +** ^This file control takes the file descriptor out of batch write mode +** so that all subsequent write operations are independent. +** ^SQLite will never invoke SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE without +** a prior successful call to [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE]. ** */ #define SQLITE_FCNTL_LOCKSTATE 1 #define SQLITE_FCNTL_GET_LOCKPROXYFILE 2 #define SQLITE_FCNTL_SET_LOCKPROXYFILE 3 @@ -1041,10 +1085,13 @@ #define SQLITE_FCNTL_RBU 26 #define SQLITE_FCNTL_VFS_POINTER 27 #define SQLITE_FCNTL_JOURNAL_POINTER 28 #define SQLITE_FCNTL_WIN32_GET_HANDLE 29 #define SQLITE_FCNTL_PDB 30 +#define SQLITE_FCNTL_BEGIN_ATOMIC_WRITE 31 +#define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE 32 +#define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE 33 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE #define SQLITE_SET_LOCKPROXYFILE SQLITE_FCNTL_SET_LOCKPROXYFILE #define SQLITE_LAST_ERRNO SQLITE_FCNTL_LAST_ERRNO Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -623,10 +623,19 @@ */ #ifndef SQLITE_DEFAULT_PCACHE_INITSZ # define SQLITE_DEFAULT_PCACHE_INITSZ 20 #endif +/* +** The compile-time options SQLITE_MMAP_READWRITE and +** SQLITE_ENABLE_BATCH_ATOMIC_WRITE are not compatible with one another. +** You must choose one or the other (or neither) but not both. +*/ +#if defined(SQLITE_MMAP_READWRITE) && defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) +#error Cannot use both SQLITE_MMAP_READWRITE and SQLITE_ENABLE_BATCH_ATOMIC_WRITE +#endif + /* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. */ #ifndef offsetof @@ -4254,11 +4263,12 @@ #define IN_INDEX_LOOP 0x0004 /* IN operator used as a loop */ int sqlite3FindInIndex(Parse *, Expr *, u32, int*, int*); int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int); int sqlite3JournalSize(sqlite3_vfs *); -#ifdef SQLITE_ENABLE_ATOMIC_WRITE +#if defined(SQLITE_ENABLE_ATOMIC_WRITE) \ + || defined(SQLITE_ENABLE_BATCH_ATOMIC_WRITE) int sqlite3JournalCreate(sqlite3_file *); #endif int sqlite3JournalIsInMemory(sqlite3_file *p); void sqlite3MemJournalOpen(sqlite3_file *); Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -2547,10 +2547,50 @@ rc = sqlite3_delete_database(zFile); Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); return TCL_OK; } + +/* +** Usage: atomic_batch_write PATH +*/ +static int SQLITE_TCLAPI test_atomic_batch_write( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + char *zFile = 0; /* Path to file to test */ + sqlite3 *db = 0; /* Database handle */ + sqlite3_file *pFd = 0; /* SQLite fd open on zFile */ + int bRes = 0; /* Integer result of this command */ + int dc = 0; /* Device-characteristics mask */ + int rc; /* sqlite3_open() return code */ + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "PATH"); + return TCL_ERROR; + } + zFile = Tcl_GetString(objv[1]); + + rc = sqlite3_open(zFile, &db); + if( rc!=SQLITE_OK ){ + Tcl_AppendResult(interp, sqlite3_errmsg(db), 0); + sqlite3_close(db); + return TCL_ERROR; + } + + rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void*)&pFd); + dc = pFd->pMethods->xDeviceCharacteristics(pFd); + if( dc & SQLITE_IOCAP_BATCH_ATOMIC ){ + bRes = 1; + } + + Tcl_SetObjResult(interp, Tcl_NewIntObj(bRes)); + sqlite3_close(db); + return TCL_OK; +} /* ** Usage: sqlite3_next_stmt DB STMT ** ** Return the next statment in sequence after STMT. @@ -7637,10 +7677,11 @@ { "sqlite3_snapshot_get_blob", test_snapshot_get_blob, 0 }, { "sqlite3_snapshot_open_blob", test_snapshot_open_blob, 0 }, { "sqlite3_snapshot_cmp_blob", test_snapshot_cmp_blob, 0 }, #endif { "sqlite3_delete_database", test_delete_database, 0 }, + { "atomic_batch_write", test_atomic_batch_write, 0 }, }; static int bitmask_size = sizeof(Bitmask)*8; static int longdouble_size = sizeof(LONGDOUBLE_TYPE); int i; extern int sqlite3_sync_count, sqlite3_fullsync_count; Index: src/test6.c ================================================================== --- src/test6.c +++ src/test6.c @@ -734,10 +734,11 @@ { "atomic32k", SQLITE_IOCAP_ATOMIC32K }, { "atomic64k", SQLITE_IOCAP_ATOMIC64K }, { "sequential", SQLITE_IOCAP_SEQUENTIAL }, { "safe_append", SQLITE_IOCAP_SAFE_APPEND }, { "powersafe_overwrite", SQLITE_IOCAP_POWERSAFE_OVERWRITE }, + { "batch-atomic", SQLITE_IOCAP_BATCH_ATOMIC }, { 0, 0 } }; int i; int iDc = 0; @@ -974,11 +975,34 @@ return TCL_ERROR; } devsym_register(iDc, iSectorSize); return TCL_OK; +} + +/* +** tclcmd: sqlite3_crash_on_write N +*/ +static int SQLITE_TCLAPI writeCrashObjCmd( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + void devsym_crash_on_write(int); + int nWrite = 0; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "NWRITE"); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[1], &nWrite) ){ + return TCL_ERROR; + } + devsym_crash_on_write(nWrite); + return TCL_OK; } /* ** tclcmd: unregister_devsim */ @@ -1066,13 +1090,14 @@ #ifndef SQLITE_OMIT_DISKIO Tcl_CreateObjCommand(interp, "sqlite3_crash_enable", crashEnableCmd, 0, 0); Tcl_CreateObjCommand(interp, "sqlite3_crashparams", crashParamsObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "sqlite3_crash_now", crashNowCmd, 0, 0); Tcl_CreateObjCommand(interp, "sqlite3_simulate_device", devSymObjCmd, 0, 0); + Tcl_CreateObjCommand(interp, "sqlite3_crash_on_write", writeCrashObjCmd,0,0); Tcl_CreateObjCommand(interp, "unregister_devsim", dsUnregisterObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "register_jt_vfs", jtObjCmd, 0, 0); Tcl_CreateObjCommand(interp, "unregister_jt_vfs", jtUnregisterObjCmd, 0, 0); #endif return TCL_OK; } #endif /* SQLITE_TEST */ Index: src/test_devsym.c ================================================================== --- src/test_devsym.c +++ src/test_devsym.c @@ -26,10 +26,11 @@ /* ** Name used to identify this VFS. */ #define DEVSYM_VFS_NAME "devsym" +#define WRITECRASH_NAME "writecrash" typedef struct devsym_file devsym_file; struct devsym_file { sqlite3_file base; sqlite3_file *pReal; @@ -70,65 +71,17 @@ #endif /* SQLITE_OMIT_LOAD_EXTENSION */ static int devsymRandomness(sqlite3_vfs*, int nByte, char *zOut); static int devsymSleep(sqlite3_vfs*, int microseconds); static int devsymCurrentTime(sqlite3_vfs*, double*); -static sqlite3_vfs devsym_vfs = { - 2, /* iVersion */ - sizeof(devsym_file), /* szOsFile */ - DEVSYM_MAX_PATHNAME, /* mxPathname */ - 0, /* pNext */ - DEVSYM_VFS_NAME, /* zName */ - 0, /* pAppData */ - devsymOpen, /* xOpen */ - devsymDelete, /* xDelete */ - devsymAccess, /* xAccess */ - devsymFullPathname, /* xFullPathname */ -#ifndef SQLITE_OMIT_LOAD_EXTENSION - devsymDlOpen, /* xDlOpen */ - devsymDlError, /* xDlError */ - devsymDlSym, /* xDlSym */ - devsymDlClose, /* xDlClose */ -#else - 0, /* xDlOpen */ - 0, /* xDlError */ - 0, /* xDlSym */ - 0, /* xDlClose */ -#endif /* SQLITE_OMIT_LOAD_EXTENSION */ - devsymRandomness, /* xRandomness */ - devsymSleep, /* xSleep */ - devsymCurrentTime, /* xCurrentTime */ - 0, /* xGetLastError */ - 0 /* xCurrentTimeInt64 */ -}; - -static sqlite3_io_methods devsym_io_methods = { - 2, /* iVersion */ - devsymClose, /* xClose */ - devsymRead, /* xRead */ - devsymWrite, /* xWrite */ - devsymTruncate, /* xTruncate */ - devsymSync, /* xSync */ - devsymFileSize, /* xFileSize */ - devsymLock, /* xLock */ - devsymUnlock, /* xUnlock */ - devsymCheckReservedLock, /* xCheckReservedLock */ - devsymFileControl, /* xFileControl */ - devsymSectorSize, /* xSectorSize */ - devsymDeviceCharacteristics, /* xDeviceCharacteristics */ - devsymShmMap, /* xShmMap */ - devsymShmLock, /* xShmLock */ - devsymShmBarrier, /* xShmBarrier */ - devsymShmUnmap /* xShmUnmap */ -}; - struct DevsymGlobal { sqlite3_vfs *pVfs; int iDeviceChar; int iSectorSize; + int nWriteCrash; }; -struct DevsymGlobal g = {0, 0, 512}; +struct DevsymGlobal g = {0, 0, 512, 0}; /* ** Close an devsym-file. */ static int devsymClose(sqlite3_file *pFile){ @@ -269,10 +222,30 @@ const char *zName, sqlite3_file *pFile, int flags, int *pOutFlags ){ +static sqlite3_io_methods devsym_io_methods = { + 2, /* iVersion */ + devsymClose, /* xClose */ + devsymRead, /* xRead */ + devsymWrite, /* xWrite */ + devsymTruncate, /* xTruncate */ + devsymSync, /* xSync */ + devsymFileSize, /* xFileSize */ + devsymLock, /* xLock */ + devsymUnlock, /* xUnlock */ + devsymCheckReservedLock, /* xCheckReservedLock */ + devsymFileControl, /* xFileControl */ + devsymSectorSize, /* xSectorSize */ + devsymDeviceCharacteristics, /* xDeviceCharacteristics */ + devsymShmMap, /* xShmMap */ + devsymShmLock, /* xShmLock */ + devsymShmBarrier, /* xShmBarrier */ + devsymShmUnmap /* xShmUnmap */ +}; + int rc; devsym_file *p = (devsym_file *)pFile; p->pReal = (sqlite3_file *)&p[1]; rc = sqlite3OsOpen(g.pVfs, zName, p->pReal, flags, pOutFlags); if( p->pReal->pMethods ){ @@ -370,21 +343,155 @@ */ static int devsymCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ return g.pVfs->xCurrentTime(g.pVfs, pTimeOut); } +/* +** Return the sector-size in bytes for an writecrash-file. +*/ +static int writecrashSectorSize(sqlite3_file *pFile){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsSectorSize(p->pReal); +} + +/* +** Return the device characteristic flags supported by an writecrash-file. +*/ +static int writecrashDeviceCharacteristics(sqlite3_file *pFile){ + devsym_file *p = (devsym_file *)pFile; + return sqlite3OsDeviceCharacteristics(p->pReal); +} + +/* +** Write data to an writecrash-file. +*/ +static int writecrashWrite( + sqlite3_file *pFile, + const void *zBuf, + int iAmt, + sqlite_int64 iOfst +){ + devsym_file *p = (devsym_file *)pFile; + if( g.nWriteCrash>0 ){ + g.nWriteCrash--; + if( g.nWriteCrash==0 ) abort(); + } + return sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst); +} + +/* +** Open an writecrash file handle. +*/ +static int writecrashOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFile, + int flags, + int *pOutFlags +){ +static sqlite3_io_methods writecrash_io_methods = { + 2, /* iVersion */ + devsymClose, /* xClose */ + devsymRead, /* xRead */ + writecrashWrite, /* xWrite */ + devsymTruncate, /* xTruncate */ + devsymSync, /* xSync */ + devsymFileSize, /* xFileSize */ + devsymLock, /* xLock */ + devsymUnlock, /* xUnlock */ + devsymCheckReservedLock, /* xCheckReservedLock */ + devsymFileControl, /* xFileControl */ + writecrashSectorSize, /* xSectorSize */ + writecrashDeviceCharacteristics, /* xDeviceCharacteristics */ + devsymShmMap, /* xShmMap */ + devsymShmLock, /* xShmLock */ + devsymShmBarrier, /* xShmBarrier */ + devsymShmUnmap /* xShmUnmap */ +}; + + int rc; + devsym_file *p = (devsym_file *)pFile; + p->pReal = (sqlite3_file *)&p[1]; + rc = sqlite3OsOpen(g.pVfs, zName, p->pReal, flags, pOutFlags); + if( p->pReal->pMethods ){ + pFile->pMethods = &writecrash_io_methods; + } + return rc; +} + +static sqlite3_vfs devsym_vfs = { + 2, /* iVersion */ + sizeof(devsym_file), /* szOsFile */ + DEVSYM_MAX_PATHNAME, /* mxPathname */ + 0, /* pNext */ + DEVSYM_VFS_NAME, /* zName */ + 0, /* pAppData */ + devsymOpen, /* xOpen */ + devsymDelete, /* xDelete */ + devsymAccess, /* xAccess */ + devsymFullPathname, /* xFullPathname */ +#ifndef SQLITE_OMIT_LOAD_EXTENSION + devsymDlOpen, /* xDlOpen */ + devsymDlError, /* xDlError */ + devsymDlSym, /* xDlSym */ + devsymDlClose, /* xDlClose */ +#else + 0, /* xDlOpen */ + 0, /* xDlError */ + 0, /* xDlSym */ + 0, /* xDlClose */ +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ + devsymRandomness, /* xRandomness */ + devsymSleep, /* xSleep */ + devsymCurrentTime, /* xCurrentTime */ + 0, /* xGetLastError */ + 0 /* xCurrentTimeInt64 */ +}; + +static sqlite3_vfs writecrash_vfs = { + 2, /* iVersion */ + sizeof(devsym_file), /* szOsFile */ + DEVSYM_MAX_PATHNAME, /* mxPathname */ + 0, /* pNext */ + WRITECRASH_NAME, /* zName */ + 0, /* pAppData */ + writecrashOpen, /* xOpen */ + devsymDelete, /* xDelete */ + devsymAccess, /* xAccess */ + devsymFullPathname, /* xFullPathname */ +#ifndef SQLITE_OMIT_LOAD_EXTENSION + devsymDlOpen, /* xDlOpen */ + devsymDlError, /* xDlError */ + devsymDlSym, /* xDlSym */ + devsymDlClose, /* xDlClose */ +#else + 0, /* xDlOpen */ + 0, /* xDlError */ + 0, /* xDlSym */ + 0, /* xDlClose */ +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ + devsymRandomness, /* xRandomness */ + devsymSleep, /* xSleep */ + devsymCurrentTime, /* xCurrentTime */ + 0, /* xGetLastError */ + 0 /* xCurrentTimeInt64 */ +}; + /* ** This procedure registers the devsym vfs with SQLite. If the argument is ** true, the devsym vfs becomes the new default vfs. It is the only publicly ** available function in this file. */ void devsym_register(int iDeviceChar, int iSectorSize){ + if( g.pVfs==0 ){ g.pVfs = sqlite3_vfs_find(0); devsym_vfs.szOsFile += g.pVfs->szOsFile; + writecrash_vfs.szOsFile += g.pVfs->szOsFile; sqlite3_vfs_register(&devsym_vfs, 0); + sqlite3_vfs_register(&writecrash_vfs, 0); } if( iDeviceChar>=0 ){ g.iDeviceChar = iDeviceChar; }else{ g.iDeviceChar = 0; @@ -400,7 +507,18 @@ sqlite3_vfs_unregister(&devsym_vfs); g.pVfs = 0; g.iDeviceChar = 0; g.iSectorSize = 0; } + +void devsym_crash_on_write(int nWrite){ + if( g.pVfs==0 ){ + g.pVfs = sqlite3_vfs_find(0); + devsym_vfs.szOsFile += g.pVfs->szOsFile; + writecrash_vfs.szOsFile += g.pVfs->szOsFile; + sqlite3_vfs_register(&devsym_vfs, 0); + sqlite3_vfs_register(&writecrash_vfs, 0); + } + g.nWriteCrash = nWrite; +} #endif ADDED test/atomic.test Index: test/atomic.test ================================================================== --- /dev/null +++ test/atomic.test @@ -0,0 +1,41 @@ +# 2015-11-07 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the WITH clause. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix atomic + +db close +if {[atomic_batch_write test.db]==0} { + puts "No f2fs atomic-batch-write support. Skipping tests..." + finish_test + return +} + +reset_db + +do_execsql_test 1.0 { + CREATE TABLE t1(x, y); + BEGIN; + INSERT INTO t1 VALUES(1, 2); +} + +do_test 1.1 { file exists test.db-journal } {0} + +do_execsql_test 1.2 { + COMMIT; +} + + +finish_test Index: test/fallocate.test ================================================================== --- test/fallocate.test +++ test/fallocate.test @@ -57,11 +57,13 @@ # causes a database file to grow, the database grows to its previous size # on disk, not to the minimum size required to hold the database image. # do_test fallocate-1.7 { execsql { BEGIN; INSERT INTO t1 VALUES(1, 2); } - if {[permutation] != "inmemory_journal"} { + if {[permutation] != "inmemory_journal" + && [permutation] != "atomic-batch-write" + } { hexio_get_int [hexio_read test.db-journal 16 4] } else { set {} 1024 } } {1024} Index: test/misc1.test ================================================================== --- test/misc1.test +++ test/misc1.test @@ -477,30 +477,32 @@ # ifcapable curdir { # Make sure a database connection still works after changing the # working directory. # -do_test misc1-14.1 { - file mkdir tempdir - cd tempdir - execsql {BEGIN} - file exists ./test.db-journal -} {0} -do_test misc1-14.2a { - execsql {UPDATE t1 SET a=a||'x' WHERE 0} - file exists ../test.db-journal -} {0} -do_test misc1-14.2b { - execsql {UPDATE t1 SET a=a||'y' WHERE 1} - file exists ../test.db-journal -} {1} -do_test misc1-14.3 { - cd .. - forcedelete tempdir - execsql {COMMIT} - file exists ./test.db-journal -} {0} +if {[atomic_batch_write test.db]==0} { + do_test misc1-14.1 { + file mkdir tempdir + cd tempdir + execsql {BEGIN} + file exists ./test.db-journal + } {0} + do_test misc1-14.2a { + execsql {UPDATE t1 SET a=a||'x' WHERE 0} + file exists ../test.db-journal + } {0} + do_test misc1-14.2b { + execsql {UPDATE t1 SET a=a||'y' WHERE 1} + file exists ../test.db-journal + } {1} + do_test misc1-14.3 { + cd .. + forcedelete tempdir + execsql {COMMIT} + file exists ./test.db-journal + } {0} +} } # A failed create table should not leave the table in the internal # data structures. Ticket #238. # Index: test/permutations.test ================================================================== --- test/permutations.test +++ test/permutations.test @@ -381,10 +381,34 @@ } -files [ test_set $allquicktests -exclude *malloc* *ioerr* *fault* oserror.test \ pager1.test syscall.test sysfault.test tkt3457.test quota* superlock* \ wal* mmap* ] + +test_suite "atomic-batch-write" -prefix "" -description { + Like veryquick.test, but must be run on a file-system that supports + atomic-batch-writes. Tests that depend on the journal file being present + are omitted. +} -files [ + test_set $allquicktests -exclude *malloc* *ioerr* *fault* *bigfile* *_err* \ + *fts5corrupt* *fts5big* *fts5aj* \ + crash8.test delete_db.test \ + exclusive.test journal3.test \ + journal1.test \ + jrnlmode.test jrnlmode2.test \ + lock4.test pager1.test \ + pager3.test sharedA.test \ + symlink.test stmt.test \ + sync.test sync2.test \ + tempdb.test tkt3457.test \ + vacuum5.test wal2.test \ + walmode.test zerodamage.test +] -initialize { + if {[atomic_batch_write test.db]==0} { + error "File system does NOT support atomic-batch-write" + } +} lappend ::testsuitelist xxx #------------------------------------------------------------------------- # Define the coverage related test suites: # Index: test/rollback.test ================================================================== --- test/rollback.test +++ test/rollback.test @@ -80,10 +80,11 @@ } {SQLITE_OK} if {$tcl_platform(platform) == "unix" && [permutation] ne "onefile" && [permutation] ne "inmemory_journal" + && [permutation] ne "atomic-batch-write" } { do_test rollback-2.1 { execsql { BEGIN; INSERT INTO t3 VALUES('hello world'); Index: test/syscall.test ================================================================== --- test/syscall.test +++ test/syscall.test @@ -59,11 +59,11 @@ foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate pread64 pwrite64 unlink openDirectory mkdir rmdir statvfs fchown geteuid umask mmap munmap mremap - getpagesize readlink lstat + getpagesize readlink lstat ioctl } { if {[test_syscall exists $s]} {lappend syscall_list $s} } do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -1588,10 +1588,58 @@ if {[string length $sql]>0} { puts $f "db eval {" puts $f "$sql" puts $f "}" } + close $f + set r [catch { + exec [info nameofexec] crash.tcl >@stdout + } msg] + + # Windows/ActiveState TCL returns a slightly different + # error message. We map that to the expected message + # so that we don't have to change all of the test + # cases. + if {$::tcl_platform(platform)=="windows"} { + if {$msg=="child killed: unknown signal"} { + set msg "child process exited abnormally" + } + } + + lappend r $msg +} + +# crash_on_write ?-devchar DEVCHAR? CRASHDELAY SQL +# +proc crash_on_write {args} { + + set nArg [llength $args] + if {$nArg<2 || $nArg%2} { + error "bad args: $args" + } + set zSql [lindex $args end] + set nDelay [lindex $args end-1] + + set devchar {} + for {set ii 0} {$ii < $nArg-2} {incr ii 2} { + set opt [lindex $args $ii] + switch -- [lindex $args $ii] { + -devchar { + set devchar [lindex $args [expr $ii+1]] + } + + default { error "unrecognized option: $opt" } + } + } + + set f [open crash.tcl w] + puts $f "sqlite3_crash_on_write $nDelay" + puts $f "sqlite3_test_control_pending_byte $::sqlite_pending_byte" + puts $f "sqlite3 db test.db -vfs writecrash" + puts $f "db eval {$zSql}" + puts $f "set {} {}" + close $f set r [catch { exec [info nameofexec] crash.tcl >@stdout } msg] ADDED test/writecrash.test Index: test/writecrash.test ================================================================== --- /dev/null +++ test/writecrash.test @@ -0,0 +1,68 @@ +# 2009 January 8 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Test the outcome of a writer crashing within a call to the VFS +# xWrite function. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix writecrash + +do_not_use_codec + + +if {$tcl_platform(platform)=="windows"} { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b BLOB UNIQUE); + WITH s(i) AS ( + VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1 SELECT NULL, randomblob(900) FROM s; +} {} + +set bGo 1 +for {set tn 1} {$bGo} {incr tn} { + +db close +sqlite3 db test.db + + do_test 1.$tn.1 { + set res [crash_on_write $tn { + UPDATE t1 SET b = randomblob(899) WHERE (a%3)==0 + }] + set bGo 0 + if {[string match {1 {child killed:*}} $res]} { + set res {0 {}} + set bGo 1 + } + set res + } {0 {}} + +#db close +#sqlite3 db test.db + + do_execsql_test 1.$tn.2 { PRAGMA integrity_check } {ok} + +db close +sqlite3 db test.db + + do_execsql_test 1.$tn.3 { PRAGMA integrity_check } {ok} +} + + + +finish_test