Index: Makefile.msc ================================================================== --- Makefile.msc +++ Makefile.msc @@ -1627,11 +1627,12 @@ $(TOP)\src\test_window.c \ $(TOP)\src\test_wsd.c \ $(TOP)\ext\fts3\fts3_term.c \ $(TOP)\ext\fts3\fts3_test.c \ $(TOP)\ext\rbu\test_rbu.c \ - $(TOP)\ext\session\test_session.c + $(TOP)\ext\session\test_session.c \ + $(TOP)\ext\session\sqlite3changebatch.c # Statically linked extensions. # TESTEXT = \ $(TOP)\ext\expert\sqlite3expert.c \ ADDED doc/begin_concurrent.md Index: doc/begin_concurrent.md ================================================================== --- /dev/null +++ doc/begin_concurrent.md @@ -0,0 +1,106 @@ + +Begin Concurrent +================ + +## Overview + +Usually, SQLite allows at most one writer to proceed concurrently. The +BEGIN CONCURRENT enhancement allows multiple writers to process write +transactions simultanously if the database is in "wal" or "wal2" mode, +although the system still serializes COMMIT commands. + +When a write-transaction is opened with "BEGIN CONCURRENT", actually +locking the database is deferred until a COMMIT is executed. This means +that any number of transactions started with BEGIN CONCURRENT may proceed +concurrently. The system uses optimistic page-level-locking to prevent +conflicting concurrent transactions from being committed. + +When a BEGIN CONCURRENT transaction is committed, the system checks whether +or not any of the database pages that the transaction has read have been +modified since the BEGIN CONCURRENT was opened. In other words - it asks +if the transaction being committed operates on a different set of data than +all other concurrently executing transactions. If the answer is "yes, this +transaction did not read or modify any data modified by any concurrent +transaction", then the transaction is committed as normal. Otherwise, if the +transaction does conflict, it cannot be committed and an SQLITE_BUSY_SNAPSHOT +error is returned. At this point, all the client can do is ROLLBACK the +transaction. + +If SQLITE_BUSY_SNAPSHOT is returned, messages are output via the sqlite3_log +mechanism indicating the page and table or index on which the conflict +occurred. This can be useful when optimizing concurrency. + +## Application Programming Notes + +In order to serialize COMMIT processing, SQLite takes a lock on the database +as part of each COMMIT command and releases it before returning. At most one +writer may hold this lock at any one time. If a writer cannot obtain the lock, +it uses SQLite's busy-handler to pause and retry for a while: + + + https://www.sqlite.org/c3ref/busy_handler.html + + +If there is significant contention for the writer lock, this mechanism can be +inefficient. In this case it is better for the application to use a mutex or +some other mechanism that supports blocking to ensure that at most one writer +is attempting to COMMIT a BEGIN CONCURRENT transaction at a time. This is +usually easier if all writers are part of the same operating system process. + +If all database clients (readers and writers) are located in the same OS +process, and if that OS is a Unix variant, then it can be more efficient to +the built-in VFS "unix-excl" instead of the default "unix". This is because it +uses more efficient locking primitives. + +The key to maximizing concurrency using BEGIN CONCURRENT is to ensure that +there are a large number of non-conflicting transactions. In SQLite, each +table and each index is stored as a separate b-tree, each of which is +distributed over a discrete set of database pages. This means that: + + * Two transactions that write to different sets of tables never + conflict, and that + + * Two transactions that write to the same tables or indexes only + conflict if the values of the keys (either primary keys or indexed + rows) are fairly close together. For example, given a large + table with the schema: + +
     CREATE TABLE t1(a INTEGER PRIMARY KEY, b BLOB);
+ + writing two rows with adjacent values for "a" probably will cause a + conflict (as the two keys are stored on the same page), but writing two + rows with vastly different values for "a" will not (as the keys will likly + be stored on different pages). + +Note that, in SQLite, if values are not explicitly supplied for an INTEGER +PRIMARY KEY, as for example in: + +> + INSERT INTO t1(b) VALUES(); + +then monotonically increasing values are assigned automatically. This is +terrible for concurrency, as it all but ensures that all new rows are +added to the same database page. In such situations, it is better to +explicitly assign random values to INTEGER PRIMARY KEY fields. + +This problem also comes up for non-WITHOUT ROWID tables that do not have an +explicit INTEGER PRIMARY KEY column. In these cases each table has an implicit +INTEGER PRIMARY KEY column that is assigned increasing values, leading to the +same problem as omitting to assign a value to an explicit INTEGER PRIMARY KEY +column. + +For both explicit and implicit INTEGER PRIMARY KEYs, it is possible to have +SQLite assign values at random (instead of the monotonically increasing +values) by writing a row with a rowid equal to the largest possible signed +64-bit integer to the table. For example: + + INSERT INTO t1(a) VALUES(9223372036854775807); + +Applications should take care not to malfunction due to the presence of such +rows. + +The nature of some types of indexes, for example indexes on timestamp fields, +can also cause problems (as concurrent transactions may assign similar +timestamps that will be stored on the same db page to new records). In these +cases the database schema may need to be rethought to increase the concurrency +provided by page-level-locking. ADDED ext/session/changebatch1.test Index: ext/session/changebatch1.test ================================================================== --- /dev/null +++ ext/session/changebatch1.test @@ -0,0 +1,222 @@ +# 2016 August 23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +ifcapable !session {finish_test; return} + +set testprefix changebatch1 + + +proc sql_to_changeset {method sql} { + sqlite3session S db main + S attach * + execsql $sql + set ret [S $method] + S delete + return $ret +} + +proc do_changebatch_test {tn method args} { + set C [list] + foreach a $args { + lappend C [sql_to_changeset $method $a] + } + + sqlite3changebatch cb db + set i 1 + foreach ::cs [lrange $C 0 end-1] { + set rc [cb add $::cs] + if {$rc!="SQLITE_OK"} { error "expected SQLITE_OK, got $rc (i=$i)" } + incr i + } + + set ::cs [lindex $C end] + do_test $tn { cb add [set ::cs] } SQLITE_CONSTRAINT + cb delete +} + +proc do_changebatch_test1 {tn args} { + uplevel do_changebatch_test $tn changeset $args +} +proc do_changebatch_test2 {tn args} { + uplevel do_changebatch_test $tn fullchangeset $args +} + +#------------------------------------------------------------------------- +# The body of the following loop contains tests for database schemas +# that do not feature multi-column UNIQUE constraints. In this case +# it doesn't matter if the changesets are generated using +# sqlite3session_changeset() or sqlite3session_fullchangeset(). +# +foreach {tn testfunction} { + 1 do_changebatch_test1 + 2 do_changebatch_test2 +} { + reset_db + + #------------------------------------------------------------------------- + # + do_execsql_test $tn.1.0 { + CREATE TABLE t1(a PRIMARY KEY, b); + } + + $testfunction $tn.1.1 { + INSERT INTO t1 VALUES(1, 1); + } { + DELETE FROM t1 WHERE a=1; + } + + do_execsql_test $tn.1.2.0 { + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 2); + INSERT INTO t1 VALUES(3, 3); + } + $testfunction $tn.1.2.1 { + DELETE FROM t1 WHERE a=2; + } { + INSERT INTO t1 VALUES(2, 2); + } + + #------------------------------------------------------------------------- + # + do_execsql_test $tn.2.0 { + CREATE TABLE x1(a, b PRIMARY KEY, c UNIQUE); + CREATE TABLE x2(a PRIMARY KEY, b UNIQUE, c UNIQUE); + CREATE INDEX x1a ON x1(a); + + INSERT INTO x1 VALUES(1, 1, 'a'); + INSERT INTO x1 VALUES(1, 2, 'b'); + INSERT INTO x1 VALUES(1, 3, 'c'); + } + + $testfunction $tn.2.1 { + DELETE FROM x1 WHERE b=2; + } { + UPDATE x1 SET c='b' WHERE b=3; + } + + $testfunction $tn.2.2 { + DELETE FROM x1 WHERE b=1; + } { + INSERT INTO x1 VALUES(1, 5, 'a'); + } + + set L [list] + for {set i 1000} {$i < 10000} {incr i} { + lappend L "INSERT INTO x2 VALUES($i, $i, 'x' || $i)" + } + lappend L "DELETE FROM x2 WHERE b=1005" + $testfunction $tn.2.3 {*}$L + + execsql { INSERT INTO x1 VALUES('f', 'f', 'f') } + $testfunction $tn.2.4 { + INSERT INTO x2 VALUES('f', 'f', 'f'); + } { + INSERT INTO x1 VALUES('g', 'g', 'g'); + } { + DELETE FROM x1 WHERE b='f'; + } { + INSERT INTO x2 VALUES('g', 'g', 'g'); + } { + INSERT INTO x1 VALUES('f', 'f', 'f'); + } + + execsql { + DELETE FROM x1; + INSERT INTO x1 VALUES(1.5, 1.5, 1.5); + } + $testfunction $tn.2.5 { + DELETE FROM x1 WHERE b BETWEEN 1 AND 2; + } { + INSERT INTO x1 VALUES(2.5, 2.5, 2.5); + } { + INSERT INTO x1 VALUES(1.5, 1.5, 1.5); + } + + execsql { + DELETE FROM x2; + INSERT INTO x2 VALUES(X'abcd', X'1234', X'7890'); + INSERT INTO x2 VALUES(X'0000', X'0000', X'0000'); + } + breakpoint + $testfunction $tn.2.6 { + UPDATE x2 SET c = X'1234' WHERE a=X'abcd'; + INSERT INTO x2 VALUES(X'1234', X'abcd', X'7890'); + } { + DELETE FROM x2 WHERE b=X'0000'; + } { + INSERT INTO x2 VALUES(1, X'0000', NULL); + } +} + +#------------------------------------------------------------------------- +# Test some multi-column UNIQUE constraints. First Using _changeset() to +# demonstrate the problem, then using _fullchangeset() to show that it has +# been fixed. +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE y1(a PRIMARY KEY, b, c, UNIQUE(b, c)); + INSERT INTO y1 VALUES(1, 1, 1); + INSERT INTO y1 VALUES(2, 2, 2); + INSERT INTO y1 VALUES(3, 3, 3); + INSERT INTO y1 VALUES(4, 3, 4); + BEGIN; +} + +do_test 3.1.1 { + set c1 [sql_to_changeset changeset { DELETE FROM y1 WHERE a=4 }] + set c2 [sql_to_changeset changeset { UPDATE y1 SET c=4 WHERE a=3 }] + sqlite3changebatch cb db + cb add $c1 + cb add $c2 +} {SQLITE_OK} +do_test 3.1.2 { + cb delete + execsql ROLLBACK +} {} + +do_test 3.1.1 { + set c1 [sql_to_changeset fullchangeset { DELETE FROM y1 WHERE a=4 }] + set c2 [sql_to_changeset fullchangeset { UPDATE y1 SET c=4 WHERE a=3 }] + sqlite3changebatch cb db + cb add $c1 + cb add $c2 +} {SQLITE_OK} +do_test 3.1.2 { + cb delete +} {} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE TABLE t1(x, y, z, PRIMARY KEY(x, y), UNIQUE(z)); +} + +do_test 4.1 { + set c1 [sql_to_changeset fullchangeset { INSERT INTO t1 VALUES(1, 2, 3) }] + execsql { + DROP TABLE t1; + CREATE TABLE t1(w, x, y, z, PRIMARY KEY(x, y), UNIQUE(z)); + } + sqlite3changebatch cb db + list [catch { cb add $c1 } msg] $msg +} {1 SQLITE_RANGE} + +cb delete + +finish_test ADDED ext/session/changebatchfault.test Index: ext/session/changebatchfault.test ================================================================== --- /dev/null +++ ext/session/changebatchfault.test @@ -0,0 +1,42 @@ +# 2011 Mar 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The focus of this file is testing the session module. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source [file join [file dirname [info script]] session_common.tcl] +source $testdir/tester.tcl +ifcapable !session {finish_test; return} +set testprefix changebatchfault + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c PRIMARY KEY, UNIQUE(a, b)); + INSERT INTO t1 VALUES('a', 'a', 'a'); + INSERT INTO t1 VALUES('b', 'b', 'b'); +} + +set ::c1 [changeset_from_sql { delete from t1 where c='a' }] +set ::c2 [changeset_from_sql { insert into t1 values('c', 'c', 'c') }] + +do_faultsim_test 1 -faults oom-* -body { + sqlite3changebatch cb db + cb add $::c1 + cb add $::c2 +} -test { + faultsim_test_result {0 SQLITE_OK} {1 SQLITE_NOMEM} + catch { cb delete } +} + + +finish_test Index: ext/session/sessionH.test ================================================================== --- ext/session/sessionH.test +++ ext/session/sessionH.test @@ -27,11 +27,11 @@ } do_then_apply_sql -ignorenoop { WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERe i<10000 ) - INSERT INTO t1 SELECT 'abcde', randomblob(16), i FROM s; + INSERT INTO t1 SELECT 'abcde', randomblob(18), i FROM s; } compare_db db db2 } {} #------------------------------------------------------------------------ ADDED ext/session/sqlite3changebatch.c Index: ext/session/sqlite3changebatch.c ================================================================== --- /dev/null +++ ext/session/sqlite3changebatch.c @@ -0,0 +1,485 @@ + +#if !defined(SQLITE_TEST) || (defined(SQLITE_ENABLE_SESSION) && defined(SQLITE_ENABLE_PREUPDATE_HOOK)) + +#include "sqlite3session.h" +#include "sqlite3changebatch.h" + +#include +#include + +typedef struct BatchTable BatchTable; +typedef struct BatchIndex BatchIndex; +typedef struct BatchIndexEntry BatchIndexEntry; +typedef struct BatchHash BatchHash; + +struct sqlite3_changebatch { + sqlite3 *db; /* Database handle used to read schema */ + BatchTable *pTab; /* First in linked list of tables */ + int iChangesetId; /* Current changeset id */ + int iNextIdxId; /* Next available index id */ + int nEntry; /* Number of entries in hash table */ + int nHash; /* Number of hash buckets */ + BatchIndexEntry **apHash; /* Array of hash buckets */ +}; + +struct BatchTable { + BatchIndex *pIdx; /* First in linked list of UNIQUE indexes */ + BatchTable *pNext; /* Next table */ + char zTab[1]; /* Table name */ +}; + +struct BatchIndex { + BatchIndex *pNext; /* Next index on same table */ + int iId; /* Index id (assigned internally) */ + int bPk; /* True for PK index */ + int nCol; /* Size of aiCol[] array */ + int *aiCol; /* Array of columns that make up index */ +}; + +struct BatchIndexEntry { + BatchIndexEntry *pNext; /* Next colliding hash table entry */ + int iChangesetId; /* Id of associated changeset */ + int iIdxId; /* Id of index this key is from */ + int szRecord; + char aRecord[1]; +}; + +/* +** Allocate and zero a block of nByte bytes. Must be freed using cbFree(). +*/ +static void *cbMalloc(int *pRc, int nByte){ + void *pRet; + + if( *pRc ){ + pRet = 0; + }else{ + pRet = sqlite3_malloc(nByte); + if( pRet ){ + memset(pRet, 0, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + + return pRet; +} + +/* +** Free an allocation made by cbMalloc(). +*/ +static void cbFree(void *p){ + sqlite3_free(p); +} + +/* +** Return the hash bucket that pEntry belongs in. +*/ +static int cbHash(sqlite3_changebatch *p, BatchIndexEntry *pEntry){ + unsigned int iHash = (unsigned int)pEntry->iIdxId; + unsigned char *pEnd = (unsigned char*)&pEntry->aRecord[pEntry->szRecord]; + unsigned char *pIter; + + for(pIter=(unsigned char*)pEntry->aRecord; pIternHash); +} + +/* +** Resize the hash table. +*/ +static int cbHashResize(sqlite3_changebatch *p){ + int rc = SQLITE_OK; + BatchIndexEntry **apNew; + int nNew = (p->nHash ? p->nHash*2 : 512); + int i; + + apNew = cbMalloc(&rc, sizeof(BatchIndexEntry*) * nNew); + if( rc==SQLITE_OK ){ + int nHash = p->nHash; + p->nHash = nNew; + for(i=0; iapHash[i])!=0 ){ + int iHash = cbHash(p, pEntry); + p->apHash[i] = pEntry->pNext; + pEntry->pNext = apNew[iHash]; + apNew[iHash] = pEntry; + } + } + + cbFree(p->apHash); + p->apHash = apNew; + } + + return rc; +} + + +/* +** Allocate a new sqlite3_changebatch object. +*/ +int sqlite3changebatch_new(sqlite3 *db, sqlite3_changebatch **pp){ + sqlite3_changebatch *pRet; + int rc = SQLITE_OK; + *pp = pRet = (sqlite3_changebatch*)cbMalloc(&rc, sizeof(sqlite3_changebatch)); + if( pRet ){ + pRet->db = db; + } + return rc; +} + +/* +** Add a BatchIndex entry for index zIdx to table pTab. +*/ +static int cbAddIndex( + sqlite3_changebatch *p, + BatchTable *pTab, + const char *zIdx, + int bPk +){ + int nCol = 0; + sqlite3_stmt *pIndexInfo = 0; + BatchIndex *pNew = 0; + int rc; + char *zIndexInfo; + + zIndexInfo = (char*)sqlite3_mprintf("PRAGMA main.index_info = %Q", zIdx); + if( zIndexInfo ){ + rc = sqlite3_prepare_v2(p->db, zIndexInfo, -1, &pIndexInfo, 0); + sqlite3_free(zIndexInfo); + }else{ + rc = SQLITE_NOMEM; + } + + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==sqlite3_step(pIndexInfo) ){ nCol++; } + rc = sqlite3_reset(pIndexInfo); + } + + pNew = (BatchIndex*)cbMalloc(&rc, sizeof(BatchIndex) + sizeof(int) * nCol); + if( rc==SQLITE_OK ){ + pNew->nCol = nCol; + pNew->bPk = bPk; + pNew->aiCol = (int*)&pNew[1]; + pNew->iId = p->iNextIdxId++; + while( SQLITE_ROW==sqlite3_step(pIndexInfo) ){ + int i = sqlite3_column_int(pIndexInfo, 0); + int j = sqlite3_column_int(pIndexInfo, 1); + pNew->aiCol[i] = j; + } + rc = sqlite3_reset(pIndexInfo); + } + + if( rc==SQLITE_OK ){ + pNew->pNext = pTab->pIdx; + pTab->pIdx = pNew; + }else{ + cbFree(pNew); + } + sqlite3_finalize(pIndexInfo); + + return rc; +} + +/* +** Free the object passed as the first argument. +*/ +static void cbFreeTable(BatchTable *pTab){ + BatchIndex *pIdx; + BatchIndex *pIdxNext; + for(pIdx=pTab->pIdx; pIdx; pIdx=pIdxNext){ + pIdxNext = pIdx->pNext; + cbFree(pIdx); + } + cbFree(pTab); +} + +/* +** Find or create the BatchTable object named zTab. +*/ +static int cbFindTable( + sqlite3_changebatch *p, + const char *zTab, + BatchTable **ppTab +){ + BatchTable *pRet = 0; + int rc = SQLITE_OK; + + for(pRet=p->pTab; pRet; pRet=pRet->pNext){ + if( 0==sqlite3_stricmp(zTab, pRet->zTab) ) break; + } + + if( pRet==0 ){ + int nTab = strlen(zTab); + pRet = (BatchTable*)cbMalloc(&rc, nTab + sizeof(BatchTable)); + if( pRet ){ + sqlite3_stmt *pIndexList = 0; + char *zIndexList = 0; + int rc2; + memcpy(pRet->zTab, zTab, nTab); + + zIndexList = sqlite3_mprintf("PRAGMA main.index_list = %Q", zTab); + if( zIndexList==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zIndexList, -1, &pIndexList, 0); + sqlite3_free(zIndexList); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pIndexList) ){ + if( sqlite3_column_int(pIndexList, 2) ){ + const char *zIdx = (const char*)sqlite3_column_text(pIndexList, 1); + const char *zTyp = (const char*)sqlite3_column_text(pIndexList, 3); + rc = cbAddIndex(p, pRet, zIdx, (zTyp[0]=='p')); + } + } + rc2 = sqlite3_finalize(pIndexList); + if( rc==SQLITE_OK ) rc = rc2; + + if( rc==SQLITE_OK ){ + pRet->pNext = p->pTab; + p->pTab = pRet; + }else{ + cbFreeTable(pRet); + pRet = 0; + } + } + } + + *ppTab = pRet; + return rc; +} + +/* +** Extract value iVal from the changeset iterator passed as the first +** argument. Set *ppVal to point to the value before returning. +** +** This function attempts to extract the value using function xVal +** (which is always either sqlite3changeset_new or sqlite3changeset_old). +** If the call returns SQLITE_OK but does not supply an sqlite3_value* +** pointer, an attempt to extract the value is made using the xFallback +** function. +*/ +static int cbGetChangesetValue( + sqlite3_changeset_iter *pIter, + int (*xVal)(sqlite3_changeset_iter*,int,sqlite3_value**), + int (*xFallback)(sqlite3_changeset_iter*,int,sqlite3_value**), + int iVal, + sqlite3_value **ppVal +){ + int rc = xVal(pIter, iVal, ppVal); + if( rc==SQLITE_OK && *ppVal==0 && xFallback ){ + rc = xFallback(pIter, iVal, ppVal); + } + return rc; +} + +static int cbAddToHash( + sqlite3_changebatch *p, + sqlite3_changeset_iter *pIter, + BatchIndex *pIdx, + int (*xVal)(sqlite3_changeset_iter*,int,sqlite3_value**), + int (*xFallback)(sqlite3_changeset_iter*,int,sqlite3_value**), + int *pbConf +){ + BatchIndexEntry *pNew; + int sz = pIdx->nCol; + int i; + int iOut = 0; + int rc = SQLITE_OK; + + for(i=0; rc==SQLITE_OK && inCol; i++){ + sqlite3_value *pVal; + rc = cbGetChangesetValue(pIter, xVal, xFallback, pIdx->aiCol[i], &pVal); + if( rc==SQLITE_OK ){ + int eType = 0; + if( pVal ) eType = sqlite3_value_type(pVal); + switch( eType ){ + case 0: + case SQLITE_NULL: + return SQLITE_OK; + + case SQLITE_INTEGER: + sz += 8; + break; + case SQLITE_FLOAT: + sz += 8; + break; + + default: + assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); + sz += sqlite3_value_bytes(pVal); + break; + } + } + } + + pNew = cbMalloc(&rc, sizeof(BatchIndexEntry) + sz); + if( pNew ){ + pNew->iChangesetId = p->iChangesetId; + pNew->iIdxId = pIdx->iId; + pNew->szRecord = sz; + + for(i=0; inCol; i++){ + int eType; + sqlite3_value *pVal; + rc = cbGetChangesetValue(pIter, xVal, xFallback, pIdx->aiCol[i], &pVal); + if( rc!=SQLITE_OK ) break; /* coverage: condition is never true */ + eType = sqlite3_value_type(pVal); + pNew->aRecord[iOut++] = eType; + switch( eType ){ + case SQLITE_INTEGER: { + sqlite3_int64 i64 = sqlite3_value_int64(pVal); + memcpy(&pNew->aRecord[iOut], &i64, 8); + iOut += 8; + break; + } + case SQLITE_FLOAT: { + double d64 = sqlite3_value_double(pVal); + memcpy(&pNew->aRecord[iOut], &d64, sizeof(double)); + iOut += sizeof(double); + break; + } + + default: { + int nByte = sqlite3_value_bytes(pVal); + const char *z = (const char*)sqlite3_value_blob(pVal); + memcpy(&pNew->aRecord[iOut], z, nByte); + iOut += nByte; + break; + } + } + } + } + + if( rc==SQLITE_OK && p->nEntry>=(p->nHash/2) ){ + rc = cbHashResize(p); + } + + if( rc==SQLITE_OK ){ + BatchIndexEntry *pIter; + int iHash = cbHash(p, pNew); + + assert( iHash>=0 && iHashnHash ); + for(pIter=p->apHash[iHash]; pIter; pIter=pIter->pNext){ + if( pNew->szRecord==pIter->szRecord + && 0==memcmp(pNew->aRecord, pIter->aRecord, pNew->szRecord) + ){ + if( pNew->iChangesetId!=pIter->iChangesetId ){ + *pbConf = 1; + } + cbFree(pNew); + pNew = 0; + break; + } + } + + if( pNew ){ + pNew->pNext = p->apHash[iHash]; + p->apHash[iHash] = pNew; + p->nEntry++; + } + }else{ + cbFree(pNew); + } + + return rc; +} + + +/* +** Add a changeset to the current batch. +*/ +int sqlite3changebatch_add(sqlite3_changebatch *p, void *pBuf, int nBuf){ + sqlite3_changeset_iter *pIter; /* Iterator opened on pBuf/nBuf */ + int rc; /* Return code */ + int bConf = 0; /* Conflict was detected */ + + rc = sqlite3changeset_start(&pIter, nBuf, pBuf); + if( rc==SQLITE_OK ){ + int rc2; + for(rc2 = sqlite3changeset_next(pIter); + rc2==SQLITE_ROW; + rc2 = sqlite3changeset_next(pIter) + ){ + BatchTable *pTab; + BatchIndex *pIdx; + const char *zTab; /* Table this change applies to */ + int nCol; /* Number of columns in table */ + int op; /* UPDATE, INSERT or DELETE */ + + sqlite3changeset_op(pIter, &zTab, &nCol, &op, 0); + assert( op==SQLITE_INSERT || op==SQLITE_UPDATE || op==SQLITE_DELETE ); + + rc = cbFindTable(p, zTab, &pTab); + assert( pTab || rc!=SQLITE_OK ); + if( pTab ){ + for(pIdx=pTab->pIdx; pIdx && rc==SQLITE_OK; pIdx=pIdx->pNext){ + if( op==SQLITE_UPDATE && pIdx->bPk ) continue; + if( op==SQLITE_UPDATE || op==SQLITE_DELETE ){ + rc = cbAddToHash(p, pIter, pIdx, sqlite3changeset_old, 0, &bConf); + } + if( op==SQLITE_UPDATE || op==SQLITE_INSERT ){ + rc = cbAddToHash(p, pIter, pIdx, + sqlite3changeset_new, sqlite3changeset_old, &bConf + ); + } + } + } + if( rc!=SQLITE_OK ) break; + } + + rc2 = sqlite3changeset_finalize(pIter); + if( rc==SQLITE_OK ) rc = rc2; + } + + if( rc==SQLITE_OK && bConf ){ + rc = SQLITE_CONSTRAINT; + } + p->iChangesetId++; + return rc; +} + +/* +** Zero an existing changebatch object. +*/ +void sqlite3changebatch_zero(sqlite3_changebatch *p){ + int i; + for(i=0; inHash; i++){ + BatchIndexEntry *pEntry; + BatchIndexEntry *pNext; + for(pEntry=p->apHash[i]; pEntry; pEntry=pNext){ + pNext = pEntry->pNext; + cbFree(pEntry); + } + } + cbFree(p->apHash); + p->nHash = 0; + p->apHash = 0; +} + +/* +** Delete a changebatch object. +*/ +void sqlite3changebatch_delete(sqlite3_changebatch *p){ + BatchTable *pTab; + BatchTable *pTabNext; + + sqlite3changebatch_zero(p); + for(pTab=p->pTab; pTab; pTab=pTabNext){ + pTabNext = pTab->pNext; + cbFreeTable(pTab); + } + cbFree(p); +} + +/* +** Return the db handle. +*/ +sqlite3 *sqlite3changebatch_db(sqlite3_changebatch *p){ + return p->db; +} + +#endif /* SQLITE_ENABLE_SESSION && SQLITE_ENABLE_PREUPDATE_HOOK */ ADDED ext/session/sqlite3changebatch.h Index: ext/session/sqlite3changebatch.h ================================================================== --- /dev/null +++ ext/session/sqlite3changebatch.h @@ -0,0 +1,82 @@ + +#if !defined(SQLITECHANGEBATCH_H_) +#define SQLITECHANGEBATCH_H_ 1 + +typedef struct sqlite3_changebatch sqlite3_changebatch; + +/* +** Create a new changebatch object for detecting conflicts between +** changesets associated with a schema equivalent to that of the "main" +** database of the open database handle db passed as the first +** parameter. It is the responsibility of the caller to ensure that +** the database handle is not closed until after the changebatch +** object has been deleted. +** +** A changebatch object is used to detect batches of non-conflicting +** changesets. Changesets that do not conflict may be applied to the +** target database in any order without affecting the final state of +** the database. +** +** The changebatch object only works reliably if PRIMARY KEY and UNIQUE +** constraints on tables affected by the changesets use collation +** sequences that are equivalent to built-in collation sequence +** BINARY for the == operation. +** +** If successful, SQLITE_OK is returned and (*pp) set to point to +** the new changebatch object. If an error occurs, an SQLite error +** code is returned and the final value of (*pp) is undefined. +*/ +int sqlite3changebatch_new(sqlite3 *db, sqlite3_changebatch **pp); + +/* +** Argument p points to a buffer containing a changeset n bytes in +** size. Assuming no error occurs, this function returns SQLITE_OK +** if the changeset does not conflict with any changeset passed +** to an sqlite3changebatch_add() call made on the same +** sqlite3_changebatch* handle since the most recent call to +** sqlite3changebatch_zero(). If the changeset does conflict with +** an earlier such changeset, SQLITE_CONSTRAINT is returned. Or, +** if an error occurs, some other SQLite error code may be returned. +** +** One changeset is said to conflict with another if +** either: +** +** * the two changesets contain operations (INSERT, UPDATE or +** DELETE) on the same row, identified by primary key, or +** +** * the two changesets contain operations (INSERT, UPDATE or +** DELETE) on rows with identical values in any combination +** of fields constrained by a UNIQUE constraint. +** +** Even if this function returns SQLITE_CONFLICT, the current +** changeset is added to the internal data structures - so future +** calls to this function may conflict with it. If this function +** returns any result code other than SQLITE_OK or SQLITE_CONFLICT, +** the result of any future call to sqlite3changebatch_add() is +** undefined. +** +** Only changesets may be passed to this function. Passing a +** patchset to this function results in an SQLITE_MISUSE error. +*/ +int sqlite3changebatch_add(sqlite3_changebatch*, void *p, int n); + +/* +** Zero a changebatch object. This causes the records of all earlier +** calls to sqlite3changebatch_add() to be discarded. +*/ +void sqlite3changebatch_zero(sqlite3_changebatch*); + +/* +** Return a copy of the first argument passed to the sqlite3changebatch_new() +** call used to create the changebatch object passed as the only argument +** to this function. +*/ +sqlite3 *sqlite3changebatch_db(sqlite3_changebatch*); + +/* +** Delete a changebatch object. +*/ +void sqlite3changebatch_delete(sqlite3_changebatch*); + +#endif /* !defined(SQLITECHANGEBATCH_H_) */ + Index: ext/session/sqlite3session.c ================================================================== --- ext/session/sqlite3session.c +++ ext/session/sqlite3session.c @@ -25,10 +25,17 @@ # endif #endif #define SESSIONS_ROWID "_rowid_" +/* +** The three different types of changesets generated. +*/ +#define SESSIONS_PATCHSET 0 +#define SESSIONS_CHANGESET 1 +#define SESSIONS_FULLCHANGESET 2 + static int sessions_strm_chunk_size = SESSIONS_STRM_CHUNK_SIZE; typedef struct SessionHook SessionHook; struct SessionHook { void *pCtx; @@ -2666,11 +2673,11 @@ ** original values of any fields that have been modified. The new.* record ** contains the new values of only those fields that have been modified. */ static int sessionAppendUpdate( SessionBuffer *pBuf, /* Buffer to append to */ - int bPatchset, /* True for "patchset", 0 for "changeset" */ + int ePatchset, /* True for "patchset", 0 for "changeset" */ sqlite3_stmt *pStmt, /* Statement handle pointing at new row */ SessionChange *p, /* Object containing old values */ u8 *abPK /* Boolean array - true for PK columns */ ){ int rc = SQLITE_OK; @@ -2730,21 +2737,21 @@ /* If at least one field has been modified, this is not a no-op. */ if( bChanged ) bNoop = 0; /* Add a field to the old.* record. This is omitted if this module is ** currently generating a patchset. */ - if( bPatchset==0 ){ - if( bChanged || abPK[i] ){ + if( ePatchset!=SESSIONS_PATCHSET ){ + if( ePatchset==SESSIONS_FULLCHANGESET || bChanged || abPK[i] ){ sessionAppendBlob(pBuf, pCsr, nAdvance, &rc); }else{ sessionAppendByte(pBuf, 0, &rc); } } /* Add a field to the new.* record. Or the only record if currently ** generating a patchset. */ - if( bChanged || (bPatchset && abPK[i]) ){ + if( bChanged || (ePatchset==SESSIONS_PATCHSET && abPK[i]) ){ sessionAppendCol(&buf2, pStmt, i, &rc); }else{ sessionAppendByte(&buf2, 0, &rc); } @@ -2766,21 +2773,21 @@ ** the changeset format if argument bPatchset is zero, or the patchset ** format otherwise. */ static int sessionAppendDelete( SessionBuffer *pBuf, /* Buffer to append to */ - int bPatchset, /* True for "patchset", 0 for "changeset" */ + int eChangeset, /* One of SESSIONS_CHANGESET etc. */ SessionChange *p, /* Object containing old values */ int nCol, /* Number of columns in table */ u8 *abPK /* Boolean array - true for PK columns */ ){ int rc = SQLITE_OK; sessionAppendByte(pBuf, SQLITE_DELETE, &rc); sessionAppendByte(pBuf, p->bIndirect, &rc); - if( bPatchset==0 ){ + if( eChangeset!=SESSIONS_PATCHSET ){ sessionAppendBlob(pBuf, p->aRecord, p->nRecord, &rc); }else{ int i; u8 *a = p->aRecord; for(i=0; inCol, pRc); sessionAppendBlob(pBuf, pTab->abPK, pTab->nCol, pRc); sessionAppendBlob(pBuf, (u8 *)pTab->zName, (int)strlen(pTab->zName)+1, pRc); } @@ -3033,11 +3040,11 @@ ** occurs, an SQLite error code is returned and both output variables set ** to 0. */ static int sessionGenerateChangeset( sqlite3_session *pSession, /* Session object */ - int bPatchset, /* True for patchset, false for changeset */ + int ePatchset, /* One of SESSIONS_CHANGESET etc. */ int (*xOutput)(void *pOut, const void *pData, int nData), void *pOut, /* First argument for xOutput */ int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ void **ppChangeset /* OUT: Buffer containing changeset */ ){ @@ -3078,11 +3085,11 @@ if( rc==SQLITE_OK && pTab->nCol!=nOldCol ){ rc = sessionUpdateChanges(pSession, pTab); } /* Write a table header */ - sessionAppendTableHdr(&buf, bPatchset, pTab, &rc); + sessionAppendTableHdr(&buf, ePatchset, pTab, &rc); /* Build and compile a statement to execute: */ if( rc==SQLITE_OK ){ rc = sessionSelectStmt(db, 0, pSession->zDb, zName, pTab->bRowid, pTab->nCol, pTab->azCol, pTab->abPK, &pSel @@ -3104,14 +3111,14 @@ for(iCol=0; iColnCol; iCol++){ sessionAppendCol(&buf, pSel, iCol, &rc); } }else{ assert( pTab->abPK!=0 ); - rc = sessionAppendUpdate(&buf, bPatchset, pSel, p, pTab->abPK); + rc = sessionAppendUpdate(&buf, ePatchset, pSel, p, pTab->abPK); } }else if( p->op!=SQLITE_INSERT ){ - rc = sessionAppendDelete(&buf, bPatchset, p, pTab->nCol,pTab->abPK); + rc = sessionAppendDelete(&buf, ePatchset, p, pTab->nCol,pTab->abPK); } if( rc==SQLITE_OK ){ rc = sqlite3_reset(pSel); } @@ -3166,11 +3173,12 @@ void **ppChangeset /* OUT: Buffer containing changeset */ ){ int rc; if( pnChangeset==0 || ppChangeset==0 ) return SQLITE_MISUSE; - rc = sessionGenerateChangeset(pSession, 0, 0, 0, pnChangeset, ppChangeset); + rc = sessionGenerateChangeset( + pSession, SESSIONS_CHANGESET, 0, 0, pnChangeset, ppChangeset); assert( rc || pnChangeset==0 || pSession->bEnableSize==0 || *pnChangeset<=pSession->nMaxChangesetSize ); return rc; } @@ -3182,11 +3190,12 @@ sqlite3_session *pSession, int (*xOutput)(void *pOut, const void *pData, int nData), void *pOut ){ if( xOutput==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 0, xOutput, pOut, 0, 0); + return sessionGenerateChangeset( + pSession, SESSIONS_CHANGESET, xOutput, pOut, 0, 0); } /* ** Streaming version of sqlite3session_patchset(). */ @@ -3194,11 +3203,12 @@ sqlite3_session *pSession, int (*xOutput)(void *pOut, const void *pData, int nData), void *pOut ){ if( xOutput==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 1, xOutput, pOut, 0, 0); + return sessionGenerateChangeset( + pSession, SESSIONS_PATCHSET, xOutput, pOut, 0, 0); } /* ** Obtain a patchset object containing all changes recorded by the ** session object passed as the first argument. @@ -3210,12 +3220,23 @@ sqlite3_session *pSession, /* Session object */ int *pnPatchset, /* OUT: Size of buffer at *ppChangeset */ void **ppPatchset /* OUT: Buffer containing changeset */ ){ if( pnPatchset==0 || ppPatchset==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 1, 0, 0, pnPatchset, ppPatchset); + return sessionGenerateChangeset( + pSession, SESSIONS_PATCHSET, 0, 0, pnPatchset, ppPatchset); } + +int sqlite3session_fullchangeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +){ + return sessionGenerateChangeset( + pSession, SESSIONS_FULLCHANGESET, 0, 0, pnChangeset, ppChangeset); +} + /* ** Enable or disable the session object passed as the first argument. */ int sqlite3session_enable(sqlite3_session *pSession, int bEnable){ @@ -6019,14 +6040,15 @@ /* Create the serialized output changeset based on the contents of the ** hash tables attached to the SessionTable objects in list p->pList. */ for(pTab=pGrp->pList; rc==SQLITE_OK && pTab; pTab=pTab->pNext){ + int eChangeset = pGrp->bPatch ? SESSIONS_PATCHSET : SESSIONS_CHANGESET; int i; if( pTab->nEntry==0 ) continue; - sessionAppendTableHdr(&buf, pGrp->bPatch, pTab, &rc); + sessionAppendTableHdr(&buf, eChangeset, pTab, &rc); for(i=0; inChange; i++){ SessionChange *p; for(p=pTab->apChange[i]; p; p=p->pNext){ sessionAppendByte(&buf, p->op, &rc); sessionAppendByte(&buf, p->bIndirect, &rc); Index: ext/session/sqlite3session.h ================================================================== --- ext/session/sqlite3session.h +++ ext/session/sqlite3session.h @@ -362,10 +362,23 @@ ** then another field of the same row is updated while the session is disabled, ** the resulting changeset will contain an UPDATE change that updates both ** fields. */ int sqlite3session_changeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +); + +/* +** CAPI3REF: Generate A Full Changeset From A Session Object +** +** This function is similar to sqlite3session_changeset(), except that for +** each row affected by an UPDATE statement, all old.* values are recorded +** as part of the changeset, not just those modified. +*/ +int sqlite3session_fullchangeset( sqlite3_session *pSession, /* Session object */ int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ void **ppChangeset /* OUT: Buffer containing changeset */ ); Index: ext/session/test_session.c ================================================================== --- ext/session/test_session.c +++ ext/session/test_session.c @@ -227,10 +227,11 @@ ** $session delete ** $session enable BOOL ** $session indirect INTEGER ** $session patchset ** $session table_filter SCRIPT +** $session fullchangeset */ static int SQLITE_TCLAPI test_session_cmd( void *clientData, Tcl_Interp *interp, int objc, @@ -240,24 +241,24 @@ sqlite3_session *pSession = p->pSession; static struct SessionSubcmd { const char *zSub; int nArg; const char *zMsg; - int iSub; } aSub[] = { - { "attach", 1, "TABLE", }, /* 0 */ - { "changeset", 0, "", }, /* 1 */ - { "delete", 0, "", }, /* 2 */ - { "enable", 1, "BOOL", }, /* 3 */ - { "indirect", 1, "BOOL", }, /* 4 */ - { "isempty", 0, "", }, /* 5 */ - { "table_filter", 1, "SCRIPT", }, /* 6 */ + { "attach", 1, "TABLE" }, /* 0 */ + { "changeset", 0, "" }, /* 1 */ + { "delete", 0, "" }, /* 2 */ + { "enable", 1, "BOOL" }, /* 3 */ + { "indirect", 1, "BOOL" }, /* 4 */ + { "isempty", 0, "" }, /* 5 */ + { "table_filter", 1, "SCRIPT" }, /* 6 */ { "patchset", 0, "", }, /* 7 */ - { "diff", 2, "FROMDB TBL", }, /* 8 */ - { "memory_used", 0, "", }, /* 9 */ - { "changeset_size", 0, "", }, /* 10 */ - { "object_config", 2, "OPTION INTEGER", }, /* 11 */ + { "diff", 2, "FROMDB TBL" }, /* 8 */ + { "fullchangeset",0, "" }, /* 9 */ + { "memory_used", 0, "", }, /* 10 */ + { "changeset_size", 0, "", }, /* 11 */ + { "object_config", 2, "OPTION INTEGER", }, /* 12 */ { 0 } }; int iSub; int rc; @@ -283,23 +284,26 @@ return test_session_error(interp, rc, 0); } break; } + case 9: /* fullchangeset */ case 7: /* patchset */ case 1: { /* changeset */ TestSessionsBlob o = {0, 0}; - if( test_tcl_integer(interp, SESSION_STREAM_TCL_VAR) ){ + if( iSub!=9 && test_tcl_integer(interp, SESSION_STREAM_TCL_VAR) ){ void *pCtx = (void*)&o; if( iSub==7 ){ rc = sqlite3session_patchset_strm(pSession, testStreamOutput, pCtx); }else{ rc = sqlite3session_changeset_strm(pSession, testStreamOutput, pCtx); } }else{ if( iSub==7 ){ rc = sqlite3session_patchset(pSession, &o.n, &o.p); + }else if( iSub==9 ){ + rc = sqlite3session_fullchangeset(pSession, &o.n, &o.p); }else{ rc = sqlite3session_changeset(pSession, &o.n, &o.p); } } if( rc==SQLITE_OK ){ @@ -310,10 +314,11 @@ if( rc!=SQLITE_OK ){ return test_session_error(interp, rc, 0); } break; } + case 2: /* delete */ Tcl_DeleteCommand(interp, Tcl_GetString(objv[0])); break; @@ -361,32 +366,31 @@ return test_session_error(interp, rc, zErr); } break; } - case 9: { /* memory_used */ + case 10: { /* memory_used */ sqlite3_int64 nMalloc = sqlite3session_memory_used(pSession); Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nMalloc)); break; } - case 10: { + case 11: { sqlite3_int64 nSize = sqlite3session_changeset_size(pSession); Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize)); break; } - case 11: { /* object_config */ + case 12: { /* object_config */ struct ObjConfOpt { const char *zName; int opt; } aOpt[] = { { "size", SQLITE_SESSION_OBJCONFIG_SIZE }, { "rowid", SQLITE_SESSION_OBJCONFIG_ROWID }, { 0, 0 } }; int sz = (int)sizeof(aOpt[0]); - int iArg; Tcl_Size iOpt; if( Tcl_GetIndexFromObjStruct(interp,objv[2],aOpt,sz,"option",0,&iOpt) ){ return TCL_ERROR; } @@ -539,11 +543,11 @@ Tcl_BackgroundError(interp); } Tcl_DecrRefCount(pEval); return res; -} +} static int test_conflict_handler( void *pCtx, /* Pointer to TestConflictHandler structure */ int eConf, /* DATA, MISSING, CONFLICT, CONSTRAINT */ sqlite3_changeset_iter *pIter /* Handle describing change and conflict */ @@ -1186,10 +1190,131 @@ return test_session_error(interp, rc, 0); } return TCL_OK; } + +#include "sqlite3changebatch.h" + +typedef struct TestChangebatch TestChangebatch; +struct TestChangebatch { + sqlite3_changebatch *pChangebatch; +}; + +/* +** Tclcmd: $changebatch add BLOB +** $changebatch zero +** $changebatch delete +*/ +static int SQLITE_TCLAPI test_changebatch_cmd( + void *clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + TestChangebatch *p = (TestChangebatch*)clientData; + sqlite3_changebatch *pChangebatch = p->pChangebatch; + struct SessionSubcmd { + const char *zSub; + int nArg; + const char *zMsg; + int iSub; + } aSub[] = { + { "add", 1, "CHANGESET", }, /* 0 */ + { "zero", 0, "", }, /* 1 */ + { "delete", 0, "", }, /* 2 */ + { 0 } + }; + int iSub; + int rc; + + if( objc<2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "SUBCOMMAND ..."); + return TCL_ERROR; + } + rc = Tcl_GetIndexFromObjStruct(interp, + objv[1], aSub, sizeof(aSub[0]), "sub-command", 0, &iSub + ); + if( rc!=TCL_OK ) return rc; + if( objc!=2+aSub[iSub].nArg ){ + Tcl_WrongNumArgs(interp, 2, objv, aSub[iSub].zMsg); + return TCL_ERROR; + } + + switch( iSub ){ + case 0: { /* add */ + Tcl_Size nArg; + unsigned char *pArg = Tcl_GetByteArrayFromObj(objv[2], &nArg); + rc = sqlite3changebatch_add(pChangebatch, pArg, (int)nArg); + if( rc!=SQLITE_OK && rc!=SQLITE_CONSTRAINT ){ + return test_session_error(interp, rc, 0); + }else{ + extern const char *sqlite3ErrName(int); + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + } + break; + } + + case 1: { /* zero */ + sqlite3changebatch_zero(pChangebatch); + break; + } + + case 2: /* delete */ + Tcl_DeleteCommand(interp, Tcl_GetString(objv[0])); + break; + } + + return TCL_OK; +} + +static void SQLITE_TCLAPI test_changebatch_del(void *clientData){ + TestChangebatch *p = (TestChangebatch*)clientData; + sqlite3changebatch_delete(p->pChangebatch); + ckfree((char*)p); +} + +/* +** Tclcmd: sqlite3changebatch CMD DB-HANDLE +*/ +static int SQLITE_TCLAPI test_sqlite3changebatch( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db; + Tcl_CmdInfo info; + int rc; /* sqlite3session_create() return code */ + TestChangebatch *p; /* New wrapper object */ + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "CMD DB-HANDLE"); + return TCL_ERROR; + } + + if( 0==Tcl_GetCommandInfo(interp, Tcl_GetString(objv[2]), &info) ){ + Tcl_AppendResult(interp, "no such handle: ", Tcl_GetString(objv[2]), 0); + return TCL_ERROR; + } + db = *(sqlite3 **)info.objClientData; + + p = (TestChangebatch*)ckalloc(sizeof(TestChangebatch)); + memset(p, 0, sizeof(TestChangebatch)); + rc = sqlite3changebatch_new(db, &p->pChangebatch); + if( rc!=SQLITE_OK ){ + ckfree((char*)p); + return test_session_error(interp, rc, 0); + } + + Tcl_CreateObjCommand( + interp, Tcl_GetString(objv[1]), test_changebatch_cmd, (ClientData)p, + test_changebatch_del + ); + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; +} /* ** tclcmd: CMD configure REBASE-BLOB ** tclcmd: CMD rebase CHANGESET ** tclcmd: CMD delete @@ -1741,9 +1866,13 @@ for(i=0; izCmd, p->xProc, 0, 0); } + + Tcl_CreateObjCommand( + interp, "sqlite3changebatch", test_sqlite3changebatch, 0, 0 + ); return TCL_OK; } #endif /* SQLITE_TEST && SQLITE_SESSION && SQLITE_PREUPDATE_HOOK */ Index: ext/wasm/api/sqlite3-worker1-promiser.c-pp.js ================================================================== --- ext/wasm/api/sqlite3-worker1-promiser.c-pp.js +++ ext/wasm/api/sqlite3-worker1-promiser.c-pp.js @@ -333,12 +333,12 @@ //#if target=es6-module /** When built as a module, we export sqlite3Worker1Promiser.v2() instead of sqlite3Worker1Promise() because (A) its interface is more - conventional for ESM usage and (B) the ESM export option for this - API did not exist until v2 was created, so there's no backwards + conventional for ESM usage and (B) the ESM option export option for + this API did not exist until v2 was created, so there's no backwards incompatibility. */ export default sqlite3Worker1Promiser.v2; //#endif /* target=es6-module */ //#else Index: ext/wasm/demo-worker1-promiser.c-pp.js ================================================================== --- ext/wasm/demo-worker1-promiser.c-pp.js +++ ext/wasm/demo-worker1-promiser.c-pp.js @@ -113,21 +113,18 @@ await wtest('exec',{ sql: ["create table t(a,b)", "insert into t(a,b) values(1,2),(3,4),(5,6)" ].join(';'), resultRows: [], columnNames: [], - lastInsertRowId: true, countChanges: sqConfig.bigIntEnabled ? 64 : true }, function(ev){ ev = ev.result; T.assert(0===ev.resultRows.length) .assert(0===ev.columnNames.length) .assert(sqConfig.bigIntEnabled ? (3n===ev.changeCount) - : (3===ev.changeCount)) - .assert('bigint'===typeof ev.lastInsertRowId) - .assert(ev.lastInsertRowId>=3); + : (3===ev.changeCount)); }); await wtest('exec',{ sql: 'select a a, b b from t order by a', resultRows: [], columnNames: [], Index: main.mk ================================================================== --- main.mk +++ main.mk @@ -776,10 +776,11 @@ $(TOP)/src/test_window.c \ $(TOP)/src/test_wsd.c \ $(TOP)/ext/fts3/fts3_term.c \ $(TOP)/ext/fts3/fts3_test.c \ $(TOP)/ext/session/test_session.c \ + $(TOP)/ext/session/sqlite3changebatch.c \ $(TOP)/ext/recover/sqlite3recover.c \ $(TOP)/ext/recover/dbdata.c \ $(TOP)/ext/recover/test_recover.c \ $(TOP)/ext/intck/test_intck.c \ $(TOP)/ext/intck/sqlite3intck.c \ @@ -875,10 +876,11 @@ $(TOP)/ext/fts3/fts3_expr.c \ $(TOP)/ext/fts3/fts3_tokenizer.c \ $(TOP)/ext/fts3/fts3_write.c \ $(TOP)/ext/session/sqlite3session.c \ $(TOP)/ext/misc/stmt.c \ + $(TOP)/ext/session/test_session.c \ fts5.c # Header files used by all library source files. # HDR = \ Index: src/bitvec.c ================================================================== --- src/bitvec.c +++ src/bitvec.c @@ -169,10 +169,16 @@ int sqlite3BitvecSet(Bitvec *p, u32 i){ u32 h; if( p==0 ) return SQLITE_OK; assert( i>0 ); assert( i<=p->iSize ); + if( i>p->iSize || i==0 ){ + sqlite3_log(SQLITE_ERROR, + "Bitvec: setting bit %d of bitvec size %d\n", (int)i, (int)p->iSize + ); + abort(); + } i--; while((p->iSize > BITVEC_NBIT) && p->iDivisor) { u32 bin = i/p->iDivisor; i = i%p->iDivisor; if( p->u.apSub[bin]==0 ){ Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -526,10 +526,243 @@ } } #endif /* SQLITE_OMIT_SHARED_CACHE */ +#ifndef SQLITE_OMIT_CONCURRENT +/* +** The following structure - BtreePtrmap - stores the in-memory pointer map +** used for newly allocated pages in CONCURRENT transactions. Such pages are +** always allocated in a contiguous block (from the end of the file) starting +** with page BtreePtrmap.iFirst. +*/ +typedef struct RollbackEntry RollbackEntry; +typedef struct PtrmapEntry PtrmapEntry; +struct PtrmapEntry { + Pgno parent; + u8 eType; +}; +struct RollbackEntry { + Pgno pgno; + Pgno parent; + u8 eType; +}; +struct BtreePtrmap { + Pgno iFirst; /* First new page number aPtr[0] */ + + int nPtrAlloc; /* Allocated size of aPtr[] array */ + PtrmapEntry *aPtr; /* Array of parent page numbers */ + + int nSvpt; /* Used size of aSvpt[] array */ + int nSvptAlloc; /* Allocated size of aSvpt[] */ + int *aSvpt; /* First aRollback[] entry for savepoint i */ + + int nRollback; /* Used size of aRollback[] array */ + int nRollbackAlloc; /* Allocated size of aRollback[] array */ + RollbackEntry *aRollback; /* Array of rollback entries */ +}; + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** If page number pgno is greater than or equal to BtreePtrmap.iFirst, +** store an entry for it in the pointer-map structure. +*/ +static int btreePtrmapStore( + BtShared *pBt, + Pgno pgno, + u8 eType, + Pgno parent +){ + BtreePtrmap *pMap = pBt->pMap; + if( pgno>=pMap->iFirst ){ + int iEntry = pgno - pMap->iFirst; + + /* Grow the aPtr[] array as required */ + while( iEntry>=pMap->nPtrAlloc ){ + int nNew = pMap->nPtrAlloc ? pMap->nPtrAlloc*2 : 16; + PtrmapEntry *aNew = (PtrmapEntry*)sqlite3_realloc( + pMap->aPtr, nNew*sizeof(PtrmapEntry) + ); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + int nByte = (nNew-pMap->nPtrAlloc)*sizeof(PtrmapEntry); + memset(&aNew[pMap->nPtrAlloc], 0, nByte); + pMap->aPtr = aNew; + pMap->nPtrAlloc = nNew; + } + } + + /* Add an entry to the rollback log if required */ + if( pMap->nSvpt>0 && pMap->aPtr[iEntry].parent ){ + if( pMap->nRollback>=pMap->nRollbackAlloc ){ + int nNew = pMap->nRollback ? pMap->nRollback*2 : 16; + RollbackEntry *aNew = (RollbackEntry*)sqlite3_realloc( + pMap->aRollback, nNew*sizeof(RollbackEntry) + ); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + pMap->aRollback = aNew; + pMap->nRollbackAlloc = nNew; + } + } + + pMap->aRollback[pMap->nRollback].pgno = pgno; + pMap->aRollback[pMap->nRollback].parent = pMap->aPtr[iEntry].parent; + pMap->aRollback[pMap->nRollback].eType = pMap->aPtr[iEntry].eType; + pMap->nRollback++; + } + + /* Update the aPtr[] array */ + pMap->aPtr[iEntry].parent = parent; + pMap->aPtr[iEntry].eType = eType; + } + + return SQLITE_OK; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Open savepoint iSavepoint, if it is not already open. +*/ +static int btreePtrmapBegin(BtShared *pBt, int nSvpt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap && nSvpt>pMap->nSvpt ){ + int i; + if( nSvpt>=pMap->nSvptAlloc ){ + int nNew = pMap->nSvptAlloc ? pMap->nSvptAlloc*2 : 16; + int *aNew = sqlite3_realloc(pMap->aSvpt, sizeof(int) * nNew); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + pMap->aSvpt = aNew; + pMap->nSvptAlloc = nNew; + } + } + + for(i=pMap->nSvpt; iaSvpt[i] = pMap->nRollback; + } + pMap->nSvpt = nSvpt; + } + + return SQLITE_OK; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Rollback (if op==SAVEPOINT_ROLLBACK) or release (if op==SAVEPOINT_RELEASE) +** savepoint iSvpt. +*/ +static void btreePtrmapEnd(BtShared *pBt, int op, int iSvpt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap ){ + assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE ); + assert( iSvpt>=0 || (iSvpt==-1 && op==SAVEPOINT_ROLLBACK) ); + if( iSvpt<0 ){ + pMap->nSvpt = 0; + pMap->nRollback = 0; + memset(pMap->aPtr, 0, sizeof(Pgno) * pMap->nPtrAlloc); + }else if( iSvptnSvpt ){ + if( op==SAVEPOINT_ROLLBACK ){ + int ii; + for(ii=pMap->nRollback-1; ii>=pMap->aSvpt[iSvpt]; ii--){ + RollbackEntry *p = &pMap->aRollback[ii]; + PtrmapEntry *pEntry = &pMap->aPtr[p->pgno - pMap->iFirst]; + pEntry->parent = p->parent; + pEntry->eType = p->eType; + } + } + pMap->nSvpt = iSvpt + (op==SAVEPOINT_ROLLBACK); + pMap->nRollback = pMap->aSvpt[iSvpt]; + } + } +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** This function is called after an CONCURRENT transaction is opened on the +** database. It allocates the BtreePtrmap structure used to track pointers +** to allocated pages and zeroes the nFree/iTrunk fields in the database +** header on page 1. +*/ +static int btreePtrmapAllocate(BtShared *pBt){ + int rc = SQLITE_OK; + if( pBt->pMap==0 ){ + BtreePtrmap *pMap = sqlite3_malloc(sizeof(BtreePtrmap)); + if( pMap==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(&pBt->pPage1->aData[32], 0, sizeof(u32)*2); + memset(pMap, 0, sizeof(BtreePtrmap)); + pMap->iFirst = pBt->nPage + 1; + pBt->pMap = pMap; + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Free any BtreePtrmap structure allocated by an earlier call to +** btreePtrmapAllocate(). +*/ +static void btreePtrmapDelete(BtShared *pBt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap ){ + sqlite3_free(pMap->aRollback); + sqlite3_free(pMap->aPtr); + sqlite3_free(pMap->aSvpt); + sqlite3_free(pMap); + pBt->pMap = 0; + } +} + +/* +** Check that the pointer-map does not contain any entries with a parent +** page of 0. Call sqlite3_log() multiple times to output the entire +** data structure if it does. +*/ +static void btreePtrmapCheck(BtShared *pBt, Pgno nPage){ + Pgno i; + int bProblem = 0; + BtreePtrmap *p = pBt->pMap; + + for(i=p->iFirst; i<=nPage; i++){ + PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst]; + if( pEntry->eType==PTRMAP_OVERFLOW1 + || pEntry->eType==PTRMAP_OVERFLOW2 + || pEntry->eType==PTRMAP_BTREE + ){ + if( pEntry->parent==0 ){ + bProblem = 1; + break; + } + } + } + + if( bProblem ){ + for(i=p->iFirst; i<=nPage; i++){ + PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst]; + sqlite3_log(SQLITE_CORRUPT, + "btreePtrmapCheck: pgno=%d eType=%d parent=%d", + (int)i, (int)pEntry->eType, (int)pEntry->parent + ); + } + abort(); + } +} + +#else /* SQLITE_OMIT_CONCURRENT */ +# define btreePtrmapAllocate(x) SQLITE_OK +# define btreePtrmapDelete(x) +# define btreePtrmapBegin(x,y) SQLITE_OK +# define btreePtrmapEnd(x,y,z) +# define btreePtrmapCheck(y,z) +#endif /* SQLITE_OMIT_CONCURRENT */ + static void releasePage(MemPage *pPage); /* Forward reference */ static void releasePageOne(MemPage *pPage); /* Forward reference */ static void releasePageNotNull(MemPage *pPage); /* Forward reference */ /* @@ -1067,10 +1300,17 @@ if( *pRC ) return; assert( sqlite3_mutex_held(pBt->mutex) ); /* The super-journal page number must never be used as a pointer map page */ assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); + +#ifndef SQLITE_OMIT_CONCURRENT + if( pBt->pMap ){ + *pRC = btreePtrmapStore(pBt, key, eType, parent); + return; + } +#endif assert( pBt->autoVacuum ); if( key==0 ){ *pRC = SQLITE_CORRUPT_BKPT; return; @@ -2406,10 +2646,21 @@ assert( pPage->aData==sqlite3PagerGetData(pDbPage) ); *ppPage = pPage; return SQLITE_OK; } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** Set the value of the MemPage.pgnoRoot variable, if it exists. +*/ +static void setMempageRoot(MemPage *pPg, u32 pgnoRoot){ + pPg->pgnoRoot = pgnoRoot; +} +#else +# define setMempageRoot(x,y) +#endif + /* ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. ** ** Page1 is a special case and must be released using releasePageOne(). @@ -3592,10 +3843,11 @@ int *pSchemaVersion /* Put schema version number here, if not NULL */ ){ BtShared *pBt = p->pBt; Pager *pPager = pBt->pPager; int rc = SQLITE_OK; + int bConcurrent = (p->db->eConcurrent && !ISAUTOVACUUM(pBt)); sqlite3BtreeEnter(p); btreeIntegrity(p); /* If the btree is already in a write-transaction, or it @@ -3679,11 +3931,12 @@ if( rc==SQLITE_OK && wrflag ){ if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ rc = SQLITE_READONLY; }else{ - rc = sqlite3PagerBegin(pPager, wrflag>1, sqlite3TempInMemory(p->db)); + int exFlag = bConcurrent ? -1 : (wrflag>1); + rc = sqlite3PagerBegin(pPager, exFlag, sqlite3TempInMemory(p->db)); if( rc==SQLITE_OK ){ rc = newDatabase(pBt); }else if( rc==SQLITE_BUSY_SNAPSHOT && pBt->inTransaction==TRANS_NONE ){ /* if there was no transaction opened when this function was ** called and SQLITE_BUSY_SNAPSHOT is returned, change the error @@ -3743,20 +3996,33 @@ } } } trans_begun: +#ifndef SQLITE_OMIT_CONCURRENT + if( bConcurrent && rc==SQLITE_OK && sqlite3PagerIsWal(pBt->pPager) ){ + rc = sqlite3PagerBeginConcurrent(pBt->pPager); + if( rc==SQLITE_OK && wrflag ){ + rc = btreePtrmapAllocate(pBt); + } + } +#endif + if( rc==SQLITE_OK ){ if( pSchemaVersion ){ *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]); } if( wrflag ){ /* This call makes sure that the pager has the correct number of ** open savepoints. If the second parameter is greater than 0 and ** the sub-journal is not already open, then it will be opened here. */ - rc = sqlite3PagerOpenSavepoint(pPager, p->db->nSavepoint); + int nSavepoint = p->db->nSavepoint; + rc = sqlite3PagerOpenSavepoint(pPager, nSavepoint); + if( rc==SQLITE_OK && nSavepoint ){ + rc = btreePtrmapBegin(pBt, nSavepoint); + } } } btreeIntegrity(p); sqlite3BtreeLeave(p); @@ -3765,10 +4031,19 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){ BtShared *pBt; if( p->sharable || p->inTrans==TRANS_NONE || (p->inTrans==TRANS_READ && wrflag!=0) +#ifndef SQLITE_OMIT_CONCURRENT + /* Always use the full version for "BEGIN CONCURRENT" transactions. This + ** is to ensure that any required calls to btreePtrmapBegin() are made. + ** These calls are not present on trunk (they're part of the + ** begin-concurrent patch), and so they are not present in the fast path + ** below. And it's easier just to call the full version every time than + ** to complicate the code below by adding btreePtrmapBegin() calls. */ + || p->db->eConcurrent!=CONCURRENT_NONE +#endif ){ return btreeBeginTrans(p,wrflag,pSchemaVersion); } pBt = p->pBt; if( pSchemaVersion ){ @@ -4242,10 +4517,193 @@ #else /* ifndef SQLITE_OMIT_AUTOVACUUM */ # define setChildPtrmaps(x) SQLITE_OK #endif +#ifndef SQLITE_OMIT_CONCURRENT +/* +** This function is called as part of merging an CONCURRENT transaction with +** the snapshot at the head of the wal file. It relocates all pages in the +** range iFirst..iLast, inclusive. It is assumed that the BtreePtrmap +** structure at BtShared.pMap contains the location of the pointers to each +** page in the range. +** +** If pnCurrent is NULL, then all pages in the range are moved to currently +** free locations (i.e. free-list entries) within the database file before page +** iFirst. +** +** Or, if pnCurrent is not NULL, then it points to a value containing the +** current size of the database file in pages. In this case, all pages are +** relocated to the end of the database file - page iFirst is relocated to +** page (*pnCurrent+1), page iFirst+1 to page (*pnCurrent+2), and so on. +** Value *pnCurrent is set to the new size of the database before this +** function returns. +** +** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error code. +*/ +static int btreeRelocateRange( + BtShared *pBt, /* B-tree handle */ + Pgno iFirst, /* First page to relocate */ + Pgno iLast, /* Last page to relocate */ + Pgno *pnCurrent /* If not NULL, IN/OUT: Database size */ +){ + int rc = SQLITE_OK; + BtreePtrmap *pMap = pBt->pMap; + Pgno iPg; + + for(iPg=iFirst; iPg<=iLast && rc==SQLITE_OK; iPg++){ + MemPage *pFree = 0; /* Page allocated from free-list */ + MemPage *pPg = 0; + Pgno iNew; /* New page number for pPg */ + PtrmapEntry *pEntry; /* Pointer map entry for page iPg */ + + if( iPg==PENDING_BYTE_PAGE(pBt) ) continue; + pEntry = &pMap->aPtr[iPg - pMap->iFirst]; + + if( pEntry->eType==PTRMAP_FREEPAGE ){ + Pgno dummy; + rc = allocateBtreePage(pBt, &pFree, &dummy, iPg, BTALLOC_EXACT); + if( pFree ){ + assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); + sqlite3PcacheDrop(pFree->pDbPage); + } + assert( rc!=SQLITE_OK || dummy==iPg ); + }else if( pnCurrent ){ + btreeGetPage(pBt, iPg, &pPg, 0); + assert( sqlite3PagerIswriteable(pPg->pDbPage) ); + assert( sqlite3PagerPageRefcount(pPg->pDbPage)==1 ); + iNew = ++(*pnCurrent); + if( iNew==PENDING_BYTE_PAGE(pBt) ) iNew = ++(*pnCurrent); + rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent, iNew, 1); + releasePageNotNull(pPg); + }else if( pEntry->eType!=0 ){ + + /* Allocate a new page from the free-list to move page iPg to. + ** Except - if the page allocated is within the range being relocated + ** (i.e. pgno>=iFirst), then discard it and allocate another. */ + do { + rc = allocateBtreePage(pBt, &pFree, &iNew, 0, 0); + if( iNew>=iFirst ){ + assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); + assert( iNew>iPg ); + sqlite3PcacheDrop(pFree->pDbPage); + pMap->aPtr[iNew - pMap->iFirst].eType = 0; + pFree = 0; + } + }while( pFree==0 ); + + assert( rc!=SQLITE_OK || iNeweType, pEntry->parent,iNew,1); + releasePage(pPg); + } + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** The b-tree handle passed as the only argument is about to commit an +** CONCURRENT transaction. At this point it is guaranteed that this is +** possible - the wal WRITER lock is held and it is known that there are +** no conflicts with committed transactions. +*/ +static int btreeFixUnlocked(Btree *p){ + BtShared *pBt = p->pBt; + MemPage *pPage1 = pBt->pPage1; + u8 *p1 = pPage1->aData; + Pager *pPager = pBt->pPager; + int rc = SQLITE_OK; + + /* If page 1 of the database is not writable, then no pages were allocated + ** or freed by this transaction. In this case no special handling is + ** required. Otherwise, if page 1 is dirty, proceed. */ + BtreePtrmap *pMap = pBt->pMap; + Pgno iTrunk = get4byte(&p1[32]); + Pgno nPage = btreePagecount(pBt); + u32 nFree = get4byte(&p1[36]); + + assert( pBt->pMap ); + rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage); + assert( p1==pPage1->aData ); + + if( rc==SQLITE_OK ){ + Pgno nHPage = get4byte(&p1[28]); + Pgno nFin = nHPage; /* Size of db after transaction merge */ + + if( sqlite3PagerIswriteable(pPage1->pDbPage) ){ + Pgno iHTrunk = get4byte(&p1[32]); + u32 nHFree = get4byte(&p1[36]); + + btreePtrmapCheck(pBt, nPage); + + /* Attach the head database free list to the end of the current + ** transactions free-list (if any). */ + if( iTrunk!=0 ){ + put4byte(&p1[36], nHFree + nFree); + put4byte(&p1[32], iTrunk); + while( iTrunk ){ + DbPage *pTrunk = sqlite3PagerLookup(pPager, iTrunk); + iTrunk = get4byte((u8*)pTrunk->pData); + if( iTrunk==0 ){ + put4byte((u8*)pTrunk->pData, iHTrunk); + } + sqlite3PagerUnref(pTrunk); + }; + } + + if( nHPage<(pMap->iFirst-1) ){ + /* The database consisted of (pMap->iFirst-1) pages when the current + ** concurrent transaction was opened. And an concurrent transaction may + ** not be executed on an auto-vacuum database - so the db should + ** not have shrunk since the transaction was opened. Therefore nHPage + ** should be set to (pMap->iFirst-1) or greater. */ + rc = SQLITE_CORRUPT_BKPT; + }else{ + /* The current transaction allocated pages pMap->iFirst through + ** nPage (inclusive) at the end of the database file. Meanwhile, + ** other transactions have allocated (iFirst..nHPage). So move + ** pages (iFirst..MIN(nPage,nHPage)) to (MAX(nPage,nHPage)+1). */ + Pgno iLast = MIN(nPage, nHPage); /* Last page to move */ + Pgno nCurrent; /* Current size of db */ + + nCurrent = MAX(nPage, nHPage); + pBt->nPage = nCurrent; + rc = btreeRelocateRange(pBt, pMap->iFirst, iLast, &nCurrent); + + /* There are now no collisions with the snapshot at the head of the + ** database file. So at this point it would be possible to write + ** the transaction out to disk. Before doing so though, attempt to + ** relocate some of the new pages to free locations within the body + ** of the database file (i.e. free-list entries). */ + if( rc==SQLITE_OK ){ + assert( nCurrent!=PENDING_BYTE_PAGE(pBt) ); + sqlite3PagerSetDbsize(pBt->pPager, nCurrent); + nFree = get4byte(&p1[36]); + nFin = nCurrent-nFree; + if( nCurrent>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){ + nFin--; + } + nFin = MAX(nFin, nHPage); + rc = btreeRelocateRange(pBt, nFin+1, nCurrent, 0); + } + + put4byte(&p1[28], nFin); + } + } + sqlite3PagerSetDbsize(pPager, nFin); + } + + return rc; +} +#else +# define btreeFixUnlocked(X) SQLITE_OK +#endif /* SQLITE_OMIT_CONCURRENT */ + /* ** This routine does the first phase of a two-phase commit. This routine ** causes a rollback journal to be created (if it does not already exist) ** and populated with enough information so that if a power loss occurs ** the database can be restored to its original state by playing back @@ -4275,10 +4733,11 @@ if( p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ + assert( ISCONCURRENT==0 ); rc = autoVacuumCommit(p); if( rc!=SQLITE_OK ){ sqlite3BtreeLeave(p); return rc; } @@ -4285,11 +4744,16 @@ } if( pBt->bDoTruncate ){ sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); } #endif - rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + if( rc==SQLITE_OK && ISCONCURRENT && p->db->eConcurrent==CONCURRENT_OPEN ){ + rc = btreeFixUnlocked(p); + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + } sqlite3BtreeLeave(p); } return rc; } @@ -4328,10 +4792,15 @@ ** pager if this call closed the only read or write transaction. */ p->inTrans = TRANS_NONE; unlockBtreeIfUnused(pBt); } + /* If this was an CONCURRENT transaction, delete the pBt->pMap object. + ** Also call PagerEndConcurrent() to ensure that the pager has discarded + ** the record of all pages read within the transaction. */ + btreePtrmapDelete(pBt); + sqlite3PagerEndConcurrent(pBt->pPager); btreeIntegrity(p); } /* ** Commit the transaction currently in progress. @@ -4557,10 +5026,13 @@ ** an index greater than all savepoints created explicitly using ** SQL statements. It is illegal to open, release or rollback any ** such savepoints while the statement transaction savepoint is active. */ rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); + if( rc==SQLITE_OK ){ + rc = btreePtrmapBegin(pBt, iStatement); + } sqlite3BtreeLeave(p); return rc; } /* @@ -4580,10 +5052,11 @@ if( p && p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); sqlite3BtreeEnter(p); + btreePtrmapEnd(pBt, op, iSavepoint); if( op==SAVEPOINT_ROLLBACK ){ rc = saveAllCursors(pBt, 0, 0); } if( rc==SQLITE_OK ){ rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint); @@ -5240,10 +5713,12 @@ sqlite3_file *fd = sqlite3PagerFile(pBt->pPager); u8 aSave[4]; u8 *aWrite = &pBuf[-4]; assert( aWrite>=pBufStart ); /* due to (6) */ memcpy(aSave, aWrite, 4); + rc = sqlite3PagerUsePage(pBt->pPager, nextPage); + if( rc!=SQLITE_OK ) break; rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); }else #endif @@ -5252,10 +5727,13 @@ DbPage *pDbPage; rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage, (eOp==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ + setMempageRoot( + (MemPage*)sqlite3PagerGetExtra(pDbPage), pCur->pgnoRoot + ); aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); sqlite3PagerUnref(pDbPage); offset = 0; @@ -5402,10 +5880,11 @@ ** the new child page does not match the flags field of the parent (i.e. ** if an intkey page appears to be the parent of a non-intkey page, or ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ + BtShared *pBt = pCur->pBt; int rc; assert( cursorOwnsBtShared(pCur) ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->iPageiPage>=0 ); @@ -5416,17 +5895,18 @@ pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); pCur->aiIdx[pCur->iPage] = pCur->ix; pCur->apPage[pCur->iPage] = pCur->pPage; pCur->ix = 0; pCur->iPage++; - rc = getAndInitPage(pCur->pBt, newPgno, &pCur->pPage, pCur->curPagerFlags); - assert( pCur->pPage!=0 || rc!=SQLITE_OK ); - if( rc==SQLITE_OK - && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey) - ){ - releasePage(pCur->pPage); - rc = SQLITE_CORRUPT_PGNO(newPgno); + rc = getAndInitPage(pBt, newPgno, &pCur->pPage, pCur->curPagerFlags); + if( rc==SQLITE_OK ){ + assert( pCur->pPage!=0 ); + setMempageRoot(pCur->pPage, pCur->pgnoRoot); + if( pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey ){ + releasePage(pCur->pPage); + rc = SQLITE_CORRUPT_PGNO(newPgno); + } } if( rc ){ pCur->pPage = pCur->apPage[--pCur->iPage]; } return rc; @@ -5539,10 +6019,11 @@ pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } + setMempageRoot(pCur->pPage, pCur->pgnoRoot); pCur->iPage = 0; pCur->curIntKey = pCur->pPage->intKey; } pRoot = pCur->pPage; assert( pRoot->pgno==pCur->pgnoRoot || CORRUPT_DB ); @@ -6181,10 +6662,11 @@ } if( rc ){ pCur->pPage = pCur->apPage[--pCur->iPage]; break; } + setMempageRoot(pCur->pPage, pCur->pgnoRoot); /* ***** End of in-lined moveToChild() call */ } moveto_index_finish: pCur->info.nSize = 0; @@ -6451,20 +6933,29 @@ MemPage *pTrunk = 0; MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ assert( sqlite3_mutex_held(pBt->mutex) ); - assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); + assert( eMode==BTALLOC_ANY || (nearby>0 && REQUIRE_PTRMAP ) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); /* EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36 ** stores the total number of pages on the freelist. */ n = get4byte(&pPage1->aData[36]); testcase( n==mxPage-1 ); if( n>=mxPage ){ return SQLITE_CORRUPT_BKPT; } + + /* Ensure page 1 is writable. This function will either change the number + ** of pages in the free-list or the size of the database file. Since both + ** of these operations involve modifying page 1 header fields, page 1 + ** will definitely be written by this transaction. If this is an CONCURRENT + ** transaction, ensure the BtreePtrmap structure has been allocated. */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) return rc; + if( n>0 ){ /* There are pages on the freelist. Reuse one of those pages. */ Pgno iTrunk; u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ u32 nSearch = 0; /* Count of the number of search attempts */ @@ -6471,32 +6962,33 @@ /* If eMode==BTALLOC_EXACT and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then ** the entire-list will be searched for that page. */ -#ifndef SQLITE_OMIT_AUTOVACUUM if( eMode==BTALLOC_EXACT ){ - if( nearby<=mxPage ){ - u8 eType; - assert( nearby>0 ); - assert( pBt->autoVacuum ); - rc = ptrmapGet(pBt, nearby, &eType, 0); - if( rc ) return rc; - if( eType==PTRMAP_FREEPAGE ){ - searchList = 1; - } + assert( ISAUTOVACUUM(pBt)!=ISCONCURRENT ); + if( ISAUTOVACUUM(pBt) ){ + if( nearby<=mxPage ){ + u8 eType; + assert( nearby>0 ); + assert( pBt->autoVacuum ); + rc = ptrmapGet(pBt, nearby, &eType, 0); + if( rc ) return rc; + if( eType==PTRMAP_FREEPAGE ){ + searchList = 1; + } + } + }else{ + searchList = 1; } }else if( eMode==BTALLOC_LE ){ searchList = 1; } -#endif /* Decrement the free-list count by 1. Set iTrunk to the index of the ** first free-list trunk page. iPrevTrunk is initially 1. */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) return rc; put4byte(&pPage1->aData[36], n-1); /* The code within this loop is run only once if the 'searchList' variable ** is not true. Otherwise, it runs once for each trunk-page on the ** free-list until the page 'nearby' is located (eMode==BTALLOC_EXACT) @@ -6800,11 +7292,11 @@ } /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); if( rc ) goto freepage_out; } /* Now manipulate the actual database free-list structure. There are two @@ -7134,11 +7626,11 @@ PTRMAP_ISPAGE(pBt, pgnoOvfl) || pgnoOvfl==PENDING_BYTE_PAGE(pBt) ); } #endif rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); -#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the database supports auto-vacuum, and the second or subsequent ** overflow page is being allocated, add an entry to the pointer-map ** for that page now. ** ** If this is the first overflow page, then write a partial entry @@ -7145,18 +7637,17 @@ ** to the pointer-map. If we write nothing to this pointer-map slot, ** then the optimistic overflow chain processing in clearCell() ** may misinterpret the uninitialized values and delete the ** wrong pages from the database. */ - if( pBt->autoVacuum && rc==SQLITE_OK ){ + if( REQUIRE_PTRMAP && rc==SQLITE_OK ){ u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); if( rc ){ releasePage(pOvfl); } } -#endif if( rc ){ releasePage(pToRelease); return rc; } @@ -7296,10 +7787,11 @@ ** balancing, and the dividers are adjacent and sorted. */ assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ + BtShared *pBt = pPage->pBt; int rc = sqlite3PagerWrite(pPage->pDbPage); if( NEVER(rc!=SQLITE_OK) ){ return rc; } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); @@ -7325,20 +7817,18 @@ put2byte(pIns, idx); pPage->nCell++; /* increment the cell count */ if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPage->pBt->autoVacuum ){ + if( REQUIRE_PTRMAP ){ int rc2 = SQLITE_OK; /* The cell may contain a pointer to an overflow page. If so, write ** the entry for the overflow page into the pointer map. */ ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2); if( rc2 ) return rc2; } -#endif } return SQLITE_OK; } /* @@ -7386,10 +7876,11 @@ ** balancing, and the dividers are adjacent and sorted. */ assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ + BtShared *pBt = pPage->pBt; int rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ return rc; } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); @@ -7399,30 +7890,28 @@ if( rc ){ return rc; } /* The allocateSpace() routine guarantees the following properties ** if it returns successfully */ assert( idx >= 0 ); assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); - assert( idx+sz <= (int)pPage->pBt->usableSize ); + assert( idx+sz <= (int)pBt->usableSize ); pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); pIns = pPage->aCellIdx + i*2; memmove(pIns+2, pIns, 2*(pPage->nCell - i)); put2byte(pIns, idx); pPage->nCell++; /* increment the cell count */ if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPage->pBt->autoVacuum ){ + if( REQUIRE_PTRMAP ){ int rc2 = SQLITE_OK; /* The cell may contain a pointer to an overflow page. If so, write ** the entry for the overflow page into the pointer map. */ ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2); if( rc2 ) return rc2; } -#endif } return SQLITE_OK; } /* @@ -7982,11 +8471,11 @@ ** of the parent page are still manipulated by the code below. ** That is Ok, at this point the parent page is guaranteed to ** be marked as dirty. Returning an error code will cause a ** rollback, undoing any changes made to the parent page. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); if( szCell>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pNew, pCell, &rc); } } @@ -8120,11 +8609,11 @@ } /* If this is an auto-vacuum database, update the pointer-map entries ** for any b-tree or overflow pages that pTo now contains the pointers to. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ *pRC = setChildPtrmaps(pTo); } } } @@ -8171,11 +8660,12 @@ static int balance_nonroot( MemPage *pParent, /* Parent page of siblings being balanced */ int iParentIdx, /* Index of "the page" in pParent */ u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ int isRoot, /* True if pParent is a root-page */ - int bBulk /* True if this call is part of a bulk load */ + int bBulk, /* True if this call is part of a bulk load */ + Pgno pgnoRoot /* Root page of b-tree being balanced */ ){ BtShared *pBt; /* The whole database */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ int nNew = 0; /* Number of pages in apNew[] */ int nOld; /* Number of pages in apOld[] */ @@ -8261,10 +8751,11 @@ } if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; } + setMempageRoot(apOld[i], pgnoRoot); if( apOld[i]->nFree<0 ){ rc = btreeComputeFreeSpace(apOld[i]); if( rc ){ memset(apOld, 0, (i)*sizeof(MemPage*)); goto balance_cleanup; @@ -8610,11 +9101,11 @@ apNew[i] = pNew; nNew++; cntOld[i] = b.nCell; /* Set the pointer-map entry for the new sibling page. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); if( rc!=SQLITE_OK ){ goto balance_cleanup; } } @@ -8703,11 +9194,11 @@ ** If the sibling pages are not leaves, then the pointer map entry ** associated with the right-child of each sibling may also need to be ** updated. This happens below, after the sibling pages have been ** populated, not here. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ MemPage *pOld; MemPage *pNew = pOld = apNew[0]; int cntOldNext = pNew->nCell + pNew->nOverflow; int iNew = 0; int iOld = 0; @@ -8899,11 +9390,11 @@ - apNew[0]->nCell*2) || rc!=SQLITE_OK ); copyNodeContent(apNew[0], pParent, &rc); freePage(apNew[0], &rc); - }else if( ISAUTOVACUUM(pBt) && !leafCorrection ){ + }else if( REQUIRE_PTRMAP && !leafCorrection ){ /* Fix the pointer map entries associated with the right-child of each ** sibling page. All other pointer map entries have already been taken ** care of. */ for(i=0; iaData[8]); @@ -8982,11 +9473,11 @@ */ rc = sqlite3PagerWrite(pRoot->pDbPage); if( rc==SQLITE_OK ){ rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); copyNodeContent(pRoot, pChild, &rc); - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); } } if( rc ){ *ppChild = 0; @@ -9145,11 +9636,11 @@ ** copied either into the body of a database page or into the new ** pSpace buffer passed to the latter call to balance_nonroot(). */ u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, - pCur->hints&BTREE_BULKLOAD); + pCur->hints&BTREE_BULKLOAD, pCur->pgnoRoot); if( pFree ){ /* If pFree is not NULL, it points to the pSpace buffer used ** by a previous call to balance_nonroot(). Its contents are ** now stored either on real database pages or within the ** new pSpace buffer, so it may be safely freed here. */ @@ -9253,10 +9744,11 @@ pBt = pPage->pBt; ovflPageSize = pBt->usableSize - 4; do{ rc = btreeGetPage(pBt, ovflPgno, &pPage, 0); if( rc ) return rc; + setMempageRoot(pPage, pCur->pgnoRoot); if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 || pPage->isInit ){ rc = SQLITE_CORRUPT_PAGE(pPage); }else{ if( iOffset+ovflPageSize<(u32)nTotal ){ ovflPgno = get4byte(pPage->aData); @@ -9526,10 +10018,11 @@ assert( szNew <= MX_CELL_SIZE(p->pBt) ); idx = pCur->ix; pCur->info.nSize = 0; if( loc==0 ){ CellInfo info; + BtShared *pBt = p->pBt; assert( idx>=0 ); if( idx>=pPage->nCell ){ return SQLITE_CORRUPT_PAGE(pPage); } rc = sqlite3PagerWrite(pPage->pDbPage); @@ -9542,11 +10035,11 @@ } BTREE_CLEAR_CELL(rc, pPage, oldCell, info); testcase( pCur->curFlags & BTCF_ValidOvfl ); invalidateOverflowCache(pCur); if( info.nSize==szNew && info.nLocal==info.nPayload - && (!ISAUTOVACUUM(p->pBt) || szNewminLocal) + && (!REQUIRE_PTRMAP || szNewminLocal) ){ /* Overwrite the old cell with the new if they are the same size. ** We could also try to do this if the old cell is smaller, then add ** the leftover space to the free list. But experiments show that ** doing that is no faster then skipping this optimization and just @@ -9707,10 +10200,13 @@ if( nOut>0 ){ sqlite3PagerUnref(pPageIn); pPageIn = 0; rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY); if( rc==SQLITE_OK ){ + setMempageRoot( + (MemPage*)sqlite3PagerGetExtra(pPageIn), pSrc->pgnoRoot + ); aIn = (const u8*)sqlite3PagerGetData(pPageIn); ovflIn = get4byte(aIn); aIn += 4; nIn = pSrc->pBt->usableSize - 4; } @@ -10131,11 +10627,12 @@ */ static int clearDatabasePage( BtShared *pBt, /* The BTree that contains the table */ Pgno pgno, /* Page number to clear */ int freePageFlag, /* Deallocate page if true */ - i64 *pnChange /* Add number of Cells freed to this counter */ + i64 *pnChange, /* Add number of Cells freed to this counter */ + Pgno pgnoRoot ){ MemPage *pPage; int rc; unsigned char *pCell; int i; @@ -10146,10 +10643,11 @@ if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_PGNO(pgno); } rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; + setMempageRoot(pPage, pgnoRoot); if( (pBt->openFlags & BTREE_SINGLE)==0 && sqlite3PagerPageRefcount(pPage->pDbPage) != (1 + (pgno==1)) ){ rc = SQLITE_CORRUPT_PAGE(pPage); goto cleardatabasepage_out; @@ -10156,18 +10654,20 @@ } hdr = pPage->hdrOffset; for(i=0; inCell; i++){ pCell = findCell(pPage, i); if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); + rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange, pgnoRoot); if( rc ) goto cleardatabasepage_out; } BTREE_CLEAR_CELL(rc, pPage, pCell, info); if( rc ) goto cleardatabasepage_out; } if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); + rc = clearDatabasePage( + pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange, pgnoRoot + ); if( rc ) goto cleardatabasepage_out; if( pPage->intKey ) pnChange = 0; } if( pnChange ){ testcase( !pPage->intKey ); @@ -10209,11 +10709,11 @@ ** is the root of a table b-tree - if it is not, the following call is ** a no-op). */ if( p->hasIncrblobCur ){ invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1); } - rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); + rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange, (Pgno)iTable); } sqlite3BtreeLeave(p); return rc; } @@ -11165,14 +11665,16 @@ } sqlite3MemSetArrayInt64(aCnt, i, sCheck.nRow); } pBt->db->flags = savedDbFlags; - /* Make sure every page in the file is referenced - */ if( !bPartial ){ - for(i=1; i<=sCheck.nCkPage && sCheck.mxErr; i++){ + /* Make sure every page in the file is referenced. Skip this if the + ** database is currently being written by a CONCURRENT transaction (it + ** may fail as pages that were part of the free-list when the transaction + ** was opened cannot be counted). */ + for(i=1; ISCONCURRENT==0 && i<=sCheck.nCkPage && sCheck.mxErr; i++){ #ifdef SQLITE_OMIT_AUTOVACUUM if( getPageReferenced(&sCheck, i)==0 ){ checkAppendMsg(&sCheck, "Page %u: never used", i); } #else @@ -11469,10 +11971,106 @@ /* ** Return the size of the header added to each page by this module. */ int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } + +/* +** This function is called to ensure that all locks required to commit the +** current write-transaction to the database file are held. If the db is +** in rollback mode, this means the EXCLUSIVE lock on the database file. +** +** Or, if this is an CONCURRENT transaction on a wal-mode database, the WRITER +** lock on the wal file. In this case this function also checks that the +** CONCURRENT transaction can be safely committed (does not commit with any +** other transaction committed since it was opened). +** +** SQLITE_OK is returned if successful. SQLITE_BUSY if the required locks +** cannot be obtained due to a conflicting lock. If the locks cannot be +** obtained for an CONCURRENT transaction due to a conflict with an already +** committed transaction, SQLITE_BUSY_SNAPSHOT is returned. Otherwise, if +** some other error (OOM, IO, etc.) occurs, the relevant SQLite error code +** is returned. +*/ +int sqlite3BtreeExclusiveLock(Btree *p){ + int rc; + Pgno pgno = 0; + BtShared *pBt = p->pBt; + assert( p->inTrans==TRANS_WRITE && pBt->pPage1 ); + sqlite3BtreeEnter(p); + rc = sqlite3PagerExclusiveLock(pBt->pPager, + (p->db->eConcurrent==CONCURRENT_SCHEMA) ? 0 : pBt->pPage1->pDbPage, + &pgno + ); +#ifdef SQLITE_OMIT_CONCURRENT + assert( pgno==0 ); +#else + if( rc==SQLITE_BUSY_SNAPSHOT && pgno ){ + PgHdr *pPg = 0; + int rc2 = sqlite3PagerGet(pBt->pPager, pgno, &pPg, 0); + if( rc2==SQLITE_OK ){ + int bWrite = -1; + const char *zObj = 0; + const char *zTab = 0; + char zContent[17]; + + if( pPg ){ + Pgno pgnoRoot = 0; + HashElem *pE; + Schema *pSchema; + u8 *aData = (u8*)sqlite3PagerGetData(pPg); + int i; + for(i=0; i<8; i++){ + static const char hexdigits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; + zContent[i*2] = hexdigits[(aData[i] >> 4)]; + zContent[i*2+1] = hexdigits[(aData[i] & 0xF)]; + } + zContent[16] = '\0'; + + pgnoRoot = ((MemPage*)sqlite3PagerGetExtra(pPg))->pgnoRoot; + bWrite = sqlite3PagerIswriteable(pPg); + sqlite3PagerUnref(pPg); + + pSchema = sqlite3SchemaGet(p->db, p); + if( pSchema ){ + for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ + Table *pTab = (Table *)sqliteHashData(pE); + if( pTab->tnum==pgnoRoot ){ + zObj = pTab->zName; + zTab = 0; + }else{ + Index *pIdx; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->tnum==pgnoRoot ){ + zObj = pIdx->zName; + zTab = pTab->zName; + } + } + } + } + } + } + + sqlite3_log(SQLITE_OK, + "cannot commit CONCURRENT transaction " + "- conflict at page %d " + "(%s page; part of db %s %s%s%s; content=%s...)", + (int)pgno, + (bWrite==0?"read-only":(bWrite>0?"read/write":"unknown")), + (zTab ? "index" : "table"), + (zTab ? zTab : ""), (zTab ? "." : ""), (zObj ? zObj : "UNKNOWN"), + zContent + ); + } + } +#endif + sqlite3BtreeLeave(p); + return rc; +} /* ** If no transaction is active and the database is not a temp-db, clear ** the in-memory pager cache. */ Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -357,10 +357,12 @@ #ifdef SQLITE_DEBUG sqlite3_uint64 sqlite3BtreeSeekCount(Btree*); #else # define sqlite3BtreeSeekCount(X) 0 #endif + +int sqlite3BtreeExclusiveLock(Btree *pBt); #ifndef NDEBUG int sqlite3BtreeCursorIsValid(BtCursor*); #endif int sqlite3BtreeCursorIsValidNN(BtCursor*); Index: src/btreeInt.h ================================================================== --- src/btreeInt.h +++ src/btreeInt.h @@ -230,10 +230,11 @@ /* Forward declarations */ typedef struct MemPage MemPage; typedef struct BtLock BtLock; typedef struct CellInfo CellInfo; +typedef struct BtreePtrmap BtreePtrmap; /* ** This is a magic string that appears at the beginning of every ** SQLite database in order to identify the file as a real database. ** @@ -273,10 +274,13 @@ struct MemPage { u8 isInit; /* True if previously initialized. MUST BE FIRST! */ u8 intKey; /* True if table b-trees. False for index b-trees */ u8 intKeyLeaf; /* True if the leaf of an intKey table */ Pgno pgno; /* Page number for this page */ +#ifndef SQLITE_OMIT_CONCURRENT + Pgno pgnoRoot; /* Root page of b-tree that this page belongs to */ +#endif /* Only the first 8 bytes (above) are zeroed by pager.c when a new page ** is allocated. All fields that follow must be initialized before use */ u8 leaf; /* True if a leaf page */ u8 hdrOffset; /* 100 for page 1. 0 otherwise */ u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */ @@ -454,10 +458,13 @@ BtShared *pNext; /* Next on a list of sharable BtShared structs */ BtLock *pLock; /* List of locks held on this shared-btree struct */ Btree *pWriter; /* Btree with currently open write transaction */ #endif u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ +#ifndef SQLITE_OMIT_CONCURRENT + BtreePtrmap *pMap; +#endif int nPreformatSize; /* Size of last cell written by TransferRow() */ }; /* ** Allowed values for BtShared.btsFlags @@ -677,16 +684,23 @@ ** if the database supports auto-vacuum or not. Because it is used ** within an expression that is an argument to another macro ** (sqliteMallocRaw), it is not possible to use conditional compilation. ** So, this macro is defined instead. */ -#ifndef SQLITE_OMIT_AUTOVACUUM +#ifdef SQLITE_OMIT_AUTOVACUUM +#define ISAUTOVACUUM(pBt) 0 +#else #define ISAUTOVACUUM(pBt) (pBt->autoVacuum) +#endif + +#ifdef SQLITE_OMIT_CONCURRENT +# define ISCONCURRENT 0 #else -#define ISAUTOVACUUM(pBt) 0 +# define ISCONCURRENT (pBt->pMap!=0) #endif +#define REQUIRE_PTRMAP (ISAUTOVACUUM(pBt) || ISCONCURRENT) /* ** This structure is passed around through all the PRAGMA integrity_check ** checking routines in order to keep track of some global state information. ** Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -88,12 +88,14 @@ int iDb, /* Index of the database containing the table to lock */ Pgno iTab, /* Root page number of the table to be locked */ u8 isWriteLock, /* True for a write lock */ const char *zName /* Name of the table to be locked */ ){ +#ifdef SQLITE_OMIT_CONCURRENT if( iDb==1 ) return; if( !sqlite3BtreeSharable(pParse->db->aDb[iDb].pBt) ) return; +#endif lockTable(pParse, iDb, iTab, isWriteLock, zName); } /* ** Code an OP_TableLock instruction for each table locked by the @@ -5229,11 +5231,11 @@ if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "BEGIN", 0, 0) ){ return; } v = sqlite3GetVdbe(pParse); if( !v ) return; - if( type!=TK_DEFERRED ){ + if( type==TK_IMMEDIATE || type==TK_EXCLUSIVE ){ for(i=0; inDb; i++){ int eTxnType; Btree *pBt = db->aDb[i].pBt; if( pBt && sqlite3BtreeIsReadonly(pBt) ){ eTxnType = 0; /* Read txn */ @@ -5244,11 +5246,11 @@ } sqlite3VdbeAddOp2(v, OP_Transaction, i, eTxnType); sqlite3VdbeUsesBtree(v, i); } } - sqlite3VdbeAddOp0(v, OP_AutoCommit); + sqlite3VdbeAddOp3(v, OP_AutoCommit, 0, 0, (type==TK_CONCURRENT)); } /* ** Generate VDBE code for a COMMIT or ROLLBACK statement. ** Code for ROLLBACK is generated if eType==TK_ROLLBACK. Otherwise Index: src/func.c ================================================================== --- src/func.c +++ src/func.c @@ -557,12 +557,13 @@ sqlite3_context *context, int NotUsed, sqlite3_value **NotUsed2 ){ sqlite_int64 r; + sqlite3 *db = sqlite3_context_db_handle(context); UNUSED_PARAMETER2(NotUsed, NotUsed2); - sqlite3_randomness(sizeof(r), &r); + sqlite3FastRandomness(&db->sPrng, sizeof(r), &r); if( r<0 ){ /* We need to prevent a random number of 0x8000000000000000 ** (or -9223372036854775808) since when you do abs() of that ** number of you get the same value back again. To do this ** in a way that is testable, mask the sign bit off of negative @@ -584,19 +585,20 @@ int argc, sqlite3_value **argv ){ sqlite3_int64 n; unsigned char *p; + sqlite3 *db = sqlite3_context_db_handle(context); assert( argc==1 ); UNUSED_PARAMETER(argc); n = sqlite3_value_int64(argv[0]); if( n<1 ){ n = 1; } p = contextMalloc(context, n); if( p ){ - sqlite3_randomness(n, p); + sqlite3FastRandomness(&db->sPrng, n, p); sqlite3_result_blob(context, (char*)p, n, sqlite3_free); } } /* Index: src/main.c ================================================================== --- src/main.c +++ src/main.c @@ -3357,11 +3357,11 @@ db->nDb = 2; db->eOpenState = SQLITE_STATE_BUSY; db->aDb = db->aDbStatic; db->lookaside.bDisable = 1; db->lookaside.sz = 0; - + sqlite3FastPrngInit(&db->sPrng); assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; db->autoCommit = 1; db->nextAutovac = -1; @@ -5075,10 +5075,39 @@ */ void sqlite3_snapshot_free(sqlite3_snapshot *pSnapshot){ sqlite3_free(pSnapshot); } #endif /* SQLITE_ENABLE_SNAPSHOT */ + +SQLITE_EXPERIMENTAL int sqlite3_wal_info( + sqlite3 *db, const char *zDb, + unsigned int *pnPrior, unsigned int *pnFrame +){ + int rc = SQLITE_OK; + +#ifndef SQLITE_OMIT_WAL + Btree *pBt; + int iDb; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + + sqlite3_mutex_enter(db->mutex); + iDb = sqlite3FindDbName(db, zDb); + if( iDb<0 ){ + return SQLITE_ERROR; + } + pBt = db->aDb[iDb].pBt; + rc = sqlite3PagerWalInfo(sqlite3BtreePager(pBt), pnPrior, pnFrame); + sqlite3_mutex_leave(db->mutex); +#endif /* SQLITE_OMIT_WAL */ + + return rc; +} #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS /* ** Given the name of a compile-time option, return true if that option ** was used and false if not. Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -1223,10 +1223,14 @@ #endif #if OS_VXWORKS sem_t *pSem; /* Named POSIX semaphore */ char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ #endif +#ifdef SQLITE_SHARED_MAPPING + sqlite3_int64 nSharedMapping; /* Size of mapped region in bytes */ + void *pSharedMapping; /* Memory mapped region */ +#endif }; /* ** A lists of all unixInodeInfo objects. ** @@ -1379,10 +1383,17 @@ assert( unixFileMutexNotheld(pFile) ); if( ALWAYS(pInode) ){ pInode->nRef--; if( pInode->nRef==0 ){ assert( pInode->pShmNode==0 ); +#ifdef SQLITE_SHARED_MAPPING + if( pInode->pSharedMapping ){ + osMunmap(pInode->pSharedMapping, pInode->nSharedMapping); + pInode->pSharedMapping = 0; + pInode->nSharedMapping = 0; + } +#endif sqlite3_mutex_enter(pInode->pLockMutex); closePendingFds(pFile); sqlite3_mutex_leave(pInode->pLockMutex); if( pInode->pPrev ){ assert( pInode->pPrev->pNext==pInode ); @@ -2249,10 +2260,18 @@ /* ** Close the file. */ static int nolockClose(sqlite3_file *id) { +#ifdef SQLITE_SHARED_MAPPING + unixFile *pFd = (unixFile*)id; + if( pFd->pInode ){ + unixEnterMutex(); + releaseInodeInfo(pFd); + unixLeaveMutex(); + } +#endif return closeUnixFile(id); } /******************* End of the no-op lock implementation ********************* ******************************************************************************/ @@ -4079,10 +4098,13 @@ } *(i64*)pArg = pFile->mmapSizeMax; if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ pFile->mmapSizeMax = newLimit; +#ifdef SQLITE_SHARED_MAPPING + if( pFile->pInode==0 ) +#endif if( pFile->mmapSize>0 ){ unixUnmapfile(pFile); rc = unixMapfile(pFile, -1); } } @@ -5278,10 +5300,13 @@ /* ** If it is currently memory mapped, unmap file pFd. */ static void unixUnmapfile(unixFile *pFd){ assert( pFd->nFetchOut==0 ); +#ifdef SQLITE_SHARED_MAPPING + if( pFd->pInode ) return; +#endif if( pFd->pMapRegion ){ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); pFd->pMapRegion = 0; pFd->mmapSize = 0; pFd->mmapSizeActual = 0; @@ -5408,10 +5433,32 @@ nMap = statbuf.st_size; } if( nMap>pFd->mmapSizeMax ){ nMap = pFd->mmapSizeMax; } + +#ifdef SQLITE_SHARED_MAPPING + if( pFd->pInode ){ + unixInodeInfo *pInode = pFd->pInode; + if( pFd->pMapRegion ) return SQLITE_OK; + unixEnterMutex(); + if( pInode->pSharedMapping==0 ){ + u8 *pNew = osMmap(0, nMap, PROT_READ, MAP_SHARED, pFd->h, 0); + if( pNew==MAP_FAILED ){ + unixLogError(SQLITE_OK, "mmap", pFd->zPath); + pFd->mmapSizeMax = 0; + }else{ + pInode->pSharedMapping = pNew; + pInode->nSharedMapping = nMap; + } + } + pFd->pMapRegion = pInode->pSharedMapping; + pFd->mmapSizeActual = pFd->mmapSize = pInode->nSharedMapping; + unixLeaveMutex(); + return SQLITE_OK; + } +#endif assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); if( nMap!=pFd->mmapSize ){ unixRemapfile(pFd, nMap); } @@ -5852,10 +5899,13 @@ if( pLockingStyle == &posixIoMethods #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE || pLockingStyle == &nfsIoMethods #endif +#ifdef SQLITE_SHARED_MAPPING + || pLockingStyle == &nolockIoMethods +#endif ){ unixEnterMutex(); rc = findInodeInfo(pNew, &pNew->pInode); if( rc!=SQLITE_OK ){ /* If an error occurred in findInodeInfo(), close the file descriptor Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -656,10 +656,13 @@ int errCode; /* One of several kinds of errors */ int nRec; /* Pages journalled since last j-header written */ u32 cksumInit; /* Quasi-random value added to every checksum */ u32 nSubRec; /* Number of records written to sub-journal */ Bitvec *pInJournal; /* One bit for each page in the database file */ +#ifndef SQLITE_OMIT_CONCURRENT + Bitvec *pAllRead; /* Pages read within current CONCURRENT trans. */ +#endif sqlite3_file *fd; /* File descriptor for database */ sqlite3_file *jfd; /* File descriptor for main journal */ sqlite3_file *sjfd; /* File descriptor for sub-journal */ i64 journalOff; /* Current write offset in the journal file */ i64 journalHdr; /* Byte offset to previous journal header */ @@ -906,11 +909,13 @@ assert( p->eLock!=UNKNOWN_LOCK ); assert( pPager->errCode==SQLITE_OK ); if( !pagerUseWal(pPager) ){ assert( p->eLock>=RESERVED_LOCK ); } - assert( pPager->dbSize==pPager->dbOrigSize ); +#ifndef SQLITE_OMIT_CONCURRENT + assert( pPager->dbSize==pPager->dbOrigSize || pPager->pAllRead ); +#endif assert( pPager->dbOrigSize==pPager->dbFileSize ); assert( pPager->dbOrigSize==pPager->dbHintSize ); assert( pPager->setSuper==0 ); break; @@ -1813,10 +1818,57 @@ assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); } } return rc; } + +#ifndef SQLITE_OMIT_CONCURRENT +/* +** If they are not already, begin recording all pages read from the pager layer +** by the b-tree layer This is used by concurrent transactions. Return +** SQLITE_OK if successful, or an SQLite error code (SQLITE_NOMEM) if an error +** occurs. +*/ +int sqlite3PagerBeginConcurrent(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->pAllRead==0 ){ + pPager->pAllRead = sqlite3BitvecCreate(pPager->dbSize); + pPager->dbOrigSize = pPager->dbSize; + if( pPager->pAllRead==0 ){ + rc = SQLITE_NOMEM; + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Stop recording all pages read from the pager layer by the b-tree layer +** and discard any current records. +*/ +void sqlite3PagerEndConcurrent(Pager *pPager){ + sqlite3BitvecDestroy(pPager->pAllRead); + pPager->pAllRead = 0; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Return true if the database is in wal mode. False otherwise. +*/ +int sqlite3PagerIsWal(Pager *pPager){ + return pPager->pWal!=0; +} +#endif /* SQLITE_OMIT_CONCURRENT */ + +/* +** Free the Pager.pInJournal and Pager.pAllRead bitvec objects. +*/ +static void pagerFreeBitvecs(Pager *pPager){ + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + sqlite3PagerEndConcurrent(pPager); +} /* ** This function is a no-op if the pager is in exclusive mode and not ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN ** state. @@ -1838,12 +1890,11 @@ assert( pPager->eState==PAGER_READER || pPager->eState==PAGER_OPEN || pPager->eState==PAGER_ERROR ); - sqlite3BitvecDestroy(pPager->pInJournal); - pPager->pInJournal = 0; + pagerFreeBitvecs(pPager); releaseAllSavepoints(pPager); if( pagerUseWal(pPager) ){ assert( !isOpen(pPager->jfd) ); if( pPager->eState==PAGER_ERROR ){ @@ -2115,12 +2166,11 @@ sqlite3PagerUnrefNotNull(p); } } #endif - sqlite3BitvecDestroy(pPager->pInJournal); - pPager->pInJournal = 0; + pagerFreeBitvecs(pPager); pPager->nRec = 0; if( rc==SQLITE_OK ){ if( MEMDB || pagerFlushOnCommit(pPager, bCommit) ){ sqlite3PcacheCleanAll(pPager->pPCache); }else{ @@ -3149,12 +3199,28 @@ ** ** + Discard the cached page (if refcount==0), or ** + Reload page content from the database (if refcount>0). */ pPager->dbSize = pPager->dbOrigSize; - rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager, +#ifdef SQLITE_OMIT_CONCURRENT + 0 +#else + pPager->pAllRead!=0 +#endif + ); pList = sqlite3PcacheDirtyList(pPager->pPCache); + +#ifndef SQLITE_OMIT_CONCURRENT + /* If this is an CONCURRENT transaction, then page 1 must be reread from + ** the db file, even if it is not dirty. This is because the b-tree layer + ** may have already zeroed the nFree and iTrunk header fields. */ + if( rc==SQLITE_OK && (pList==0 || pList->pgno!=1) && pPager->pAllRead ){ + rc = pagerUndoCallback((void*)pPager, 1); + } +#endif + while( pList && rc==SQLITE_OK ){ PgHdr *pNext = pList->pDirty; rc = pagerUndoCallback((void *)pPager, pList->pgno); pList = pNext; } @@ -3200,10 +3266,12 @@ nList = 0; for(p=pList; (*ppNext = p)!=0; p=p->pDirty){ if( p->pgno<=nTruncate ){ ppNext = &p->pDirty; nList++; + PAGERTRACE(("TO-WAL %d page %d hash(%08x)\n", + PAGERID(pPager), p->pgno, pager_pagehash(p))); } } assert( pList ); }else{ nList = 1; @@ -4270,11 +4338,11 @@ || pPager->eState==PAGER_WRITER_DBMOD ); assert( assert_pager_state(pPager) ); assert( !pagerUseWal(pPager) ); - rc = sqlite3PagerExclusiveLock(pPager); + rc = sqlite3PagerExclusiveLock(pPager, 0, 0); if( rc!=SQLITE_OK ) return rc; if( !pPager->noSync ){ assert( !pPager->tempFile ); if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){ @@ -4621,10 +4689,16 @@ } pPager->aStat[PAGER_STAT_SPILL]++; pPg->pDirty = 0; if( pagerUseWal(pPager) ){ +#ifndef SQLITE_OMIT_CONCURRENT + /* If the transaction is a "BEGIN CONCURRENT" transaction, the page + ** cannot be flushed to disk. Return early in this case. */ + if( pPager->pAllRead ) return SQLITE_OK; +#endif + /* Write a single frame for this page to the log. */ rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } @@ -5453,10 +5527,27 @@ assert( pPager->nMmapOut==0 ); /* because page1 is never memory mapped */ pagerUnlockAndRollback(pPager); } } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** If this pager is currently in a concurrent transaction (pAllRead!=0), +** then set the bit in the pAllRead vector to indicate that the transaction +** read from page pgno. Return SQLITE_OK if successful, or an SQLite error +** code (i.e. SQLITE_NOMEM) if an error occurs. +*/ +int sqlite3PagerUsePage(Pager *pPager, Pgno pgno){ + int rc = SQLITE_OK; + if( pPager->pAllRead && pgno<=pPager->dbOrigSize ){ + PAGERTRACE(("USING page %d\n", pgno)); + rc = sqlite3BitvecSet(pPager->pAllRead, pgno); + } + return rc; +} +#endif + /* ** The page getter methods each try to acquire a reference to a ** page with page number pgno. If the requested reference is ** successfully obtained, it is copied to *ppPage and SQLITE_OK returned. ** @@ -5525,10 +5616,18 @@ assert( pPager->errCode==SQLITE_OK ); assert( pPager->eState>=PAGER_READER ); assert( assert_pager_state(pPager) ); assert( pPager->hasHeldSharedLock==1 ); + + /* If this is an CONCURRENT transaction and the page being read was + ** present in the database file when the transaction was opened, + ** mark it as read in the pAllRead vector. */ + if( (rc = sqlite3PagerUsePage(pPager, pgno))!=SQLITE_OK ){ + pPg = 0; + goto pager_acquire_err; + } if( pgno==0 ) return SQLITE_CORRUPT_BKPT; pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); if( pBase==0 ){ pPg = 0; @@ -5884,14 +5983,17 @@ /* ** Begin a write-transaction on the specified pager object. If a ** write-transaction has already been opened, this function is a no-op. ** -** If the exFlag argument is false, then acquire at least a RESERVED -** lock on the database file. If exFlag is true, then acquire at least +** If the exFlag argument is 0, then acquire at least a RESERVED +** lock on the database file. If exFlag is >0, then acquire at least ** an EXCLUSIVE lock. If such a lock is already held, no locking ** functions need be called. +** +** If (exFlag<0) and the database is in WAL mode, do not take any locks. +** The transaction will run in CONCURRENT mode instead. ** ** If the subjInMemory argument is non-zero, then any sub-journal opened ** within this transaction will be opened as an in-memory file. This ** has no effect if the sub-journal is already opened (as it may be when ** running in exclusive mode) or if the transaction does not require a @@ -5906,11 +6008,10 @@ assert( pPager->eState>=PAGER_READER && pPager->eStatesubjInMemory = (u8)subjInMemory; if( pPager->eState==PAGER_READER ){ assert( pPager->pInJournal==0 ); - if( pagerUseWal(pPager) ){ /* If the pager is configured to use locking_mode=exclusive, and an ** exclusive lock on the database is not already held, obtain it now. */ if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ @@ -5922,21 +6023,22 @@ } /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_RESERVED state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already - ** holds the write-lock. If possible, the upper layer will call it. - */ - rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + ** holds the write-lock. If possible, the upper layer will call it. */ + if( exFlag>=0 ){ + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + } }else{ /* Obtain a RESERVED lock on the database file. If the exFlag parameter ** is true, then immediately upgrade this to an EXCLUSIVE lock. The ** busy-handler callback can be used when upgrading to the EXCLUSIVE ** lock, but not when obtaining the RESERVED lock. */ rc = pagerLockDb(pPager, RESERVED_LOCK); - if( rc==SQLITE_OK && exFlag ){ + if( rc==SQLITE_OK && exFlag>0 ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } } if( rc==SQLITE_OK ){ @@ -6232,11 +6334,11 @@ /* ** Return TRUE if the page given in the argument was previously passed ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok ** to change the content of the page. */ -#ifndef NDEBUG +#if !defined(SQLITE_OMIT_CONCURRENT) || !defined(NDEBUG) int sqlite3PagerIswriteable(DbPage *pPg){ return pPg->flags & PGHDR_WRITEABLE; } #endif @@ -6388,21 +6490,30 @@ } return rc; } /* -** This function may only be called while a write-transaction is active in -** rollback. If the connection is in WAL mode, this call is a no-op. -** Otherwise, if the connection does not already have an EXCLUSIVE lock on -** the database file, an attempt is made to obtain one. +** This function is called to ensure that all locks required to commit the +** current write-transaction to the database file are held. If the db is +** in rollback mode, this means the EXCLUSIVE lock on the database file. +** +** Or, if this is a non-CONCURRENT transaction on a wal-mode database, this +** function is a no-op. +** +** If this is an CONCURRENT transaction on a wal-mode database, this function +** attempts to obtain the WRITER lock on the wal file and also checks to +** see that the transaction can be safely committed (does not commit with +** any other transaction committed since it was opened). ** -** If the EXCLUSIVE lock is already held or the attempt to obtain it is -** successful, or the connection is in WAL mode, SQLITE_OK is returned. -** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is -** returned. +** If the required locks are already held or successfully obtained and +** the transaction can be committed, SQLITE_OK is returned. If a required lock +** cannot be obtained, SQLITE_BUSY is returned. Or, if the current transaction +** is CONCURRENT and cannot be committed due to a conflict, SQLITE_BUSY_SNAPSHOT +** is returned. Otherwise, if some other error occurs (IO error, OOM etc.), +** and SQLite error code is returned. */ -int sqlite3PagerExclusiveLock(Pager *pPager){ +int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, Pgno *piConflict){ int rc = pPager->errCode; assert( assert_pager_state(pPager) ); if( rc==SQLITE_OK ){ assert( pPager->eState==PAGER_WRITER_CACHEMOD || pPager->eState==PAGER_WRITER_DBMOD @@ -6410,13 +6521,76 @@ ); assert( assert_pager_state(pPager) ); if( 0==pagerUseWal(pPager) ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } +#ifndef SQLITE_OMIT_CONCURRENT + else{ + if( pPager->pAllRead ){ + /* This is an CONCURRENT transaction. Attempt to lock the wal database + ** here. If SQLITE_BUSY (but not SQLITE_BUSY_SNAPSHOT) is returned, + ** invoke the busy-handler and try again for as long as it returns + ** non-zero. */ + do { + rc = sqlite3WalLockForCommit( + pPager->pWal, pPage1, pPager->pAllRead, piConflict + ); + }while( rc==SQLITE_BUSY + && pPager->xBusyHandler(pPager->pBusyHandlerArg) + ); + } + } +#endif /* SQLITE_OMIT_CONCURRENT */ + } + return rc; +} + +#ifndef SQLITE_OMIT_CONCURRENT +/* +** This function is called as part of committing an CONCURRENT transaction. +** At this point the wal WRITER lock is held, and all pages in the cache +** except for page 1 are compatible with the snapshot at the head of the +** wal file. +** +** This function updates the in-memory data structures and reloads the +** contents of page 1 so that the client is operating on the snapshot +** at the head of the wal file. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage *pPage1){ + int rc; + + assert( pPager->pWal && pPager->pAllRead ); + rc = sqlite3WalUpgradeSnapshot(pPager->pWal); + if( rc==SQLITE_OK ){ + rc = readDbPage(pPage1); } + return rc; } + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Set the in-memory cache of the database file size to nSz pages. +*/ +void sqlite3PagerSetDbsize(Pager *pPager, Pgno nSz){ + pPager->dbSize = nSz; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** If this is a WAL mode connection and the WRITER lock is currently held, +** relinquish it. +*/ +void sqlite3PagerDropExclusiveLock(Pager *pPager){ + if( pagerUseWal(pPager) ){ + sqlite3WalEndWriteTransaction(pPager->pWal); + } +} +#endif /* SQLITE_OMIT_CONCURRENT */ + /* ** Sync the database file for the pager pPager. zSuper points to the name ** of a super-journal file that should be written into the individual ** journal file. zSuper may be NULL, which is interpreted as no @@ -7781,10 +7955,15 @@ assert( pPager->pWal ); sqlite3WalSnapshotUnlock(pPager->pWal); } #endif /* SQLITE_ENABLE_SNAPSHOT */ + +int sqlite3PagerWalInfo(Pager *pPager, u32 *pnPrior, u32 *pnFrame){ + return sqlite3WalInfo(pPager->pWal, pnPrior, pnFrame); +} + #endif /* !SQLITE_OMIT_WAL */ #ifdef SQLITE_ENABLE_ZIPVFS /* ** A read-lock must be held on the pager when this function is called. If Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -177,11 +177,11 @@ /* Functions used to manage pager transactions and savepoints. */ void sqlite3PagerPagecount(Pager*, int*); int sqlite3PagerBegin(Pager*, int exFlag, int); int sqlite3PagerCommitPhaseOne(Pager*,const char *zSuper, int); -int sqlite3PagerExclusiveLock(Pager*); +int sqlite3PagerExclusiveLock(Pager*, DbPage *pPage1, Pgno*); int sqlite3PagerSync(Pager *pPager, const char *zSuper); int sqlite3PagerCommitPhaseTwo(Pager*); int sqlite3PagerRollback(Pager*); int sqlite3PagerOpenSavepoint(Pager *pPager, int n); int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); @@ -239,14 +239,32 @@ /* Functions used to truncate the database file. */ void sqlite3PagerTruncateImage(Pager*,Pgno); void sqlite3PagerRekey(DbPage*, Pgno, u16); +#ifndef SQLITE_OMIT_CONCURRENT +int sqlite3PagerUsePage(Pager*, Pgno); +void sqlite3PagerEndConcurrent(Pager*); +int sqlite3PagerBeginConcurrent(Pager*); +void sqlite3PagerDropExclusiveLock(Pager*); +int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage*); +void sqlite3PagerSetDbsize(Pager *pPager, Pgno); +int sqlite3PagerIsWal(Pager*); +#else +# define sqlite3PagerEndConcurrent(x) +# define sqlite3PagerUsePage(x, y) SQLITE_OK +#endif + +#if defined(SQLITE_DEBUG) || !defined(SQLITE_OMIT_CONCURRENT) +int sqlite3PagerIswriteable(DbPage*); +#endif + +int sqlite3PagerWalInfo(Pager*, u32 *pnPrior, u32 *pnFrame); + /* Functions to support testing and debugging. */ #if !defined(NDEBUG) || defined(SQLITE_TEST) Pgno sqlite3PagerPagenumber(DbPage*); - int sqlite3PagerIswriteable(DbPage*); #endif #ifdef SQLITE_TEST int *sqlite3PagerStats(Pager*); void sqlite3PagerRefdump(Pager*); void disable_simulated_io_errors(void); Index: src/parse.y ================================================================== --- src/parse.y +++ src/parse.y @@ -182,11 +182,20 @@ trans_opt ::= TRANSACTION nm. %type transtype {int} transtype(A) ::= . {A = TK_DEFERRED;} transtype(A) ::= DEFERRED(X). {A = @X; /*A-overwrites-X*/} transtype(A) ::= IMMEDIATE(X). {A = @X; /*A-overwrites-X*/} -transtype(A) ::= EXCLUSIVE(X). {A = @X; /*A-overwrites-X*/} +transtype(A) ::= ID(X). { + Token *p = &X; + if( p->n==9 && sqlite3_strnicmp(p->z,"exclusive",9)==0 ){ + A = TK_EXCLUSIVE; + }else if( p->n==10 && sqlite3_strnicmp(p->z,"concurrent",10)==0 ){ + A = TK_CONCURRENT; /*A-overwrites-X*/ + }else{ + parserSyntaxError(pParse, p); + } +} cmd ::= COMMIT|END(X) trans_opt. {sqlite3EndTransaction(pParse,@X);} cmd ::= ROLLBACK(X) trans_opt. {sqlite3EndTransaction(pParse,@X);} savepoint_opt ::= SAVEPOINT. savepoint_opt ::= . @@ -320,11 +329,10 @@ // An IDENTIFIER can be a generic identifier, or one of several // keywords. Any non-standard keyword can also be an identifier. // %token_class id ID|INDEXED. - // And "ids" is an identifier-or-string. // %token_class ids ID|STRING. // An identifier or a join-keyword @@ -2048,10 +2056,11 @@ FUNCTION /* A function invocation */ UPLUS /* Unary plus */ UMINUS /* Unary minus */ TRUTH /* IS TRUE or IS FALSE or IS NOT TRUE or IS NOT FALSE */ REGISTER /* Reference to a VDBE register */ + CONCURRENT /* BEGIN CONCURRENT */ VECTOR /* Vector */ SELECT_COLUMN /* Choose a single column from a multi-column SELECT */ IF_NULL_ROW /* the if-null-row operator */ ASTERISK /* The "*" in count(*) and similar */ SPAN /* The span operator */ Index: src/random.c ================================================================== --- src/random.c +++ src/random.c @@ -126,10 +126,37 @@ chacha_block((u32*)wsdPrng.out, wsdPrng.s); wsdPrng.n = 64; } sqlite3_mutex_leave(mutex); } + +/* +** Initialize a fast PRNG. A Fast PRNG is called "fast" because it does +** not need a mutex to operate, though it does use a mutex to initialize. +** The quality of the randomness is not as good as the global PRNG. +*/ +void sqlite3FastPrngInit(FastPrng *pPrng){ + sqlite3_randomness(sizeof(*pPrng), pPrng); + pPrng->x |= 1; +} + +/* +** Generate N bytes of pseudo-randomness using a FastPrng +*/ +void sqlite3FastRandomness(FastPrng *pPrng, int N, void *P){ + unsigned char *pOut = (unsigned char*)P; + while( N-->0 ){ + /* "x" is a variant of LFSR called "Xorshift" by George Marsaglia */ + pPrng->x ^= pPrng->x <<13; + pPrng->x ^= pPrng->x >>7; + pPrng->x ^= pPrng->x <<17; + /* "y" is a LCG using Don Kunth's constants from MMIX */ + pPrng->y = (pPrng->y)*6364136223846793005LL + 1442695040888963407LL; + /* XOR the two streams together to give the final result */ + *(pOut++) = (pPrng->x ^ pPrng->y) & 0xff; + } +} #ifndef SQLITE_UNTESTABLE /* ** For testing purposes, we sometimes want to preserve the state of ** PRNG and restore the PRNG to its saved state at a later time, or Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -2305,11 +2305,11 @@ if( zName[j]==':' ) nName = j; } zName = sqlite3MPrintf(db, "%.*z:%u", nName, zName, ++cnt); sqlite3ProgressCheck(pParse); if( cnt>3 ){ - sqlite3_randomness(sizeof(cnt), &cnt); + sqlite3FastRandomness(&db->sPrng, sizeof(cnt), &cnt); } } pCol->zCnName = zName; pCol->hName = sqlite3StrIHash(zName); if( pX->fg.bNoExpand ){ Index: src/sqlite.h.in ================================================================== --- src/sqlite.h.in +++ src/sqlite.h.in @@ -10905,10 +10905,35 @@ ** ** This interface is only available if SQLite is compiled with the ** [SQLITE_ENABLE_SNAPSHOT] option. */ SQLITE_EXPERIMENTAL int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb); + +/* +** CAPI3REF: Wal related information regarding the most recent COMMIT +** EXPERIMENTAL +** +** This function reports on the state of the wal file (if any) for database +** zDb, which should be "main", "temp", or the name of the attached database. +** Its results - the values written to the output parameters - are only +** defined if the most recent SQL command on the connection was a successful +** COMMIT that wrote data to wal-mode database zDb. +** +** Assuming the above conditions are met, output parameter (*pnFrame) is set +** to the total number of frames in the wal file. Parameter (*pnPrior) is +** set to the number of frames that were present in the wal file before the +** most recent transaction was committed. So that the number of frames written +** by the most recent transaction is (*pnFrame)-(*pnPrior). +** +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. It +** is not an error if this function is called at a time when the results +** are undefined. +*/ +SQLITE_EXPERIMENTAL int sqlite3_wal_info( + sqlite3 *db, const char *zDb, + unsigned int *pnPrior, unsigned int *pnFrame +); /* ** CAPI3REF: Serialize a database ** ** The sqlite3_serialize(D,S,P,F) interface returns a pointer to Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1331,10 +1331,11 @@ typedef struct DbClientData DbClientData; typedef struct DbFixer DbFixer; typedef struct Schema Schema; typedef struct Expr Expr; typedef struct ExprList ExprList; +typedef struct FastPrng FastPrng; typedef struct FKey FKey; typedef struct FpDecode FpDecode; typedef struct FuncDestructor FuncDestructor; typedef struct FuncDef FuncDef; typedef struct FuncDefHash FuncDefHash; @@ -1458,10 +1459,18 @@ # define SQLITE_DEFAULT_SYNCHRONOUS 2 #endif #ifndef SQLITE_DEFAULT_WAL_SYNCHRONOUS # define SQLITE_DEFAULT_WAL_SYNCHRONOUS SQLITE_DEFAULT_SYNCHRONOUS #endif + +/* +** State of a simple PRNG used for the per-connection and per-pager +** pseudo-random number generators. +*/ +struct FastPrng { + sqlite3_uint64 x, y; +}; /* ** Each database file to be accessed by the system is an instance ** of the following structure. There are normally two of these structures ** in the sqlite.aDb[] array. aDb[0] is the main database file and @@ -1674,10 +1683,11 @@ int errMask; /* & result codes with this before returning */ int iSysErrno; /* Errno value from last system error */ u32 dbOptFlags; /* Flags to enable/disable optimizations */ u8 enc; /* Text encoding */ u8 autoCommit; /* The auto-commit flag. */ + u8 eConcurrent; /* CONCURRENT_* value */ u8 temp_store; /* 1: file 2: memory 0: default */ u8 mallocFailed; /* True if we have seen a malloc failure */ u8 bBenignMalloc; /* Do not require OOMs if true */ u8 dfltLockMode; /* Default locking-mode for attached dbs */ signed char nextAutovac; /* Autovac setting after VACUUM if >=0 */ @@ -1687,10 +1697,11 @@ u8 mTrace; /* zero or more SQLITE_TRACE flags */ u8 noSharedCache; /* True if no shared-cache backends */ u8 nSqlExec; /* Number of pending OP_SqlExec opcodes */ u8 eOpenState; /* Current condition of the connection */ int nextPagesize; /* Pagesize after VACUUM if >0 */ + FastPrng sPrng; /* State of the per-connection PRNG */ i64 nChange; /* Value returned by sqlite3_changes() */ i64 nTotalChange; /* Value returned by sqlite3_total_changes() */ int aLimit[SQLITE_N_LIMIT]; /* Limits */ int nMaxSorterMmap; /* Maximum size of regions mapped by sorter */ struct sqlite3InitInfo { /* Information used during initialization */ @@ -1798,10 +1809,17 @@ void (*xUnlockNotify)(void **, int); /* Unlock notify callback */ sqlite3 *pNextBlocked; /* Next in list of all blocked connections */ #endif }; +/* +** Candidate values for sqlite3.eConcurrent +*/ +#define CONCURRENT_NONE 0 +#define CONCURRENT_OPEN 1 +#define CONCURRENT_SCHEMA 2 + /* ** A macro to discover the encoding of a database. */ #define SCHEMA_ENC(db) ((db)->aDb[0].pSchema->enc) #define ENC(db) ((db)->enc) @@ -1863,10 +1881,13 @@ #define SQLITE_FkNoAction HI(0x00008) /* Treat all FK as NO ACTION */ #define SQLITE_AttachCreate HI(0x00010) /* ATTACH allowed to create new dbs */ #define SQLITE_AttachWrite HI(0x00020) /* ATTACH allowed to open for write */ #define SQLITE_Comments HI(0x00040) /* Enable SQL comments */ +/* Flags used by the Pragma noop_update enhancement */ +#define SQLITE_NoopUpdate HI(0x0001000) /* UPDATE operations are no-ops */ + /* Flags used only if debugging */ #ifdef SQLITE_DEBUG #define SQLITE_SqlTrace HI(0x0100000) /* Debug print SQL as it executes */ #define SQLITE_VdbeListing HI(0x0200000) /* Debug listings of VDBE progs */ #define SQLITE_VdbeTrace HI(0x0400000) /* True to trace VDBE execution */ @@ -5134,10 +5155,12 @@ Vdbe *sqlite3GetVdbe(Parse*); #ifndef SQLITE_UNTESTABLE void sqlite3PrngSaveState(void); void sqlite3PrngRestoreState(void); #endif +void sqlite3FastPrngInit(FastPrng*); +void sqlite3FastRandomness(FastPrng*, int N, void *P); void sqlite3RollbackAll(sqlite3*,int); void sqlite3CodeVerifySchema(Parse*, int); void sqlite3CodeVerifyNamedSchema(Parse*, const char *zDb); void sqlite3BeginTransaction(Parse*, int); void sqlite3EndTransaction(Parse*,int); Index: src/test1.c ================================================================== --- src/test1.c +++ src/test1.c @@ -8539,10 +8539,45 @@ rc = sqlite3_db_config(db, SQLITE_DBCONFIG_MAINDBNAME, "icecube"); Tcl_SetObjResult(interp, Tcl_NewIntObj(rc)); return TCL_OK; } } + +/* +** Usage: sqlite3_wal_info DB DBNAME +*/ +static int SQLITE_TCLAPI test_wal_info( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int rc; + sqlite3 *db; + char *zName; + unsigned int nPrior; + unsigned int nFrame; + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB DBNAME"); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + zName = Tcl_GetString(objv[2]); + + rc = sqlite3_wal_info(db, zName, &nPrior, &nFrame); + if( rc!=SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + return TCL_ERROR; + }else{ + Tcl_Obj *pNew = Tcl_NewObj(); + Tcl_ListObjAppendElement(interp, pNew, Tcl_NewWideIntObj((i64)nPrior)); + Tcl_ListObjAppendElement(interp, pNew, Tcl_NewWideIntObj((i64)nFrame)); + Tcl_SetObjResult(interp, pNew); + } + return TCL_OK; +} /* ** Usage: sqlite3_mmap_warm DB DBNAME */ static int SQLITE_TCLAPI test_mmap_warm( @@ -9129,10 +9164,11 @@ { "sqlite3_snapshot_get_blob", test_snapshot_get_blob, 0 }, { "sqlite3_snapshot_open_blob", test_snapshot_open_blob, 0 }, { "sqlite3_snapshot_cmp_blob", test_snapshot_cmp_blob, 0 }, #endif { "sqlite3_delete_database", test_delete_database, 0 }, + { "sqlite3_wal_info", test_wal_info, 0 }, { "atomic_batch_write", test_atomic_batch_write, 0 }, { "sqlite3_mmap_warm", test_mmap_warm, 0 }, { "sqlite3_config_sorterref", test_config_sorterref, 0 }, { "sqlite3_autovacuum_pages", test_autovacuum_pages, 0 }, { "decode_hexdb", test_decode_hexdb, 0 }, Index: src/test_config.c ================================================================== --- src/test_config.c +++ src/test_config.c @@ -688,10 +688,16 @@ #ifdef SQLITE_OMIT_TRUNCATE_OPTIMIZATION Tcl_SetVar2(interp, "sqlite_options", "truncate_opt", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "truncate_opt", "1", TCL_GLOBAL_ONLY); #endif + +#ifndef SQLITE_OMIT_CONCURRENT + Tcl_SetVar2(interp, "sqlite_options", "concurrent", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "concurrent", "0", TCL_GLOBAL_ONLY); +#endif #ifdef SQLITE_OMIT_UTF16 Tcl_SetVar2(interp, "sqlite_options", "utf16", "0", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "utf16", "1", TCL_GLOBAL_ONLY); Index: src/update.c ================================================================== --- src/update.c +++ src/update.c @@ -463,10 +463,21 @@ ** column to be updated, make sure we have authorization to change ** that column. */ chngRowid = chngPk = 0; for(i=0; inExpr; i++){ +#if defined(SQLITE_ENABLE_NOOP_UPDATE) && !defined(SQLITE_OMIT_FLAG_PRAGMAS) + if( db->flags & SQLITE_NoopUpdate ){ + Token x; + sqlite3ExprDelete(db, pChanges->a[i].pExpr); + x.z = pChanges->a[i].zEName; + x.n = sqlite3Strlen30(x.z); + pChanges->a[i].pExpr = + sqlite3PExpr(pParse, TK_UPLUS, sqlite3ExprAlloc(db, TK_ID, &x, 0), 0); + if( db->mallocFailed ) goto update_cleanup; + } +#endif /* If this is an UPDATE with a FROM clause, do not resolve expressions ** here. The call to sqlite3Select() below will do that. */ if( nChangeFrom==0 && sqlite3ResolveExprNames(&sNC, pChanges->a[i].pExpr) ){ goto update_cleanup; } Index: src/vacuum.c ================================================================== --- src/vacuum.c +++ src/vacuum.c @@ -399,10 +399,11 @@ ** by manually setting the autoCommit flag to true and detaching the ** vacuum database. The vacuum_db journal file is deleted when the pager ** is closed by the DETACH. */ db->autoCommit = 1; + assert( db->eConcurrent==0 ); if( pDb ){ sqlite3BtreeClose(pDb->pBt); pDb->pBt = 0; pDb->pSchema = 0; Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -3849,10 +3849,11 @@ /* Determine whether or not this is a transaction savepoint. If so, ** and this is a RELEASE command, then the current transaction ** is committed. */ int isTransaction = pSavepoint->pNext==0 && db->isTransactionSavepoint; + assert( db->eConcurrent==0 || db->isTransactionSavepoint==0 ); if( isTransaction && p1==SAVEPOINT_RELEASE ){ if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ goto vdbe_return; } db->autoCommit = 1; @@ -3935,54 +3936,73 @@ goto vdbe_return; } break; } -/* Opcode: AutoCommit P1 P2 * * * +/* Opcode: AutoCommit P1 P2 P3 * * ** ** Set the database auto-commit flag to P1 (1 or 0). If P2 is true, roll ** back any currently active btree transactions. If there are any active ** VMs (apart from this one), then a ROLLBACK fails. A COMMIT fails if ** there are active writing VMs or active VMs that use shared cache. +** +** If P3 is non-zero, then this instruction is being executed as part of +** a "BEGIN CONCURRENT" command. ** ** This instruction causes the VM to halt. */ case OP_AutoCommit: { int desiredAutoCommit; int iRollback; + int bConcurrent; + int hrc; desiredAutoCommit = pOp->p1; iRollback = pOp->p2; + bConcurrent = pOp->p3; assert( desiredAutoCommit==1 || desiredAutoCommit==0 ); assert( desiredAutoCommit==1 || iRollback==0 ); + assert( desiredAutoCommit==0 || bConcurrent==0 ); + assert( db->autoCommit==0 || db->eConcurrent==CONCURRENT_NONE ); assert( db->nVdbeActive>0 ); /* At least this one VM is active */ assert( p->bIsReader ); if( desiredAutoCommit!=db->autoCommit ){ if( iRollback ){ assert( desiredAutoCommit==1 ); sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); db->autoCommit = 1; - }else if( desiredAutoCommit && db->nVdbeWrite>0 ){ - /* If this instruction implements a COMMIT and other VMs are writing - ** return an error indicating that the other VMs must complete first. - */ + db->eConcurrent = CONCURRENT_NONE; + }else if( desiredAutoCommit + && (db->nVdbeWrite>0 || (db->eConcurrent && db->nVdbeActive>1)) ){ + /* A transaction may only be committed if there are no other active + ** writer VMs. If the transaction is CONCURRENT, then it may only be + ** committed if there are no active VMs at all (readers or writers). + ** + ** If this instruction is a COMMIT and the transaction may not be + ** committed due to one of the conditions above, return an error + ** indicating that other VMs must complete before the COMMIT can + ** be processed. */ sqlite3VdbeError(p, "cannot commit transaction - " "SQL statements in progress"); rc = SQLITE_BUSY; goto abort_due_to_error; }else if( (rc = sqlite3VdbeCheckFk(p, 1))!=SQLITE_OK ){ goto vdbe_return; }else{ db->autoCommit = (u8)desiredAutoCommit; } - if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ + hrc = sqlite3VdbeHalt(p); + if( (hrc & 0xFF)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); - p->rc = rc = SQLITE_BUSY; + p->rc = hrc; + rc = SQLITE_BUSY; goto vdbe_return; } + assert( bConcurrent==CONCURRENT_NONE || bConcurrent==CONCURRENT_OPEN ); + db->eConcurrent = (u8)bConcurrent; sqlite3CloseSavepoints(db); if( p->rc==SQLITE_OK ){ rc = SQLITE_DONE; }else{ rc = SQLITE_ERROR; @@ -4191,10 +4211,21 @@ assert( DbMaskTest(p->btreeMask, pOp->p1) ); assert( p->readOnly==0 ); pDb = &db->aDb[pOp->p1]; assert( pDb->pBt!=0 ); assert( sqlite3SchemaMutexHeld(db, pOp->p1, 0) ); +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent + && (pOp->p2==BTREE_USER_VERSION || pOp->p2==BTREE_APPLICATION_ID) + ){ + rc = SQLITE_ERROR; + sqlite3VdbeError(p, "cannot modify %s within CONCURRENT transaction", + pOp->p2==BTREE_USER_VERSION ? "user_version" : "application_id" + ); + goto abort_due_to_error; + } +#endif /* See note about index shifting on OP_ReadCookie */ rc = sqlite3BtreeUpdateMeta(pDb->pBt, pOp->p2, pOp->p3); if( pOp->p2==BTREE_SCHEMA_VERSION ){ /* When the schema cookie changes, record the new cookie internally */ *(u32*)&pDb->pSchema->schema_cookie = *(u32*)&pOp->p3 - pOp->p5; @@ -4340,10 +4371,15 @@ assert( DbMaskTest(p->btreeMask, iDb) ); pDb = &db->aDb[iDb]; pX = pDb->pBt; assert( pX!=0 ); if( pOp->opcode==OP_OpenWrite ){ +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent==CONCURRENT_OPEN && p2==1 && iDb!=1 ){ + db->eConcurrent = CONCURRENT_SCHEMA; + } +#endif assert( OPFLAG_FORDELETE==BTREE_FORDELETE ); wrFlag = BTREE_WRCSR | (pOp->p5 & OPFLAG_FORDELETE); assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); if( pDb->pSchema->file_format < p->minWriteFileFormat ){ p->minWriteFileFormat = pDb->pSchema->file_format; @@ -8112,10 +8148,15 @@ ** P4 contains a pointer to the name of the table being locked. This is only ** used to generate an error message if the lock cannot be obtained. */ case OP_TableLock: { u8 isWriteLock = (u8)pOp->p3; +#ifndef SQLITE_OMIT_CONCURRENT + if( isWriteLock && db->eConcurrent && pOp->p2==1 && pOp->p1!=1 ){ + db->eConcurrent = CONCURRENT_SCHEMA; + } +#endif if( isWriteLock || 0==(db->flags&SQLITE_ReadUncommit) ){ int p1 = pOp->p1; assert( p1>=0 && p1nDb ); assert( DbMaskTest(p->btreeMask, p1) ); assert( isWriteLock==0 || isWriteLock==1 ); Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -2961,14 +2961,31 @@ && sqlite3PagerIsMemdb(pPager)==0 ){ assert( i!=1 ); nTrans++; } - rc = sqlite3PagerExclusiveLock(pPager); + rc = sqlite3BtreeExclusiveLock(pBt); sqlite3BtreeLeave(pBt); } } + +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent && (rc & 0xFF)==SQLITE_BUSY ){ + /* An SQLITE_BUSY or SQLITE_BUSY_SNAPSHOT was encountered while + ** attempting to take the WRITER lock on a wal file. Release the + ** WRITER locks on all wal files and return early. */ + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ){ + sqlite3BtreeEnter(pBt); + sqlite3PagerDropExclusiveLock(sqlite3BtreePager(pBt)); + sqlite3BtreeLeave(pBt); + } + } + } +#endif + if( rc!=SQLITE_OK ){ return rc; } /* If there are any write-transactions at all, invoke the commit hook */ @@ -3366,10 +3383,11 @@ ** so, abort any other statements this handle currently has active. */ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } } @@ -3404,13 +3422,13 @@ ** or hit an 'OR FAIL' constraint and there are no deferred foreign ** key constraints to hold up the transaction. This means a commit ** is required. */ rc = vdbeCommit(db, p); } - if( rc==SQLITE_BUSY && p->readOnly ){ + if( (rc & 0xFF)==SQLITE_BUSY && p->readOnly ){ sqlite3VdbeLeave(p); - return SQLITE_BUSY; + return rc; }else if( rc!=SQLITE_OK ){ sqlite3SystemError(db, rc); p->rc = rc; sqlite3RollbackAll(db, SQLITE_OK); p->nChange = 0; @@ -3434,10 +3452,11 @@ eStatementOp = SAVEPOINT_ROLLBACK; }else{ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } /* If eStatementOp is non-zero, then a statement transaction needs to @@ -3455,10 +3474,11 @@ p->zErrMsg = 0; } sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } /* If this was an INSERT, UPDATE or DELETE and no statement transaction Index: src/wal.c ================================================================== --- src/wal.c +++ src/wal.c @@ -529,12 +529,14 @@ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ u8 bShmUnreliable; /* SHM content is read-only and unreliable */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ + u32 nPriorFrame; /* For sqlite3WalInfo() */ const char *zWalName; /* Name of WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ + FastPrng sPrng; /* Random number generator */ #ifdef SQLITE_USE_SEH u32 lockMask; /* Mask of locks held */ void *pFree; /* Pointer to sqlite3_free() if exception thrown */ u32 *pWiValue; /* Value to write into apWiData[iWiPg] */ int iWiPg; /* Write pWiValue into apWiData[iWiPg] */ @@ -1070,11 +1072,11 @@ /* ** Set or release locks on the WAL. Locks are either shared or exclusive. ** A lock cannot be moved directly between shared and exclusive - it must go -** through the unlocked state first. +** through the concurrent state first. ** ** In locking_mode=EXCLUSIVE, all of these routines become no-ops. */ static int walLockShared(Wal *pWal, int lockIdx){ int rc; @@ -1388,11 +1390,11 @@ int iLock; /* Lock offset to lock for checkpoint */ /* Obtain an exclusive lock on all byte in the locking range not already ** locked by the caller. The caller is guaranteed to have locked the ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte. - ** If successful, the same bytes that are locked here are unlocked before + ** If successful, the same bytes that are locked here are concurrent before ** this function returns. */ assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); @@ -1712,10 +1714,11 @@ pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; pRet->syncHeader = 1; pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); + sqlite3FastPrngInit(&pRet->sPrng); /* Open file handle on the write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ @@ -2340,11 +2343,11 @@ SEH_INJECT_FAULT; if( pInfo->nBackfillhdr.mxFrame ){ rc = SQLITE_BUSY; }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ u32 salt1; - sqlite3_randomness(4, &salt1); + sqlite3FastRandomness(&pWal->sPrng, 4, &salt1); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as @@ -2547,10 +2550,53 @@ sqlite3_free((void *)pWal->apWiData); sqlite3_free(pWal); } return rc; } + +/* +** Try to copy the wal-index header from shared-memory into (*pHdr). Return +** zero if successful or non-zero otherwise. If the header is corrupted +** (either because the two copies are inconsistent or because the checksum +** values are incorrect), the read fails and non-zero is returned. +*/ +static int walIndexLoadHdr(Wal *pWal, WalIndexHdr *pHdr){ + u32 aCksum[2]; /* Checksum on the header content */ + WalIndexHdr h2; /* Second copy of the header content */ + WalIndexHdr volatile *aHdr; /* Header in shared memory */ + + /* The first page of the wal-index must be mapped at this point. */ + assert( pWal->nWiData>0 && pWal->apWiData[0] ); + + /* Read the header. This might happen concurrently with a write to the + ** same area of shared memory on a different CPU in a SMP, + ** meaning it is possible that an inconsistent snapshot is read + ** from the file. If this happens, return non-zero. + ** + ** There are two copies of the header at the beginning of the wal-index. + ** When reading, read [0] first then [1]. Writes are in the reverse order. + ** Memory barriers are used to prevent the compiler or the hardware from + ** reordering the reads and writes. + */ + aHdr = walIndexHdr(pWal); + memcpy(pHdr, (void *)&aHdr[0], sizeof(h2)); + walShmBarrier(pWal); + memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); + + if( memcmp(&h2, pHdr, sizeof(h2))!=0 ){ + return 1; /* Dirty read */ + } + if( h2.isInit==0 ){ + return 1; /* Malformed header - probably all zeros */ + } + walChecksumBytes(1, (u8*)&h2, sizeof(h2)-sizeof(h2.aCksum), 0, aCksum); + if( aCksum[0]!=h2.aCksum[0] || aCksum[1]!=h2.aCksum[1] ){ + return 1; /* Checksum does not match */ + } + + return 0; +} /* ** Try to read the wal-index header. Return 0 on success and 1 if ** there is a problem. ** @@ -2566,47 +2612,14 @@ ** ** If the checksum cannot be verified return non-zero. If the header ** is read successfully and the checksum verified, return zero. */ static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ - u32 aCksum[2]; /* Checksum on the header content */ - WalIndexHdr h1, h2; /* Two copies of the header content */ - WalIndexHdr volatile *aHdr; /* Header in shared memory */ - - /* The first page of the wal-index must be mapped at this point. */ - assert( pWal->nWiData>0 && pWal->apWiData[0] ); - - /* Read the header. This might happen concurrently with a write to the - ** same area of shared memory on a different CPU in a SMP, - ** meaning it is possible that an inconsistent snapshot is read - ** from the file. If this happens, return non-zero. - ** - ** tag-20200519-1: - ** There are two copies of the header at the beginning of the wal-index. - ** When reading, read [0] first then [1]. Writes are in the reverse order. - ** Memory barriers are used to prevent the compiler or the hardware from - ** reordering the reads and writes. TSAN and similar tools can sometimes - ** give false-positive warnings about these accesses because the tools do not - ** account for the double-read and the memory barrier. The use of mutexes - ** here would be problematic as the memory being accessed is potentially - ** shared among multiple processes and not all mutex implementations work - ** reliably in that environment. - */ - aHdr = walIndexHdr(pWal); - memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); /* Possible TSAN false-positive */ - walShmBarrier(pWal); - memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); - - if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ - return 1; /* Dirty read */ - } - if( h1.isInit==0 ){ - return 1; /* Malformed header - probably all zeros */ - } - walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); - if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ - return 1; /* Checksum does not match */ + WalIndexHdr h1; /* Copy of the header content */ + + if( walIndexLoadHdr(pWal, &h1) ){ + return 1; } if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ *pChanged = 1; memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); @@ -3393,10 +3406,11 @@ testcase( (rc&0xff)==SQLITE_BUSY ); testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); + pWal->nPriorFrame = pWal->hdr.mxFrame; #ifdef SQLITE_ENABLE_SNAPSHOT if( rc==SQLITE_OK ){ if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ /* At this point the client has a lock on an aReadMark[] slot holding ** a value equal to or smaller than pSnapshot->mxFrame, but pWal->hdr @@ -3656,10 +3670,39 @@ return pWal->hdr.nPage; } return 0; } +/* +** Take the WRITER lock on the WAL file. Return SQLITE_OK if successful, +** or an SQLite error code otherwise. This routine does not invoke any +** busy-handler callbacks, that is done at a higher level. +*/ +static int walWriteLock(Wal *pWal){ + int rc; + + /* Cannot start a write transaction without first holding a read lock */ + assert( pWal->readLock>=0 ); + assert( pWal->writeLock==0 ); + assert( pWal->iReCksum==0 ); + + /* If this is a read-only connection, obtaining a write-lock is not + ** possible. In this case return SQLITE_READONLY. Otherwise, attempt + ** to grab the WRITER lock. Set Wal.writeLock to true and return + ** SQLITE_OK if successful, or leave Wal.writeLock clear and return + ** an SQLite error code (possibly SQLITE_BUSY) otherwise. */ + if( pWal->readOnly ){ + rc = SQLITE_READONLY; + }else{ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + } + } + + return rc; +} /* ** This function starts a write transaction on the WAL. ** ** A read transaction must have already been started by a prior call @@ -3682,46 +3725,241 @@ if( pWal->writeLock ){ assert( !memcmp(&pWal->hdr,(void*)pWal->apWiData[0],sizeof(WalIndexHdr)) ); return SQLITE_OK; } #endif - - /* Cannot start a write transaction without first holding a read - ** transaction. */ - assert( pWal->readLock>=0 ); - assert( pWal->writeLock==0 && pWal->iReCksum==0 ); - - if( pWal->readOnly ){ - return SQLITE_READONLY; - } - - /* Only one writer allowed at a time. Get the write lock. Return - ** SQLITE_BUSY if unable. - */ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc ){ - return rc; - } - pWal->writeLock = 1; - - /* If another connection has written to the database file since the - ** time the read transaction on this connection was started, then - ** the write is disallowed. - */ - SEH_TRY { - if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + + rc = walWriteLock(pWal); + if( rc==SQLITE_OK ){ + /* If another connection has written to the database file since the + ** time the read transaction on this connection was started, then + ** the write is disallowed. Release the WRITER lock and return + ** SQLITE_BUSY_SNAPSHOT in this case. */ + SEH_TRY { + if( memcmp(&pWal->hdr, (void*)walIndexHdr(pWal),sizeof(WalIndexHdr))!=0 ){ + rc = SQLITE_BUSY_SNAPSHOT; + } + } + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + if( rc!=SQLITE_OK ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + } + } + return rc; +} + +/* +** This function is called by a writer that has a read-lock on aReadmark[0] +** (pWal->readLock==0). This function relinquishes that lock and takes a +** lock on a different aReadmark[] slot. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int walUpgradeReadlock(Wal *pWal){ + int cnt; + int rc; + assert( pWal->writeLock && pWal->readLock==0 ); + walUnlockShared(pWal, WAL_READ_LOCK(0)); + pWal->readLock = -1; + cnt = 0; + do{ + int notUsed; + rc = walTryBeginRead(pWal, ¬Used, 1, &cnt); + }while( rc==WAL_RETRY ); + assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + return rc; +} + + +#ifndef SQLITE_OMIT_CONCURRENT +/* +** This function does the work of sqlite3WalLockForCommit(). The difference +** between this function and sqlite3WalLockForCommit() is that the latter +** encloses everything in a SEH_TRY {} block. +*/ +static int walLockForCommit( + Wal *pWal, + PgHdr *pPg1, + Bitvec *pAllRead, + Pgno *piConflict +){ + int rc = walWriteLock(pWal); + + /* If the database has been modified since this transaction was started, + ** check if it is still possible to commit. The transaction can be + ** committed if: + ** + ** a) None of the pages in pList have been modified since the + ** transaction opened, and + ** + ** b) The database schema cookie has not been modified since the + ** transaction was started. + */ + if( rc==SQLITE_OK ){ + WalIndexHdr head; + + if( walIndexLoadHdr(pWal, &head) ){ + /* This branch is taken if the wal-index header is corrupted. This + ** occurs if some other writer has crashed while committing a + ** transaction to this database since the current concurrent transaction + ** was opened. */ rc = SQLITE_BUSY_SNAPSHOT; - } - } - SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) - - if( rc!=SQLITE_OK ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - } + }else if( memcmp(&pWal->hdr, (void*)&head, sizeof(WalIndexHdr))!=0 ){ + int iHash; + int iLast = walFramePage(head.mxFrame); + u32 iFirst = pWal->hdr.mxFrame+1; /* First wal frame to check */ + if( memcmp(pWal->hdr.aSalt, (u32*)head.aSalt, sizeof(u32)*2) ){ + assert( pWal->readLock==0 ); + iFirst = 1; + } + if( pPg1==0 ){ + /* If pPg1==0, then the current transaction modified the database + ** schema. This means it conflicts with all other transactions. */ + *piConflict = 1; + rc = SQLITE_BUSY_SNAPSHOT; + } + for(iHash=walFramePage(iFirst); rc==SQLITE_OK && iHash<=iLast; iHash++){ + WalHashLoc sLoc; + + rc = walHashGet(pWal, iHash, &sLoc); + if( rc==SQLITE_OK ){ + u32 i, iMin, iMax; + assert( head.mxFrame>=sLoc.iZero ); + iMin = (sLoc.iZero >= iFirst) ? 1 : (iFirst - sLoc.iZero); + iMax = (iHash==0) ? HASHTABLE_NPAGE_ONE : HASHTABLE_NPAGE; + if( iMax>(head.mxFrame-sLoc.iZero) ) iMax = (head.mxFrame-sLoc.iZero); + for(i=iMin; rc==SQLITE_OK && i<=iMax; i++){ + PgHdr *pPg; + if( sLoc.aPgno[i-1]==1 ){ + /* Check that the schema cookie has not been modified. If + ** it has not, the commit can proceed. */ + u8 aNew[4]; + u8 *aOld = &((u8*)pPg1->pData)[40]; + int sz; + i64 iOffset; + sz = pWal->hdr.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + iOffset = walFrameOffset(i+sLoc.iZero, sz) + WAL_FRAME_HDRSIZE+40; + rc = sqlite3OsRead(pWal->pWalFd, aNew, sizeof(aNew), iOffset); + if( rc==SQLITE_OK && memcmp(aOld, aNew, sizeof(aNew)) ){ + rc = SQLITE_BUSY_SNAPSHOT; + } + }else if( sqlite3BitvecTestNotNull(pAllRead, sLoc.aPgno[i-1]) ){ + *piConflict = sLoc.aPgno[i-1]; + rc = SQLITE_BUSY_SNAPSHOT; + }else + if( (pPg = sqlite3PagerLookup(pPg1->pPager, sLoc.aPgno[i-1])) ){ + /* Page aPgno[i-1], which is present in the pager cache, has been + ** modified since the current CONCURRENT transaction was started. + ** However it was not read by the current transaction, so is not + ** a conflict. There are two possibilities: (a) the page was + ** allocated at the of the file by the current transaction or + ** (b) was present in the cache at the start of the transaction. + ** + ** For case (a), do nothing. This page will be moved within the + ** database file by the commit code to avoid the conflict. The + ** call to PagerUnref() is to release the reference grabbed by + ** the sqlite3PagerLookup() above. + ** + ** In case (b), drop the page from the cache - otherwise + ** following the snapshot upgrade the cache would be inconsistent + ** with the database as stored on disk. */ + if( sqlite3PagerIswriteable(pPg) ){ + sqlite3PagerUnref(pPg); + }else{ + sqlite3PcacheDrop(pPg); + } + } + } + } + } + } + } + + pWal->nPriorFrame = pWal->hdr.mxFrame; + return rc; +} + +/* +** This function is only ever called when committing a "BEGIN CONCURRENT" +** transaction. It may be assumed that no frames have been written to +** the wal file. The second parameter is a pointer to the in-memory +** representation of page 1 of the database (which may or may not be +** dirty). The third is a bitvec with a bit set for each page in the +** database file that was read by the current concurrent transaction. +** +** This function performs three tasks: +** +** 1) It obtains the WRITER lock on the wal file, +** +** 2) It checks that there are no conflicts between the current +** transaction and any transactions committed to the wal file since +** it was opened, and +** +** 3) It ejects any non-dirty pages from the page-cache that have been +** written by another client since the CONCURRENT transaction was started +** (so as to avoid ending up with an inconsistent cache after the +** current transaction is committed). +** +** If no error occurs and the caller may proceed with committing the +** transaction, SQLITE_OK is returned. SQLITE_BUSY is returned if the WRITER +** lock cannot be obtained. Or, if the WRITER lock can be obtained but there +** are conflicts with a committed transaction, SQLITE_BUSY_SNAPSHOT. Finally, +** if an error (i.e. an OOM condition or IO error), an SQLite error code +** is returned. +*/ +int sqlite3WalLockForCommit( + Wal *pWal, + PgHdr *pPg1, + Bitvec *pAllRead, + Pgno *piConflict +){ + int rc = SQLITE_OK; + SEH_TRY { + rc = walLockForCommit(pWal, pPg1, pAllRead, piConflict); + } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** This function is called as part of committing an CONCURRENT transaction. +** It is assumed that sqlite3WalLockForCommit() has already been successfully +** called and so (a) the WRITER lock is held and (b) it is known that the +** wal-index-header stored in shared memory is not corrupt. +** +** Before returning, this function upgrades the client so that it is +** operating on the database snapshot currently at the head of the wal file +** (even if the CONCURRENT transaction ran against an older snapshot). +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +int sqlite3WalUpgradeSnapshot(Wal *pWal){ + int rc = SQLITE_OK; + assert( pWal->writeLock ); + + SEH_TRY { + assert( pWal->szPage==pWal->hdr.szPage ); + memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); + assert( pWal->szPage==pWal->hdr.szPage || pWal->szPage==0 ); + pWal->szPage = pWal->hdr.szPage; + + /* If this client has its read-lock on slot aReadmark[0] and the entire + ** wal has not been checkpointed, switch it to a different slot. Otherwise + ** any reads performed between now and committing the transaction will + ** read from the old snapshot - not the one just upgraded to. */ + if( pWal->readLock==0 && pWal->hdr.mxFrame!=walCkptInfo(pWal)->nBackfill ){ + rc = walUpgradeReadlock(pWal); + } + } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) return rc; } +#endif /* SQLITE_OMIT_CONCURRENT */ /* ** End a write transaction. The commit has already been done. This ** routine merely releases the lock. */ @@ -3745,22 +3983,33 @@ ** returned to the caller. ** ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ +int sqlite3WalUndo( + Wal *pWal, + int (*xUndo)(void *, Pgno), + void *pUndoCtx, + int bConcurrent /* True if this is a CONCURRENT transaction */ +){ int rc = SQLITE_OK; - if( ALWAYS(pWal->writeLock) ){ + if( pWal->writeLock ){ Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; SEH_TRY { /* Restore the clients cache of the wal-index header to the state it ** was in before the client began writing to the database. */ memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); - +#ifndef SQLITE_OMIT_CONCURRENT + if( bConcurrent ){ + pWal->hdr.aCksum[0]++; + } +#else + UNUSED_PARAMETER(bConcurrent); +#endif for(iFrame=pWal->hdr.mxFrame+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++ ){ /* This call cannot fail. Unless the page for which the page number @@ -3789,11 +4038,10 @@ ** values. This function populates the array with values required to ** "rollback" the write position of the WAL handle back to the current ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ - assert( pWal->writeLock ); aWalData[0] = pWal->hdr.mxFrame; aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; aWalData[3] = pWal->nCkpt; } @@ -3805,11 +4053,11 @@ ** by a call to WalSavepoint(). */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; - assert( pWal->writeLock ); + assert( pWal->writeLock || aWalData[0]==pWal->hdr.mxFrame ); assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); if( aWalData[3]!=pWal->nCkpt ){ /* This savepoint was opened immediately after the write-transaction ** was started. Right after that, the writer decided to wrap around @@ -3844,18 +4092,17 @@ ** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned ** if an error occurs. */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; - int cnt; if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ u32 salt1; - sqlite3_randomness(4, &salt1); + sqlite3FastRandomness(&pWal->sPrng, 4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no ** readers are currently using the WAL), then the transactions ** frames will overwrite the start of the existing log. Update the @@ -3865,25 +4112,25 @@ ** at this point. But updating the actual wal-index header is also ** safe and means there is no special case for sqlite3WalUndo() ** to handle if this transaction is rolled back. */ walRestartHdr(pWal, salt1); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + pWal->nPriorFrame = 0; }else if( rc!=SQLITE_BUSY ){ return rc; } } - walUnlockShared(pWal, WAL_READ_LOCK(0)); - pWal->readLock = -1; - cnt = 0; - do{ - int notUsed; - rc = walTryBeginRead(pWal, ¬Used, 1, &cnt); - }while( rc==WAL_RETRY ); - assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ - testcase( (rc&0xff)==SQLITE_IOERR ); - testcase( rc==SQLITE_PROTOCOL ); - testcase( rc==SQLITE_OK ); + + /* Regardless of whether or not the wal file was restarted, change the + ** read-lock held by this client to a slot other than aReadmark[0]. + ** Clients with a lock on aReadmark[0] read from the database file + ** only - never from the wal file. This means that if a writer holding + ** a lock on aReadmark[0] were to commit a transaction but not close the + ** read-transaction, subsequent read operations would read directly from + ** the database file - ignoring the new pages just appended + ** to the wal file. */ + rc = walUpgradeReadlock(pWal); } return rc; } /* @@ -4063,11 +4310,11 @@ sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); sqlite3Put4byte(&aWalHdr[8], szPage); sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); - if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); + if( pWal->nCkpt==0 ) sqlite3FastRandomness(&pWal->sPrng, 8, pWal->hdr.aSalt); memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); sqlite3Put4byte(&aWalHdr[24], aCksum[0]); sqlite3Put4byte(&aWalHdr[28], aCksum[1]); @@ -4356,11 +4603,11 @@ /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags,zBuf); + rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); } /* If no error occurred, set the output variables. */ if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; @@ -4605,7 +4852,19 @@ /* Return the sqlite3_file object for the WAL file */ sqlite3_file *sqlite3WalFile(Wal *pWal){ return pWal->pWalFd; } + +/* +** Return the values required by sqlite3_wal_info(). +*/ +int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame){ + int rc = SQLITE_OK; + if( pWal ){ + *pnFrame = pWal->hdr.mxFrame; + *pnPrior = pWal->nPriorFrame; + } + return rc; +} #endif /* #ifndef SQLITE_OMIT_WAL */ Index: src/wal.h ================================================================== --- src/wal.h +++ src/wal.h @@ -32,11 +32,11 @@ # define sqlite3WalBeginReadTransaction(y,z) 0 # define sqlite3WalEndReadTransaction(z) # define sqlite3WalDbsize(y) 0 # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWriteTransaction(x) 0 -# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalUndo(w,x,y,z) 0 # define sqlite3WalSavepoint(y,z) # define sqlite3WalSavepointUndo(y,z) 0 # define sqlite3WalFrames(u,v,w,x,y,z) 0 # define sqlite3WalCheckpoint(q,r,s,t,u,v,w,x,y,z) 0 # define sqlite3WalCallback(z) 0 @@ -82,11 +82,11 @@ /* Obtain or release the WRITER lock. */ int sqlite3WalBeginWriteTransaction(Wal *pWal); int sqlite3WalEndWriteTransaction(Wal *pWal); /* Undo any frames written (but not committed) to the log */ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx, int); /* Return an integer that records the current (uncommitted) write ** position in the WAL */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData); @@ -135,10 +135,19 @@ int sqlite3WalSnapshotRecover(Wal *pWal); int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot); void sqlite3WalSnapshotUnlock(Wal *pWal); #endif +#ifndef SQLITE_OMIT_CONCURRENT +/* Tell the wal layer that we want to commit a concurrent transaction */ +int sqlite3WalLockForCommit(Wal *pWal, PgHdr *pPg, Bitvec *pRead, Pgno*); + +/* Upgrade the state of the client to take into account changes written +** by other connections */ +int sqlite3WalUpgradeSnapshot(Wal *pWal); +#endif /* SQLITE_OMIT_CONCURRENT */ + #ifdef SQLITE_ENABLE_ZIPVFS /* If the WAL file is not empty, return the number of bytes of content ** stored in each frame (i.e. the db page-size when the WAL was created). */ int sqlite3WalFramesize(Wal *pWal); @@ -154,7 +163,10 @@ #ifdef SQLITE_USE_SEH int sqlite3WalSystemErrno(Wal*); #endif +/* sqlite3_wal_info() data */ +int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); + #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* SQLITE_WAL_H */ ADDED test/bc_test1.c Index: test/bc_test1.c ================================================================== --- /dev/null +++ test/bc_test1.c @@ -0,0 +1,556 @@ +/* +** 2016-05-07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +#include +#include +#include +#include "tt3_core.c" + +#ifdef USE_OSINST +# include "../src/test_osinst.c" +#else +# define vfslog_time() 0 +#endif + +typedef struct Config Config; +typedef struct ThreadCtx ThreadCtx; + +#define THREAD_TIME_INSERT 0 +#define THREAD_TIME_COMMIT 1 +#define THREAD_TIME_ROLLBACK 2 +#define THREAD_TIME_WRITER 3 +#define THREAD_TIME_CKPT 4 + +struct ThreadCtx { + Config *pConfig; + Sqlite *pDb; + Error *pErr; + sqlite3_int64 aTime[5]; +}; + +struct Config { + int nIPT; /* --inserts-per-transaction */ + int nThread; /* --threads */ + int nSecond; /* --seconds */ + int bMutex; /* --mutex */ + int nAutoCkpt; /* --autockpt */ + int bRm; /* --rm */ + int bClearCache; /* --clear-cache */ + int nMmap; /* mmap limit in MB */ + char *zFile; + int bOsinst; /* True to use osinst */ + + ThreadCtx *aCtx; /* Array of size nThread */ + + pthread_cond_t cond; + pthread_mutex_t mutex; + int nCondWait; /* Number of threads waiting on hCond */ + sqlite3_vfs *pVfs; +}; + + +typedef struct VfsWrapperFd VfsWrapperFd; +struct VfsWrapperFd { + sqlite3_file base; /* Base class */ + int bWriter; /* True if holding shm WRITER lock */ + int iTid; + Config *pConfig; + sqlite3_file *pFd; /* Underlying file descriptor */ +}; + +/* Methods of the wrapper VFS */ +static int vfsWrapOpen(sqlite3_vfs*, const char*, sqlite3_file*, int, int*); +static int vfsWrapDelete(sqlite3_vfs*, const char*, int); +static int vfsWrapAccess(sqlite3_vfs*, const char*, int, int*); +static int vfsWrapFullPathname(sqlite3_vfs*, const char *, int, char*); +static void *vfsWrapDlOpen(sqlite3_vfs*, const char*); +static void vfsWrapDlError(sqlite3_vfs*, int, char*); +static void (*vfsWrapDlSym(sqlite3_vfs*,void*, const char*))(void); +static void vfsWrapDlClose(sqlite3_vfs*, void*); +static int vfsWrapRandomness(sqlite3_vfs*, int, char*); +static int vfsWrapSleep(sqlite3_vfs*, int); +static int vfsWrapCurrentTime(sqlite3_vfs*, double*); +static int vfsWrapGetLastError(sqlite3_vfs*, int, char*); +static int vfsWrapCurrentTimeInt64(sqlite3_vfs*, sqlite3_int64*); +static int vfsWrapSetSystemCall(sqlite3_vfs*, const char*, sqlite3_syscall_ptr); +static sqlite3_syscall_ptr vfsWrapGetSystemCall(sqlite3_vfs*, const char*); +static const char *vfsWrapNextSystemCall(sqlite3_vfs*, const char*); + +/* Methods of wrapper sqlite3_io_methods object (see vfsWrapOpen()) */ +static int vfsWrapClose(sqlite3_file*); +static int vfsWrapRead(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); +static int vfsWrapWrite(sqlite3_file*, const void*, int iAmt, sqlite3_int64); +static int vfsWrapTruncate(sqlite3_file*, sqlite3_int64 size); +static int vfsWrapSync(sqlite3_file*, int flags); +static int vfsWrapFileSize(sqlite3_file*, sqlite3_int64 *pSize); +static int vfsWrapLock(sqlite3_file*, int); +static int vfsWrapUnlock(sqlite3_file*, int); +static int vfsWrapCheckReservedLock(sqlite3_file*, int *pResOut); +static int vfsWrapFileControl(sqlite3_file*, int op, void *pArg); +static int vfsWrapSectorSize(sqlite3_file*); +static int vfsWrapDeviceCharacteristics(sqlite3_file*); +static int vfsWrapShmMap(sqlite3_file*, int iPg, int, int, void volatile**); +static int vfsWrapShmLock(sqlite3_file*, int offset, int n, int flags); +static void vfsWrapShmBarrier(sqlite3_file*); +static int vfsWrapShmUnmap(sqlite3_file*, int deleteFlag); +static int vfsWrapFetch(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **); +static int vfsWrapUnfetch(sqlite3_file*, sqlite3_int64 iOfst, void *p); + +static int vfsWrapOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFd, + int flags, + int *fout +){ + static sqlite3_io_methods methods = { + 3, + vfsWrapClose, vfsWrapRead, vfsWrapWrite, + vfsWrapTruncate, vfsWrapSync, vfsWrapFileSize, + vfsWrapLock, vfsWrapUnlock, vfsWrapCheckReservedLock, + vfsWrapFileControl, vfsWrapSectorSize, vfsWrapDeviceCharacteristics, + vfsWrapShmMap, vfsWrapShmLock, vfsWrapShmBarrier, + vfsWrapShmUnmap, vfsWrapFetch, vfsWrapUnfetch + }; + + Config *pConfig = (Config*)pVfs->pAppData; + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + int rc; + + memset(pWrapper, 0, sizeof(VfsWrapperFd)); + if( flags & SQLITE_OPEN_MAIN_DB ){ + pWrapper->iTid = (int)sqlite3_uri_int64(zName, "tid", 0); + } + + pWrapper->pFd = (sqlite3_file*)&pWrapper[1]; + pWrapper->pConfig = pConfig; + rc = pConfig->pVfs->xOpen(pConfig->pVfs, zName, pWrapper->pFd, flags, fout); + if( rc==SQLITE_OK ){ + pWrapper->base.pMethods = &methods; + } + return rc; +} + +static int vfsWrapDelete(sqlite3_vfs *pVfs, const char *a, int b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDelete(pConfig->pVfs, a, b); +} +static int vfsWrapAccess(sqlite3_vfs *pVfs, const char *a, int b, int *c){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xAccess(pConfig->pVfs, a, b, c); +} +static int vfsWrapFullPathname(sqlite3_vfs *pVfs, const char *a, int b, char*c){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xFullPathname(pConfig->pVfs, a, b, c); +} +static void *vfsWrapDlOpen(sqlite3_vfs *pVfs, const char *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlOpen(pConfig->pVfs, a); +} +static void vfsWrapDlError(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlError(pConfig->pVfs, a, b); +} +static void (*vfsWrapDlSym(sqlite3_vfs *pVfs, void *a, const char *b))(void){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlSym(pConfig->pVfs, a, b); +} +static void vfsWrapDlClose(sqlite3_vfs *pVfs, void *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlClose(pConfig->pVfs, a); +} +static int vfsWrapRandomness(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xRandomness(pConfig->pVfs, a, b); +} +static int vfsWrapSleep(sqlite3_vfs *pVfs, int a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xSleep(pConfig->pVfs, a); +} +static int vfsWrapCurrentTime(sqlite3_vfs *pVfs, double *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xCurrentTime(pConfig->pVfs, a); +} +static int vfsWrapGetLastError(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xGetLastError(pConfig->pVfs, a, b); +} +static int vfsWrapCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xCurrentTimeInt64(pConfig->pVfs, a); +} +static int vfsWrapSetSystemCall( + sqlite3_vfs *pVfs, + const char *a, + sqlite3_syscall_ptr b +){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xSetSystemCall(pConfig->pVfs, a, b); +} +static sqlite3_syscall_ptr vfsWrapGetSystemCall( + sqlite3_vfs *pVfs, + const char *a +){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xGetSystemCall(pConfig->pVfs, a); +} +static const char *vfsWrapNextSystemCall(sqlite3_vfs *pVfs, const char *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xNextSystemCall(pConfig->pVfs, a); +} + +static int vfsWrapClose(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + pWrapper->pFd->pMethods->xClose(pWrapper->pFd); + pWrapper->pFd = 0; + return SQLITE_OK; +} +static int vfsWrapRead(sqlite3_file *pFd, void *a, int b, sqlite3_int64 c){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xRead(pWrapper->pFd, a, b, c); +} +static int vfsWrapWrite( + sqlite3_file *pFd, + const void *a, int b, + sqlite3_int64 c +){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xWrite(pWrapper->pFd, a, b, c); +} +static int vfsWrapTruncate(sqlite3_file *pFd, sqlite3_int64 a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xTruncate(pWrapper->pFd, a); +} +static int vfsWrapSync(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xSync(pWrapper->pFd, a); +} +static int vfsWrapFileSize(sqlite3_file *pFd, sqlite3_int64 *a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFileSize(pWrapper->pFd, a); +} +static int vfsWrapLock(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xLock(pWrapper->pFd, a); +} +static int vfsWrapUnlock(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xUnlock(pWrapper->pFd, a); +} +static int vfsWrapCheckReservedLock(sqlite3_file *pFd, int *a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xCheckReservedLock(pWrapper->pFd, a); +} +static int vfsWrapFileControl(sqlite3_file *pFd, int a, void *b){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFileControl(pWrapper->pFd, a, b); +} +static int vfsWrapSectorSize(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xSectorSize(pWrapper->pFd); +} +static int vfsWrapDeviceCharacteristics(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xDeviceCharacteristics(pWrapper->pFd); +} +static int vfsWrapShmMap( + sqlite3_file *pFd, + int a, int b, int c, + void volatile **d +){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmMap(pWrapper->pFd, a, b, c, d); +} +static int vfsWrapShmLock(sqlite3_file *pFd, int offset, int n, int flags){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + Config *pConfig = pWrapper->pConfig; + int bMutex = 0; + int rc; + + if( (offset==0 && n==1) + && (flags & SQLITE_SHM_LOCK) && (flags & SQLITE_SHM_EXCLUSIVE) + ){ + pthread_mutex_lock(&pConfig->mutex); + pWrapper->bWriter = 1; + bMutex = 1; + if( pWrapper->iTid ){ + sqlite3_int64 t = vfslog_time(); + pConfig->aCtx[pWrapper->iTid-1].aTime[THREAD_TIME_WRITER] -= t; + } + } + + rc = pWrapper->pFd->pMethods->xShmLock(pWrapper->pFd, offset, n, flags); + + if( (rc!=SQLITE_OK && bMutex) + || (offset==0 && (flags & SQLITE_SHM_UNLOCK) && pWrapper->bWriter) + ){ + assert( pWrapper->bWriter ); + pthread_mutex_unlock(&pConfig->mutex); + pWrapper->bWriter = 0; + if( pWrapper->iTid ){ + sqlite3_int64 t = vfslog_time(); + pConfig->aCtx[pWrapper->iTid-1].aTime[THREAD_TIME_WRITER] += t; + } + } + + return rc; +} +static void vfsWrapShmBarrier(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmBarrier(pWrapper->pFd); +} +static int vfsWrapShmUnmap(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmUnmap(pWrapper->pFd, a); +} +static int vfsWrapFetch(sqlite3_file *pFd, sqlite3_int64 a, int b, void **c){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFetch(pWrapper->pFd, a, b, c); +} +static int vfsWrapUnfetch(sqlite3_file *pFd, sqlite3_int64 a, void *b){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xUnfetch(pWrapper->pFd, a, b); +} + +static void create_vfs(Config *pConfig){ + static sqlite3_vfs vfs = { + 3, 0, 0, 0, "wrapper", 0, + vfsWrapOpen, vfsWrapDelete, vfsWrapAccess, + vfsWrapFullPathname, vfsWrapDlOpen, vfsWrapDlError, + vfsWrapDlSym, vfsWrapDlClose, vfsWrapRandomness, + vfsWrapSleep, vfsWrapCurrentTime, vfsWrapGetLastError, + vfsWrapCurrentTimeInt64, vfsWrapSetSystemCall, vfsWrapGetSystemCall, + vfsWrapNextSystemCall + }; + sqlite3_vfs *pVfs; + + pVfs = sqlite3_vfs_find(0); + vfs.mxPathname = pVfs->mxPathname; + vfs.szOsFile = pVfs->szOsFile + sizeof(VfsWrapperFd); + vfs.pAppData = (void*)pConfig; + pConfig->pVfs = pVfs; + + sqlite3_vfs_register(&vfs, 1); +} + + +/* +** Wal hook used by connections in thread_main(). +*/ +static int thread_wal_hook( + void *pArg, /* Pointer to ThreadCtx object */ + sqlite3 *db, + const char *zDb, + int nFrame +){ + ThreadCtx *pCtx = (ThreadCtx*)pArg; + Config *pConfig = pCtx->pConfig; + + if( pConfig->nAutoCkpt && nFrame>=pConfig->nAutoCkpt ){ + pCtx->aTime[THREAD_TIME_CKPT] -= vfslog_time(); + pthread_mutex_lock(&pConfig->mutex); + if( pConfig->nCondWait>=0 ){ + pConfig->nCondWait++; + if( pConfig->nCondWait==pConfig->nThread ){ + execsql(pCtx->pErr, pCtx->pDb, "PRAGMA wal_checkpoint"); + pthread_cond_broadcast(&pConfig->cond); + }else{ + pthread_cond_wait(&pConfig->cond, &pConfig->mutex); + } + pConfig->nCondWait--; + } + pthread_mutex_unlock(&pConfig->mutex); + pCtx->aTime[THREAD_TIME_CKPT] += vfslog_time(); + } + + return SQLITE_OK; +} + + +static char *thread_main(int iTid, void *pArg){ + Config *pConfig = (Config*)pArg; + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + int nAttempt = 0; /* Attempted transactions */ + int nCommit = 0; /* Successful transactions */ + int j; + ThreadCtx *pCtx = &pConfig->aCtx[iTid-1]; + char *zUri = 0; + +#ifdef USE_OSINST + char *zOsinstName = 0; + char *zLogName = 0; + if( pConfig->bOsinst ){ + zOsinstName = sqlite3_mprintf("osinst%d", iTid); + zLogName = sqlite3_mprintf("bc_test1.log.%d.%d", (int)getpid(), iTid); + zUri = sqlite3_mprintf( + "file:%s?vfs=%s&tid=%d", pConfig->zFile, zOsinstName, iTid + ); + sqlite3_vfslog_new(zOsinstName, 0, zLogName); + opendb(&err, &db, zUri, 0); + }else +#endif + { + zUri = sqlite3_mprintf("file:%s?tid=%d", pConfig->zFile, iTid); + opendb(&err, &db, zUri, 0); + } + + sqlite3_busy_handler(db.db, 0, 0); + sql_script_printf(&err, &db, + "PRAGMA wal_autocheckpoint = 0;" + "PRAGMA synchronous = 0;" + "PRAGMA mmap_size = %lld;", + (i64)(pConfig->nMmap) * 1024 * 1024 + ); + + pCtx->pConfig = pConfig; + pCtx->pErr = &err; + pCtx->pDb = &db; + sqlite3_wal_hook(db.db, thread_wal_hook, (void*)pCtx); + + while( !timetostop(&err) ){ + execsql(&err, &db, "BEGIN CONCURRENT"); + + pCtx->aTime[THREAD_TIME_INSERT] -= vfslog_time(); + for(j=0; jnIPT; j++){ + execsql(&err, &db, + "INSERT INTO t1 VALUES" + "(randomblob(10), randomblob(20), randomblob(30), randomblob(200))" + ); + } + pCtx->aTime[THREAD_TIME_INSERT] += vfslog_time(); + + pCtx->aTime[THREAD_TIME_COMMIT] -= vfslog_time(); + execsql(&err, &db, "COMMIT"); + pCtx->aTime[THREAD_TIME_COMMIT] += vfslog_time(); + + pCtx->aTime[THREAD_TIME_ROLLBACK] -= vfslog_time(); + nAttempt++; + if( err.rc==SQLITE_OK ){ + nCommit++; + }else{ + clear_error(&err, SQLITE_BUSY); + execsql(&err, &db, "ROLLBACK"); + } + pCtx->aTime[THREAD_TIME_ROLLBACK] += vfslog_time(); + + if( pConfig->bClearCache ){ + sqlite3_db_release_memory(db.db); + } + } + + closedb(&err, &db); + +#ifdef USE_OSINST + if( pConfig->bOsinst ){ + sqlite3_vfslog_finalize(zOsinstName); + sqlite3_free(zOsinstName); + sqlite3_free(zLogName); + } +#endif + sqlite3_free(zUri); + + pthread_mutex_lock(&pConfig->mutex); + pConfig->nCondWait = -1; + pthread_cond_broadcast(&pConfig->cond); + pthread_mutex_unlock(&pConfig->mutex); + + return sqlite3_mprintf("commits: %d/%d insert: %dms" + " commit: %dms" + " rollback: %dms" + " writer: %dms" + " checkpoint: %dms", + nCommit, nAttempt, + (int)(pCtx->aTime[THREAD_TIME_INSERT]/1000), + (int)(pCtx->aTime[THREAD_TIME_COMMIT]/1000), + (int)(pCtx->aTime[THREAD_TIME_ROLLBACK]/1000), + (int)(pCtx->aTime[THREAD_TIME_WRITER]/1000), + (int)(pCtx->aTime[THREAD_TIME_CKPT]/1000) + ); +} + +int main(int argc, const char **argv){ + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + Threadset threads = {0}; /* Test threads */ + Config conf = {5, 3, 5}; + int i; + + CmdlineArg apArg[] = { + { "-seconds", CMDLINE_INT, offsetof(Config, nSecond) }, + { "-inserts", CMDLINE_INT, offsetof(Config, nIPT) }, + { "-threads", CMDLINE_INT, offsetof(Config, nThread) }, + { "-mutex", CMDLINE_BOOL, offsetof(Config, bMutex) }, + { "-rm", CMDLINE_BOOL, offsetof(Config, bRm) }, + { "-autockpt",CMDLINE_INT, offsetof(Config, nAutoCkpt) }, + { "-mmap", CMDLINE_INT, offsetof(Config, nMmap) }, + { "-clear-cache", CMDLINE_BOOL, offsetof(Config, bClearCache) }, + { "-file", CMDLINE_STRING, offsetof(Config, zFile) }, + { "-osinst", CMDLINE_BOOL, offsetof(Config, bOsinst) }, + { 0, 0, 0 } + }; + + conf.nAutoCkpt = 1000; + cmdline_process(apArg, argc, argv, (void*)&conf); + if( err.rc==SQLITE_OK ){ + char *z = cmdline_construct(apArg, (void*)&conf); + printf("With: %s\n", z); + sqlite3_free(z); + } + if( conf.zFile==0 ){ + conf.zFile = "xyz.db"; + } + + /* Create the special VFS - "wrapper". And the mutex and condition + ** variable. */ + create_vfs(&conf); + pthread_mutex_init(&conf.mutex, 0); + pthread_cond_init(&conf.cond, 0); + + conf.aCtx = sqlite3_malloc(sizeof(ThreadCtx) * conf.nThread); + memset(conf.aCtx, 0, sizeof(ThreadCtx) * conf.nThread); + + /* Ensure the schema has been created */ + opendb(&err, &db, conf.zFile, conf.bRm); + sql_script(&err, &db, + "PRAGMA journal_mode = wal;" + "CREATE TABLE IF NOT EXISTS t1(a PRIMARY KEY, b, c, d) WITHOUT ROWID;" + "CREATE INDEX IF NOT EXISTS t1b ON t1(b);" + "CREATE INDEX IF NOT EXISTS t1c ON t1(c);" + ); + + setstoptime(&err, conf.nSecond*1000); + if( conf.nThread==1 ){ + char *z = thread_main(1, (void*)&conf); + printf("Thread 0 says: %s\n", (z==0 ? "..." : z)); + fflush(stdout); + }else{ + for(i=0; i$nPg + } {1} + + do_test 1.$tn.7 { + sql2 { PRAGMA integrity_check } + } {ok} + + do_test 1.$tn.8 { + sql1 { + BEGIN CONCURRENT; + CREATE TABLE t4(a, b); + } + sql2 { + INSERT INTO t1 VALUES(2, 2); + } + list [catch { sql1 COMMIT } msg] $msg + } {1 {database is locked}} + sql1 ROLLBACK + + do_test 1.$tn.9 { + sql1 { + BEGIN CONCURRENT; + CREATE TEMP TABLE t5(a, b); + INSERT INTO t2 VALUES('x', 'x'); + } + sql2 { + INSERT INTO t1 VALUES(3, 3); + CREATE TEMP TABLE t1(x, y); + } + sql1 COMMIT + } {} +} + + + +finish_test + + ADDED test/concurrent9.test Index: test/concurrent9.test ================================================================== --- /dev/null +++ test/concurrent9.test @@ -0,0 +1,120 @@ +# 2023 January 12 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix concurrent9 + +do_execsql_test 1.0 { + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(1), (2); + CREATE TABLE t2(y); + INSERT INTO t2 VALUES('a'), ('b'); + PRAGMA journal_mode = wal; +} {wal} + +db close + +#------------------------------------------------------------------------- +# Fix a problem that may occur if a BEGIN CONCURRENT transaction is +# started when the wal file is completely empty and committed after +# it has been initialized by some other connection. +# +sqlite3 db test.db +sqlite3 db2 test.db + +do_execsql_test -db db 1.1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES('c'); +} + +do_execsql_test -db db2 1.2 { + INSERT INTO t1 VALUES(3); +} + +do_execsql_test -db db 1.3 { + COMMIT; +} + +do_execsql_test -db db2 1.4 { + SELECT * FROM t1; + SELECT * FROM t2; +} {1 2 3 a b c} + +db2 close + +#------------------------------------------------------------------------- +reset_db + +do_execsql_test 2.1 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + PRAGMA journal_mode = wal; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<500 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; + PRAGMA page_count; +} {wal 255} + +sqlite3 db2 test.db +do_execsql_test -db db2 2.2 { + DELETE FROM t1 WHERE a<100; + PRAGMA freelist_count; +} {49} + +do_execsql_test 2.3 { + BEGIN CONCURRENT; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; +} + +sqlite3_db_status db CACHE_MISS 1 +do_execsql_test 2.4.1 { + COMMIT; +} + +do_test 2.4.2 { + lindex [sqlite3_db_status db CACHE_MISS 0] 1 +} {1} + +do_execsql_test -db db2 2.5 { + DELETE FROM t1 WHERE a<200; + PRAGMA freelist_count; +} {50} + +do_execsql_test 2.6 { + BEGIN CONCURRENT; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; + DELETE FROM t1 WHERE rowid BETWEEN 600 AND 680; +} + +sqlite3_db_status db CACHE_MISS 1 +do_execsql_test 2.7.1 { + COMMIT; +} +do_test 2.7.2 { + lindex [sqlite3_db_status db CACHE_MISS 0] 1 +} {1} + +do_execsql_test 2.8 { + PRAGMA integrity_check; +} {ok} + +finish_test + + Index: test/fts3corrupt4.test ================================================================== --- test/fts3corrupt4.test +++ test/fts3corrupt4.test @@ -4396,10 +4396,11 @@ do_catchsql_test 25.4 { WITH RECURSIVE c(x) AS (VALUES(1) UNION ALL SELECT x%1 FROM c WHERE 599237 +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +** The "Set Error Line" macro. +*/ +#define SEL(e) ((e)->iLine = ((e)->rc ? (e)->iLine : __LINE__)) + +/* Database functions */ +#define opendb(w,x,y,z) (SEL(w), opendb_x(w,x,y,z)) +#define closedb(y,z) (SEL(y), closedb_x(y,z)) + +/* Functions to execute SQL */ +#define sql_script(x,y,z) (SEL(x), sql_script_x(x,y,z)) +#define integrity_check(x,y) (SEL(x), integrity_check_x(x,y)) +#define execsql_i64(x,y,...) (SEL(x), execsql_i64_x(x,y,__VA_ARGS__)) +#define execsql_text(x,y,z,...) (SEL(x), execsql_text_x(x,y,z,__VA_ARGS__)) +#define execsql(x,y,...) (SEL(x), (void)execsql_i64_x(x,y,__VA_ARGS__)) +#define sql_script_printf(x,y,z,...) ( \ + SEL(x), sql_script_printf_x(x,y,z,__VA_ARGS__) \ +) + +/* Thread functions */ +#define launch_thread(w,x,y,z) (SEL(w), launch_thread_x(w,x,y,z)) +#define join_all_threads(y,z) (SEL(y), join_all_threads_x(y,z)) + +/* Timer functions */ +#define setstoptime(y,z) (SEL(y), setstoptime_x(y,z)) +#define timetostop(z) (SEL(z), timetostop_x(z)) + +/* Report/clear errors. */ +#define test_error(z, ...) test_error_x(z, sqlite3_mprintf(__VA_ARGS__)) +#define clear_error(y,z) clear_error_x(y, z) + +/* File-system operations */ +#define filesize(y,z) (SEL(y), filesize_x(y,z)) +#define filecopy(x,y,z) (SEL(x), filecopy_x(x,y,z)) + +#define PTR2INT(x) ((int)((intptr_t)x)) +#define INT2PTR(x) ((void*)((intptr_t)x)) + +/* +** End of test code/infrastructure interface macros. +*************************************************************************/ + + +/************************************************************************ +** Start of command line processing utilities. +*/ +#define CMDLINE_INT 1 +#define CMDLINE_BOOL 2 +#define CMDLINE_STRING 3 + +typedef struct CmdlineArg CmdlineArg; +struct CmdlineArg { + const char *zSwitch; + int eType; + int iOffset; +}; + +static void cmdline_error(const char *zFmt, ...){ + va_list ap; /* ... arguments */ + char *zMsg = 0; + va_start(ap, zFmt); + zMsg = sqlite3_vmprintf(zFmt, ap); + fprintf(stderr, "%s\n", zMsg); + sqlite3_free(zMsg); + va_end(ap); + exit(-1); +} + +static void cmdline_usage(const char *zPrg, CmdlineArg *apArg){ + int i; + fprintf(stderr, "Usage: %s SWITCHES\n", zPrg); + fprintf(stderr, "\n"); + fprintf(stderr, "where switches are\n"); + for(i=0; apArg[i].zSwitch; i++){ + const char *zExtra = ""; + switch( apArg[i].eType ){ + case CMDLINE_STRING: zExtra = "STRING"; break; + case CMDLINE_INT: zExtra = "N"; break; + case CMDLINE_BOOL: zExtra = ""; break; + default: + zExtra = "???"; + break; + } + fprintf(stderr, " %s %s\n", apArg[i].zSwitch, zExtra); + } + fprintf(stderr, "\n"); + exit(-2); +} + +static char *cmdline_construct(CmdlineArg *apArg, void *pObj){ + unsigned char *p = (unsigned char*)pObj; + char *zRet = 0; + int iArg; + + for(iArg=0; apArg[iArg].zSwitch; iArg++){ + const char *zSpace = (zRet ? " " : ""); + CmdlineArg *pArg = &apArg[iArg]; + + switch( pArg->eType ){ + case CMDLINE_STRING: { + char *zVal = *(char**)(p + pArg->iOffset); + if( zVal ){ + zRet = sqlite3_mprintf("%z%s%s %s", zRet, zSpace, pArg->zSwitch,zVal); + } + break; + }; + + case CMDLINE_INT: { + zRet = sqlite3_mprintf("%z%s%s %d", zRet, zSpace, pArg->zSwitch, + *(int*)(p + pArg->iOffset) + ); + break; + }; + + case CMDLINE_BOOL: + if( *(int*)(p + pArg->iOffset) ){ + zRet = sqlite3_mprintf("%z%s%s", zRet, zSpace, pArg->zSwitch); + } + break; + + default: + zRet = sqlite3_mprintf("%z%s%s ???", zRet, zSpace, pArg->zSwitch); + } + } + + return zRet; +} + +static void cmdline_process( + CmdlineArg *apArg, + int argc, + const char **argv, + void *pObj +){ + int i; + int iArg; + unsigned char *p = (unsigned char*)pObj; + + for(i=1; i=0 ){ + cmdline_error("ambiguous switch: %s", z); + } + iOpt = iArg; + switch( apArg[iArg].eType ){ + case CMDLINE_INT: + i++; + if( i==argc ){ + cmdline_error("option requires an argument: %s", z); + } + *(int*)(p + apArg[iArg].iOffset) = atoi(argv[i]); + break; + + case CMDLINE_STRING: + i++; + if( i==argc ){ + cmdline_error("option requires an argument: %s", z); + } + *(char**)(p + apArg[iArg].iOffset) = sqlite3_mprintf("%s", argv[i]); + break; + + case CMDLINE_BOOL: + *(int*)(p + apArg[iArg].iOffset) = 1; + break; + + default: + assert( 0 ); + cmdline_error("internal error"); + return; + } + } + } + + if( iOpt<0 ){ + cmdline_usage(argv[0], apArg); + } + } +} + +/* +** End of command line processing utilities. +*************************************************************************/ + + +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +/* + * If compiled on a machine that doesn't have a 32-bit integer, + * you just set "uint32" to the appropriate datatype for an + * unsigned 32-bit integer. For example: + * + * cc -Duint32='unsigned long' md5.c + * + */ +#ifndef uint32 +# define uint32 unsigned int +#endif + +struct MD5Context { + int isInit; + uint32 buf[4]; + uint32 bits[2]; + union { + unsigned char in[64]; + uint32 in32[16]; + } u; +}; +typedef struct MD5Context MD5Context; + +/* + * Note: this code is harmless on little-endian machines. + */ +static void byteReverse (unsigned char *buf, unsigned longs){ + uint32 t; + do { + t = (uint32)((unsigned)buf[3]<<8 | buf[2]) << 16 | + ((unsigned)buf[1]<<8 | buf[0]); + *(uint32 *)buf = t; + buf += 4; + } while (--longs); +} +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +static void MD5Transform(uint32 buf[4], const uint32 in[16]){ + register uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +static void MD5Init(MD5Context *ctx){ + ctx->isInit = 1; + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +static +void MD5Update(MD5Context *ctx, const unsigned char *buf, unsigned int len){ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32)len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if ( t ) { + unsigned char *p = (unsigned char *)ctx->u.in + t; + + t = 64-t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + buf += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->u.in, buf, 64); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->u.in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +static void MD5Final(unsigned char digest[16], MD5Context *ctx){ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->u.in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->u.in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count-8); + } + byteReverse(ctx->u.in, 14); + + /* Append length in bits and transform */ + ctx->u.in32[14] = ctx->bits[0]; + ctx->u.in32[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + byteReverse((unsigned char *)ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(*ctx)); /* In case it is sensitive */ +} + +/* +** Convert a 128-bit MD5 digest into a 32-digit base-16 number. +*/ +static void MD5DigestToBase16(unsigned char *digest, char *zBuf){ + static char const zEncode[] = "0123456789abcdef"; + int i, j; + + for(j=i=0; i<16; i++){ + int a = digest[i]; + zBuf[j++] = zEncode[(a>>4)&0xf]; + zBuf[j++] = zEncode[a & 0xf]; + } + zBuf[j] = 0; +} + +/* +** During testing, the special md5sum() aggregate function is available. +** inside SQLite. The following routines implement that function. +*/ +static void md5step(sqlite3_context *context, int argc, sqlite3_value **argv){ + MD5Context *p; + int i; + if( argc<1 ) return; + p = sqlite3_aggregate_context(context, sizeof(*p)); + if( p==0 ) return; + if( !p->isInit ){ + MD5Init(p); + } + for(i=0; izErr); + p->zErr = 0; + p->rc = 0; +} + +static void print_err(Error *p){ + if( p->rc!=SQLITE_OK ){ + int isWarn = 0; + if( p->rc==SQLITE_SCHEMA ) isWarn = 1; + if( sqlite3_strglob("* - no such table: *",p->zErr)==0 ) isWarn = 1; + printf("%s: (%d) \"%s\" at line %d\n", isWarn ? "Warning" : "Error", + p->rc, p->zErr, p->iLine); + if( !isWarn ) nGlobalErr++; + fflush(stdout); + } +} + +static void print_and_free_err(Error *p){ + print_err(p); + free_err(p); +} + +static void system_error(Error *pErr, int iSys){ + pErr->rc = iSys; + pErr->zErr = (char *)sqlite3_malloc(512); + strerror_r(iSys, pErr->zErr, 512); + pErr->zErr[511] = '\0'; +} + +static void sqlite_error( + Error *pErr, + Sqlite *pDb, + const char *zFunc +){ + pErr->rc = sqlite3_errcode(pDb->db); + pErr->zErr = sqlite3_mprintf( + "sqlite3_%s() - %s (%d)", zFunc, sqlite3_errmsg(pDb->db), + sqlite3_extended_errcode(pDb->db) + ); +} + +static void test_error_x( + Error *pErr, + char *zErr +){ + if( pErr->rc==SQLITE_OK ){ + pErr->rc = 1; + pErr->zErr = zErr; + }else{ + sqlite3_free(zErr); + } +} + +static void clear_error_x( + Error *pErr, + int rc +){ + if( pErr->rc==rc ){ + pErr->rc = SQLITE_OK; + sqlite3_free(pErr->zErr); + pErr->zErr = 0; + } +} + +static int busyhandler(void *pArg, int n){ + usleep(10*1000); + return 1; +} + +static void opendb_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* OUT: Database handle */ + const char *zFile, /* Database file name */ + int bDelete /* True to delete db file before opening */ +){ + if( pErr->rc==SQLITE_OK ){ + int rc; + int flags = SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE | SQLITE_OPEN_URI; + if( bDelete ) unlink(zFile); + rc = sqlite3_open_v2(zFile, &pDb->db, flags, 0); + if( rc ){ + sqlite_error(pErr, pDb, "open"); + sqlite3_close(pDb->db); + pDb->db = 0; + }else{ + sqlite3_create_function( + pDb->db, "md5sum", -1, SQLITE_UTF8, 0, 0, md5step, md5finalize + ); + sqlite3_busy_handler(pDb->db, busyhandler, 0); + sqlite3_exec(pDb->db, "PRAGMA synchronous=OFF", 0, 0, 0); + } + } +} + +static void closedb_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb /* OUT: Database handle */ +){ + int rc; + int i; + Statement *pIter; + Statement *pNext; + for(pIter=pDb->pCache; pIter; pIter=pNext){ + pNext = pIter->pNext; + sqlite3_finalize(pIter->pStmt); + sqlite3_free(pIter); + } + for(i=0; inText; i++){ + sqlite3_free(pDb->aText[i]); + } + sqlite3_free(pDb->aText); + rc = sqlite3_close(pDb->db); + if( rc && pErr->rc==SQLITE_OK ){ + pErr->zErr = sqlite3_mprintf("%s", sqlite3_errmsg(pDb->db)); + } + memset(pDb, 0, sizeof(Sqlite)); +} + +static void sql_script_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zSql /* SQL script to execute */ +){ + if( pErr->rc==SQLITE_OK ){ + pErr->rc = sqlite3_exec(pDb->db, zSql, 0, 0, &pErr->zErr); + } +} + +static void sql_script_printf_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zFormat, /* SQL printf format string */ + ... /* Printf args */ +){ + va_list ap; /* ... printf arguments */ + va_start(ap, zFormat); + if( pErr->rc==SQLITE_OK ){ + char *zSql = sqlite3_vmprintf(zFormat, ap); + pErr->rc = sqlite3_exec(pDb->db, zSql, 0, 0, &pErr->zErr); + sqlite3_free(zSql); + } + va_end(ap); +} + +static Statement *getSqlStatement( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zSql /* SQL statement */ +){ + Statement *pRet; + int rc; + + for(pRet=pDb->pCache; pRet; pRet=pRet->pNext){ + if( 0==strcmp(sqlite3_sql(pRet->pStmt), zSql) ){ + return pRet; + } + } + + pRet = sqlite3_malloc(sizeof(Statement)); + rc = sqlite3_prepare_v2(pDb->db, zSql, -1, &pRet->pStmt, 0); + if( rc!=SQLITE_OK ){ + sqlite_error(pErr, pDb, "prepare_v2"); + return 0; + } + assert( 0==strcmp(sqlite3_sql(pRet->pStmt), zSql) ); + + pRet->pNext = pDb->pCache; + pDb->pCache = pRet; + return pRet; +} + +static sqlite3_stmt *getAndBindSqlStatement( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + va_list ap /* SQL followed by parameters */ +){ + Statement *pStatement; /* The SQLite statement wrapper */ + sqlite3_stmt *pStmt; /* The SQLite statement to return */ + int i; /* Used to iterate through parameters */ + + pStatement = getSqlStatement(pErr, pDb, va_arg(ap, const char *)); + if( !pStatement ) return 0; + pStmt = pStatement->pStmt; + for(i=1; i<=sqlite3_bind_parameter_count(pStmt); i++){ + const char *zName = sqlite3_bind_parameter_name(pStmt, i); + void * pArg = va_arg(ap, void*); + + switch( zName[1] ){ + case 'i': + sqlite3_bind_int64(pStmt, i, *(i64 *)pArg); + break; + + default: + pErr->rc = 1; + pErr->zErr = sqlite3_mprintf("Cannot discern type: \"%s\"", zName); + pStmt = 0; + break; + } + } + + return pStmt; +} + +static i64 execsql_i64_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + ... /* SQL and pointers to parameter values */ +){ + i64 iRet = 0; + if( pErr->rc==SQLITE_OK ){ + sqlite3_stmt *pStmt; /* SQL statement to execute */ + va_list ap; /* ... arguments */ + va_start(ap, pDb); + pStmt = getAndBindSqlStatement(pErr, pDb, ap); + if( pStmt ){ + int first = 1; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( first && sqlite3_column_count(pStmt)>0 ){ + iRet = sqlite3_column_int64(pStmt, 0); + } + first = 0; + } + if( SQLITE_OK!=sqlite3_reset(pStmt) ){ + sqlite_error(pErr, pDb, "reset"); + } + } + va_end(ap); + } + return iRet; +} + +static char * execsql_text_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + int iSlot, /* Db handle slot to store text in */ + ... /* SQL and pointers to parameter values */ +){ + char *zRet = 0; + + if( iSlot>=pDb->nText ){ + int nByte = sizeof(char *)*(iSlot+1); + pDb->aText = (char **)sqlite3_realloc(pDb->aText, nByte); + memset(&pDb->aText[pDb->nText], 0, sizeof(char*)*(iSlot+1-pDb->nText)); + pDb->nText = iSlot+1; + } + + if( pErr->rc==SQLITE_OK ){ + sqlite3_stmt *pStmt; /* SQL statement to execute */ + va_list ap; /* ... arguments */ + va_start(ap, iSlot); + pStmt = getAndBindSqlStatement(pErr, pDb, ap); + if( pStmt ){ + int first = 1; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( first && sqlite3_column_count(pStmt)>0 ){ + zRet = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 0)); + sqlite3_free(pDb->aText[iSlot]); + pDb->aText[iSlot] = zRet; + } + first = 0; + } + if( SQLITE_OK!=sqlite3_reset(pStmt) ){ + sqlite_error(pErr, pDb, "reset"); + } + } + va_end(ap); + } + + return zRet; +} + +static void integrity_check_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb /* Database handle */ +){ + if( pErr->rc==SQLITE_OK ){ + Statement *pStatement; /* Statement to execute */ + char *zErr = 0; /* Integrity check error */ + + pStatement = getSqlStatement(pErr, pDb, "PRAGMA integrity_check"); + if( pStatement ){ + sqlite3_stmt *pStmt = pStatement->pStmt; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + const char *z = (const char*)sqlite3_column_text(pStmt, 0); + if( strcmp(z, "ok") ){ + if( zErr==0 ){ + zErr = sqlite3_mprintf("%s", z); + }else{ + zErr = sqlite3_mprintf("%z\n%s", zErr, z); + } + } + } + sqlite3_reset(pStmt); + + if( zErr ){ + pErr->zErr = zErr; + pErr->rc = 1; + } + } + } +} + +static void *launch_thread_main(void *pArg){ + Thread *p = (Thread *)pArg; + return (void *)p->xProc(p->iTid, p->pArg); +} + +static void launch_thread_x( + Error *pErr, /* IN/OUT: Error code */ + Threadset *pThreads, /* Thread set */ + char *(*xProc)(int, void*), /* Proc to run */ + void *pArg /* Argument passed to thread proc */ +){ + if( pErr->rc==SQLITE_OK ){ + int iTid = ++pThreads->iMaxTid; + Thread *p; + int rc; + + p = (Thread *)sqlite3_malloc(sizeof(Thread)); + memset(p, 0, sizeof(Thread)); + p->iTid = iTid; + p->pArg = pArg; + p->xProc = xProc; + + rc = pthread_create(&p->tid, NULL, launch_thread_main, (void *)p); + if( rc!=0 ){ + system_error(pErr, rc); + sqlite3_free(p); + }else{ + p->pNext = pThreads->pThread; + pThreads->pThread = p; + } + } +} + +static void join_all_threads_x( + Error *pErr, /* IN/OUT: Error code */ + Threadset *pThreads /* Thread set */ +){ + Thread *p; + Thread *pNext; + for(p=pThreads->pThread; p; p=pNext){ + void *ret; + pNext = p->pNext; + int rc; + rc = pthread_join(p->tid, &ret); + if( rc!=0 ){ + if( pErr->rc==SQLITE_OK ) system_error(pErr, rc); + }else{ + printf("Thread %d says: %s\n", p->iTid, (ret==0 ? "..." : (char *)ret)); + fflush(stdout); + } + sqlite3_free(p); + } + pThreads->pThread = 0; +} + +static i64 filesize_x( + Error *pErr, + const char *zFile +){ + i64 iRet = 0; + if( pErr->rc==SQLITE_OK ){ + struct stat sStat; + if( stat(zFile, &sStat) ){ + iRet = -1; + }else{ + iRet = sStat.st_size; + } + } + return iRet; +} + +static void filecopy_x( + Error *pErr, + const char *zFrom, + const char *zTo +){ + if( pErr->rc==SQLITE_OK ){ + i64 nByte = filesize_x(pErr, zFrom); + if( nByte<0 ){ + test_error_x(pErr, sqlite3_mprintf("no such file: %s", zFrom)); + }else{ + i64 iOff; + char aBuf[1024]; + int fd1; + int fd2; + unlink(zTo); + + fd1 = open(zFrom, O_RDONLY); + if( fd1<0 ){ + system_error(pErr, errno); + return; + } + fd2 = open(zTo, O_RDWR|O_CREAT|O_EXCL, 0644); + if( fd2<0 ){ + system_error(pErr, errno); + close(fd1); + return; + } + + iOff = 0; + while( iOffnByte ){ + nCopy = nByte - iOff; + } + if( nCopy!=read(fd1, aBuf, nCopy) ){ + system_error(pErr, errno); + break; + } + if( nCopy!=write(fd2, aBuf, nCopy) ){ + system_error(pErr, errno); + break; + } + iOff += nCopy; + } + + close(fd1); + close(fd2); + } + } +} + +/* +** Used by setstoptime() and timetostop(). +*/ +static double timelimit = 0.0; + +static double currentTime(void){ + double t; + static sqlite3_vfs *pTimelimitVfs = 0; + if( pTimelimitVfs==0 ) pTimelimitVfs = sqlite3_vfs_find(0); + if( pTimelimitVfs->iVersion>=2 && pTimelimitVfs->xCurrentTimeInt64!=0 ){ + sqlite3_int64 tm; + pTimelimitVfs->xCurrentTimeInt64(pTimelimitVfs, &tm); + t = tm/86400000.0; + }else{ + pTimelimitVfs->xCurrentTime(pTimelimitVfs, &t); + } + return t; +} + +static void setstoptime_x( + Error *pErr, /* IN/OUT: Error code */ + int nMs /* Milliseconds until "stop time" */ +){ + if( pErr->rc==SQLITE_OK ){ + double t = currentTime(); + timelimit = t + ((double)nMs)/(1000.0*60.0*60.0*24.0); + } +} + +static int timetostop_x( + Error *pErr /* IN/OUT: Error code */ +){ + int ret = 1; + if( pErr->rc==SQLITE_OK ){ + double t = currentTime(); + ret = (t >= timelimit); + } + return ret; +} + Index: tool/mkpragmatab.tcl ================================================================== --- tool/mkpragmatab.tcl +++ tool/mkpragmatab.tcl @@ -113,10 +113,16 @@ TYPE: FLAG ARG: SQLITE_VdbeEQP IF: !defined(SQLITE_OMIT_FLAG_PRAGMAS) IF: defined(SQLITE_DEBUG) + NAME: noop_update + TYPE: FLAG + ARG: SQLITE_NoopUpdate + IF: !defined(SQLITE_OMIT_FLAG_PRAGMAS) + IF: defined(SQLITE_ENABLE_NOOP_UPDATE) + NAME: ignore_check_constraints TYPE: FLAG ARG: SQLITE_IgnoreChecks IF: !defined(SQLITE_OMIT_FLAG_PRAGMAS) IF: !defined(SQLITE_OMIT_CHECK)