Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -3460,13 +3460,10 @@ fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); pgno = ss.iChild; fts5NodeIterFree(&ss); } } - if( pSeg->nHeight==1 ){ - pWriter->nEmpty = pSeg->pgnoLast-1; - } assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); pWriter->bFirstTermInPage = 1; assert( pWriter->aWriter[0].term.n==0 ); } } @@ -4049,12 +4046,14 @@ int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); - fts5IndexMerge(p, &pStruct, nMerge); - fts5StructureWrite(p, pStruct); + if( pStruct && pStruct->nLevel ){ + fts5IndexMerge(p, &pStruct, nMerge); + fts5StructureWrite(p, pStruct); + } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } @@ -4531,25 +4530,25 @@ if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); } +#ifdef SQLITE_DEBUG + if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + assert( flags & FTS5INDEX_QUERY_PREFIX ); + iIdx = 1+pConfig->nPrefix; + }else +#endif if( flags & FTS5INDEX_QUERY_PREFIX ){ - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ - iIdx = 1+pConfig->nPrefix; - }else{ - int nChar = fts5IndexCharlen(pToken, nToken); - for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ - if( pConfig->aPrefix[iIdx-1]==nChar ) break; - } + int nChar = fts5IndexCharlen(pToken, nToken); + for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ + if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); if( pRet ){ - memset(pRet, 0, sizeof(Fts5IndexIter)); - pRet->pIndex = p; if( iIdx<=pConfig->nPrefix ){ buf.p[0] = FTS5_MAIN_PREFIX + iIdx; pRet->pStruct = fts5StructureRead(p); if( pRet->pStruct ){ @@ -4888,20 +4887,20 @@ } sqlite3_free(pIter->aLvl); fts5BufferFree(&pIter->term); } +#ifdef SQLITE_DEBUG /* ** This function is purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. ** ** Instead, it tests that the same set of pgno/rowid combinations are ** visited regardless of whether the doclist-index identified by parameters ** iSegid/iLeaf is iterated in forwards or reverse order. */ -#ifdef SQLITE_DEBUG -static void fts5DlidxIterTestReverse( +static void fts5TestDlidxReverse( Fts5Index *p, int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; @@ -4932,12 +4931,111 @@ fts5DlidxIterFree(pDlidx); pDlidx = 0; if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } + +static int fts5QueryCksum( + Fts5Index *p, /* Fts5 index object */ + int iIdx, + const char *z, /* Index key to query for */ + int n, /* Size of index key in bytes */ + int flags, /* Flags for Fts5IndexQuery */ + u64 *pCksum /* IN/OUT: Checksum value */ +){ + u64 cksum = *pCksum; + Fts5IndexIter *pIdxIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + + *pCksum = cksum; + return rc; +} + + +/* +** This function is also purely an internal test. It does not contribute to +** FTS functionality, or even the integrity-check, in any way. +*/ +static void fts5TestTerm( + Fts5Index *p, + Fts5Buffer *pPrev, /* Previous term */ + const char *z, int n, /* Possibly new term to test */ + u64 expected, + u64 *pCksum +){ + int rc = p->rc; + if( pPrev->n==0 ){ + fts5BufferSet(&rc, pPrev, n, (const u8*)z); + }else + if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ + u32 cksum3 = *pCksum; + const char *zTerm = &pPrev->p[1]; /* The term without the prefix-byte */ + int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ + int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); + int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); + int rc; + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); + if( rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_DESC; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + } + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + /* If this is a prefix query, check that the results returned if the + ** the index is disabled are the same. In both ASC and DESC order. */ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + + cksum3 ^= ck1; + fts5BufferSet(&rc, pPrev, n, (const u8*)z); + + if( rc==SQLITE_OK && cksum3!=expected ){ + rc = FTS5_CORRUPT; + } + *pCksum = cksum3; + } + p->rc = rc; +} + #else -# define fts5DlidxIterTestReverse(x,y,z) +# define fts5TestDlidxReverse(x,y,z) +# define fts5TestTerm(u,v,w,x,y,z) #endif static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ @@ -5044,64 +5142,23 @@ fts5DataRelease(pLeaf); } } fts5DlidxIterFree(pDlidx); - fts5DlidxIterTestReverse(p, iSegid, iter.iLeaf); + fts5TestDlidxReverse(p, iSegid, iter.iLeaf); } } - /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or - ** else the segment has been completely emptied by an ongoing merge - ** operation. */ - if( p->rc==SQLITE_OK - && iter.iLeaf!=pSeg->pgnoLast - && (pSeg->pgnoFirst || pSeg->pgnoLast) - ){ + /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ + if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } fts5BtreeIterFree(&iter); } -static int fts5QueryCksum( - Fts5Index *p, /* Fts5 index object */ - int iIdx, - const char *z, /* Index key to query for */ - int n, /* Size of index key in bytes */ - int flags, /* Flags for Fts5IndexQuery */ - u64 *pCksum /* IN/OUT: Checksum value */ -){ - u64 cksum = *pCksum; - Fts5IndexIter *pIdxIter = 0; - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); - - while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - const u8 *pPos; - int nPos; - i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); - } - rc = sqlite3Fts5IterNext(pIdxIter); - } - } - sqlite3Fts5IterClose(pIdxIter); - - *pCksum = cksum; - return rc; -} - /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums ** as calculated by fts5IndexEntryCksum() is cksum. ** @@ -5110,15 +5167,17 @@ ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ u64 cksum2 = 0; /* Checksum based on contents of indexes */ - u64 cksum3 = 0; /* Checksum based on contents of indexes */ - Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5MultiSegIter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ + + /* Used by extra internal tests only run if NDEBUG is not defined */ + u64 cksum3 = 0; /* Checksum based on contents of indexes */ + Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ /* Load the FTS index structure */ pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ @@ -5162,52 +5221,16 @@ int iTokOff = FTS5_POS2OFFSET(iPos); cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } /* If this is a new term, query for it. Update cksum3 with the results. */ - if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ - const char *zTerm = &z[1]; /* The term without the prefix-byte */ - int nTerm = n-1; /* Size of zTerm in bytes */ - int iIdx = (z[0] - FTS5_MAIN_PREFIX); - int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); - int rc; - u64 ck1 = 0; - u64 ck2 = 0; - - /* Check that the results returned for ASC and DESC queries are - ** the same. If not, call this corruption. */ - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); - if( rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_DESC; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - } - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - - /* If this is a prefix query, check that the results returned if the - ** the index is disabled are the same. In both ASC and DESC order. */ - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - - cksum3 ^= ck1; - fts5BufferSet(&rc, &term, n, (const u8*)z); - p->rc = rc; - } - } + fts5TestTerm(p, &term, z, n, cksum2, &cksum3); + } + fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); + fts5MultiIterFree(p, pIter); - if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; - if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT; fts5StructureRelease(pStruct); fts5BufferFree(&term); fts5BufferFree(&poslist); return fts5IndexReturn(p); Index: ext/fts5/test/fts5corrupt2.test ================================================================== --- ext/fts5/test/fts5corrupt2.test +++ ext/fts5/test/fts5corrupt2.test @@ -113,12 +113,10 @@ ROLLBACK; INSERT INTO t1(t1) VALUES('integrity-check'); } {} } -} - #------------------------------------------------------------------------- # Test that corruption in leaf page headers is detected by queries that use # doclist-indexes. # set doc "A B C D E F G H I J " @@ -205,10 +203,44 @@ execsql ROLLBACK } do_test 4.$tn.x { expr $nCorrupt>0 } 1 } + +} + +set doc [string repeat "A B C " 1000] +do_execsql_test 4.0 { + CREATE VIRTUAL TABLE x5 USING fts5(tt); + INSERT INTO x5(x5, rank) VALUES('pgsz', 32); + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) + INSERT INTO x5 SELECT $doc FROM ii; +} + +foreach {tn hdr} { + 1 "\x00\x01" +} { + set tn2 0 + set nCorrupt 0 + foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { + if {$rowid & $mask} continue + incr tn2 + do_test 4.$tn.$tn2 { + execsql BEGIN + + set fd [db incrblob main x5_data block $rowid] + fconfigure $fd -encoding binary -translation binary + puts -nonewline $fd $hdr + close $fd + + catchsql { INSERT INTO x5(x5) VALUES('integrity-check') } + set {} {} + } {} + + execsql ROLLBACK + } +} sqlite3_fts5_may_be_corrupt 0 finish_test ADDED ext/fts5/test/fts5corrupt3.test Index: ext/fts5/test/fts5corrupt3.test ================================================================== --- /dev/null +++ ext/fts5/test/fts5corrupt3.test @@ -0,0 +1,57 @@ +# 2015 Apr 24 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file tests that FTS5 handles corrupt databases (i.e. internal +# inconsistencies in the backing tables) correctly. In this case +# "correctly" means without crashing. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5corrupt3 +sqlite3_fts5_may_be_corrupt 1 + +# Create a simple FTS5 table containing 100 documents. Each document +# contains 10 terms, each of which start with the character "x". +# +expr srand(0) +db func rnddoc fts5_rnddoc +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE t1 USING fts5(x); + INSERT INTO t1(t1, rank) VALUES('pgsz', 64); + WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) + INSERT INTO t1 SELECT rnddoc(10) FROM ii; +} +set mask [expr 31 << 31] + +do_test 1.1 { + # Pick out the rowid of the right-most b-tree leaf in the new segment. + set rowid [db one { + SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1 + }] + set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}] + set {} {} +} {} + +for {set i 0} {$i < $L} {incr i} { + do_test 1.2.$i { + catchsql { + BEGIN; + UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid; + INSERT INTO t1(t1) VALUES('integrity-check'); + } + } {1 {database disk image is malformed}} + catchsql ROLLBACK +} + + +sqlite3_fts5_may_be_corrupt 0 +finish_test + Index: ext/fts5/test/fts5merge.test ================================================================== --- ext/fts5/test/fts5merge.test +++ ext/fts5/test/fts5merge.test @@ -132,8 +132,57 @@ while {[not_merged x8]} { execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) } } fts5_level_segs x8 } {0 1} + +#------------------------------------------------------------------------- +# +proc mydoc {} { + set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]] + return [string repeat "$x " 30] +} +db func mydoc mydoc + +proc mycount {} { + set res [list] + foreach x {a b c d e f g h i j} { + lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}] + } + set res +} + + #1 32 +foreach {tn pgsz} { + 2 1000 +} { + do_execsql_test 4.$tn.1 { + DROP TABLE IF EXISTS x8; + CREATE VIRTUAL TABLE x8 USING fts5(i); + INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz); + } + + do_execsql_test 4.$tn.2 { + INSERT INTO x8(x8, rank) VALUES('merge', 1); + } + + do_execsql_test 4.$tn.3 { + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO x8 SELECT mydoc() FROM ii; + WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) + INSERT INTO x8 SELECT mydoc() FROM ii; + INSERT INTO x8(x8, rank) VALUES('automerge', 2); + } + + set expect [mycount] + for {set i 0} {$i < 20} {incr i} { + do_test 4.$tn.4.$i { + execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); } + mycount + } $expect + break + } + db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r } +} finish_test