/ Check-in [41449f7a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add extra tests for corrupt database handling in fts5.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 41449f7a0b5da6332eef48386c91ef63382c4783
User & Date: dan 2015-04-24 15:56:09
Context
2015-04-24
19:41
Add the "unindexed" column option to fts5. check-in: 86309961 user: dan tags: fts5
15:56
Add extra tests for corrupt database handling in fts5. check-in: 41449f7a user: dan tags: fts5
06:02
Fix an fts5 build problem in main.mk. check-in: 60045ced user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

    35     35   #ifdef SQLITE_DEBUG
    36     36   # define FTS5_CORRUPT sqlite3Fts5Corrupt()
    37     37   int sqlite3Fts5Corrupt(void);
    38     38   #else
    39     39   # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
    40     40   #endif
    41     41   
           42  +/*
           43  +** The assert_nc() macro is similar to the assert() macro, except that it
           44  +** is used for assert() conditions that are true only if it can be 
           45  +** guranteed that the database is not corrupt.
           46  +*/
           47  +#ifdef SQLITE_TEST
           48  +extern int sqlite3_fts5_may_be_corrupt;
           49  +# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
           50  +#else
           51  +# define assert_nc(x) assert(x)
           52  +#endif
           53  +
    42     54   /**************************************************************************
    43     55   ** Interface to code in fts5.c. 
    44     56   */
    45     57   typedef struct Fts5Global Fts5Global;
    46     58   
    47     59   int sqlite3Fts5GetTokenizer(
    48     60     Fts5Global*, 

Changes to ext/fts5/fts5_index.c.

   748    748     if( p->pReader ){
   749    749       sqlite3_blob *pReader = p->pReader;
   750    750       p->pReader = 0;
   751    751       sqlite3_blob_close(pReader);
   752    752     }
   753    753   }
   754    754   
          755  +/*
          756  +** Check if row iRowid exists in the %_data table, and that it contains
          757  +** a blob value. If so, return SQLITE_ERROR (yes - SQLITE_ERROR, not 
          758  +** SQLITE_OK). If not, return SQLITE_CORRUPT_VTAB.
          759  +**
          760  +** If an error occurs (e.g. OOM or IOERR), return the relevant error code.
          761  +**
          762  +** This function does not need to be efficient. It is part of vary rarely
          763  +** invoked error handling code only.
          764  +*/
          765  +#if 0
          766  +static int fts5CheckMissingRowid(Fts5Index *p, i64 iRowid){
          767  +  const char *zFmt = "SELECT typeof(block)=='blob' FROM '%q'.%Q WHERE id=%lld";
          768  +  int bOk = 0;
          769  +  int rc;
          770  +  char *zSql;
          771  +
          772  +  zSql = sqlite3_mprintf(zFmt, p->pConfig->zDb, p->zDataTbl, iRowid);
          773  +  if( zSql==0 ){
          774  +    rc = SQLITE_NOMEM;
          775  +  }else{
          776  +    sqlite3_stmt *pStmt;
          777  +    rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, &pStmt, 0);
          778  +    if( rc==SQLITE_OK ){
          779  +      if( SQLITE_ROW==sqlite3_step(pStmt) ){
          780  +        bOk = sqlite3_column_int(pStmt, 0);
          781  +      }
          782  +      rc = sqlite3_finalize(pStmt);
          783  +    }
          784  +    sqlite3_free(zSql);
          785  +  }
          786  +
          787  +  if( rc==SQLITE_OK ){
          788  +    rc = bOk ? SQLITE_ERROR : FTS5_CORRUPT;
          789  +  }
          790  +
          791  +  return rc;
          792  +}
          793  +#endif
          794  +
   755    795   static Fts5Data *fts5DataReadOrBuffer(
   756    796     Fts5Index *p, 
   757    797     Fts5Buffer *pBuf, 
   758    798     i64 iRowid
   759    799   ){
   760    800     Fts5Data *pRet = 0;
   761    801     if( p->rc==SQLITE_OK ){
   762    802       int rc = SQLITE_OK;
   763    803   
   764         -#if 0
   765         -Fts5Buffer buf = {0,0,0};
   766         -fts5DebugRowid(&rc, &buf, iRowid);
   767         -fprintf(stdout, "read: %s\n", buf.p);
   768         -fflush(stdout);
   769         -sqlite3_free(buf.p);
   770         -#endif
   771    804       if( p->pReader ){
   772    805         /* This call may return SQLITE_ABORT if there has been a savepoint
   773    806         ** rollback since it was last used. In this case a new blob handle
   774    807         ** is required.  */
   775    808         rc = sqlite3_blob_reopen(p->pReader, iRowid);
   776    809         if( rc==SQLITE_ABORT ){
   777    810           fts5CloseReader(p);
................................................................................
   783    816       ** the blob_reopen() API to reseek the existing blob handle.  */
   784    817       if( p->pReader==0 ){
   785    818         Fts5Config *pConfig = p->pConfig;
   786    819         rc = sqlite3_blob_open(pConfig->db, 
   787    820             pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
   788    821         );
   789    822       }
          823  +
          824  +    /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
          825  +    ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
          826  +    ** All the reasons those functions might return SQLITE_ERROR - missing
          827  +    ** table, missing row, non-blob/text in block column - indicate 
          828  +    ** backing store corruption.  */
          829  +    if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
   790    830   
   791    831       if( rc==SQLITE_OK ){
   792    832         u8 *aOut;                   /* Read blob data into this buffer */
   793    833         int nByte = sqlite3_blob_bytes(p->pReader);
   794    834         if( pBuf ){
   795    835           fts5BufferZero(pBuf);
   796    836           fts5BufferGrow(&rc, pBuf, nByte);
................................................................................
  1559   1599   ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
  1560   1600   ** position list content (if any).
  1561   1601   */
  1562   1602   static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  1563   1603     if( p->rc==SQLITE_OK ){
  1564   1604       const u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
  1565   1605       int iOff = pIter->iLeafOffset;  /* Offset to read at */
  1566         -    pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos,&pIter->bDel);
         1606  +    pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
  1567   1607     }
  1568   1608   }
  1569   1609   
  1570   1610   /*
  1571   1611   ** Fts5SegIter.iLeafOffset currently points to the first byte of the 
  1572   1612   ** "nSuffix" field of a term. Function parameter nKeep contains the value
  1573   1613   ** of the "nPrefix" field (if there was one - it is passed 0 if this is
  1574   1614   ** the first term in the segment).
  1575   1615   **
  1576   1616   ** This function populates:
  1577   1617   **
  1578   1618   **   Fts5SegIter.term
  1579   1619   **   Fts5SegIter.rowid
  1580         -**   Fts5SegIter.nPos
  1581         -**   Fts5SegIter.bDel
  1582   1620   **
  1583   1621   ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
  1584   1622   ** the first position list. The position list belonging to document 
  1585   1623   ** (Fts5SegIter.iRowid).
  1586   1624   */
  1587   1625   static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
  1588   1626     u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
................................................................................
  3908   3946         }
  3909   3947       }
  3910   3948     }
  3911   3949   
  3912   3950     pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
  3913   3951     pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
  3914   3952     pIter->iLeaf = pIter->aLvl[0].s.iChild;
  3915         -  assert( p->rc==SQLITE_OK || pIter->bEof );
  3916   3953   }
  3917   3954   
  3918   3955   static void fts5BtreeIterFree(Fts5BtreeIter *pIter){
  3919   3956     int i;
  3920   3957     for(i=0; i<pIter->nLvl; i++){
  3921   3958       Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
  3922   3959       fts5NodeIterFree(&pLvl->s);
................................................................................
  3981   4018     int iIdx,                       /* Index that pSeg is a part of */
  3982   4019     Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
  3983   4020   ){
  3984   4021     Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */
  3985   4022   
  3986   4023     /* Iterate through the b-tree hierarchy.  */
  3987   4024     for(fts5BtreeIterInit(p, iIdx, pSeg, &iter);
  3988         -      iter.bEof==0;
         4025  +      p->rc==SQLITE_OK && iter.bEof==0;
  3989   4026         fts5BtreeIterNext(&iter)
  3990   4027     ){
  3991   4028       i64 iRow;                     /* Rowid for this leaf */
  3992   4029       Fts5Data *pLeaf;              /* Data for this leaf */
  3993   4030       int iOff;                     /* Offset of first term on leaf */
  3994   4031       int i;                        /* Used to iterate through empty leaves */
  3995   4032   

Changes to ext/fts5/fts5_tcl.c.

    18     18   
    19     19   #ifdef SQLITE_ENABLE_FTS5
    20     20   
    21     21   #include "fts5.h"
    22     22   #include <string.h>
    23     23   #include <assert.h>
    24     24   
           25  +/*
           26  +** This variable is set to true when running corruption tests. Otherwise
           27  +** false. If it is false, extra assert() conditions in the fts5 code are
           28  +** activated - conditions that are only true if it is guaranteed that the
           29  +** fts5 database is not corrupt.
           30  +*/
           31  +int sqlite3_fts5_may_be_corrupt = 0;
           32  +
    25     33   /*************************************************************************
    26     34   ** This is a copy of the first part of the SqliteDb structure in 
    27     35   ** tclsqlite.c.  We need it here so that the get_sqlite_pointer routine
    28     36   ** can extract the sqlite3* pointer from an existing Tcl SQLite
    29     37   ** connection.
    30     38   */
    31     39   struct SqliteDb {
................................................................................
   825    833   
   826    834     return TCL_OK;
   827    835   }
   828    836   
   829    837   static void xF5tFree(ClientData clientData){
   830    838     ckfree(clientData);
   831    839   }
          840  +
          841  +/*
          842  +**      sqlite3_fts5_may_be_corrupt BOOLEAN
          843  +**
          844  +** Set or clear the global "may-be-corrupt" flag. Return the old value.
          845  +*/
          846  +static int f5tMayBeCorrupt(
          847  +  void * clientData,
          848  +  Tcl_Interp *interp,
          849  +  int objc,
          850  +  Tcl_Obj *CONST objv[]
          851  +){
          852  +  int bOld = sqlite3_fts5_may_be_corrupt;
          853  +
          854  +  if( objc!=2 && objc!=1 ){
          855  +    Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?");
          856  +    return TCL_ERROR;
          857  +  }
          858  +  if( objc==2 ){
          859  +    int bNew;
          860  +    if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR;
          861  +    sqlite3_fts5_may_be_corrupt = bNew;
          862  +  }
          863  +
          864  +  Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld));
          865  +  return TCL_OK;
          866  +}
   832    867   
   833    868   /*
   834    869   ** Entry point.
   835    870   */
   836    871   int Fts5tcl_Init(Tcl_Interp *interp){
   837    872     static struct Cmd {
   838    873       char *zName;
   839    874       Tcl_ObjCmdProc *xProc;
   840    875       int bTokenizeCtx;
   841    876     } aCmd[] = {
   842    877       { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
   843    878       { "sqlite3_fts5_token",            f5tTokenizerReturn, 1 },
   844    879       { "sqlite3_fts5_tokenize",         f5tTokenize, 0 },
   845         -    { "sqlite3_fts5_create_function",  f5tCreateFunction, 0 }
          880  +    { "sqlite3_fts5_create_function",  f5tCreateFunction, 0 },
          881  +    { "sqlite3_fts5_may_be_corrupt",   f5tMayBeCorrupt, 0 }
   846    882     };
   847    883     int i;
   848    884     F5tTokenizerContext *pContext;
   849    885   
   850    886     pContext = ckalloc(sizeof(F5tTokenizerContext));
   851    887     memset(pContext, 0, sizeof(*pContext));
   852    888   

Changes to ext/fts5/test/fts5corrupt.test.

     5      5   #
     6      6   #    May you do good and not evil.
     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
    11     11   #
           12  +# This file tests that the FTS5 'integrity-check' command detects 
           13  +# inconsistencies (corruption) in the on-disk backing tables.
    12     14   #
    13     15   
    14     16   source [file join [file dirname [info script]] fts5_common.tcl]
    15     17   set testprefix fts5corrupt
    16     18   
    17     19   do_execsql_test 1.0 {
    18     20     CREATE VIRTUAL TABLE t1 USING fts5(x);
................................................................................
    34     36   set segid [lindex [fts5_level_segids t1] 0]
    35     37   
    36     38   do_test 1.3 {
    37     39     execsql {
    38     40       DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
    39     41     }
    40     42     catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
    41         -} {1 {SQL logic error or missing database}}
           43  +} {1 {database disk image is malformed}}
    42     44   
    43     45   do_test 1.4 {
    44     46     db_restore_and_reopen
    45     47     execsql {
    46     48       UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    47     49       rowid = fts5_rowid('segment', 0, $segid, 0, 4);
    48     50     }

Added ext/fts5/test/fts5corrupt2.test.

            1  +# 2015 Apr 24
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +#
           12  +# This file tests that FTS5 handles corrupt databases (i.e. internal
           13  +# inconsistencies in the backing tables) correctly. In this case 
           14  +# "correctly" means without crashing.
           15  +#
           16  +
           17  +source [file join [file dirname [info script]] fts5_common.tcl]
           18  +set testprefix fts5corrupt2
           19  +
           20  +# Create a simple FTS5 table containing 100 documents. Each document 
           21  +# contains 10 terms, each of which start with the character "x".
           22  +#
           23  +expr srand(0)
           24  +db func rnddoc fts5_rnddoc
           25  +do_execsql_test 1.0 {
           26  +  CREATE VIRTUAL TABLE t1 USING fts5(x);
           27  +  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
           28  +  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
           29  +  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
           30  +}
           31  +
           32  +set mask [expr 31 << 31]
           33  +
           34  +# Test 1:
           35  +#
           36  +#   For each page in the t1_data table, open a transaction and DELETE
           37  +#   the t1_data entry. Then run:
           38  +#
           39  +#     * an integrity-check, and
           40  +#     * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'"
           41  +#
           42  +#   and check that the corruption is detected in both cases. The 
           43  +#   rollback the transaction.
           44  +#
           45  +# Test 2:
           46  +#
           47  +#   Same thing, except instead of deleting a row from t1_data, replace its
           48  +#   blob content with integer value 14.
           49  +#
           50  +foreach {tno stmt} {
           51  +  1 { DELETE FROM t1_data WHERE rowid=$rowid }
           52  +  2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
           53  +} {
           54  +  break
           55  +  set tn 0
           56  +  foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
           57  +    incr tn
           58  +    #if {$tn!=224} continue
           59  +  
           60  +    do_test 1.$tno.$tn.1.$rowid {
           61  +      execsql { BEGIN }
           62  +      execsql $stmt
           63  +      catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
           64  +    } {1 {database disk image is malformed}}
           65  +  
           66  +    if {($rowid & $mask)==0} {
           67  +      # Node is a leaf node, not a b-tree node.
           68  +      do_catchsql_test 1.$tno.$tn.2.$rowid {
           69  +        SELECT rowid FROM t1 WHERE t1 MATCH 'x*'
           70  +      } {1 {database disk image is malformed}}
           71  +    }
           72  +  
           73  +    do_execsql_test 1.$tno.$tn.3.$rowid {
           74  +      ROLLBACK;
           75  +      INSERT INTO t1(t1) VALUES('integrity-check');
           76  +    } {}
           77  +  }
           78  +}
           79  +
           80  +# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
           81  +# of the fts index. For test $i, truncate the rightmost leafpage to $i
           82  +# bytes. Then test both the integrity-check detects the corruption.
           83  +#
           84  +# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
           85  +# corruption. It may not report the db as corrupt because truncating the
           86  +# final leaf to some sizes may create a valid leaf page.
           87  +#
           88  +set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}] 
           89  +set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}]
           90  +set all [db eval {SELECT rowid FROM t1}]
           91  +for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} {
           92  +  do_execsql_test 2.$i.1 {
           93  +    BEGIN;
           94  +      UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid;
           95  +  }
           96  +
           97  +  do_catchsql_test 2.$i.2 {
           98  +    INSERT INTO t1(t1) VALUES('integrity-check');
           99  +  } {1 {database disk image is malformed}}
          100  +
          101  +  do_test 2.$i.3 {
          102  +    set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}]
          103  +    expr {
          104  +        $res=="1 {database disk image is malformed}" 
          105  +     || $res=="0 {$all}" 
          106  +    }
          107  +  } 1
          108  +
          109  +  do_execsql_test 2.$i.4 {
          110  +    ROLLBACK;
          111  +    INSERT INTO t1(t1) VALUES('integrity-check');
          112  +  } {}
          113  +}
          114  +
          115  +finish_test
          116  +

Changes to ext/fts5/test/fts5rebuild.test.

    35     35   
    36     36   do_execsql_test 1.5 {
    37     37     DELETE FROM f1_data;
    38     38   } {}
    39     39   
    40     40   do_catchsql_test 1.6 {
    41     41     INSERT INTO f1(f1) VALUES('integrity-check');
    42         -} {1 {SQL logic error or missing database}}
           42  +} {1 {database disk image is malformed}}
    43     43   
    44     44   do_execsql_test 1.7 {
    45     45     INSERT INTO f1(f1) VALUES('rebuild');
    46     46     INSERT INTO f1(f1) VALUES('integrity-check');
    47     47   } {}
    48     48   
    49     49   finish_test
    50     50