/ Check-in [8b872e42]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 8b872e426091d9ef108e52dbec0d968ed7452907
User & Date: shess 2008-07-14 20:43:15
Context
2008-07-15
00:27
Increased test coverage. (CVS 5414) check-in: 7cf91e08 user: drh tags: trunk
2008-07-14
20:43
Delete all fts3 index data the table becomes empty. Previously, deleting all rows from an fts3 table would leave a bunch of index data describing the terms of the original data, plus deletions of those terms, perhaps with some amount of it merged together so the deletions knocked out the originals. Even when all rows were deleted that original data would hang out, though eventually it would mostly be overwritten if new data contained the same set of terms. (CVS 5413) check-in: 8b872e42 user: shess tags: trunk
19:39
Add a new test script to exercise the deadlock avoidance code in the btree mutex logic. (CVS 5412) check-in: 7d5e1c43 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  1901   1901   } QueryType;
  1902   1902   
  1903   1903   typedef enum fulltext_statement {
  1904   1904     CONTENT_INSERT_STMT,
  1905   1905     CONTENT_SELECT_STMT,
  1906   1906     CONTENT_UPDATE_STMT,
  1907   1907     CONTENT_DELETE_STMT,
         1908  +  CONTENT_EXISTS_STMT,
  1908   1909   
  1909   1910     BLOCK_INSERT_STMT,
  1910   1911     BLOCK_SELECT_STMT,
  1911   1912     BLOCK_DELETE_STMT,
         1913  +  BLOCK_DELETE_ALL_STMT,
  1912   1914   
  1913   1915     SEGDIR_MAX_INDEX_STMT,
  1914   1916     SEGDIR_SET_STMT,
  1915   1917     SEGDIR_SELECT_LEVEL_STMT,
  1916   1918     SEGDIR_SPAN_STMT,
  1917   1919     SEGDIR_DELETE_STMT,
  1918   1920     SEGDIR_SELECT_SEGMENT_STMT,
  1919   1921     SEGDIR_SELECT_ALL_STMT,
         1922  +  SEGDIR_DELETE_ALL_STMT,
  1920   1923   
  1921   1924     MAX_STMT                     /* Always at end! */
  1922   1925   } fulltext_statement;
  1923   1926   
  1924   1927   /* These must exactly match the enum above. */
  1925   1928   /* TODO(shess): Is there some risk that a statement will be used in two
  1926   1929   ** cursors at once, e.g.  if a query joins a virtual table to itself?
................................................................................
  1927   1930   ** If so perhaps we should move some of these to the cursor object.
  1928   1931   */
  1929   1932   static const char *const fulltext_zStatement[MAX_STMT] = {
  1930   1933     /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  1931   1934     /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */
  1932   1935     /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  1933   1936     /* CONTENT_DELETE */ "delete from %_content where docid = ?",
         1937  +  /* CONTENT_EXISTS */ "select docid from %_content limit 1",
  1934   1938   
  1935   1939     /* BLOCK_INSERT */
  1936   1940     "insert into %_segments (blockid, block) values (null, ?)",
  1937   1941     /* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
  1938   1942     /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",
         1943  +  /* BLOCK_DELETE_ALL */ "delete from %_segments",
  1939   1944   
  1940   1945     /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  1941   1946     /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  1942   1947     /* SEGDIR_SELECT_LEVEL */
  1943   1948     "select start_block, leaves_end_block, root from %_segdir "
  1944   1949     " where level = ? order by idx",
  1945   1950     /* SEGDIR_SPAN */
................................................................................
  1952   1957     */
  1953   1958     /* SEGDIR_SELECT_SEGMENT */
  1954   1959     "select start_block, leaves_end_block, root from %_segdir "
  1955   1960     " where level = ? and idx = ?",
  1956   1961     /* SEGDIR_SELECT_ALL */
  1957   1962     "select start_block, leaves_end_block, root from %_segdir "
  1958   1963     " order by level desc, idx asc",
  1959         -
         1964  +  /* SEGDIR_DELETE_ALL */ "delete from %_segdir",
  1960   1965   };
  1961   1966   
  1962   1967   /*
  1963   1968   ** A connection to a fulltext index is an instance of the following
  1964   1969   ** structure.  The xCreate and xConnect methods create an instance
  1965   1970   ** of this structure and xDestroy and xDisconnect free that instance.
  1966   1971   ** All other methods receive a pointer to the structure as one of their
................................................................................
  2245   2250     if( rc!=SQLITE_OK ) return rc;
  2246   2251   
  2247   2252     rc = sqlite3_bind_int64(s, 1, iDocid);
  2248   2253     if( rc!=SQLITE_OK ) return rc;
  2249   2254   
  2250   2255     return sql_single_step(s);
  2251   2256   }
         2257  +
         2258  +/* Returns SQLITE_ROW if any rows exist in %_content, SQLITE_DONE if
         2259  +** no rows exist, and any error in case of failure.
         2260  +*/
         2261  +static int content_exists(fulltext_vtab *v){
         2262  +  sqlite3_stmt *s;
         2263  +  int rc = sql_get_statement(v, CONTENT_EXISTS_STMT, &s);
         2264  +  if( rc!=SQLITE_OK ) return rc;
         2265  +
         2266  +  rc = sqlite3_step(s);
         2267  +  if( rc!=SQLITE_ROW ) return rc;
         2268  +
         2269  +  /* We expect only one row.  We must execute another sqlite3_step()
         2270  +   * to complete the iteration; otherwise the table will remain locked. */
         2271  +  rc = sqlite3_step(s);
         2272  +  if( rc==SQLITE_DONE ) return SQLITE_ROW;
         2273  +  if( rc==SQLITE_ROW ) return SQLITE_ERROR;
         2274  +  return rc;
         2275  +}
  2252   2276   
  2253   2277   /* insert into %_segments values ([pData])
  2254   2278   **   returns assigned blockid in *piBlockid
  2255   2279   */
  2256   2280   static int block_insert(fulltext_vtab *v, const char *pData, int nData,
  2257   2281                           sqlite_int64 *piBlockid){
  2258   2282     sqlite3_stmt *s;
................................................................................
  2417   2441     /* Delete the segment directory itself. */
  2418   2442     rc = sql_get_statement(v, SEGDIR_DELETE_STMT, &s);
  2419   2443     if( rc!=SQLITE_OK ) return rc;
  2420   2444   
  2421   2445     rc = sqlite3_bind_int64(s, 1, iLevel);
  2422   2446     if( rc!=SQLITE_OK ) return rc;
  2423   2447   
         2448  +  return sql_single_step(s);
         2449  +}
         2450  +
         2451  +/* Delete entire fts index, SQLITE_OK on success, relevant error on
         2452  +** failure.
         2453  +*/
         2454  +static int segdir_delete_all(fulltext_vtab *v){
         2455  +  sqlite3_stmt *s;
         2456  +  int rc = sql_get_statement(v, SEGDIR_DELETE_ALL_STMT, &s);
         2457  +  if( rc!=SQLITE_OK ) return rc;
         2458  +
         2459  +  rc = sql_single_step(s);
         2460  +  if( rc!=SQLITE_OK ) return rc;
         2461  +
         2462  +  rc = sql_get_statement(v, BLOCK_DELETE_ALL_STMT, &s);
         2463  +  if( rc!=SQLITE_OK ) return rc;
         2464  +
  2424   2465     return sql_single_step(s);
  2425   2466   }
  2426   2467   
  2427   2468   /* TODO(shess) clearPendingTerms() is far down the file because
  2428   2469   ** writeZeroSegment() is far down the file because LeafWriter is far
  2429   2470   ** down the file.  Consider refactoring the code to move the non-vtab
  2430   2471   ** code above the vtab code so that we don't need this forward
................................................................................
  6108   6149     fulltext_vtab *v = (fulltext_vtab *) pVtab;
  6109   6150     int rc;
  6110   6151   
  6111   6152     FTSTRACE(("FTS3 Update %p\n", pVtab));
  6112   6153   
  6113   6154     if( nArg<2 ){
  6114   6155       rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
         6156  +    if( rc==SQLITE_OK ){
         6157  +      /* If we just deleted the last row in the table, clear out the
         6158  +      ** index data.
         6159  +      */
         6160  +      rc = content_exists(v);
         6161  +      if( rc==SQLITE_ROW ){
         6162  +        rc = SQLITE_OK;
         6163  +      }else if( rc==SQLITE_DONE ){
         6164  +        /* Clear the pending terms so we don't flush a useless level-0
         6165  +        ** segment when the transaction closes.
         6166  +        */
         6167  +        rc = clearPendingTerms(v);
         6168  +        if( rc==SQLITE_OK ){
         6169  +          rc = segdir_delete_all(v);
         6170  +        }
         6171  +      }
         6172  +    }
  6115   6173     } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
  6116   6174       /* An update:
  6117   6175        * ppArg[0] = old rowid
  6118   6176        * ppArg[1] = new rowid
  6119   6177        * ppArg[2..2+v->nColumn-1] = values
  6120   6178        * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  6121   6179        * ppArg[2+v->nColumn+1] = value for docid

Added test/fts3d.test.

            1  +# 2008 June 26
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The focus
           12  +# of this script is testing the FTS3 module's optimize() function.
           13  +#
           14  +# $Id: fts3d.test,v 1.1 2008/07/14 20:43:15 shess Exp $
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +
           20  +# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
           21  +ifcapable !fts3 {
           22  +  finish_test
           23  +  return
           24  +}
           25  +
           26  +#*************************************************************************
           27  +# Probe to see if support for the FTS3 dump_* functions is compiled in.
           28  +# TODO(shess): Change main.mk to do the right thing and remove this test.
           29  +db eval {
           30  +  DROP TABLE IF EXISTS t1;
           31  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
           32  +  INSERT INTO t1 (docid, c) VALUES (1, 'x');
           33  +}
           34  +
           35  +set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1}
           36  +set r {1 {unable to use function dump_terms in the requested context}}
           37  +if {[catchsql $s]==$r} {
           38  +  finish_test
           39  +  return
           40  +}
           41  +
           42  +#*************************************************************************
           43  +# Utility function to check for the expected terms in the segment
           44  +# level/index.  _all version does same but for entire index.
           45  +proc check_terms {test level index terms} {
           46  +  # TODO(shess): Figure out why uplevel in do_test can't catch
           47  +  # $level and $index directly.
           48  +  set ::level $level
           49  +  set ::index $index
           50  +  do_test $test.terms {
           51  +    execsql {
           52  +      SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1;
           53  +    }
           54  +  } [list $terms]
           55  +}
           56  +proc check_terms_all {test terms} {
           57  +  do_test $test.terms {
           58  +    execsql {
           59  +      SELECT dump_terms(t1) FROM t1 LIMIT 1;
           60  +    }
           61  +  } [list $terms]
           62  +}
           63  +
           64  +# Utility function to check for the expected doclist for the term in
           65  +# segment level/index.  _all version does same for entire index.
           66  +proc check_doclist {test level index term doclist} {
           67  +  # TODO(shess): Again, why can't the non-:: versions work?
           68  +  set ::term $term
           69  +  set ::level $level
           70  +  set ::index $index
           71  +  do_test $test {
           72  +    execsql {
           73  +      SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1;
           74  +    }
           75  +  } [list $doclist]
           76  +}
           77  +proc check_doclist_all {test term doclist} {
           78  +  set ::term $term
           79  +  do_test $test {
           80  +    execsql {
           81  +      SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1;
           82  +    }
           83  +  } [list $doclist]
           84  +}
           85  +
           86  +#*************************************************************************
           87  +# Test results when all rows are deleted and one is added back.
           88  +# Previously older segments would continue to exist, but now the index
           89  +# should be dropped when the table is empty.  The results should look
           90  +# exactly like we never added the earlier rows in the first place.
           91  +db eval {
           92  +  DROP TABLE IF EXISTS t1;
           93  +  CREATE VIRTUAL TABLE t1 USING fts3(c);
           94  +  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
           95  +  INSERT INTO t1 (docid, c) VALUES (2, 'That was a test');
           96  +  INSERT INTO t1 (docid, c) VALUES (3, 'This is a test');
           97  +  DELETE FROM t1 WHERE 1=1; -- Delete each row rather than dropping table.
           98  +  INSERT INTO t1 (docid, c) VALUES (1, 'This is a test');
           99  +}
          100  +
          101  +# Should be a single initial segment.
          102  +do_test fts3d-1.segments {
          103  +  execsql {
          104  +    SELECT level, idx FROM t1_segdir ORDER BY level, idx;
          105  +  }
          106  +} {0 0}
          107  +do_test fts3d-1.matches {
          108  +  execsql {
          109  +    SELECT OFFSETS(t1) FROM t1
          110  +     WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid;
          111  +  }
          112  +} {{0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}}
          113  +
          114  +check_terms_all fts3d-1.1 {a is test this}
          115  +check_doclist_all fts3d-1.1.1 a {[1 0[2]]}
          116  +check_doclist_all fts3d-1.1.2 is {[1 0[1]]}
          117  +check_doclist_all fts3d-1.1.3 test {[1 0[3]]}
          118  +check_doclist_all fts3d-1.1.4 this {[1 0[0]]}
          119  +
          120  +check_terms   fts3d-1.2   0 0 {a is test this}
          121  +check_doclist fts3d-1.2.1 0 0 a {[1 0[2]]}
          122  +check_doclist fts3d-1.2.2 0 0 is {[1 0[1]]}
          123  +check_doclist fts3d-1.2.3 0 0 test {[1 0[3]]}
          124  +check_doclist fts3d-1.2.4 0 0 this {[1 0[0]]}
          125  +
          126  +# TODO(shess): optimize() tests here.
          127  +
          128  +finish_test