/ Check-in [6eb2d74a]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix fts3 to not have the VACUUM bug from fts2. %_content.docid is an alias to fix the rowid for documents, %_segments.blockid is an alias to fix the rowid for segment blocks. Unit test for the problem. (CVS 4280)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 6eb2d74a8cfce322930f05c97d4ec255f3711efb
User & Date: shess 2007-08-23 20:23:37
Context
2007-08-23
20:28
Make comments and variable naming more consistent WRT rowid versus docid/blockid. This should have no code impact. (CVS 4281) check-in: 76f1e18e user: shess tags: trunk
20:23
Fix fts3 to not have the VACUUM bug from fts2. %_content.docid is an alias to fix the rowid for documents, %_segments.blockid is an alias to fix the rowid for segment blocks. Unit test for the problem. (CVS 4280) check-in: 6eb2d74a user: shess tags: trunk
16:27
Add some warm-body tests for rollback journal i/o enhancements. (CVS 4279) check-in: ff3770f8 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779

1780
1781
1782
1783
1784
1785
1786
1787
1788
....
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
....
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880












1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
....
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
....
1909
1910
1911
1912
1913
1914
1915


1916
1917
1918
1919
1920
1921
1922
....
2802
2803
2804
2805
2806
2807
2808

2809
2810
2811
2812
2813
2814
2815
2816




2817
2818
2819
2820
2821
2822
2823
....
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670



3671
3672
3673

3674
3675
3676
3677
3678
3679
3680
/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
** If so perhaps we should move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  /* CONTENT_DELETE */ "delete from %_content where rowid = ?",

  /* BLOCK_INSERT */ "insert into %_segments values (?)",

  /* BLOCK_SELECT */ "select block from %_segments where rowid = ?",
  /* BLOCK_DELETE */ "delete from %_segments where rowid between ? and ?",

  /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  /* SEGDIR_SELECT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
................................................................................

  /* Precompiled statements used for segment merges.  We run a
  ** separate select across the leaf level of each tree being merged.
  */
  sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT];
  /* The statement used to prepare pLeafSelectStmts. */
#define LEAF_SELECT \
  "select block from %_segments where rowid between ? and ? order by rowid"

  /* These buffer pending index updates during transactions.
  ** nPendingData estimates the memory size of the pending data.  It
  ** doesn't include the hash-bucket overhead, nor any malloc
  ** overhead.  When nPendingData exceeds kPendingThreshold, the
  ** buffer is flushed even before the transaction closes.
  ** pendingTerms stores the data, and is only valid when nPendingData
................................................................................
static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fts3Module;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (rowid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return stringBufferData(&sb);
}













/* Return a dynamically generated statement of the form
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
 *                    where rowid = ?
 */
static const char *contentUpdateStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "update %_content set ");
................................................................................
  for(i=0; i<v->nColumn; ++i) {
    if( i>0 ){
      append(&sb, ", ");
    }
    append(&sb, v->azContentColumn[i]);
    append(&sb, " = ?");
  }
  append(&sb, " where rowid = ?");
  return stringBufferData(&sb);
}

/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
................................................................................
  assert( iStmt<MAX_STMT );
  if( v->pFulltextStatements[iStmt]==NULL ){
    const char *zStmt;
    int rc;
    switch( iStmt ){
      case CONTENT_INSERT_STMT:
        zStmt = contentInsertStatement(v); break;


      case CONTENT_UPDATE_STMT:
        zStmt = contentUpdateStatement(v); break;
      default:
        zStmt = fulltext_zStatement[iStmt];
    }
    rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
                         zStmt);
................................................................................
  TRACE(("FTS3 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");

  appendList(&schema, spec.nColumn, spec.azContentColumn);
  append(&schema, ")");
  rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema));
  stringBufferDestroy(&schema);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zDb, spec.zName,
                "create table %_segments(block blob);");




  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zDb, spec.zName,
                "create table %_segdir("
                "  level integer,"
                "  idx integer,"
                "  start_block integer,"
................................................................................
  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  int rc;
  char *zSql;

  TRACE(("FTS3 Filter %p\n",pCursor));

  zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
                          idxNum==QUERY_GENERIC ? "" : "where rowid=?");



  sqlite3_finalize(c->pStmt);
  rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
  sqlite3_free(zSql);

  if( rc!=SQLITE_OK ) return rc;

  c->iCursorType = idxNum;
  switch( idxNum ){
    case QUERY_GENERIC:
      break;








|

|

|
>
|
|







 







|







 







|






|







>
>
>
>
>
>
>
>
>
>
>
>



|







 







|







 







>
>







 







>







|
>
>
>
>







 







|



|
|
>
>
>

|
<
>







1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
....
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
....
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
....
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
....
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
....
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
....
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695

3696
3697
3698
3699
3700
3701
3702
3703
/* These must exactly match the enum above. */
/* TODO(shess): Is there some risk that a statement will be used in two
** cursors at once, e.g.  if a query joins a virtual table to itself?
** If so perhaps we should move some of these to the cursor object.
*/
static const char *const fulltext_zStatement[MAX_STMT] = {
  /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  /* CONTENT_SELECT */ NULL,  /* generated in contentSelectStatement() */
  /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  /* CONTENT_DELETE */ "delete from %_content where docid = ?",

  /* BLOCK_INSERT */
  "insert into %_segments (blockid, block) values (null, ?)",
  /* BLOCK_SELECT */ "select block from %_segments where blockid = ?",
  /* BLOCK_DELETE */ "delete from %_segments where blockid between ? and ?",

  /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?",
  /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)",
  /* SEGDIR_SELECT */
  "select start_block, leaves_end_block, root from %_segdir "
  " where level = ? order by idx",
  /* SEGDIR_SPAN */
................................................................................

  /* Precompiled statements used for segment merges.  We run a
  ** separate select across the leaf level of each tree being merged.
  */
  sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT];
  /* The statement used to prepare pLeafSelectStmts. */
#define LEAF_SELECT \
  "select block from %_segments where blockid between ? and ? order by blockid"

  /* These buffer pending index updates during transactions.
  ** nPendingData estimates the memory size of the pending data.  It
  ** doesn't include the hash-bucket overhead, nor any malloc
  ** overhead.  When nPendingData exceeds kPendingThreshold, the
  ** buffer is flushed even before the transaction closes.
  ** pendingTerms stores the data, and is only valid when nPendingData
................................................................................
static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fts3Module;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (docid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "insert into %_content (docid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, ") values (?");
  for(i=0; i<v->nColumn; ++i)
    append(&sb, ", ?");
  append(&sb, ")");
  return stringBufferData(&sb);
}

/* Return a dynamically generated statement of the form
 *   select <content columns> from %_content where docid = ?
 */
static const char *contentSelectStatement(fulltext_vtab *v){
  StringBuffer sb;
  initStringBuffer(&sb);
  append(&sb, "SELECT ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, " FROM %_content WHERE docid = ?");
  return stringBufferData(&sb);
}

/* Return a dynamically generated statement of the form
 *   update %_content set [col_0] = ?, [col_1] = ?, ...
 *                    where docid = ?
 */
static const char *contentUpdateStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;

  initStringBuffer(&sb);
  append(&sb, "update %_content set ");
................................................................................
  for(i=0; i<v->nColumn; ++i) {
    if( i>0 ){
      append(&sb, ", ");
    }
    append(&sb, v->azContentColumn[i]);
    append(&sb, " = ?");
  }
  append(&sb, " where docid = ?");
  return stringBufferData(&sb);
}

/* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
** If the indicated statement has never been prepared, it is prepared
** and cached, otherwise the cached version is reset.
*/
................................................................................
  assert( iStmt<MAX_STMT );
  if( v->pFulltextStatements[iStmt]==NULL ){
    const char *zStmt;
    int rc;
    switch( iStmt ){
      case CONTENT_INSERT_STMT:
        zStmt = contentInsertStatement(v); break;
      case CONTENT_SELECT_STMT:
        zStmt = contentSelectStatement(v); break;
      case CONTENT_UPDATE_STMT:
        zStmt = contentUpdateStatement(v); break;
      default:
        zStmt = fulltext_zStatement[iStmt];
    }
    rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
                         zStmt);
................................................................................
  TRACE(("FTS3 Create\n"));

  rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  initStringBuffer(&schema);
  append(&schema, "CREATE TABLE %_content(");
  append(&schema, "  docid INTEGER PRIMARY KEY,");
  appendList(&schema, spec.nColumn, spec.azContentColumn);
  append(&schema, ")");
  rc = sql_exec(db, spec.zDb, spec.zName, stringBufferData(&schema));
  stringBufferDestroy(&schema);
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zDb, spec.zName,
                "create table %_segments("
                "  blockid INTEGER PRIMARY KEY,"
                "  block blob"
                ");"
                );
  if( rc!=SQLITE_OK ) goto out;

  rc = sql_exec(db, spec.zDb, spec.zName,
                "create table %_segdir("
                "  level integer,"
                "  idx integer,"
                "  start_block integer,"
................................................................................
  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  int rc;
  StringBuffer sb;

  TRACE(("FTS3 Filter %p\n",pCursor));

  initStringBuffer(&sb);
  append(&sb, "SELECT docid, ");
  appendList(&sb, v->nColumn, v->azContentColumn);
  append(&sb, " FROM %_content");
  if( idxNum!=QUERY_GENERIC ) append(&sb, " WHERE docid = ?");
  sqlite3_finalize(c->pStmt);
  rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, stringBufferData(&sb));

  stringBufferDestroy(&sb);
  if( rc!=SQLITE_OK ) return rc;

  c->iCursorType = idxNum;
  switch( idxNum ){
    case QUERY_GENERIC:
      break;

Added test/fts3b.test.





















































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# 2007 August 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  This
# script tests for the fts2 rowid-versus-vacuum problem (ticket #2566).
#
# $Id: fts3b.test,v 1.1 2007/08/23 20:23:37 shess Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

db eval {
  CREATE VIRTUAL TABLE t1 USING fts3(c);
  INSERT INTO t1 (c) VALUES('this is a test');
  INSERT INTO t1 (c) VALUES('that was a test');
  INSERT INTO t1 (c) VALUES('this is fun');
  DELETE FROM t1 WHERE c = 'that was a test';
}

# Baseline test.
do_test fts3b-1.1 {
  execsql {
    SELECT rowid FROM t1 WHERE c MATCH 'this';
  }
} {1 3}

db eval {VACUUM}

# The VACUUM renumbered the t1_content table in fts2, which breaks
# this.
do_test fts3b-1.2 {
  execsql {
    SELECT rowid FROM t1 WHERE c MATCH 'this';
  }
} {1 3}

# The t2 table is unfortunately pretty contrived.  We need documents
# that are bigger than ROOT_MAX (1024) to force segments out of the
# segdir and into %_segments.  We also need to force segment merging
# to generate a hole in the %_segments table, which needs more than 16
# docs.  Beyond that, to test correct operation of BLOCK_SELECT_STMT,
# we need to merge a mult-level tree, which is where the 10,000 comes
# from.  Which is slow, thus the set of transactions, with the 500
# being a number such that 10,000/500 > 16.
set text {
  Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
  iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
  sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
  aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
  ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
  at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
  ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
  luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
  lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
  potenti. Cum sociis natoque penatibus et magnis dis parturient
  montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
  suscipit nec, consequat quis, risus.
}
append text $text

db eval {CREATE VIRTUAL TABLE t2 USING fts3(c)}
set res {}
db eval {BEGIN}
for {set ii 0} {$ii<10000} {incr ii} {
  db eval {INSERT INTO t2 (c) VALUES ($text)}
  lappend res [expr {$ii+1}]
  if {($ii%500)==0} {
    db eval {
      COMMIT;
      BEGIN;
    }
  }
}
db eval {COMMIT}

do_test fts3b-2.1 {
  execsql {
    SELECT rowid FROM t2 WHERE c MATCH 'lorem';
  }
} $res

db eval {VACUUM}

# The VACUUM renumbered the t2_segment table in fts2, which would
# break the following.
do_test fts3b-2.2 {
  execsql {
    SELECT rowid FROM t2 WHERE c MATCH 'lorem';
  }
} $res

finish_test