SQLite

Check-in [cddf69dbc4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Update fts3/4 so that the 'merge=X,0' command merges X pages from all segments of the first level in the fts index that contains 2 or more segments.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: cddf69dbc46f10ee7e87538dd850e086386b544c
User & Date: dan 2016-03-07 20:14:27.338
Context
2016-03-08
00:39
Changes so that some assert()s in the virtual table query planner are correct even following an OOM error. (check-in: 9805f6f852 user: drh tags: trunk)
2016-03-07
20:48
Merge recent enhancements from trunk. (check-in: 84f09f00f8 user: drh tags: apple-osx)
20:14
Update fts3/4 so that the 'merge=X,0' command merges X pages from all segments of the first level in the fts index that contains 2 or more segments. (check-in: cddf69dbc4 user: dan tags: trunk)
19:08
Avoid a NULL pointer dereference following an OOM while generating code for IN operators on virtual tables. (check-in: c924008692 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3_write.c.
329
330
331
332
333
334
335

336
337
338
339
340
341
342
343
/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'",

/* This statement is used to determine which level to read the input from
** when performing an incremental merge. It returns the absolute level number
** of the oldest level in the db that contains at least ? segments. Or,
** if no level in the FTS index contains more than ? segments, the statement
** returns zero rows.  */

/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
         "  ORDER BY (level %% 1024) ASC LIMIT 1",

/* Estimate the upper limit on the number of leaf nodes in a new segment
** created by merging the oldest :2 segments from absolute level :1. See 
** function sqlite3Fts3Incrmerge() for details.  */
/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
         "  FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",







>
|







329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'",

/* This statement is used to determine which level to read the input from
** when performing an incremental merge. It returns the absolute level number
** of the oldest level in the db that contains at least ? segments. Or,
** if no level in the FTS index contains more than ? segments, the statement
** returns zero rows.  */
/* 28 */ "SELECT level, count(*) AS cnt FROM %Q.'%q_segdir' "
         "  GROUP BY level HAVING cnt>=?"
         "  ORDER BY (level %% 1024) ASC LIMIT 1",

/* Estimate the upper limit on the number of leaf nodes in a new segment
** created by merging the oldest :2 segments from absolute level :1. See 
** function sqlite3Fts3Incrmerge() for details.  */
/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
         "  FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841



4842

4843
4844
4845
4846
4847
4848
4849
    /* Search the %_segdir table for the absolute level with the smallest
    ** relative level number that contains at least nMin segments, if any.
    ** If one is found, set iAbsLevel to the absolute level number and
    ** nSeg to nMin. If no level with at least nMin segments can be found, 
    ** set nSeg to -1.
    */
    rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
    sqlite3_bind_int(pFindLevel, 1, nMin);
    if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
      iAbsLevel = sqlite3_column_int64(pFindLevel, 0);



      nSeg = nMin;

    }else{
      nSeg = -1;
    }
    rc = sqlite3_reset(pFindLevel);

    /* If the hint read from the %_stat table is not empty, check if the
    ** last entry in it specifies a relative level smaller than or equal







|


>
>
>
|
>







4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
    /* Search the %_segdir table for the absolute level with the smallest
    ** relative level number that contains at least nMin segments, if any.
    ** If one is found, set iAbsLevel to the absolute level number and
    ** nSeg to nMin. If no level with at least nMin segments can be found, 
    ** set nSeg to -1.
    */
    rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
    sqlite3_bind_int(pFindLevel, 1, MAX(2, nMin));
    if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
      iAbsLevel = sqlite3_column_int64(pFindLevel, 0);
      if( nMin<2 ){
        nSeg = sqlite3_column_int(pFindLevel, 1);
      }else{
        nSeg = nMin;
      }
    }else{
      nSeg = -1;
    }
    rc = sqlite3_reset(pFindLevel);

    /* If the hint read from the %_stat table is not empty, check if the
    ** last entry in it specifies a relative level smaller than or equal
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
  /* If the first integer value is followed by a ',',  read the second
  ** integer value. */
  if( z[0]==',' && z[1]!='\0' ){
    z++;
    nMin = fts3Getint(&z);
  }

  if( z[0]!='\0' || nMin<2 ){
    rc = SQLITE_ERROR;
  }else{
    rc = SQLITE_OK;
    if( !p->bHasStat ){
      assert( p->bFts4==0 );
      sqlite3Fts3CreateStatTable(&rc, p);
    }







|







4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
  /* If the first integer value is followed by a ',',  read the second
  ** integer value. */
  if( z[0]==',' && z[1]!='\0' ){
    z++;
    nMin = fts3Getint(&z);
  }

  if( z[0]!='\0' || nMin<0 || nMin==1 ){
    rc = SQLITE_ERROR;
  }else{
    rc = SQLITE_OK;
    if( !p->bHasStat ){
      assert( p->bFts4==0 );
      sqlite3Fts3CreateStatTable(&rc, p);
    }
Changes to test/fts4merge.test.
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    1   {merge=abc}
    2   {merge=%%%}
    3   {merge=,}
    4   {merge=5,}
    5   {merge=6,%}
    6   {merge=6,six}
    7   {merge=6,1}
    8   {merge=6,0}
  } {
    do_catchsql_test 2.$tn { 
      INSERT INTO t2(t2) VALUES($arg);
    } {1 {SQL logic error or missing database}}
  }
  
  #-------------------------------------------------------------------------







<







85
86
87
88
89
90
91

92
93
94
95
96
97
98
    1   {merge=abc}
    2   {merge=%%%}
    3   {merge=,}
    4   {merge=5,}
    5   {merge=6,%}
    6   {merge=6,six}
    7   {merge=6,1}

  } {
    do_catchsql_test 2.$tn { 
      INSERT INTO t2(t2) VALUES($arg);
    } {1 {SQL logic error or missing database}}
  }
  
  #-------------------------------------------------------------------------
Added test/fts4opt.test.


















































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# 2016 March 8
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/fts3_common.tcl
set ::testprefix fts4opt

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

# Create the fts_kjv_genesis procedure which fills and FTS3/4 table 
# with the complete text of the Book of Genesis.
#
source $testdir/genesis.tcl

do_execsql_test 1.0 { CREATE TABLE t1(docid, words) }
fts_kjv_genesis

#-------------------------------------------------------------------------
# Argument $db is an open database handle. $tbl is the name of an FTS3/4
# table with the database. This command rearranges the contents of the
# %_segdir table so that all segments within each index are on the same
# level. This means that the 'merge' command can then be used for an
# incremental optimize routine.
#
proc prepare_for_optimize {db tbl} {
  $db eval [string map [list % $tbl] {
    BEGIN;
      CREATE TEMP TABLE tmp_segdir(
        level, idx, start_block, leaves_end_block, end_block, root
      );

      INSERT INTO temp.tmp_segdir 
        SELECT 
        1024*(o.level / 1024) + 32,                                -- level
        sum(o.level<i.level OR (o.level=i.level AND o.idx>i.idx)), -- idx
        o.start_block, o.leaves_end_block, o.end_block, o.root     -- other
        FROM %_segdir o, %_segdir i 
        WHERE (o.level / 1024) = (i.level / 1024)
        GROUP BY o.level, o.idx;
  
      DELETE FROM %_segdir;
      INSERT INTO %_segdir SELECT * FROM temp.tmp_segdir;
      DROP TABLE temp.tmp_segdir;
  
    COMMIT;
  }]
}

do_test 1.1 {
  execsql { CREATE VIRTUAL TABLE t2 USING fts4(words, prefix="1,2,3") }
  foreach {docid words} [db eval { SELECT * FROM t1 }] {
    execsql { INSERT INTO t2(docid, words) VALUES($docid, $words) }
  }
} {}

do_execsql_test 1.2 {
  SELECT level, count(*) FROM t2_segdir GROUP BY level
} {
  0    13    1 15    2 5 
  1024 13 1025 15 1026 5 
  2048 13 2049 15 2050 5 
  3072 13 3073 15 3074 5
}

do_execsql_test 1.3 { INSERT INTO t2(t2) VALUES('integrity-check') }
prepare_for_optimize db t2
do_execsql_test 1.4 { INSERT INTO t2(t2) VALUES('integrity-check') }

do_execsql_test 1.5 {
  SELECT level, count(*) FROM t2_segdir GROUP BY level
} {
  32   33 
  1056 33 
  2080 33 
  3104 33
}

do_test 1.6 {
  while 1 {
    set tc1 [db total_changes]
    execsql { INSERT INTO t2(t2) VALUES('merge=5,0') }
    set tc2 [db total_changes]
    if {($tc2 - $tc1) < 2} break
  }
  execsql { SELECT level, count(*) FROM t2_segdir GROUP BY level }
} {33 1 1057 1 2081 1 3105 1}
do_execsql_test 1.7 { INSERT INTO t2(t2) VALUES('integrity-check') }

do_execsql_test 1.8 {
  INSERT INTO t2(words) SELECT words FROM t1;
  SELECT level, count(*) FROM t2_segdir GROUP BY level;
} {0 2 1024 2 2048 2 3072 2}

#-------------------------------------------------------------------------

do_execsql_test 2.0 {
  DELETE FROM t2;
}
do_test 2.1 {
  foreach {docid words} [db eval { SELECT * FROM t1 }] {
    execsql { INSERT INTO t2(docid, words) VALUES($docid, $words) }
  }

  set i 0
  foreach {docid words} [db eval { SELECT * FROM t1 }] {
    if {[incr i] % 2} { execsql { DELETE FROM t2 WHERE docid = $docid } }
  }

  set i 0
  foreach {docid words} [db eval { SELECT * FROM t1 }] {
    if {[incr i] % 3} {
      execsql { INSERT OR REPLACE INTO t2(docid, words) VALUES($docid, $words) }
    }
  }
} {}

do_execsql_test 2.2 {
  SELECT level, count(*) FROM t2_segdir GROUP BY level
} {
  0    10    1 15    2 12 
  1024 10 1025 15 1026 12 
  2048 10 2049 15 2050 12 
  3072 10 3073 15 3074 12
}

do_execsql_test 2.3 { INSERT INTO t2(t2) VALUES('integrity-check') }
prepare_for_optimize db t2
do_execsql_test 2.4 { INSERT INTO t2(t2) VALUES('integrity-check') }

do_execsql_test 2.5 {
  SELECT level, count(*) FROM t2_segdir GROUP BY level
} {
    32 37 
  1056 37 
  2080 37 
  3104 37
}

do_test 2.6 {
  while 1 {
    set tc1 [db total_changes]
    execsql { INSERT INTO t2(t2) VALUES('merge=5,0') }
    set tc2 [db total_changes]
    if {($tc2 - $tc1) < 2} break
  }
  execsql { SELECT level, count(*) FROM t2_segdir GROUP BY level }
} {33 1 1057 1 2081 1 3105 1}
do_execsql_test 2.7 { INSERT INTO t2(t2) VALUES('integrity-check') }

do_execsql_test 2.8 {
  INSERT INTO t2(words) SELECT words FROM t1;
  SELECT level, count(*) FROM t2_segdir GROUP BY level;
} {0 2 1024 2 2048 2 3072 2}

finish_test