SQLite

Check-in [d27d9965b5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge fts5 branch into trunk.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d27d9965b5404cd32be1113215fd9feeb5b66acc
User & Date: dan 2015-06-26 20:25:59.799
Context
2015-06-26
20:45
Treat compilation of FTS5 for the loadable extension specially with MSVC. (check-in: 7c610276bb user: mistachkin tags: trunk)
20:25
Merge fts5 branch into trunk. (check-in: d27d9965b5 user: dan tags: trunk)
20:14
Merge latest trunk with this branch. (Closed-Leaf check-in: 8671b9e137 user: dan tags: fts5)
19:43
Small size reduction and performance increase on the OP_IdxInsert opcode. (check-in: b6bedc2e9c user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to Makefile.in.
169
170
171
172
173
174
175

176
177
178
179
180
181
182
         backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
         callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \
         expr.lo fault.lo fkey.lo \
         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
         fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
         fts3_tokenize_vtab.lo \
         fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \

         func.lo global.lo hash.lo \
         icu.lo insert.lo journal.lo legacy.lo loadext.lo \
         main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
         memjournal.lo \
         mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
         notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
         pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \







>







169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
         backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
         callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \
         expr.lo fault.lo fkey.lo \
         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
         fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
         fts3_tokenize_vtab.lo \
         fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
	 fts5.lo \
         func.lo global.lo hash.lo \
         icu.lo insert.lo journal.lo legacy.lo loadext.lo \
         main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
         memjournal.lo \
         mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
         notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
         pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
409
410
411
412
413
414
415

416
417
418
419
420
421
422
#
TESTSRC += \
  $(TOP)/ext/misc/amatch.c \
  $(TOP)/ext/misc/closure.c \
  $(TOP)/ext/misc/eval.c \
  $(TOP)/ext/misc/fileio.c \
  $(TOP)/ext/misc/fuzzer.c \

  $(TOP)/ext/misc/ieee754.c \
  $(TOP)/ext/misc/nextchar.c \
  $(TOP)/ext/misc/percentile.c \
  $(TOP)/ext/misc/regexp.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c







>







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
#
TESTSRC += \
  $(TOP)/ext/misc/amatch.c \
  $(TOP)/ext/misc/closure.c \
  $(TOP)/ext/misc/eval.c \
  $(TOP)/ext/misc/fileio.c \
  $(TOP)/ext/misc/fuzzer.c \
  $(TOP)/ext/fts5/fts5_tcl.c \
  $(TOP)/ext/misc/ieee754.c \
  $(TOP)/ext/misc/nextchar.c \
  $(TOP)/ext/misc/percentile.c \
  $(TOP)/ext/misc/regexp.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c
970
971
972
973
974
975
976





































977
978
979
980
981
982
983

fts3_write.lo:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.lo:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c







































# Rules to build the 'testfixture' application.
#
# If using the amalgamation, use sqlite3.c directly to build the test
# fixture.  Otherwise link against libsqlite3.la.  (This distinction is
# necessary because the test fixture requires non-API symbols which are
# hidden when the library is built via the amalgamation).







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022

fts3_write.lo:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.lo:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c

# FTS5 things
#
FTS5_SRC = \
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5_main.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \
   $(TOP)/ext/fts5/fts5_hash.c \
   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c fts5parse.h \
   $(TOP)/ext/fts5/fts5_storage.c \
   $(TOP)/ext/fts5/fts5_tokenize.c \
   $(TOP)/ext/fts5/fts5_unicode2.c \
   $(TOP)/ext/fts5/fts5_varint.c \
   $(TOP)/ext/fts5/fts5_vocab.c  \

fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
	cp $(TOP)/ext/fts5/fts5parse.y .
	rm -f fts5parse.h
	./lemon $(OPTS) fts5parse.y
	mv fts5parse.c fts5parse.c.orig
	echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c
	cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \
		| sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c
	echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c

fts5parse.h: fts5parse.c

fts5.c: $(FTS5_SRC)
	$(TCLSH_CMD) $(TOP)/ext/fts5/tool/mkfts5c.tcl

fts5.lo:	fts5.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) -DSQLITE_CORE -c fts5.c


# Rules to build the 'testfixture' application.
#
# If using the amalgamation, use sqlite3.c directly to build the test
# fixture.  Otherwise link against libsqlite3.la.  (This distinction is
# necessary because the test fixture requires non-API symbols which are
# hidden when the library is built via the amalgamation).
Changes to Makefile.msc.
823
824
825
826
827
828
829

830
831
832
833
834
835
836
LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \
         backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
         callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \
         expr.lo fault.lo fkey.lo \
         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
         fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
         fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \

         func.lo global.lo hash.lo \
         icu.lo insert.lo journal.lo legacy.lo loadext.lo \
         main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
         memjournal.lo \
         mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
         notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
         pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \







>







823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \
         backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
         callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \
         expr.lo fault.lo fkey.lo \
         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
         fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
         fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
         fts5.lo \
         func.lo global.lo hash.lo \
         icu.lo insert.lo journal.lo legacy.lo loadext.lo \
         main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
         memjournal.lo \
         mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
         notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
         pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
1074
1075
1076
1077
1078
1079
1080


1081
1082
1083
1084
1085
1086
1087
#
TESTEXT = \
  $(TOP)\ext\misc\amatch.c \
  $(TOP)\ext\misc\closure.c \
  $(TOP)\ext\misc\eval.c \
  $(TOP)\ext\misc\fileio.c \
  $(TOP)\ext\misc\fuzzer.c \


  $(TOP)\ext\misc\ieee754.c \
  $(TOP)\ext\misc\nextchar.c \
  $(TOP)\ext\misc\percentile.c \
  $(TOP)\ext\misc\regexp.c \
  $(TOP)\ext\misc\spellfix.c \
  $(TOP)\ext\misc\totype.c \
  $(TOP)\ext\misc\wholenumber.c







>
>







1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
#
TESTEXT = \
  $(TOP)\ext\misc\amatch.c \
  $(TOP)\ext\misc\closure.c \
  $(TOP)\ext\misc\eval.c \
  $(TOP)\ext\misc\fileio.c \
  $(TOP)\ext\misc\fuzzer.c \
  fts5.c \
  $(TOP)\ext\fts5\fts5_tcl.c \
  $(TOP)\ext\misc\ieee754.c \
  $(TOP)\ext\misc\nextchar.c \
  $(TOP)\ext\misc\percentile.c \
  $(TOP)\ext\misc\regexp.c \
  $(TOP)\ext\misc\spellfix.c \
  $(TOP)\ext\misc\totype.c \
  $(TOP)\ext\misc\wholenumber.c
1649
1650
1651
1652
1653
1654
1655









































1656
1657
1658
1659
1660
1661
1662

fts3_write.lo:	$(TOP)\ext\fts3\fts3_write.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3_write.c

rtree.lo:	$(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\rtree\rtree.c











































# Rules to build the 'testfixture' application.
#
# If using the amalgamation, use sqlite3.c directly to build the test
# fixture.  Otherwise link against libsqlite3.lib.  (This distinction is
# necessary because the test fixture requires non-API symbols which are
# hidden when the library is built via the amalgamation).







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706

fts3_write.lo:	$(TOP)\ext\fts3\fts3_write.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3_write.c

rtree.lo:	$(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\rtree\rtree.c

# FTS5 things
#
FTS5_SRC = \
   $(TOP)\ext\fts5\fts5.h \
   $(TOP)\ext\fts5\fts5Int.h \
   $(TOP)\ext\fts5\fts5_aux.c \
   $(TOP)\ext\fts5\fts5_buffer.c \
   $(TOP)\ext\fts5\fts5_main.c \
   $(TOP)\ext\fts5\fts5_config.c \
   $(TOP)\ext\fts5\fts5_expr.c \
   $(TOP)\ext\fts5\fts5_hash.c \
   $(TOP)\ext\fts5\fts5_index.c \
   fts5parse.c fts5parse.h \
   $(TOP)\ext\fts5\fts5_storage.c \
   $(TOP)\ext\fts5\fts5_tokenize.c \
   $(TOP)\ext\fts5\fts5_unicode2.c \
   $(TOP)\ext\fts5\fts5_varint.c \
   $(TOP)\ext\fts5\fts5_vocab.c

fts5parse.c:	$(TOP)\ext\fts5\fts5parse.y lemon.exe
	copy $(TOP)\ext\fts5\fts5parse.y .
	del /Q fts5parse.h 2>NUL
	.\lemon.exe $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(OPTS) fts5parse.y
	move fts5parse.c fts5parse.c.orig
	echo #ifdef SQLITE_ENABLE_FTS5 > $@
	type fts5parse.c.orig \
		| $(NAWK) "/.*/ { gsub(/yy/,\"fts5yy\");print }" \
		| $(NAWK) "/.*/ { gsub(/YY/,\"fts5YY\");print }" \
		| $(NAWK) "/.*/ { gsub(/TOKEN/,\"FTS5TOKEN\");print }" >> $@
	echo #endif /* SQLITE_ENABLE_FTS5 */ >> $@

fts5parse.h: fts5parse.c

fts5.c: $(FTS5_SRC)
	$(TCLSH_CMD) $(TOP)\ext\fts5\tool\mkfts5c.tcl

fts5.lo:	fts5.c $(HDR) $(EXTHDR)
	$(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c

fts5.dll:	fts5.lo
	$(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5.lo

# Rules to build the 'testfixture' application.
#
# If using the amalgamation, use sqlite3.c directly to build the test
# fixture.  Otherwise link against libsqlite3.lib.  (This distinction is
# necessary because the test fixture requires non-API symbols which are
# hidden when the library is built via the amalgamation).
1788
1789
1790
1791
1792
1793
1794

1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
	del /Q sqlite3.exe sqlite3.dll sqlite3.def 2>NUL
	del /Q sqlite3.c sqlite3-*.c 2>NUL
	del /Q sqlite3rc.h 2>NUL
	del /Q shell.c sqlite3ext.h 2>NUL
	del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL
	del /Q sqlite-*-output.vsix 2>NUL
	del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL


# Dynamic link library section.
#
dll: sqlite3.dll

sqlite3.def: libsqlite3.lib
	echo EXPORTS > sqlite3.def
	dumpbin /all libsqlite3.lib \
		| $(NAWK) "/ 1 _?sqlite3_/ { sub(/^.* _?/,\"\");print }" \
		| sort >> sqlite3.def

sqlite3.dll: $(LIBOBJ) $(LIBRESOBJS) $(CORE_LINK_DEP)
	$(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL $(CORE_LINK_OPTS) /OUT:$@ $(LIBOBJ) $(LIBRESOBJS) $(LTLIBS) $(TLIBS)







>













1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
	del /Q sqlite3.exe sqlite3.dll sqlite3.def 2>NUL
	del /Q sqlite3.c sqlite3-*.c 2>NUL
	del /Q sqlite3rc.h 2>NUL
	del /Q shell.c sqlite3ext.h 2>NUL
	del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL
	del /Q sqlite-*-output.vsix 2>NUL
	del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL
	del /Q fts5.c fts5parse.* 2>NUL

# Dynamic link library section.
#
dll: sqlite3.dll

sqlite3.def: libsqlite3.lib
	echo EXPORTS > sqlite3.def
	dumpbin /all libsqlite3.lib \
		| $(NAWK) "/ 1 _?sqlite3_/ { sub(/^.* _?/,\"\");print }" \
		| sort >> sqlite3.def

sqlite3.dll: $(LIBOBJ) $(LIBRESOBJS) $(CORE_LINK_DEP)
	$(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL $(CORE_LINK_OPTS) /OUT:$@ $(LIBOBJ) $(LIBRESOBJS) $(LTLIBS) $(TLIBS)
Changes to ext/fts3/unicode/mkunicode.tcl.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

#
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the 
# codepoint should simply be removed from the input. Examples:
#
#   { 224 a  }     (replace codepoint 224 to "a")
#   { 769 "" }     (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
  global tl_lookup_table

  set fd [open $zName]
  set lField {
    code
    character_name
    general_category
    canonical_combining_classes
    bidirectional_category
    character_decomposition_mapping
    decimal_digit_value
    digit_value
    numeric_value
    mirrored
    unicode_1_name
    iso10646_comment_field
    uppercase_mapping
    lowercase_mapping
    titlecase_mapping
  }
  set lRet [list]

  while { ![eof $fd] } {
    set line [gets $fd]
    if {$line == ""} continue

    set fields [split $line ";"]
    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
    foreach $lField $fields {}
    if { [llength $character_decomposition_mapping]!=2
      || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
    } {
      continue
    }

    set iCode  [expr "0x$code"]
    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]

    if {[info exists tl_lookup_table($iCode)]} continue

    if { ($iAscii >= 97 && $iAscii <= 122)
      || ($iAscii >= 65 && $iAscii <= 90)
    } {
      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
      set dia($iDia) 1
    }
  }

  foreach d [array names dia] {
    lappend lRet [list $d ""]
  }
  set lRet [lsort -integer -index 0 $lRet]

  close $fd
  set lRet
}


proc print_rd {map} {
  global tl_lookup_table
  set aChar [list]
  set lRange [list]

  set nRange 1

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1
















2
























































3
4
5
6
7
8
9

















source [file join [file dirname [info script]] parseunicode.tcl]

























































proc print_rd {map} {
  global tl_lookup_table
  set aChar [list]
  set lRange [list]

  set nRange 1
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  puts "** If the argument is a codepoint corresponding to a lowercase letter"
  puts "** in the ASCII range with a diacritic added, return the codepoint"
  puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
  puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
  puts "** E\"). The resuls of passing a codepoint that corresponds to an"
  puts "** uppercase letter are undefined."
  puts "*/"
  puts "static int remove_diacritic(int c)\{"
  puts "  unsigned short aDia\[\] = \{"
  puts -nonewline "        0, "
  set i 1
  foreach r $lRange {
    foreach {iCode nRange} $r {}
    if {($i % 8)==0} {puts "" ; puts -nonewline "    " }
    incr i







|







41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
  puts "** If the argument is a codepoint corresponding to a lowercase letter"
  puts "** in the ASCII range with a diacritic added, return the codepoint"
  puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN"
  puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER"
  puts "** E\"). The resuls of passing a codepoint that corresponds to an"
  puts "** uppercase letter are undefined."
  puts "*/"
  puts "static int ${::remove_diacritic}(int c)\{"
  puts "  unsigned short aDia\[\] = \{"
  puts -nonewline "        0, "
  set i 1
  foreach r $lRange {
    foreach {iCode nRange} $r {}
    if {($i % 8)==0} {puts "" ; puts -nonewline "    " }
    incr i
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  puts "      (mask1 & (1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------

# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
  set fd [open $zName]
  set lField {
    code
    character_name
    general_category
    canonical_combining_classes
    bidirectional_category
    character_decomposition_mapping
    decimal_digit_value
    digit_value
    numeric_value
    mirrored
    unicode_1_name
    iso10646_comment_field
    uppercase_mapping
    lowercase_mapping
    titlecase_mapping
  }
  set lRet [list]

  while { ![eof $fd] } {
    set line [gets $fd]
    if {$line == ""} continue

    set fields [split $line ";"]
    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
    foreach $lField $fields {}

    set iCode [expr "0x$code"]
    set bAlnum [expr {
         [lsearch {L N} [string range $general_category 0 0]] >= 0
      || $general_category=="Co"
    }]

    if { !$bAlnum } { lappend lRet $iCode }
  }

  close $fd
  set lRet
}

proc an_load_separator_ranges {} {
  global unicodedata.txt
  set lSep [an_load_unicodedata_text ${unicodedata.txt}]
  unset -nocomplain iFirst 
  unset -nocomplain nRange 
  set lRange [list]
  foreach sep $lSep {







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







128
129
130
131
132
133
134















































135
136
137
138
139
140
141
  puts "      (mask1 & (1 << (c-$iFirst-32)));"
  puts "\}"
}


#-------------------------------------------------------------------------
















































proc an_load_separator_ranges {} {
  global unicodedata.txt
  set lSep [an_load_unicodedata_text ${unicodedata.txt}]
  unset -nocomplain iFirst 
  unset -nocomplain nRange 
  set lRange [list]
  foreach sep $lSep {
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
  }]
  puts "  return 0;"
  puts "\}"
}

#-------------------------------------------------------------------------

proc tl_load_casefolding_txt {zName} {
  global tl_lookup_table

  set fd [open $zName]
  while { ![eof $fd] } {
    set line [gets $fd]
    if {[string range $line 0 0] == "#"} continue
    if {$line == ""} continue

    foreach x {a b c d} {unset -nocomplain $x}
    foreach {a b c d} [split $line ";"] {}

    set a2 [list]
    set c2 [list]
    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
    set b [string trim $b]
    set d [string trim $d]

    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
  }
}

proc tl_create_records {} {
  global tl_lookup_table

  set iFirst ""
  set nOff 0
  set nRange 0
  set nIncr 0







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







317
318
319
320
321
322
323























324
325
326
327
328
329
330
  }]
  puts "  return 0;"
  puts "\}"
}

#-------------------------------------------------------------------------
























proc tl_create_records {} {
  global tl_lookup_table

  set iFirst ""
  set nOff 0
  set nRange 0
  set nIncr 0
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637

638
639
640
641

642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
    if {[tl_print_table_entry toggle $entry $liOff]} { 
      lappend lHigh $entry 
    } 
  }
  tl_print_table_footer toggle
  tl_print_ioff_table $liOff

  puts {
  int ret = c;

  assert( c>=0 );
  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){

    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
    int iLo = 0;
    int iRes = -1;


    while( iHi>=iLo ){
      int iTest = (iHi + iLo) / 2;
      int cmp = (c - aEntry[iTest].iCode);
      if( cmp>=0 ){
        iRes = iTest;
        iLo = iTest+1;
      }else{
        iHi = iTest-1;
      }
    }
    assert( iRes<0 || c>=aEntry[iRes].iCode );

    if( iRes>=0 ){
      const struct TableEntry *p = &aEntry[iRes];
      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
        assert( ret>0 );
      }
    }

    if( bRemoveDiacritic ) ret = remove_diacritic(ret);
  }
  }

  foreach entry $lHigh {
    tl_print_if_entry $entry
  }

  puts ""
  puts "  return ret;"







|


<





>




>










<

|
|
|
|
|
|
|
<
|

|







480
481
482
483
484
485
486
487
488
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510

511
512
513
514
515
516
517
518

519
520
521
522
523
524
525
526
527
528
    if {[tl_print_table_entry toggle $entry $liOff]} { 
      lappend lHigh $entry 
    } 
  }
  tl_print_table_footer toggle
  tl_print_ioff_table $liOff

  puts [subst -nocommands {
  int ret = c;


  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
    int iLo = 0;
    int iRes = -1;

    assert( c>aEntry[0].iCode );
    while( iHi>=iLo ){
      int iTest = (iHi + iLo) / 2;
      int cmp = (c - aEntry[iTest].iCode);
      if( cmp>=0 ){
        iRes = iTest;
        iLo = iTest+1;
      }else{
        iHi = iTest-1;
      }
    }


    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }


    if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret);
  }
  }]

  foreach entry $lHigh {
    tl_print_if_entry $entry
  }

  puts ""
  puts "  return ret;"
728
729
730
731
732
733
734



735
736

737
738
739
740
741
742
743
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
  }]
  puts ""



  puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
  puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"

  puts ""
  puts "#include <assert.h>"
  puts ""
}

proc print_test_main {} {
  puts ""







>
>
>
|
|
>







585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
  }]
  puts ""
  if {$::generate_fts5_code} {
    puts "#if defined(SQLITE_ENABLE_FTS5)"
  } else {
    puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
    puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
  }
  puts ""
  puts "#include <assert.h>"
  puts ""
}

proc print_test_main {} {
  puts ""
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770


771

















772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809



810
811

  puts "\}"
}

# Proces the command line arguments. Exit early if they are not to
# our liking.
#
proc usage {} {
  puts -nonewline stderr "Usage: $::argv0 ?-test? "
  puts            stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
  exit 1
}
if {[llength $argv]!=2 && [llength $argv]!=3} usage
if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]


set generate_test_code [expr {[llength $argv]==3}]


















print_fileheader

# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
print_isalnum sqlite3FtsUnicodeIsalnum $lRange

# Leave a gap between the two generated C functions.
#
puts ""
puts ""

# Load the fold data. This is used by the [rd_XXX] commands
# as well as [print_fold].
tl_load_casefolding_txt ${casefolding.txt}

set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings
puts ""
puts ""
print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
puts ""
puts ""

# Print the fold() function to stdout.
#
print_fold sqlite3FtsUnicodeFold

# Print the test routines and main() function to stdout, if -test 
# was specified.
#
if {$::generate_test_code} {
  print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
  print_fold_test sqlite3FtsUnicodeFold $mappings
  print_test_main 
}




puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"








|



|
<


>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|














|





|





|
|



>
>
>
|
|
>
617
618
619
620
621
622
623
624
625
626
627
628

629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  puts "\}"
}

# Proces the command line arguments. Exit early if they are not to
# our liking.
#
proc usage {} {
  puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
  puts            stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
  exit 1
}
if {[llength $argv]<2} usage

set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]

set remove_diacritic remove_diacritic
set generate_test_code 0
set generate_fts5_code 0
set function_prefix "sqlite3Fts"
for {set i 0} {$i < [llength $argv]-2} {incr i} {
  switch -- [lindex $argv $i] {
    -test {
      set generate_test_code 1
    }
    -fts5 {
      set function_prefix sqlite3Fts5
      set generate_fts5_code 1
      set remove_diacritic fts5_remove_diacritic
    }
    default {
      usage
    }
  }
}

print_fileheader

# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
print_isalnum ${function_prefix}UnicodeIsalnum $lRange

# Leave a gap between the two generated C functions.
#
puts ""
puts ""

# Load the fold data. This is used by the [rd_XXX] commands
# as well as [print_fold].
tl_load_casefolding_txt ${casefolding.txt}

set mappings [rd_load_unicodedata_text ${unicodedata.txt}]
print_rd $mappings
puts ""
puts ""
print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts ""
puts ""

# Print the fold() function to stdout.
#
print_fold ${function_prefix}UnicodeFold

# Print the test routines and main() function to stdout, if -test 
# was specified.
#
if {$::generate_test_code} {
  print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
  print_fold_test ${function_prefix}UnicodeFold $mappings
  print_test_main 
}

if {$generate_fts5_code} {
  puts "#endif /* defined(SQLITE_ENABLE_FTS5) */"
} else {
  puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
  puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"
}
Added ext/fts3/unicode/parseunicode.tcl.




































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146

#--------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of mappings required to remove all
# diacritical marks from a unicode string. Each mapping is itself a list
# consisting of two elements - the unicode codepoint and the single ASCII
# character that it should be replaced with, or an empty string if the 
# codepoint should simply be removed from the input. Examples:
#
#   { 224 a  }     (replace codepoint 224 to "a")
#   { 769 "" }     (remove codepoint 769 from input)
#
# Mappings are only returned for non-upper case codepoints. It is assumed
# that the input has already been folded to lower case.
#
proc rd_load_unicodedata_text {zName} {
  global tl_lookup_table

  set fd [open $zName]
  set lField {
    code
    character_name
    general_category
    canonical_combining_classes
    bidirectional_category
    character_decomposition_mapping
    decimal_digit_value
    digit_value
    numeric_value
    mirrored
    unicode_1_name
    iso10646_comment_field
    uppercase_mapping
    lowercase_mapping
    titlecase_mapping
  }
  set lRet [list]

  while { ![eof $fd] } {
    set line [gets $fd]
    if {$line == ""} continue

    set fields [split $line ";"]
    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
    foreach $lField $fields {}
    if { [llength $character_decomposition_mapping]!=2
      || [string is xdigit [lindex $character_decomposition_mapping 0]]==0
    } {
      continue
    }

    set iCode  [expr "0x$code"]
    set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"]
    set iDia   [expr "0x[lindex $character_decomposition_mapping 1]"]

    if {[info exists tl_lookup_table($iCode)]} continue

    if { ($iAscii >= 97 && $iAscii <= 122)
      || ($iAscii >= 65 && $iAscii <= 90)
    } {
      lappend lRet [list $iCode [string tolower [format %c $iAscii]]]
      set dia($iDia) 1
    }
  }

  foreach d [array names dia] {
    lappend lRet [list $d ""]
  }
  set lRet [lsort -integer -index 0 $lRet]

  close $fd
  set lRet
}

#-------------------------------------------------------------------------
# Parameter $zName must be a path to the file UnicodeData.txt. This command
# reads the file and returns a list of codepoints (integers). The list
# contains all codepoints in the UnicodeData.txt assigned to any "General
# Category" that is not a "Letter" or "Number".
#
proc an_load_unicodedata_text {zName} {
  set fd [open $zName]
  set lField {
    code
    character_name
    general_category
    canonical_combining_classes
    bidirectional_category
    character_decomposition_mapping
    decimal_digit_value
    digit_value
    numeric_value
    mirrored
    unicode_1_name
    iso10646_comment_field
    uppercase_mapping
    lowercase_mapping
    titlecase_mapping
  }
  set lRet [list]

  while { ![eof $fd] } {
    set line [gets $fd]
    if {$line == ""} continue

    set fields [split $line ";"]
    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
    foreach $lField $fields {}

    set iCode [expr "0x$code"]
    set bAlnum [expr {
         [lsearch {L N} [string range $general_category 0 0]] >= 0
      || $general_category=="Co"
    }]

    if { !$bAlnum } { lappend lRet $iCode }
  }

  close $fd
  set lRet
}

proc tl_load_casefolding_txt {zName} {
  global tl_lookup_table

  set fd [open $zName]
  while { ![eof $fd] } {
    set line [gets $fd]
    if {[string range $line 0 0] == "#"} continue
    if {$line == ""} continue

    foreach x {a b c d} {unset -nocomplain $x}
    foreach {a b c d} [split $line ";"] {}

    set a2 [list]
    set c2 [list]
    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
    set b [string trim $b]
    set d [string trim $d]

    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
  }
}


Added ext/fts5/extract_api_docs.tcl.


























































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#
# 2014 August 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#--------------------------------------------------------------------------
#
# This script extracts the documentation for the API used by fts5 auxiliary 
# functions from header file fts5.h. It outputs html text on stdout that
# is included in the documentation on the web.
# 

set ::fts5_docs_output ""
if {[info commands hd_putsnl]==""} {
  if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] }
  proc output {text} {
    puts $text
  }
} else {
  proc output {text} {
    append ::fts5_docs_output "$text\n"
  }
}
if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api}


set input_file [file join [file dir [info script]] fts5.h]
set fd [open $input_file]
set data [read $fd]
close $fd


# Argument $data is the entire text of the fts5.h file. This function 
# extracts the definition of the Fts5ExtensionApi structure from it and
# returns a key/value list of structure member names and definitions. i.e.
#
#   iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ...
#
proc get_struct_members {data} {

  # Extract the structure definition from the fts5.h file.
  regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn

  # Remove all comments from the structure definition
  regsub -all {/[*].*?[*]/} $defn {} defn2

  set res [list]
  foreach member [split $defn2 {;}] {

    set member [string trim $member]
    if {$member!=""} { 
      catch { set name [lindex $member end] }
      regexp {.*?[(][*]([^)]*)[)]} $member -> name
      lappend res $name $member
    }
  }

  set res
}

proc get_struct_docs {data names} {
  # Extract the structure definition from the fts5.h file.
  regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs

  set current_doc    ""
  set current_header ""

  foreach line [split $docs "\n"] {
    regsub {[*]*} $line {} line
    if {[regexp {^  } $line]} {
      append current_doc "$line\n"
    } elseif {[string trim $line]==""} {
      if {$current_header!=""} { append current_doc "\n" }
    } else {
      if {$current_doc != ""} {
        lappend res $current_header $current_doc
        set current_doc ""
      }
      set subject n/a
      regexp {^ *([[:alpha:]]*)} $line -> subject
      if {[lsearch $names $subject]>=0} {
        set current_header $subject
      } else {
        set current_header [string trim $line]
      }
    }
  }

  if {$current_doc != ""} {
    lappend res $current_header $current_doc
  }

  set res
}

proc get_tokenizer_docs {data} {
  regexp {(xCreate:.*?)[*]/} $data -> docs

  set res "<dl>\n"
  foreach line [split [string trim $docs] "\n"] {
    regexp {[*][*](.*)} $line -> line
    if {[regexp {^ ?x.*:} $line]} {
      append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n"
      continue
    }
    if {[string trim $line] == ""} {
      append res "<p>\n"
    } else {
      append res "$line\n"
    }
  }
  append res "</dl>\n"

  set res
}

proc get_api_docs {data} {
  # Initialize global array M as a map from Fts5StructureApi member name
  # to member definition. i.e.
  #
  #   iVersion  -> {int iVersion}
  #   xUserData -> {void *(*xUserData)(Fts5Context*)}
  #   ...
  #
  array set M [get_struct_members $data]
  
  # Initialize global list D as a map from section name to documentation
  # text. Most (all?) section names are structure member names.
  #
  set D [get_struct_docs $data [array names M]]
  
  foreach {sub docs} $D {
    if {[info exists M($sub)]} {
      set hdr $M($sub)
      set link " id=$sub"
    } else {
      set link ""
    }

    output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>"
    set style "padding-left:6ex;font-size:1.4em;display:block"
    output "<h style=\"$style\"><pre>$hdr</pre></h>"
  
    set mode ""
    set bEmpty 1
    foreach line [split [string trim $docs] "\n"] {
      if {[string trim $line]==""} {
        if {$mode != ""} {output "</$mode>"}
        set mode ""
      } elseif {$mode == ""} {
        if {[regexp {^     } $line]} {
          set mode codeblock
        } else {
          set mode p
        }
        output "<$mode>"
      }
      output $line
    }
    if {$mode != ""} {output "</$mode>"}
  }
}

proc get_fts5_struct {data start end} {
  set res ""
  set bOut 0
  foreach line [split $data "\n"] {
    if {$bOut==0} {
      if {[regexp $start $line]} {
        set bOut 1
      }
    }

    if {$bOut} {
      append res "$line\n"
    }

    if {$bOut} {
      if {[regexp $end $line]} {
        set bOut 0
      }
    }
  }

  set map [list /* <i>/* */ */</i>]
  string map $map $res
}

proc main {data} {
  switch $::extract_api_docs_mode {
    fts5_api {
      output [get_fts5_struct $data "typedef struct fts5_api" "^\};"]
    }

    fts5_tokenizer {
      output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"]
    }

    fts5_extension {
      output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"]
    }

    Fts5ExtensionApi {
      set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"]
      set map [list]
      foreach {k v} [get_struct_members $data] {
        if {[string match x* $k]==0} continue
        lappend map $k "<a href=#$k>$k</a>"
      }
      output [string map $map $struct]
    }

    api {
      get_api_docs $data
    }

    tokenizer_api {
      output [get_tokenizer_docs $data]
    }

    default {
    }
  }
}
main $data

set ::fts5_docs_output





Added ext/fts5/fts5.h.










































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** Interfaces to extend FTS5. Using the interfaces defined in this file, 
** FTS5 may be extended with:
**
**     * custom tokenizers, and
**     * custom auxiliary functions.
*/


#ifndef _FTS5_H
#define _FTS5_H

#include "sqlite3.h"

/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/

typedef struct Fts5ExtensionApi Fts5ExtensionApi;
typedef struct Fts5Context Fts5Context;

typedef void (*fts5_extension_function)(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
);

/*
** EXTENSION API FUNCTIONS
**
** xUserData(pFts):
**   Return a copy of the context pointer the extension function was 
**   registered with.
**
** xColumnTotalSize(pFts, iCol, pnToken):
**   If parameter iCol is less than zero, set output variable *pnToken
**   to the total number of tokens in the FTS5 table. Or, if iCol is
**   non-negative but less than the number of columns in the table, return
**   the total number of tokens in column iCol, considering all rows in 
**   the FTS5 table.
**
**   If parameter iCol is greater than or equal to the number of columns
**   in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
**   an OOM condition or IO error), an appropriate SQLite error code is 
**   returned.
**
** xColumnCount:
**   Returns the number of columns in the FTS5 table.
**
** xColumnSize:
**   Reports the size in tokens of a column value from the current row.
**
** xColumnText:
**   This function attempts to retrieve the text of column iCol of the
**   current document. If successful, (*pz) is set to point to a buffer
**   containing the text in utf-8 encoding, (*pn) is set to the size in bytes
**   (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
**   if an error occurs, an SQLite error code is returned and the final values
**   of (*pz) and (*pn) are undefined.
**
** xPhraseCount:
**   Returns the number of phrases in the current query expression.
**
** xPhraseSize:
**   Returns the number of tokens in phrase iPhrase of the query. Phrases
**   are numbered starting from zero.
**
** xInstCount:
**   Set *pnInst to the total number of occurrences of all phrases within
**   the query within the current row. Return SQLITE_OK if successful, or
**   an error code (i.e. SQLITE_NOMEM) if an error occurs.
**
** xInst:
**   Query for the details of phrase match iIdx within the current row.
**   Phrase matches are numbered starting from zero, so the iIdx argument
**   should be greater than or equal to zero and smaller than the value
**   output by xInstCount().
**
**   Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) 
**   if an error occurs.
**
** xRowid:
**   Returns the rowid of the current row.
**
** xTokenize:
**   Tokenize text using the tokenizer belonging to the FTS5 table.
**
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
**   This API function is used to query the FTS table for phrase iPhrase
**   of the current query. Specifically, a query equivalent to:
**
**       ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
**
**   with $p set to a phrase equivalent to the phrase iPhrase of the
**   current query is executed. For each row visited, the callback function
**   passed as the fourth argument is invoked. The context and API objects 
**   passed to the callback function may be used to access the properties of
**   each matched row. Invoking Api.xUserData() returns a copy of the pointer
**   passed as the third argument to pUserData.
**
**   If the callback function returns any value other than SQLITE_OK, the
**   query is abandoned and the xQueryPhrase function returns immediately.
**   If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
**   Otherwise, the error code is propagated upwards.
**
**   If the query runs to completion without incident, SQLITE_OK is returned.
**   Or, if some error occurs before the query completes or is aborted by
**   the callback, an SQLite error code is returned.
**
**
** xSetAuxdata(pFts5, pAux, xDelete)
**
**   Save the pointer passed as the second argument as the extension functions 
**   "auxiliary data". The pointer may then be retrieved by the current or any
**   future invocation of the same fts5 extension function made as part of
**   of the same MATCH query using the xGetAuxdata() API.
**
**   Each extension function is allocated a single auxiliary data slot for
**   each FTS query (MATCH expression). If the extension function is invoked 
**   more than once for a single FTS query, then all invocations share a 
**   single auxiliary data context.
**
**   If there is already an auxiliary data pointer when this function is
**   invoked, then it is replaced by the new pointer. If an xDelete callback
**   was specified along with the original pointer, it is invoked at this
**   point.
**
**   The xDelete callback, if one is specified, is also invoked on the
**   auxiliary data pointer after the FTS5 query has finished.
**
**   If an error (e.g. an OOM condition) occurs within this function, an
**   the auxiliary data is set to NULL and an error code returned. If the
**   xDelete parameter was not NULL, it is invoked on the auxiliary data
**   pointer before returning.
**
**
** xGetAuxdata(pFts5, bClear)
**
**   Returns the current auxiliary data pointer for the fts5 extension 
**   function. See the xSetAuxdata() method for details.
**
**   If the bClear argument is non-zero, then the auxiliary data is cleared
**   (set to NULL) before this function returns. In this case the xDelete,
**   if any, is not invoked.
**
**
** xRowCount(pFts5, pnRow)
**
**   This function is used to retrieve the total number of rows in the table.
**   In other words, the same value that would be returned by:
**
**        SELECT count(*) FROM ftstable;
*/
struct Fts5ExtensionApi {
  int iVersion;                   /* Currently always set to 1 */

  void *(*xUserData)(Fts5Context*);

  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);

  sqlite3_int64 (*xRowid)(Fts5Context*);
  int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
  int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);

  int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
    int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
  );
  int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
  void *(*xGetAuxdata)(Fts5Context*, int bClear);
};

/* 
** CUSTOM AUXILIARY FUNCTIONS
*************************************************************************/

/*************************************************************************
** CUSTOM TOKENIZERS
**
** Applications may also register custom tokenizer types. A tokenizer 
** is registered by providing fts5 with a populated instance of the 
** following structure. All structure methods must be defined, setting
** any member of the fts5_tokenizer struct to NULL leads to undefined
** behaviour. The structure methods are expected to function as follows:
**
** xCreate:
**   This function is used to allocate and inititalize a tokenizer instance.
**   A tokenizer instance is required to actually tokenize text.
**
**   The first argument passed to this function is a copy of the (void*)
**   pointer provided by the application when the fts5_tokenizer object
**   was registered with FTS5 (the third argument to xCreateTokenizer()). 
**   The second and third arguments are an array of nul-terminated strings
**   containing the tokenizer arguments, if any, specified following the
**   tokenizer name as part of the CREATE VIRTUAL TABLE statement used
**   to create the FTS5 table.
**
**   The final argument is an output variable. If successful, (*ppOut) 
**   should be set to point to the new tokenizer handle and SQLITE_OK
**   returned. If an error occurs, some value other than SQLITE_OK should
**   be returned. In this case, fts5 assumes that the final value of *ppOut 
**   is undefined.
**
** xDelete:
**   This function is invoked to delete a tokenizer handle previously
**   allocated using xCreate(). Fts5 guarantees that this function will
**   be invoked exactly once for each successful call to xCreate().
**
** xTokenize:
**   This function is expected to tokenize the nText byte string indicated 
**   by argument pText. pText may not be nul-terminated. The first argument
**   passed to this function is a pointer to an Fts5Tokenizer object returned 
**   by an earlier call to xCreate().
**
**   For each token in the input string, the supplied callback xToken() must
**   be invoked. The first argument to it should be a copy of the pointer
**   passed as the second argument to xTokenize(). The next two arguments
**   are a pointer to a buffer containing the token text, and the size of
**   the token in bytes. The 4th and 5th arguments are the byte offsets of
**   the first byte of and first byte immediately following the text from 
**   which the token is derived within the input.
**
**   FTS5 assumes the xToken() callback is invoked for each token in the 
**   order that they occur within the input text.
**
**   If an xToken() callback returns any value other than SQLITE_OK, then
**   the tokenization should be abandoned and the xTokenize() method should
**   immediately return a copy of the xToken() return value. Or, if the
**   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
**   if an error occurs with the xTokenize() implementation itself, it
**   may abandon the tokenization and return any error code other than
**   SQLITE_OK or SQLITE_DONE.
**
*/
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
  int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,
      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
        int iEnd            /* Byte offset of end of token within input text */
      )
  );
};

/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
  int iVersion;                   /* Currently always set to 1 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_tokenizer *pTokenizer,
    void (*xDestroy)(void*)
  );

  /* Find an existing tokenizer */
  int (*xFindTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void **ppContext,
    fts5_tokenizer *pTokenizer
  );

  /* Create a new auxiliary function */
  int (*xCreateFunction)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_extension_function xFunction,
    void (*xDestroy)(void*)
  );
};

/*
** END OF REGISTRATION API
*************************************************************************/

#endif /* _FTS5_H */

Added ext/fts5/fts5Int.h.
















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/
#ifndef _FTS5INT_H
#define _FTS5INT_H

#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1

#include <string.h>
#include <assert.h>

#ifndef SQLITE_AMALGAMATION

typedef unsigned char  u8;
typedef unsigned int   u32;
typedef unsigned short u16;
typedef sqlite3_int64 i64;
typedef sqlite3_uint64 u64;

#define ArraySize(x) (sizeof(x) / sizeof(x[0]))

#define testcase(x)
#define ALWAYS(x) 1
#define NEVER(x) 0

#define MIN(x,y) (((x) < (y)) ? (x) : (y))

#endif


/*
** Maximum number of prefix indexes on single FTS5 table. This must be
** less than 32. If it is set to anything large than that, an #error
** directive in fts5_index.c will cause the build to fail.
*/
#define FTS5_MAX_PREFIX_INDEXES 31

#define FTS5_DEFAULT_NEARDIST 10
#define FTS5_DEFAULT_RANK     "bm25"

/* Name of rank and rowid columns */
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"

#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif

/*
** The assert_nc() macro is similar to the assert() macro, except that it
** is used for assert() conditions that are true only if it can be 
** guranteed that the database is not corrupt.
*/
#ifdef SQLITE_DEBUG
extern int sqlite3_fts5_may_be_corrupt;
# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
#else
# define assert_nc(x) assert(x)
#endif

typedef struct Fts5Global Fts5Global;

/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/

typedef struct Fts5Config Fts5Config;

/*
** An instance of the following structure encodes all information that can
** be gleaned from the CREATE VIRTUAL TABLE statement.
**
** And all information loaded from the %_config table.
**
** nAutomerge:
**   The minimum number of segments that an auto-merge operation should
**   attempt to merge together. A value of 1 sets the object to use the 
**   compile time default. Zero disables auto-merge altogether.
**
** zContent:
**
** zContentRowid:
**   The value of the content_rowid= option, if one was specified. Or 
**   the string "rowid" otherwise. This text is not quoted - if it is
**   used as part of an SQL statement it needs to be quoted appropriately.
**
** zContentExprlist:
**
** pzErrmsg:
**   This exists in order to allow the fts5_index.c module to return a 
**   decent error message if it encounters a file-format version it does
**   not understand.
**
** bColumnsize:
**   True if the %_docsize table is created.
**
*/
struct Fts5Config {
  sqlite3 *db;                    /* Database handle */
  char *zDb;                      /* Database holding FTS index (e.g. "main") */
  char *zName;                    /* Name of FTS index */
  int nCol;                       /* Number of columns */
  char **azCol;                   /* Column names */
  u8 *abUnindexed;                /* True for unindexed columns */
  int nPrefix;                    /* Number of prefix indexes */
  int *aPrefix;                   /* Sizes in bytes of nPrefix prefix indexes */
  int eContent;                   /* An FTS5_CONTENT value */
  char *zContent;                 /* content table */ 
  char *zContentRowid;            /* "content_rowid=" option value */ 
  int bColumnsize;                /* "columnsize=" option value (dflt==1) */
  char *zContentExprlist;
  Fts5Tokenizer *pTok;
  fts5_tokenizer *pTokApi;

  /* Values loaded from the %_config table */
  int iCookie;                    /* Incremented when %_config is modified */
  int pgsz;                       /* Approximate page size used in %_data */
  int nAutomerge;                 /* 'automerge' setting */
  int nCrisisMerge;               /* Maximum allowed segments per level */
  char *zRank;                    /* Name of rank function */
  char *zRankArgs;                /* Arguments to rank function */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;
};

/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 2

#define FTS5_CONTENT_NORMAL   0
#define FTS5_CONTENT_NONE     1
#define FTS5_CONTENT_EXTERNAL 2




int sqlite3Fts5ConfigParse(
    Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
);
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);

/* Set the value of a single config attribute */
int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);

int sqlite3Fts5ConfigParseRank(const char*, char**, char**);

/*
** End of interface to code in fts5_config.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_buffer.c.
*/

/*
** Buffer object for the incremental building of string data.
*/
typedef struct Fts5Buffer Fts5Buffer;
struct Fts5Buffer {
  u8 *p;
  int n;
  int nSpace;
};

int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int);
void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
void sqlite3Fts5BufferFree(Fts5Buffer*);
void sqlite3Fts5BufferZero(Fts5Buffer*);
void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int);

char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);

#define fts5BufferZero(x)             sqlite3Fts5BufferZero(x)
#define fts5BufferGrow(a,b,c)         sqlite3Fts5BufferGrow(a,b,c)
#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c)
#define fts5BufferFree(a)             sqlite3Fts5BufferFree(a)
#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
#define fts5BufferSet(a,b,c,d)        sqlite3Fts5BufferSet(a,b,c,d)
#define fts5BufferAppend32(a,b,c)     sqlite3Fts5BufferAppend32(a,b,c)

/* Write and decode big-endian 32-bit integer values */
void sqlite3Fts5Put32(u8*, int);
int sqlite3Fts5Get32(const u8*);

#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF)

typedef struct Fts5PoslistReader Fts5PoslistReader;
struct Fts5PoslistReader {
  /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
  int iCol;                       /* If (iCol>=0), this column only */
  const u8 *a;                    /* Position list to iterate through */
  int n;                          /* Size of buffer at a[] in bytes */
  int i;                          /* Current offset in a[] */

  /* Output variables */
  int bEof;                       /* Set to true at EOF */
  i64 iPos;                       /* (iCol<<32) + iPos */
};
int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
);
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);

typedef struct Fts5PoslistWriter Fts5PoslistWriter;
struct Fts5PoslistWriter {
  i64 iPrev;
};
int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);

int sqlite3Fts5PoslistNext64(
  const u8 *a, int n,             /* Buffer containing poslist */
  int *pi,                        /* IN/OUT: Offset within a[] */
  i64 *piOff                      /* IN/OUT: Current offset */
);

/* Malloc utility */
void *sqlite3Fts5MallocZero(int *pRc, int nByte);
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);

/* Character set tests (like isspace(), isalpha() etc.) */
int sqlite3Fts5IsBareword(char t);

/*
** End of interface to code in fts5_buffer.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/

typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;

/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX     0x0001   /* Prefix query */
#define FTS5INDEX_QUERY_DESC       0x0002   /* Docs in descending rowid order */
#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004   /* Do not use prefix index */
#define FTS5INDEX_QUERY_SCAN       0x0008   /* Scan query (fts5vocab) */

/*
** Create/destroy an Fts5Index object.
*/
int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
int sqlite3Fts5IndexClose(Fts5Index *p);

/*
** for(
**   pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0);
**   0==sqlite3Fts5IterEof(pIter);
**   sqlite3Fts5IterNext(pIter)
** ){
**   i64 iRowid = sqlite3Fts5IterRowid(pIter);
** }
*/

/*
** Open a new iterator to iterate though all docids that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
  Fts5IndexIter **ppIter
);

/*
** The various operations on open token or token prefix iterators opened
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*
** This interface is used by the fts5vocab module.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
int sqlite3Fts5IterNextScan(Fts5IndexIter*);


/*
** Insert or remove data to or from the index. Each time a document is 
** added to or removed from the index, this function is called one or more
** times.
**
** For an insert, it must be called once for each token in the new document.
** If the operation is a delete, it must be called (at least) once for each
** unique token in the document with an iCol value less than zero. The iPos
** argument is ignored for a delete.
*/
int sqlite3Fts5IndexWrite(
  Fts5Index *p,                   /* Index to write to */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
);

/*
** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
** document iDocid.
*/
int sqlite3Fts5IndexBeginWrite(
  Fts5Index *p,                   /* Index to write to */
  i64 iDocid                      /* Docid to add or remove data from */
);

/*
** Flush any data stored in the in-memory hash tables to the database.
** If the bCommit flag is true, also close any open blob handles.
*/
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit);

/*
** Discard any data stored in the in-memory hash tables. Do not write it
** to the database. Additionally, assume that the contents of the %_data
** table may have changed on disk. So any in-memory caches of %_data 
** records must be invalidated.
*/
int sqlite3Fts5IndexRollback(Fts5Index *p);

/*
** Retrieve and clear the current error code, respectively.
*/
int sqlite3Fts5IndexErrcode(Fts5Index*);
void sqlite3Fts5IndexReset(Fts5Index*);

/*
** Get or set the "averages" record.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf);
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);

/*
** Functions called by the storage module as part of integrity-check.
*/
u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int);
int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum);

/* 
** Called during virtual module initialization to register UDF 
** fts5_decode() with SQLite 
*/
int sqlite3Fts5IndexInit(sqlite3*);

int sqlite3Fts5IndexSetCookie(Fts5Index*, int);

/*
** Return the total number of entries read from the %_data table by 
** this connection since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p);

int sqlite3Fts5IndexReinit(Fts5Index *p);
int sqlite3Fts5IndexOptimize(Fts5Index *p);
int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);

int sqlite3Fts5IndexLoadConfig(Fts5Index *p);

/*
** End of interface to code in fts5_index.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_varint.c. 
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
int sqlite3Fts5GetVarintLen(u32 iVal);
u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
int sqlite3Fts5PutVarint(unsigned char *p, u64 v);

#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
#define fts5GetVarint    sqlite3Fts5GetVarint

/*
** End of interface to code in fts5_varint.c.
**************************************************************************/


/**************************************************************************
** Interface to code in fts5.c. 
*/

int sqlite3Fts5GetTokenizer(
  Fts5Global*, 
  const char **azArg,
  int nArg,
  Fts5Tokenizer**,
  fts5_tokenizer**,
  char **pzErr
);

Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*);

/*
** End of interface to code in fts5.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_hash.c. 
*/
typedef struct Fts5Hash Fts5Hash;

/*
** Create a hash table, free a hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize);
void sqlite3Fts5HashFree(Fts5Hash*);

int sqlite3Fts5HashWrite(
  Fts5Hash*,
  i64 iRowid,                     /* Rowid for this entry */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  char bByte,
  const char *pToken, int nToken  /* Token to add or remove to or from index */
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);

int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);

int sqlite3Fts5HashScanInit(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
);
void sqlite3Fts5HashScanNext(Fts5Hash*);
int sqlite3Fts5HashScanEof(Fts5Hash*);
void sqlite3Fts5HashScanEntry(Fts5Hash *,
  const char **pzTerm,            /* OUT: term (nul-terminated) */
  const u8 **ppDoclist,           /* OUT: pointer to doclist */
  int *pnDoclist                  /* OUT: size of doclist in bytes */
);


/*
** End of interface to code in fts5_hash.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_storage.c. fts5_storage.c contains contains 
** code to access the data stored in the %_content and %_docsize tables.
*/

#define FTS5_STMT_SCAN_ASC  0     /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
#define FTS5_STMT_SCAN_DESC 1     /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
#define FTS5_STMT_LOOKUP    2     /* SELECT rowid, * FROM ... WHERE rowid=? */

typedef struct Fts5Storage Fts5Storage;

int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
int sqlite3Fts5StorageClose(Fts5Storage *p);
int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);

int sqlite3Fts5DropAll(Fts5Config*);
int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);

int sqlite3Fts5StorageDelete(Fts5Storage *p, i64);
int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*);

int sqlite3Fts5StorageIntegrity(Fts5Storage *p);

int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);

int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);

int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit);
int sqlite3Fts5StorageRollback(Fts5Storage *p);

int sqlite3Fts5StorageConfigValue(
    Fts5Storage *p, const char*, sqlite3_value*, int
);

int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**);

int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);

/*
** End of interface to code in fts5_storage.c.
**************************************************************************/


/**************************************************************************
** Interface to code in fts5_expr.c. 
*/
typedef struct Fts5Expr Fts5Expr;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Parse Fts5Parse;
typedef struct Fts5Token Fts5Token;
typedef struct Fts5ExprPhrase Fts5ExprPhrase;
typedef struct Fts5ExprNearset Fts5ExprNearset;
typedef struct Fts5ExprColset Fts5ExprColset;

struct Fts5Token {
  const char *p;                  /* Token text (not NULL terminated) */
  int n;                          /* Size of buffer p in bytes */
};

/* Parse a MATCH expression. */
int sqlite3Fts5ExprNew(
  Fts5Config *pConfig, 
  const char *zExpr,
  Fts5Expr **ppNew, 
  char **pzErr
);

/*
** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
**     rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
**     rc = sqlite3Fts5ExprNext(pExpr)
** ){
**   // The document with rowid iRowid matches the expression!
**   i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
** }
*/
int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
int sqlite3Fts5ExprEof(Fts5Expr*);
i64 sqlite3Fts5ExprRowid(Fts5Expr*);

void sqlite3Fts5ExprFree(Fts5Expr*);

/* Called during startup to register a UDF with SQLite */
int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);

int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);

int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);

/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
** the parser code in fts5parse.y.  */

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);

Fts5ExprNode *sqlite3Fts5ParseNode(
  Fts5Parse *pParse,
  int eType,
  Fts5ExprNode *pLeft,
  Fts5ExprNode *pRight,
  Fts5ExprNearset *pNear
);

Fts5ExprPhrase *sqlite3Fts5ParseTerm(
  Fts5Parse *pParse, 
  Fts5ExprPhrase *pPhrase, 
  Fts5Token *pToken,
  int bPrefix
);

Fts5ExprNearset *sqlite3Fts5ParseNearset(
  Fts5Parse*, 
  Fts5ExprNearset*,
  Fts5ExprPhrase* 
);

Fts5ExprColset *sqlite3Fts5ParseColset(
  Fts5Parse*, 
  Fts5ExprColset*, 
  Fts5Token *
);

void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);

void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*);
void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);

/*
** End of interface to code in fts5_expr.c.
**************************************************************************/



/**************************************************************************
** Interface to code in fts5_aux.c. 
*/

int sqlite3Fts5AuxInit(fts5_api*);
/*
** End of interface to code in fts5_aux.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_tokenizer.c. 
*/

int sqlite3Fts5TokenizerInit(fts5_api*);
/*
** End of interface to code in fts5_tokenizer.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_sorter.c. 
*/
typedef struct Fts5Sorter Fts5Sorter;

int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp);

/*
** End of interface to code in fts5_sorter.c.
**************************************************************************/

/**************************************************************************
** Interface to code in fts5_vocab.c. 
*/

int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);

/*
** End of interface to code in fts5_vocab.c.
**************************************************************************/


/**************************************************************************
** Interface to automatically generated code in fts5_unicode2.c. 
*/
int sqlite3Fts5UnicodeIsalnum(int c);
int sqlite3Fts5UnicodeIsdiacritic(int c);
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
/*
** End of interface to code in fts5_unicode2.c.
**************************************************************************/

#endif
#endif
Added ext/fts5/fts5_aux.c.


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
*/

#ifdef SQLITE_ENABLE_FTS5

#include "fts5Int.h"
#include <math.h>

/*
** Object used to iterate through all "coalesced phrase instances" in 
** a single column of the current row. If the phrase instances in the
** column being considered do not overlap, this object simply iterates
** through them. Or, if they do overlap (share one or more tokens in
** common), each set of overlapping instances is treated as a single
** match. See documentation for the highlight() auxiliary function for
** details.
**
** Usage is:
**
**   for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
**      (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
**      rc = fts5CInstIterNext(&iter)
**   ){
**     printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
**   }
**
*/
typedef struct CInstIter CInstIter;
struct CInstIter {
  const Fts5ExtensionApi *pApi;   /* API offered by current FTS version */
  Fts5Context *pFts;              /* First arg to pass to pApi functions */
  int iCol;                       /* Column to search */
  int iInst;                      /* Next phrase instance index */
  int nInst;                      /* Total number of phrase instances */

  /* Output variables */
  int iStart;                     /* First token in coalesced phrase instance */
  int iEnd;                       /* Last token in coalesced phrase instance */
};

/*
** Advance the iterator to the next coalesced phrase instance. Return
** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
*/
static int fts5CInstIterNext(CInstIter *pIter){
  int rc = SQLITE_OK;
  pIter->iStart = -1;
  pIter->iEnd = -1;

  while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
    int ip; int ic; int io;
    rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
    if( rc==SQLITE_OK ){
      if( ic==pIter->iCol ){
        int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
        if( pIter->iStart<0 ){
          pIter->iStart = io;
          pIter->iEnd = iEnd;
        }else if( io<=pIter->iEnd ){
          if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
        }else{
          break;
        }
      }
      pIter->iInst++;
    }
  }

  return rc;
}

/*
** Initialize the iterator object indicated by the final parameter to 
** iterate through coalesced phrase instances in column iCol.
*/
static int fts5CInstIterInit(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  int iCol,
  CInstIter *pIter
){
  int rc;

  memset(pIter, 0, sizeof(CInstIter));
  pIter->pApi = pApi;
  pIter->pFts = pFts;
  pIter->iCol = iCol;
  rc = pApi->xInstCount(pFts, &pIter->nInst);

  if( rc==SQLITE_OK ){
    rc = fts5CInstIterNext(pIter);
  }

  return rc;
}



/*************************************************************************
** Start of highlight() implementation.
*/
typedef struct HighlightContext HighlightContext;
struct HighlightContext {
  CInstIter iter;                 /* Coalesced Instance Iterator */
  int iPos;                       /* Current token offset in zIn[] */
  int iRangeStart;                /* First token to include */
  int iRangeEnd;                  /* If non-zero, last token to include */
  const char *zOpen;              /* Opening highlight */
  const char *zClose;             /* Closing highlight */
  const char *zIn;                /* Input text */
  int nIn;                        /* Size of input text in bytes */
  int iOff;                       /* Current offset within zIn[] */
  char *zOut;                     /* Output value */
};

/*
** Append text to the HighlightContext output string - p->zOut. Argument
** z points to a buffer containing n bytes of text to append. If n is 
** negative, everything up until the first '\0' is appended to the output.
**
** If *pRc is set to any value other than SQLITE_OK when this function is 
** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, 
** *pRc is set to an error code before returning. 
*/
static void fts5HighlightAppend(
  int *pRc, 
  HighlightContext *p, 
  const char *z, int n
){
  if( *pRc==SQLITE_OK ){
    if( n<0 ) n = strlen(z);
    p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
    if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
  }
}

/*
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff                     /* End offset of token */
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;
  int iPos = p->iPos++;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  if( iPos==p->iter.iStart ){
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
    fts5HighlightAppend(&rc, p, p->zOpen, -1);
    p->iOff = iStartOff;
  }

  if( iPos==p->iter.iEnd ){
    if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
      fts5HighlightAppend(&rc, p, p->zOpen, -1);
    }
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
    fts5HighlightAppend(&rc, p, p->zClose, -1);
    p->iOff = iEndOff;
    if( rc==SQLITE_OK ){
      rc = fts5CInstIterNext(&p->iter);
    }
  }

  if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
    fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
    p->iOff = iEndOff;
    if( iPos<p->iter.iEnd ){
      fts5HighlightAppend(&rc, p, p->zClose, -1);
    }
  }

  return rc;
}

/*
** Implementation of highlight() function.
*/
static void fts5HighlightFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  HighlightContext ctx;
  int rc;
  int iCol;

  if( nVal!=3 ){
    const char *zErr = "wrong number of arguments to function highlight()";
    sqlite3_result_error(pCtx, zErr, -1);
    return;
  }

  iCol = sqlite3_value_int(apVal[0]);
  memset(&ctx, 0, sizeof(HighlightContext));
  ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
  ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
  rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);

  if( ctx.zIn ){
    if( rc==SQLITE_OK ){
      rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
    }

    if( rc==SQLITE_OK ){
      rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
    }
    fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);

    if( rc==SQLITE_OK ){
      sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
    }
    sqlite3_free(ctx.zOut);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }
}
/*
** End of highlight() implementation.
**************************************************************************/

/*
** Implementation of snippet() function.
*/
static void fts5SnippetFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  HighlightContext ctx;
  int rc = SQLITE_OK;             /* Return code */
  int iCol;                       /* 1st argument to snippet() */
  const char *zEllips;            /* 4th argument to snippet() */
  int nToken;                     /* 5th argument to snippet() */
  int nInst;                      /* Number of instance matches this row */
  int i;                          /* Used to iterate through instances */
  int nPhrase;                    /* Number of phrases in query */
  unsigned char *aSeen;           /* Array of "seen instance" flags */
  int iBestCol;                   /* Column containing best snippet */
  int iBestStart = 0;             /* First token of best snippet */
  int iBestLast;                  /* Last token of best snippet */
  int nBestScore = 0;             /* Score of best snippet */
  int nColSize;                   /* Total size of iBestCol in tokens */

  if( nVal!=5 ){
    const char *zErr = "wrong number of arguments to function snippet()";
    sqlite3_result_error(pCtx, zErr, -1);
    return;
  }

  memset(&ctx, 0, sizeof(HighlightContext));
  iCol = sqlite3_value_int(apVal[0]);
  ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
  ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
  zEllips = (const char*)sqlite3_value_text(apVal[3]);
  nToken = sqlite3_value_int(apVal[4]);
  iBestLast = nToken-1;

  iBestCol = (iCol>=0 ? iCol : 0);
  nPhrase = pApi->xPhraseCount(pFts);
  aSeen = sqlite3_malloc(nPhrase);
  if( aSeen==0 ){
    rc = SQLITE_NOMEM;
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xInstCount(pFts, &nInst);
  }
  for(i=0; rc==SQLITE_OK && i<nInst; i++){
    int ip, iSnippetCol, iStart;
    memset(aSeen, 0, nPhrase);
    rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart);
    if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){
      int nScore = 1000;
      int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip);
      int j;
      aSeen[ip] = 1;

      for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
        int ic; int io; int iFinal;
        rc = pApi->xInst(pFts, j, &ip, &ic, &io);
        iFinal = io + pApi->xPhraseSize(pFts, ip) - 1;
        if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){
          nScore += aSeen[ip] ? 1000 : 1;
          aSeen[ip] = 1;
          if( iFinal>iLast ) iLast = iFinal;
        }
      }

      if( rc==SQLITE_OK && nScore>nBestScore ){
        iBestCol = iSnippetCol;
        iBestStart = iStart;
        iBestLast = iLast;
        nBestScore = nScore;
      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
  }
  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }
  if( ctx.zIn ){
    if( rc==SQLITE_OK ){
      rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
    }

    if( (iBestStart+nToken-1)>iBestLast ){
      iBestStart -= (iBestStart+nToken-1-iBestLast) / 2;
    }
    if( iBestStart+nToken>nColSize ){
      iBestStart = nColSize - nToken;
    }
    if( iBestStart<0 ) iBestStart = 0;

    ctx.iRangeStart = iBestStart;
    ctx.iRangeEnd = iBestStart + nToken - 1;

    if( iBestStart>0 ){
      fts5HighlightAppend(&rc, &ctx, zEllips, -1);
    }
    if( rc==SQLITE_OK ){
      rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
    }
    if( ctx.iRangeEnd>=(nColSize-1) ){
      fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
    }else{
      fts5HighlightAppend(&rc, &ctx, zEllips, -1);
    }

    if( rc==SQLITE_OK ){
      sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
    }else{
      sqlite3_result_error_code(pCtx, rc);
    }
    sqlite3_free(ctx.zOut);
  }
  sqlite3_free(aSeen);
}

/************************************************************************/

/*
** The first time the bm25() function is called for a query, an instance
** of the following structure is allocated and populated.
*/
typedef struct Fts5Bm25Data Fts5Bm25Data;
struct Fts5Bm25Data {
  int nPhrase;                    /* Number of phrases in query */
  double avgdl;                   /* Average number of tokens in each row */
  double *aIDF;                   /* IDF for each phrase */
  double *aFreq;                  /* Array used to calculate phrase freq. */
};

/*
** Callback used by fts5Bm25GetData() to count the number of rows in the
** table matched by each individual phrase within the query.
*/
static int fts5CountCb(
  const Fts5ExtensionApi *pApi, 
  Fts5Context *pFts,
  void *pUserData                 /* Pointer to sqlite3_int64 variable */
){
  sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
  (*pn)++;
  return SQLITE_OK;
}

/*
** Set *ppData to point to the Fts5Bm25Data object for the current query. 
** If the object has not already been allocated, allocate and populate it
** now.
*/
static int fts5Bm25GetData(
  const Fts5ExtensionApi *pApi, 
  Fts5Context *pFts,
  Fts5Bm25Data **ppData           /* OUT: bm25-data object for this query */
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5Bm25Data *p;                /* Object to return */

  p = pApi->xGetAuxdata(pFts, 0);
  if( p==0 ){
    int nPhrase;                  /* Number of phrases in query */
    sqlite3_int64 nRow;           /* Number of rows in table */
    sqlite3_int64 nToken;         /* Number of tokens in table */
    int nByte;                    /* Bytes of space to allocate */
    int i;

    /* Allocate the Fts5Bm25Data object */
    nPhrase = pApi->xPhraseCount(pFts);
    nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
    p = (Fts5Bm25Data*)sqlite3_malloc(nByte);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      memset(p, 0, nByte);
      p->nPhrase = nPhrase;
      p->aIDF = (double*)&p[1];
      p->aFreq = &p->aIDF[nPhrase];
    }

    /* Calculate the average document length for this FTS5 table */
    if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
    if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
    if( rc==SQLITE_OK ) p->avgdl = (double)nToken  / (double)nRow;

    /* Calculate an IDF for each phrase in the query */
    for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
      sqlite3_int64 nHit = 0;
      rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
      if( rc==SQLITE_OK ){
        /* Calculate the IDF (Inverse Document Frequency) for phrase i.
        ** This is done using the standard BM25 formula as found on wikipedia:
        **
        **   IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
        **
        ** where "N" is the total number of documents in the set and nHit
        ** is the number that contain at least one instance of the phrase
        ** under consideration.
        **
        ** The problem with this is that if (N < 2*nHit), the IDF is 
        ** negative. Which is undesirable. So the mimimum allowable IDF is
        ** (1e-6) - roughly the same as a term that appears in just over
        ** half of set of 5,000,000 documents.  */
        double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
        if( idf<=0.0 ) idf = 1e-6;
        p->aIDF[i] = idf;
      }
    }

    if( rc!=SQLITE_OK ){
      sqlite3_free(p);
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
    }
    if( rc!=SQLITE_OK ) p = 0;
  }
  *ppData = p;
  return rc;
}

/*
** Implementation of bm25() function.
*/
static void fts5Bm25Function(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const double k1 = 1.2;          /* Constant "k1" from BM25 formula */
  const double b = 0.75;          /* Constant "b" from BM25 formula */
  int rc = SQLITE_OK;             /* Error code */
  double score = 0.0;             /* SQL function return value */
  Fts5Bm25Data *pData;            /* Values allocated/calculated once only */
  int i;                          /* Iterator variable */
  int nInst;                      /* Value returned by xInstCount() */
  double D;                       /* Total number of tokens in row */
  double *aFreq;                  /* Array of phrase freq. for current row */

  /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
  ** for each phrase in the query for the current row. */
  rc = fts5Bm25GetData(pApi, pFts, &pData);
  if( rc==SQLITE_OK ){
    aFreq = pData->aFreq;
    memset(aFreq, 0, sizeof(double) * pData->nPhrase);
    rc = pApi->xInstCount(pFts, &nInst);
  }
  for(i=0; rc==SQLITE_OK && i<nInst; i++){
    int ip; int ic; int io;
    rc = pApi->xInst(pFts, i, &ip, &ic, &io);
    if( rc==SQLITE_OK ){
      double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
      aFreq[ip] += w;
    }
  }

  /* Figure out the total size of the current row in tokens. */
  if( rc==SQLITE_OK ){
    int nTok;
    rc = pApi->xColumnSize(pFts, -1, &nTok);
    D = (double)nTok;
  }

  /* Determine the BM25 score for the current row. */
  for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){
    score += pData->aIDF[i] * (
      ( aFreq[i] * (k1 + 1.0) ) / 
      ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
    );
  }
  
  /* If no error has occurred, return the calculated score. Otherwise,
  ** throw an SQL exception.  */
  if( rc==SQLITE_OK ){
    sqlite3_result_double(pCtx, -1.0 * score);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
}

int sqlite3Fts5AuxInit(fts5_api *pApi){
  struct Builtin {
    const char *zFunc;            /* Function name (nul-terminated) */
    void *pUserData;              /* User-data pointer */
    fts5_extension_function xFunc;/* Callback function */
    void (*xDestroy)(void*);      /* Destructor function */
  } aBuiltin [] = {
    { "snippet",   0, fts5SnippetFunction, 0 },
    { "highlight", 0, fts5HighlightFunction, 0 },
    { "bm25",      0, fts5Bm25Function,    0 },
  };
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){
    rc = pApi->xCreateFunction(pApi,
        aBuiltin[i].zFunc,
        aBuiltin[i].pUserData,
        aBuiltin[i].xFunc,
        aBuiltin[i].xDestroy
    );
  }

  return rc;
}
#endif /* SQLITE_ENABLE_FTS5 */


Added ext/fts5/fts5_buffer.c.










































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
*/


#ifdef SQLITE_ENABLE_FTS5

#include "fts5Int.h"

int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
  /* A no-op if an error has already occurred */
  if( *pRc ) return 1;

  if( (pBuf->n + nByte) > pBuf->nSpace ){
    u8 *pNew;
    int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;
    while( nNew<(pBuf->n + nByte) ){
      nNew = nNew * 2;
    }
    pNew = sqlite3_realloc(pBuf->p, nNew);
    if( pNew==0 ){
      *pRc = SQLITE_NOMEM;
      return 1;
    }else{
      pBuf->nSpace = nNew;
      pBuf->p = pNew;
    }
  }
  return 0;
}

/*
** Encode value iVal as an SQLite varint and append it to the buffer object
** pBuf. If an OOM error occurs, set the error code in p.
*/
void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
  if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return;
  pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
}

void sqlite3Fts5Put32(u8 *aBuf, int iVal){
  aBuf[0] = (iVal>>24) & 0x00FF;
  aBuf[1] = (iVal>>16) & 0x00FF;
  aBuf[2] = (iVal>> 8) & 0x00FF;
  aBuf[3] = (iVal>> 0) & 0x00FF;
}

int sqlite3Fts5Get32(const u8 *aBuf){
  return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3];
}

void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){
  if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return;
  sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal);
  pBuf->n += 4;
}

/*
** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set 
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferAppendBlob(
  int *pRc,
  Fts5Buffer *pBuf, 
  int nData, 
  const u8 *pData
){
  assert( *pRc || nData>=0 );
  if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return;
  memcpy(&pBuf->p[pBuf->n], pData, nData);
  pBuf->n += nData;
}

/*
** Append the nul-terminated string zStr to the buffer pBuf. This function
** ensures that the byte following the buffer data is set to 0x00, even 
** though this byte is not included in the pBuf->n count.
*/
void sqlite3Fts5BufferAppendString(
  int *pRc,
  Fts5Buffer *pBuf, 
  const char *zStr
){
  int nStr = strlen(zStr);
  sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
  pBuf->n--;
}

/*
** Argument zFmt is a printf() style format string. This function performs
** the printf() style processing, then appends the results to buffer pBuf.
**
** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte 
** following the buffer data is set to 0x00, even though this byte is not
** included in the pBuf->n count.
*/ 
void sqlite3Fts5BufferAppendPrintf(
  int *pRc,
  Fts5Buffer *pBuf, 
  char *zFmt, ...
){
  if( *pRc==SQLITE_OK ){
    char *zTmp;
    va_list ap;
    va_start(ap, zFmt);
    zTmp = sqlite3_vmprintf(zFmt, ap);
    va_end(ap);

    if( zTmp==0 ){
      *pRc = SQLITE_NOMEM;
    }else{
      sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
      sqlite3_free(zTmp);
    }
  }
}

char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
  char *zRet = 0;
  if( *pRc==SQLITE_OK ){
    va_list ap;
    va_start(ap, zFmt);
    zRet = sqlite3_vmprintf(zFmt, ap);
    va_end(ap);
    if( zRet==0 ){
      *pRc = SQLITE_NOMEM; 
    }
  }
  return zRet;
}
 

/*
** Free any buffer allocated by pBuf. Zero the structure before returning.
*/
void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
  sqlite3_free(pBuf->p);
  memset(pBuf, 0, sizeof(Fts5Buffer));
}

/*
** Zero the contents of the buffer object. But do not free the associated 
** memory allocation.
*/
void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
  pBuf->n = 0;
}

/*
** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
** the error code in p. If an error has already occurred when this function
** is called, it is a no-op.
*/
void sqlite3Fts5BufferSet(
  int *pRc,
  Fts5Buffer *pBuf, 
  int nData, 
  const u8 *pData
){
  pBuf->n = 0;
  sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
}

int sqlite3Fts5PoslistNext64(
  const u8 *a, int n,             /* Buffer containing poslist */
  int *pi,                        /* IN/OUT: Offset within a[] */
  i64 *piOff                      /* IN/OUT: Current offset */
){
  int i = *pi;
  if( i>=n ){
    /* EOF */
    *piOff = -1;
    return 1;  
  }else{
    i64 iOff = *piOff;
    int iVal;
    i += fts5GetVarint32(&a[i], iVal);
    if( iVal==1 ){
      i += fts5GetVarint32(&a[i], iVal);
      iOff = ((i64)iVal) << 32;
      i += fts5GetVarint32(&a[i], iVal);
    }
    *piOff = iOff + (iVal-2);
    *pi = i;
    return 0;
  }
}


/*
** Advance the iterator object passed as the only argument. Return true
** if the iterator reaches EOF, or false otherwise.
*/
int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
  if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) 
   || (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol)
  ){
    pIter->bEof = 1;
  }
  return pIter->bEof;
}

int sqlite3Fts5PoslistReaderInit(
  int iCol,                       /* If (iCol>=0), this column only */
  const u8 *a, int n,             /* Poslist buffer to iterate through */
  Fts5PoslistReader *pIter        /* Iterator object to initialize */
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->a = a;
  pIter->n = n;
  pIter->iCol = iCol;
  do {
    sqlite3Fts5PoslistReaderNext(pIter);
  }while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol );
  return pIter->bEof;
}

int sqlite3Fts5PoslistWriterAppend(
  Fts5Buffer *pBuf, 
  Fts5PoslistWriter *pWriter,
  i64 iPos
){
  static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
  int rc = SQLITE_OK;
  if( (iPos & colmask) != (pWriter->iPrev & colmask) ){
    fts5BufferAppendVarint(&rc, pBuf, 1);
    fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32));
    pWriter->iPrev = (iPos & colmask);
  }
  fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2);
  pWriter->iPrev = iPos;
  return rc;
}

void *sqlite3Fts5MallocZero(int *pRc, int nByte){
  void *pRet = 0;
  if( *pRc==SQLITE_OK ){
    pRet = sqlite3_malloc(nByte);
    if( pRet==0 && nByte>0 ){
      *pRc = SQLITE_NOMEM;
    }else{
      memset(pRet, 0, nByte);
    }
  }
  return pRet;
}

/*
** Return a nul-terminated copy of the string indicated by pIn. If nIn
** is non-negative, then it is the length of the string in bytes. Otherwise,
** the length of the string is determined using strlen().
**
** It is the responsibility of the caller to eventually free the returned
** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. 
*/
char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
  char *zRet = 0;
  if( *pRc==SQLITE_OK ){
    if( nIn<0 ){
      nIn = strlen(pIn);
    }
    zRet = (char*)sqlite3_malloc(nIn+1);
    if( zRet ){
      memcpy(zRet, pIn, nIn);
      zRet[nIn] = '\0';
    }else{
      *pRc = SQLITE_NOMEM;
    }
  }
  return zRet;
}


/*
** Return true if character 't' may be part of an FTS5 bareword, or false
** otherwise. Characters that may be part of barewords:
**
**   * All non-ASCII characters,
**   * The 52 upper and lower case ASCII characters, and
**   * The 10 integer ASCII characters.
**   * The underscore character "_" (0x5F).
*/
int sqlite3Fts5IsBareword(char t){
  u8 aBareword[128] = {
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
  };

  return (t & 0x80) || aBareword[(int)t];
}

#endif /* SQLITE_ENABLE_FTS5 */

Added ext/fts5/fts5_config.c.
































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
/*
** 2014 Jun 09
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
*/

#ifdef SQLITE_ENABLE_FTS5


#include "fts5Int.h"

#define FTS5_DEFAULT_PAGE_SIZE   1000
#define FTS5_DEFAULT_AUTOMERGE      4
#define FTS5_DEFAULT_CRISISMERGE   16

/* Maximum allowed page size */
#define FTS5_MAX_PAGE_SIZE (128*1024)

static int fts5_iswhitespace(char x){
  return (x==' ');
}

static int fts5_isopenquote(char x){
  return (x=='"' || x=='\'' || x=='[' || x=='`');
}

/*
** Argument pIn points to a character that is part of a nul-terminated 
** string. Return a pointer to the first character following *pIn in 
** the string that is not a white-space character.
*/
static const char *fts5ConfigSkipWhitespace(const char *pIn){
  const char *p = pIn;
  if( p ){
    while( fts5_iswhitespace(*p) ){ p++; }
  }
  return p;
}

/*
** Argument pIn points to a character that is part of a nul-terminated 
** string. Return a pointer to the first character following *pIn in 
** the string that is not a "bareword" character.
*/
static const char *fts5ConfigSkipBareword(const char *pIn){
  const char *p = pIn;
  while ( sqlite3Fts5IsBareword(*p) ) p++;
  if( p==pIn ) p = 0;
  return p;
}

static int fts5_isdigit(char a){
  return (a>='0' && a<='9');
}



static const char *fts5ConfigSkipLiteral(const char *pIn){
  const char *p = pIn;
  switch( *p ){
    case 'n': case 'N':
      if( sqlite3_strnicmp("null", p, 4)==0 ){
        p = &p[4];
      }else{
        p = 0;
      }
      break;

    case 'x': case 'X':
      p++;
      if( *p=='\'' ){
        p++;
        while( (*p>='a' && *p<='f') 
            || (*p>='A' && *p<='F') 
            || (*p>='0' && *p<='9') 
            ){
          p++;
        }
        if( *p=='\'' && 0==((p-pIn)%2) ){
          p++;
        }else{
          p = 0;
        }
      }else{
        p = 0;
      }
      break;

    case '\'':
      p++;
      while( p ){
        if( *p=='\'' ){
          p++;
          if( *p!='\'' ) break;
        }
        p++;
        if( *p==0 ) p = 0;
      }
      break;

    default:
      /* maybe a number */
      if( *p=='+' || *p=='-' ) p++;
      while( fts5_isdigit(*p) ) p++;

      /* At this point, if the literal was an integer, the parse is 
      ** finished. Or, if it is a floating point value, it may continue
      ** with either a decimal point or an 'E' character. */
      if( *p=='.' && fts5_isdigit(p[1]) ){
        p += 2;
        while( fts5_isdigit(*p) ) p++;
      }
      if( p==pIn ) p = 0;

      break;
  }

  return p;
}

/*
** The first character of the string pointed to by argument z is guaranteed
** to be an open-quote character (see function fts5_isopenquote()).
**
** This function searches for the corresponding close-quote character within
** the string and, if found, dequotes the string in place and adds a new
** nul-terminator byte.
**
** If the close-quote is found, the value returned is the byte offset of
** the character immediately following it. Or, if the close-quote is not 
** found, -1 is returned. If -1 is returned, the buffer is left in an 
** undefined state.
*/
static int fts5Dequote(char *z){
  char q;
  int iIn = 1;
  int iOut = 0;
  q = z[0];

  /* Set stack variable q to the close-quote character */
  assert( q=='[' || q=='\'' || q=='"' || q=='`' );
  if( q=='[' ) q = ']';  

  while( ALWAYS(z[iIn]) ){
    if( z[iIn]==q ){
      if( z[iIn+1]!=q ){
        /* Character iIn was the close quote. */
        iIn++;
        break;
      }else{
        /* Character iIn and iIn+1 form an escaped quote character. Skip
        ** the input cursor past both and copy a single quote character 
        ** to the output buffer. */
        iIn += 2;
        z[iOut++] = q;
      }
    }else{
      z[iOut++] = z[iIn++];
    }
  }

  z[iOut] = '\0';
  return iIn;
}

/*
** Convert an SQL-style quoted string into a normal string by removing
** the quote characters.  The conversion is done in-place.  If the
** input does not begin with a quote character, then this routine
** is a no-op.
**
** Examples:
**
**     "abc"   becomes   abc
**     'xyz'   becomes   xyz
**     [pqr]   becomes   pqr
**     `mno`   becomes   mno
*/
void sqlite3Fts5Dequote(char *z){
  char quote;                     /* Quote character (if any ) */

  assert( 0==fts5_iswhitespace(z[0]) );
  quote = z[0];
  if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
    fts5Dequote(z);
  }
}

/*
** Parse a "special" CREATE VIRTUAL TABLE directive and update
** configuration object pConfig as appropriate.
**
** If successful, object pConfig is updated and SQLITE_OK returned. If
** an error occurs, an SQLite error code is returned and an error message
** may be left in *pzErr. It is the responsibility of the caller to
** eventually free any such error message using sqlite3_free().
*/
static int fts5ConfigParseSpecial(
  Fts5Global *pGlobal,
  Fts5Config *pConfig,            /* Configuration object to update */
  const char *zCmd,               /* Special command to parse */
  const char *zArg,               /* Argument to parse */
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE_OK;
  int nCmd = strlen(zCmd);
  if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
    const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
    const char *p;
    if( pConfig->aPrefix ){
      *pzErr = sqlite3_mprintf("multiple prefix=... directives");
      rc = SQLITE_ERROR;
    }else{
      pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
    }
    p = zArg;
    while( rc==SQLITE_OK && p[0] ){
      int nPre = 0;
      while( p[0]==' ' ) p++;
      while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
        nPre = nPre*10 + (p[0] - '0');
        p++;
      }
      while( p[0]==' ' ) p++;
      if( p[0]==',' ){
        p++;
      }else if( p[0] ){
        *pzErr = sqlite3_mprintf("malformed prefix=... directive");
        rc = SQLITE_ERROR;
      }
      if( rc==SQLITE_OK && (nPre==0 || nPre>=1000) ){
        *pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre);
        rc = SQLITE_ERROR;
      }
      pConfig->aPrefix[pConfig->nPrefix] = nPre;
      pConfig->nPrefix++;
    }
    return rc;
  }

  if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
    const char *p = (const char*)zArg;
    int nArg = strlen(zArg) + 1;
    char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
    char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
    char *pSpace = pDel;

    if( azArg && pSpace ){
      if( pConfig->pTok ){
        *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
        rc = SQLITE_ERROR;
      }else{
        for(nArg=0; p && *p; nArg++){
          const char *p2 = fts5ConfigSkipWhitespace(p);
          if( *p2=='\'' ){
            p = fts5ConfigSkipLiteral(p2);
          }else{
            p = fts5ConfigSkipBareword(p2);
          }
          if( p ){
            memcpy(pSpace, p2, p-p2);
            azArg[nArg] = pSpace;
            sqlite3Fts5Dequote(pSpace);
            pSpace += (p - p2) + 1;
            p = fts5ConfigSkipWhitespace(p);
          }
        }
        if( p==0 ){
          *pzErr = sqlite3_mprintf("parse error in tokenize directive");
          rc = SQLITE_ERROR;
        }else{
          rc = sqlite3Fts5GetTokenizer(pGlobal, 
              (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi,
              pzErr
          );
        }
      }
    }

    sqlite3_free(azArg);
    sqlite3_free(pDel);
    return rc;
  }

  if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
    if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
      *pzErr = sqlite3_mprintf("multiple content=... directives");
      rc = SQLITE_ERROR;
    }else{
      if( zArg[0] ){
        pConfig->eContent = FTS5_CONTENT_EXTERNAL;
        pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
      }else{
        pConfig->eContent = FTS5_CONTENT_NONE;
      }
    }
    return rc;
  }

  if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
    if( pConfig->zContentRowid ){
      *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
      rc = SQLITE_ERROR;
    }else{
      pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
    }
    return rc;
  }

  if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
    if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
      *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
      rc = SQLITE_ERROR;
    }else{
      pConfig->bColumnsize = (zArg[0]=='1');
    }
    return rc;
  }

  *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
  return SQLITE_ERROR;
}

/*
** Allocate an instance of the default tokenizer ("simple") at 
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
** code if an error occurs.
*/
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
  assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
  return sqlite3Fts5GetTokenizer(
      pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0
  );
}

/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a
** nul-terminated, dequoted copy of the gobbled word. If the word was
** quoted, *pbQuoted is also set to 1 before returning.
**
** If *pRc is other than SQLITE_OK when this function is called, it is
** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
** set if a parse error (failed to find close quote) occurs.
*/
static const char *fts5ConfigGobbleWord(
  int *pRc,                       /* IN/OUT: Error code */
  const char *zIn,                /* Buffer to gobble string/bareword from */
  char **pzOut,                   /* OUT: malloc'd buffer containing str/bw */
  int *pbQuoted                   /* OUT: Set to true if dequoting required */
){
  const char *zRet = 0;

  int nIn = strlen(zIn);
  char *zOut = sqlite3_malloc(nIn+1);

  assert( *pRc==SQLITE_OK );
  *pbQuoted = 0;
  *pzOut = 0;

  if( zOut==0 ){
    *pRc = SQLITE_NOMEM;
  }else{
    memcpy(zOut, zIn, nIn+1);
    if( fts5_isopenquote(zOut[0]) ){
      int ii = fts5Dequote(zOut);
      zRet = &zIn[ii];
      *pbQuoted = 1;
    }else{
      zRet = fts5ConfigSkipBareword(zIn);
      zOut[zRet-zIn] = '\0';
    }
  }

  if( zRet==0 ){
    sqlite3_free(zOut);
  }else{
    *pzOut = zOut;
  }

  return zRet;
}

static int fts5ConfigParseColumn(
  Fts5Config *p, 
  char *zCol, 
  char *zArg, 
  char **pzErr
){
  int rc = SQLITE_OK;
  if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) 
   || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) 
  ){
    *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
    rc = SQLITE_ERROR;
  }else if( zArg ){
    if( 0==sqlite3_stricmp(zArg, "unindexed") ){
      p->abUnindexed[p->nCol] = 1;
    }else{
      *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
      rc = SQLITE_ERROR;
    }
  }

  p->azCol[p->nCol++] = zCol;
  return rc;
}

/*
** Populate the Fts5Config.zContentExprlist string.
*/
static int fts5ConfigMakeExprlist(Fts5Config *p){
  int i;
  int rc = SQLITE_OK;
  Fts5Buffer buf = {0, 0, 0};

  sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
  if( p->eContent!=FTS5_CONTENT_NONE ){
    for(i=0; i<p->nCol; i++){
      if( p->eContent==FTS5_CONTENT_EXTERNAL ){
        sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
      }else{
        sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
      }
    }
  }

  assert( p->zContentExprlist==0 );
  p->zContentExprlist = (char*)buf.p;
  return rc;
}

/*
** Arguments nArg/azArg contain the string arguments passed to the xCreate
** or xConnect method of the virtual table. This function attempts to 
** allocate an instance of Fts5Config containing the results of parsing
** those arguments.
**
** If successful, SQLITE_OK is returned and *ppOut is set to point to the
** new Fts5Config object. If an error occurs, an SQLite error code is 
** returned, *ppOut is set to NULL and an error message may be left in
** *pzErr. It is the responsibility of the caller to eventually free any 
** such error message using sqlite3_free().
*/
int sqlite3Fts5ConfigParse(
  Fts5Global *pGlobal,
  sqlite3 *db,
  int nArg,                       /* Number of arguments */
  const char **azArg,             /* Array of nArg CREATE VIRTUAL TABLE args */
  Fts5Config **ppOut,             /* OUT: Results of parse */
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5Config *pRet;               /* New object to return */
  int i;
  int nByte;

  *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
  if( pRet==0 ) return SQLITE_NOMEM;
  memset(pRet, 0, sizeof(Fts5Config));
  pRet->db = db;
  pRet->iCookie = -1;

  nByte = nArg * (sizeof(char*) + sizeof(u8));
  pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
  pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
  pRet->bColumnsize = 1;
  if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
    *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
    rc = SQLITE_ERROR;
  }

  for(i=3; rc==SQLITE_OK && i<nArg; i++){
    const char *zOrig = azArg[i];
    const char *z;
    char *zOne = 0;
    char *zTwo = 0;
    int bOption = 0;
    int bMustBeCol = 0;

    z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
    z = fts5ConfigSkipWhitespace(z);
    if( z && *z=='=' ){
      bOption = 1;
      z++;
      if( bMustBeCol ) z = 0;
    }
    z = fts5ConfigSkipWhitespace(z);
    if( z && z[0] ){
      int bDummy;
      z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
      if( z && z[0] ) z = 0;
    }

    if( rc==SQLITE_OK ){
      if( z==0 ){
        *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
        rc = SQLITE_ERROR;
      }else{
        if( bOption ){
          rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr);
        }else{
          rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
          zOne = 0;
        }
      }
    }

    sqlite3_free(zOne);
    sqlite3_free(zTwo);
  }

  /* If a tokenizer= option was successfully parsed, the tokenizer has
  ** already been allocated. Otherwise, allocate an instance of the default
  ** tokenizer (unicode61) now.  */
  if( rc==SQLITE_OK && pRet->pTok==0 ){
    rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
  }

  /* If no zContent option was specified, fill in the default values. */
  if( rc==SQLITE_OK && pRet->zContent==0 ){
    const char *zTail = 0;
    assert( pRet->eContent==FTS5_CONTENT_NORMAL 
         || pRet->eContent==FTS5_CONTENT_NONE 
    );
    if( pRet->eContent==FTS5_CONTENT_NORMAL ){
      zTail = "content";
    }else if( pRet->bColumnsize ){
      zTail = "docsize";
    }

    if( zTail ){
      pRet->zContent = sqlite3Fts5Mprintf(
          &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
      );
    }
  }

  if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
    pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
  }

  /* Formulate the zContentExprlist text */
  if( rc==SQLITE_OK ){
    rc = fts5ConfigMakeExprlist(pRet);
  }

  if( rc!=SQLITE_OK ){
    sqlite3Fts5ConfigFree(pRet);
    *ppOut = 0;
  }
  return rc;
}

/*
** Free the configuration object passed as the only argument.
*/
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
  if( pConfig ){
    int i;
    if( pConfig->pTok ){
      pConfig->pTokApi->xDelete(pConfig->pTok);
    }
    sqlite3_free(pConfig->zDb);
    sqlite3_free(pConfig->zName);
    for(i=0; i<pConfig->nCol; i++){
      sqlite3_free(pConfig->azCol[i]);
    }
    sqlite3_free(pConfig->azCol);
    sqlite3_free(pConfig->aPrefix);
    sqlite3_free(pConfig->zRank);
    sqlite3_free(pConfig->zRankArgs);
    sqlite3_free(pConfig->zContent);
    sqlite3_free(pConfig->zContentRowid);
    sqlite3_free(pConfig->zContentExprlist);
    sqlite3_free(pConfig);
  }
}

/*
** Call sqlite3_declare_vtab() based on the contents of the configuration
** object passed as the only argument. Return SQLITE_OK if successful, or
** an SQLite error code if an error occurs.
*/
int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
  int i;
  int rc = SQLITE_OK;
  char *zSql;

  zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
  for(i=0; zSql && i<pConfig->nCol; i++){
    const char *zSep = (i==0?"":", ");
    zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
  }
  zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", 
      zSql, pConfig->zName, FTS5_RANK_NAME
  );

  assert( zSql || rc==SQLITE_NOMEM );
  if( zSql ){
    rc = sqlite3_declare_vtab(pConfig->db, zSql);
    sqlite3_free(zSql);
  }
  
  return rc;
}

/*
** Tokenize the text passed via the second and third arguments.
**
** The callback is invoked once for each token in the input text. The
** arguments passed to it are, in order:
**
**     void *pCtx          // Copy of 4th argument to sqlite3Fts5Tokenize()
**     const char *pToken  // Pointer to buffer containing token
**     int nToken          // Size of token in bytes
**     int iStart          // Byte offset of start of token within input text
**     int iEnd            // Byte offset of end of token within input text
**     int iPos            // Position of token in input (first token is 0)
**
** If the callback returns a non-zero value the tokenization is abandoned
** and no further callbacks are issued. 
**
** This function returns SQLITE_OK if successful or an SQLite error code
** if an error occurs. If the tokenization was abandoned early because
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);
}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.
*/
static const char *fts5ConfigSkipArgs(const char *pIn){
  const char *p = pIn;
  
  while( 1 ){
    p = fts5ConfigSkipWhitespace(p);
    p = fts5ConfigSkipLiteral(p);
    p = fts5ConfigSkipWhitespace(p);
    if( p==0 || *p==')' ) break;
    if( *p!=',' ){
      p = 0;
      break;
    }
    p++;
  }

  return p;
}

/*
** Parameter zIn contains a rank() function specification. The format of 
** this is:
**
**   + Bareword (function name)
**   + Open parenthesis - "("
**   + Zero or more SQL literals in a comma separated list
**   + Close parenthesis - ")"
*/
int sqlite3Fts5ConfigParseRank(
  const char *zIn,                /* Input string */
  char **pzRank,                  /* OUT: Rank function name */
  char **pzRankArgs               /* OUT: Rank function arguments */
){
  const char *p = zIn;
  const char *pRank;
  char *zRank = 0;
  char *zRankArgs = 0;
  int rc = SQLITE_OK;

  *pzRank = 0;
  *pzRankArgs = 0;

  p = fts5ConfigSkipWhitespace(p);
  pRank = p;
  p = fts5ConfigSkipBareword(p);

  if( p ){
    zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
    if( zRank ) memcpy(zRank, pRank, p-pRank);
  }else{
    rc = SQLITE_ERROR;
  }

  if( rc==SQLITE_OK ){
    p = fts5ConfigSkipWhitespace(p);
    if( *p!='(' ) rc = SQLITE_ERROR;
    p++;
  }
  if( rc==SQLITE_OK ){
    const char *pArgs; 
    p = fts5ConfigSkipWhitespace(p);
    pArgs = p;
    if( *p!=')' ){
      p = fts5ConfigSkipArgs(p);
      if( p==0 ){
        rc = SQLITE_ERROR;
      }else{
        zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
        if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
      }
    }
  }

  if( rc!=SQLITE_OK ){
    sqlite3_free(zRank);
    assert( zRankArgs==0 );
  }else{
    *pzRank = zRank;
    *pzRankArgs = zRankArgs;
  }
  return rc;
}

int sqlite3Fts5ConfigSetValue(
  Fts5Config *pConfig, 
  const char *zKey, 
  sqlite3_value *pVal,
  int *pbBadkey
){
  int rc = SQLITE_OK;

  if( 0==sqlite3_stricmp(zKey, "pgsz") ){
    int pgsz = 0;
    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
      pgsz = sqlite3_value_int(pVal);
    }
    if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){
      *pbBadkey = 1;
    }else{
      pConfig->pgsz = pgsz;
    }
  }

  else if( 0==sqlite3_stricmp(zKey, "automerge") ){
    int nAutomerge = -1;
    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
      nAutomerge = sqlite3_value_int(pVal);
    }
    if( nAutomerge<0 || nAutomerge>64 ){
      *pbBadkey = 1;
    }else{
      if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
      pConfig->nAutomerge = nAutomerge;
    }
  }

  else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
    int nCrisisMerge = -1;
    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
      nCrisisMerge = sqlite3_value_int(pVal);
    }
    if( nCrisisMerge<0 ){
      *pbBadkey = 1;
    }else{
      if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
      pConfig->nCrisisMerge = nCrisisMerge;
    }
  }

  else if( 0==sqlite3_stricmp(zKey, "rank") ){
    const char *zIn = (const char*)sqlite3_value_text(pVal);
    char *zRank;
    char *zRankArgs;
    rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
    if( rc==SQLITE_OK ){
      sqlite3_free(pConfig->zRank);
      sqlite3_free(pConfig->zRankArgs);
      pConfig->zRank = zRank;
      pConfig->zRankArgs = zRankArgs;
    }else if( rc==SQLITE_ERROR ){
      rc = SQLITE_OK;
      *pbBadkey = 1;
    }
  }else{
    *pbBadkey = 1;
  }
  return rc;
}

/*
** Load the contents of the %_config table into memory.
*/
int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
  const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
  char *zSql;
  sqlite3_stmt *p = 0;
  int rc = SQLITE_OK;
  int iVersion = 0;

  /* Set default values */
  pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
  pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
  pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;

  zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
  if( zSql ){
    rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
    sqlite3_free(zSql);
  }

  assert( rc==SQLITE_OK || p==0 );
  if( rc==SQLITE_OK ){
    while( SQLITE_ROW==sqlite3_step(p) ){
      const char *zK = (const char*)sqlite3_column_text(p, 0);
      sqlite3_value *pVal = sqlite3_column_value(p, 1);
      if( 0==sqlite3_stricmp(zK, "version") ){
        iVersion = sqlite3_value_int(pVal);
      }else{
        int bDummy = 0;
        sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
      }
    }
    rc = sqlite3_finalize(p);
  }
  
  if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
    rc = SQLITE_ERROR;
    if( pConfig->pzErrmsg ){
      assert( 0==*pConfig->pzErrmsg );
      *pConfig->pzErrmsg = sqlite3_mprintf(
          "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
          iVersion, FTS5_CURRENT_VERSION
      );
    }
  }

  if( rc==SQLITE_OK ){
    pConfig->iCookie = iCookie;
  }
  return rc;
}

#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_expr.c.








































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/

#ifdef SQLITE_ENABLE_FTS5


#include "fts5Int.h"
#include "fts5parse.h"

/*
** All token types in the generated fts5parse.h file are greater than 0.
*/
#define FTS5_EOF 0

typedef struct Fts5ExprTerm Fts5ExprTerm;

/*
** Functions generated by lemon from fts5parse.y.
*/
void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);

struct Fts5Expr {
  Fts5Index *pIndex;
  Fts5ExprNode *pRoot;
  int bDesc;                      /* Iterate in descending docid order */
  int nPhrase;                    /* Number of phrases in expression */
  Fts5ExprPhrase **apExprPhrase;  /* Pointers to phrase objects */
};

/*
** eType:
**   Expression node type. Always one of:
**
**       FTS5_AND                 (nChild, apChild valid)
**       FTS5_OR                  (nChild, apChild valid)
**       FTS5_NOT                 (nChild, apChild valid)
**       FTS5_STRING              (pNear valid)
**       FTS5_TERM                (pNear valid)
*/
struct Fts5ExprNode {
  int eType;                      /* Node type */
  int bEof;                       /* True at EOF */
  int bNomatch;                   /* True if entry is not a match */

  i64 iRowid;                     /* Current rowid */
  Fts5ExprNearset *pNear;         /* For FTS5_STRING - cluster of phrases */

  /* Child nodes. For a NOT node, this array always contains 2 entries. For 
  ** AND or OR nodes, it contains 2 or more entries.  */
  int nChild;                     /* Number of child nodes */
  Fts5ExprNode *apChild[0];       /* Array of child nodes */
};

#define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)

/*
** An instance of the following structure represents a single search term
** or term prefix.
*/
struct Fts5ExprTerm {
  int bPrefix;                    /* True for a prefix term */
  char *zTerm;                    /* nul-terminated term */
  Fts5IndexIter *pIter;           /* Iterator for this term */
};

/*
** A phrase. One or more terms that must appear in a contiguous sequence
** within a document for it to match.
*/
struct Fts5ExprPhrase {
  Fts5ExprNode *pNode;            /* FTS5_STRING node this phrase is part of */
  Fts5Buffer poslist;             /* Current position list */
  int nTerm;                      /* Number of entries in aTerm[] */
  Fts5ExprTerm aTerm[0];          /* Terms that make up this phrase */
};

/*
** If a NEAR() clump may only match a specific set of columns, then
** Fts5ExprNearset.pColset points to an object of the following type.
** Each entry in the aiCol[] array
*/
struct Fts5ExprColset {
  int nCol;
  int aiCol[1];
};

/*
** One or more phrases that must appear within a certain token distance of
** each other within each matching document.
*/
struct Fts5ExprNearset {
  int nNear;                      /* NEAR parameter */
  Fts5ExprColset *pColset;        /* Columns to search (NULL -> all columns) */
  int nPhrase;                    /* Number of entries in aPhrase[] array */
  Fts5ExprPhrase *apPhrase[0];    /* Array of phrase pointers */
};


/*
** Parse context.
*/
struct Fts5Parse {
  Fts5Config *pConfig;
  char *zErr;
  int rc;
  int nPhrase;                    /* Size of apPhrase array */
  Fts5ExprPhrase **apPhrase;      /* Array of all phrases */
  Fts5ExprNode *pExpr;            /* Result of a successful parse */
};

void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
  va_list ap;
  va_start(ap, zFmt);
  if( pParse->rc==SQLITE_OK ){
    pParse->zErr = sqlite3_vmprintf(zFmt, ap);
    pParse->rc = SQLITE_ERROR;
  }
  va_end(ap);
}

static int fts5ExprIsspace(char t){
  return t==' ' || t=='\t' || t=='\n' || t=='\r';
}

/*
** Read the first token from the nul-terminated string at *pz.
*/
static int fts5ExprGetToken(
  Fts5Parse *pParse, 
  const char **pz,                /* IN/OUT: Pointer into buffer */
  Fts5Token *pToken
){
  const char *z = *pz;
  int tok;

  /* Skip past any whitespace */
  while( fts5ExprIsspace(*z) ) z++;

  pToken->p = z;
  pToken->n = 1;
  switch( *z ){
    case '(':  tok = FTS5_LP;    break;
    case ')':  tok = FTS5_RP;    break;
    case '{':  tok = FTS5_LCP;   break;
    case '}':  tok = FTS5_RCP;   break;
    case ':':  tok = FTS5_COLON; break;
    case ',':  tok = FTS5_COMMA; break;
    case '+':  tok = FTS5_PLUS;  break;
    case '*':  tok = FTS5_STAR;  break;
    case '\0': tok = FTS5_EOF;   break;

    case '"': {
      const char *z2;
      tok = FTS5_STRING;

      for(z2=&z[1]; 1; z2++){
        if( z2[0]=='"' ){
          z2++;
          if( z2[0]!='"' ) break;
        }
        if( z2[0]=='\0' ){
          sqlite3Fts5ParseError(pParse, "unterminated string");
          return FTS5_EOF;
        }
      }
      pToken->n = (z2 - z);
      break;
    }

    default: {
      const char *z2;
      tok = FTS5_STRING;
      for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
      pToken->n = (z2 - z);
      if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
      if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
      if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
      break;
    }
  }

  *pz = &pToken->p[pToken->n];
  return tok;
}

static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); }
static void fts5ParseFree(void *p){ sqlite3_free(p); }

int sqlite3Fts5ExprNew(
  Fts5Config *pConfig,            /* FTS5 Configuration */
  const char *zExpr,              /* Expression text */
  Fts5Expr **ppNew, 
  char **pzErr
){
  Fts5Parse sParse;
  Fts5Token token;
  const char *z = zExpr;
  int t;                          /* Next token type */
  void *pEngine;
  Fts5Expr *pNew;

  *ppNew = 0;
  *pzErr = 0;
  memset(&sParse, 0, sizeof(sParse));
  pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
  if( pEngine==0 ){ return SQLITE_NOMEM; }
  sParse.pConfig = pConfig;

  do {
    t = fts5ExprGetToken(&sParse, &z, &token);
    sqlite3Fts5Parser(pEngine, t, token, &sParse);
  }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
  sqlite3Fts5ParserFree(pEngine, fts5ParseFree);

  assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
  if( sParse.rc==SQLITE_OK ){
    *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
    if( pNew==0 ){
      sParse.rc = SQLITE_NOMEM;
      sqlite3Fts5ParseNodeFree(sParse.pExpr);
    }else{
      pNew->pRoot = sParse.pExpr;
      pNew->pIndex = 0;
      pNew->apExprPhrase = sParse.apPhrase;
      pNew->nPhrase = sParse.nPhrase;
      sParse.apPhrase = 0;
    }
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}

/*
** Create a new FTS5 expression by cloning phrase iPhrase of the
** expression passed as the second argument.
*/
int sqlite3Fts5ExprPhraseExpr(
  Fts5Config *pConfig,
  Fts5Expr *pExpr, 
  int iPhrase, 
  Fts5Expr **ppNew
){
  int rc = SQLITE_OK;             /* Return code */
  Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
  Fts5ExprPhrase *pCopy;          /* Copy of pOrig */
  Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */

  pOrig = pExpr->apExprPhrase[iPhrase];
  pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, 
      sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
  );
  if( pCopy ){
    int i;                          /* Used to iterate through phrase terms */
    Fts5ExprPhrase **apPhrase;
    Fts5ExprNode *pNode;
    Fts5ExprNearset *pNear;

    pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
    apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprPhrase*)
    );
    pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode));
    pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 
        sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
    );

    for(i=0; i<pOrig->nTerm; i++){
      pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1);
      pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }

    if( rc==SQLITE_OK ){
      /* All the allocations succeeded. Put the expression object together. */
      pNew->pIndex = pExpr->pIndex;
      pNew->pRoot = pNode;
      pNew->nPhrase = 1;
      pNew->apExprPhrase = apPhrase;
      pNew->apExprPhrase[0] = pCopy;

      pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING);
      pNode->pNear = pNear;

      pNear->nPhrase = 1;
      pNear->apPhrase[0] = pCopy;

      pCopy->nTerm = pOrig->nTerm;
      pCopy->pNode = pNode;
    }else{
      /* At least one allocation failed. Free them all. */
      for(i=0; i<pOrig->nTerm; i++){
        sqlite3_free(pCopy->aTerm[i].zTerm);
      }
      sqlite3_free(pCopy);
      sqlite3_free(pNear);
      sqlite3_free(pNode);
      sqlite3_free(apPhrase);
      sqlite3_free(pNew);
      pNew = 0;
    }
  }

  *ppNew = pNew;
  return rc;
}

/*
** Free the expression node object passed as the only argument.
*/
void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
  if( p ){
    int i;
    for(i=0; i<p->nChild; i++){
      sqlite3Fts5ParseNodeFree(p->apChild[i]);
    }
    sqlite3Fts5ParseNearsetFree(p->pNear);
    sqlite3_free(p);
  }
}

/*
** Free the expression object passed as the only argument.
*/
void sqlite3Fts5ExprFree(Fts5Expr *p){
  if( p ){
    sqlite3Fts5ParseNodeFree(p->pRoot);
    sqlite3_free(p->apExprPhrase);
    sqlite3_free(p);
  }
}

static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){
  int i;
  for(i=0; i<pColset->nCol; i++){
    if( pColset->aiCol[i]==iCol ) return 1;
  }
  return 0;
}

/*
** All individual term iterators in pPhrase are guaranteed to be valid and
** pointing to the same rowid when this function is called. This function 
** checks if the current rowid really is a match, and if so populates
** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
** is set to true if this is really a match, or false otherwise.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if the current rowid is 
** not a match.
*/
static int fts5ExprPhraseIsMatch(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprColset *pColset,        /* Restrict matches to these columns */
  Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
  int *pbMatch                    /* OUT: Set to true if really a match */
){
  Fts5PoslistWriter writer = {0};
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
  int i;
  int rc = SQLITE_OK;
  int iCol = -1;
  
  if( pColset && pColset->nCol==1 ){
    iCol = pColset->aiCol[0];
    pColset = 0;
  }

  fts5BufferZero(&pPhrase->poslist);

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
    aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte);
    if( !aIter ) return SQLITE_NOMEM;
  }

  /* Initialize a term iterator for each term in the phrase */
  for(i=0; i<pPhrase->nTerm; i++){
    i64 dummy;
    int n;
    const u8 *a;
    rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy);
    if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){
      goto ismatch_out;
    }
  }

  while( 1 ){
    int bMatch;
    i64 iPos = aIter[0].iPos;
    do {
      bMatch = 1;
      for(i=0; i<pPhrase->nTerm; i++){
        Fts5PoslistReader *pPos = &aIter[i];
        i64 iAdj = iPos + i;
        if( pPos->iPos!=iAdj ){
          bMatch = 0;
          while( pPos->iPos<iAdj ){
            if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
          }
          if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
        }
      }
    }while( bMatch==0 );

    if( pColset==0 || fts5ExprColsetTest(pColset, FTS5_POS2COLUMN(iPos)) ){
      /* Append position iPos to the output */
      rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
      if( rc!=SQLITE_OK ) goto ismatch_out;
    }

    for(i=0; i<pPhrase->nTerm; i++){
      if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
    }
  }

 ismatch_out:
  *pbMatch = (pPhrase->poslist.n>0);
  if( aIter!=aStatic ) sqlite3_free(aIter);
  return rc;
}

typedef struct Fts5LookaheadReader Fts5LookaheadReader;
struct Fts5LookaheadReader {
  const u8 *a;                    /* Buffer containing position list */
  int n;                          /* Size of buffer a[] in bytes */
  int i;                          /* Current offset in position list */
  i64 iPos;                       /* Current position */
  i64 iLookahead;                 /* Next position */
};

#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)

static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
  p->iPos = p->iLookahead;
  if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
    p->iLookahead = FTS5_LOOKAHEAD_EOF;
  }
  return (p->iPos==FTS5_LOOKAHEAD_EOF);
}

static int fts5LookaheadReaderInit(
  const u8 *a, int n,             /* Buffer to read position list from */
  Fts5LookaheadReader *p          /* Iterator object to initialize */
){
  memset(p, 0, sizeof(Fts5LookaheadReader));
  p->a = a;
  p->n = n;
  fts5LookaheadReaderNext(p);
  return fts5LookaheadReaderNext(p);
}

#if 0
static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){
  return (p->iPos==FTS5_LOOKAHEAD_EOF);
}
#endif

typedef struct Fts5NearTrimmer Fts5NearTrimmer;
struct Fts5NearTrimmer {
  Fts5LookaheadReader reader;     /* Input iterator */
  Fts5PoslistWriter writer;       /* Writer context */
  Fts5Buffer *pOut;               /* Output poslist */
};

/*
** The near-set object passed as the first argument contains more than
** one phrase. All phrases currently point to the same row. The
** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
** tests if the current row contains instances of each phrase sufficiently
** close together to meet the NEAR constraint. Non-zero is returned if it
** does, or zero otherwise.
**
** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
** occurs within this function (*pRc) is set accordingly before returning.
** The return value is undefined in both these cases.
** 
** If no error occurs and non-zero (a match) is returned, the position-list
** of each phrase object is edited to contain only those entries that
** meet the constraint before returning.
*/
static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
  Fts5NearTrimmer aStatic[4];
  Fts5NearTrimmer *a = aStatic;
  Fts5ExprPhrase **apPhrase = pNear->apPhrase;

  int i;
  int rc = *pRc;
  int bMatch;

  assert( pNear->nPhrase>1 );

  /* If the aStatic[] array is not large enough, allocate a large array
  ** using sqlite3_malloc(). This approach could be improved upon. */
  if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){
    int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
    a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
  }else{
    memset(aStatic, 0, sizeof(aStatic));
  }
  if( rc!=SQLITE_OK ){
    *pRc = rc;
    return 0;
  }

  /* Initialize a lookahead iterator for each phrase. After passing the
  ** buffer and buffer size to the lookaside-reader init function, zero
  ** the phrase poslist buffer. The new poslist for the phrase (containing
  ** the same entries as the original with some entries removed on account 
  ** of the NEAR constraint) is written over the original even as it is
  ** being read. This is safe as the entries for the new poslist are a
  ** subset of the old, so it is not possible for data yet to be read to
  ** be overwritten.  */
  for(i=0; i<pNear->nPhrase; i++){
    Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
    fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
    pPoslist->n = 0;
    a[i].pOut = pPoslist;
  }

  while( 1 ){
    int iAdv;
    i64 iMin;
    i64 iMax;

    /* This block advances the phrase iterators until they point to a set of
    ** entries that together comprise a match.  */
    iMax = a[0].reader.iPos;
    do {
      bMatch = 1;
      for(i=0; i<pNear->nPhrase; i++){
        Fts5LookaheadReader *pPos = &a[i].reader;
        iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
        if( pPos->iPos<iMin || pPos->iPos>iMax ){
          bMatch = 0;
          while( pPos->iPos<iMin ){
            if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
          }
          if( pPos->iPos>iMax ) iMax = pPos->iPos;
        }
      }
    }while( bMatch==0 );

    /* Add an entry to each output position list */
    for(i=0; i<pNear->nPhrase; i++){
      i64 iPos = a[i].reader.iPos;
      Fts5PoslistWriter *pWriter = &a[i].writer;
      if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
        sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
      }
    }

    iAdv = 0;
    iMin = a[0].reader.iLookahead;
    for(i=0; i<pNear->nPhrase; i++){
      if( a[i].reader.iLookahead < iMin ){
        iMin = a[i].reader.iLookahead;
        iAdv = i;
      }
    }
    if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
  }

  ismatch_out: {
    int bRet = a[0].pOut->n>0;
    *pRc = rc;
    if( a!=aStatic ) sqlite3_free(a);
    return bRet;
  }
}

/*
** Advance the first term iterator in the first phrase of pNear. Set output
** variable *pbEof to true if it reaches EOF or if an error occurs.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5ExprNearAdvanceFirst(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
  int bFromValid,
  i64 iFrom 
){
  Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
  int rc;

  assert( Fts5NodeIsString(pNode) );
  if( bFromValid ){
    rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
  }else{
    rc = sqlite3Fts5IterNext(pIter);
  }

  pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
  return rc;
}

/*
** Advance iterator pIter until it points to a value equal to or laster
** than the initial value of *piLast. If this means the iterator points
** to a value laster than *piLast, update *piLast to the new lastest value.
**
** If the iterator reaches EOF, set *pbEof to true before returning. If
** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
** are set, return a non-zero value. Otherwise, return zero.
*/
static int fts5ExprAdvanceto(
  Fts5IndexIter *pIter,           /* Iterator to advance */
  int bDesc,                      /* True if iterator is "rowid DESC" */
  i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
  int *pRc,                       /* OUT: Error code */
  int *pbEof                      /* OUT: Set to true if EOF */
){
  i64 iLast = *piLast;
  i64 iRowid;

  iRowid = sqlite3Fts5IterRowid(pIter);
  if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
    int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
    if( rc || sqlite3Fts5IterEof(pIter) ){
      *pRc = rc;
      *pbEof = 1;
      return 1;
    }
    iRowid = sqlite3Fts5IterRowid(pIter);
    assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
  }
  *piLast = iRowid;

  return 0;
}

/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
*/
static int fts5ExprExtractCol(
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int iCurrent = 0;
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( (prev & 0x80) || *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += fts5GetVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  assert( (prev & 0x80)==0 );
  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
    prev = *p++;
  }
  return p - (*pa);
}

static int fts5ExprExtractColset (
  Fts5ExprColset *pColset,        /* Colset to filter on */
  const u8 *pPos, int nPos,       /* Position list */
  Fts5Buffer *pBuf                /* Output buffer */
){
  int rc = SQLITE_OK;
  int i;

  fts5BufferZero(pBuf);
  for(i=0; i<pColset->nCol; i++){
    const u8 *pSub = pPos;
    int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]);
    if( nSub ){
      fts5BufferAppendBlob(&rc, pBuf, nSub, pSub);
    }
  }
  return rc;
}

static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = *pRc;
  int i;

  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }

  *pRc = rc;
  if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
    return 1;
  }

  return 0;
}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
){
  /* As this "NEAR" object is actually a single phrase that consists 
  ** of a single term only, grab pointers into the poslist managed by the
  ** fts5_index.c iterator object. This is much faster than synthesizing 
  ** a new poslist the way we have to for more complicated phrase or NEAR
  ** expressions.  */
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
  Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
  Fts5ExprColset *pColset = pNear->pColset;
  const u8 *pPos;
  int nPos;
  int rc;

  assert( pNode->eType==FTS5_TERM );
  assert( pNear->nPhrase==1 && pPhrase->nTerm==1 );

  rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);

  /* If the term may match any column, then this must be a match. 
  ** Return immediately in this case. Otherwise, try to find the
  ** part of the poslist that corresponds to the required column.
  ** If it can be found, return. If it cannot, the next iteration
  ** of the loop will test the next rowid in the database for this
  ** term.  */
  if( pColset==0 ){
    assert( pPhrase->poslist.nSpace==0 );
    pPhrase->poslist.p = (u8*)pPos;
    pPhrase->poslist.n = nPos;
  }else if( pColset->nCol==1 ){
    assert( pPhrase->poslist.nSpace==0 );
    pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]);
    pPhrase->poslist.p = (u8*)pPos;
  }else if( rc==SQLITE_OK ){
    rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist);
  }

  pNode->bNomatch = (pPhrase->poslist.n==0);
  return rc;
}

/*
** All individual term iterators in pNear are guaranteed to be valid when
** this function is called. This function checks if all term iterators
** point to the same rowid, and if not, advances them until they do.
** If an EOF is reached before this happens, *pbEof is set to true before
** returning.
**
** SQLITE_OK is returned if an error occurs, or an SQLite error code 
** otherwise. It is not considered an error code if an iterator reaches
** EOF.
*/
static int fts5ExprNearNextMatch(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
  int rc = SQLITE_OK;
  i64 iLast;                      /* Lastest rowid any iterator points to */
  int i, j;                       /* Phrase and token index, respectively */
  int bMatch;                     /* True if all terms are at the same rowid */

  assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 );

  /* Initialize iLast, the "lastest" rowid any iterator points to. If the
  ** iterator skips through rowids in the default ascending order, this means
  ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
  ** means the minimum rowid.  */
  iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter);

  do {
    bMatch = 1;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      for(j=0; j<pPhrase->nTerm; j++){
        Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
        i64 iRowid = sqlite3Fts5IterRowid(pIter);
        if( iRowid!=iLast ) bMatch = 0;
        if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){
          return rc;
        }
      }
    }
  }while( bMatch==0 );

  pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode));
  pNode->iRowid = iLast;

  return rc;
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/
static int fts5ExprNearInitAll(
  Fts5Expr *pExpr,
  Fts5ExprNode *pNode
){
  Fts5ExprNearset *pNear = pNode->pNear;
  Fts5ExprTerm *pTerm;
  Fts5ExprPhrase *pPhrase;
  int i, j;
  int rc = SQLITE_OK;

  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    pPhrase = pNear->apPhrase[i];
    for(j=0; j<pPhrase->nTerm; j++){
      pTerm = &pPhrase->aTerm[j];
      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
        pTerm->pIter = 0;
      }
      rc = sqlite3Fts5IndexQuery(
          pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm),
          (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
          (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
          &pTerm->pIter
      );
      assert( rc==SQLITE_OK || pTerm->pIter==0 );
      if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){
        pNode->bEof = 1;
        break;
      }
    }
  }

  return rc;
}

/* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */
static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*);


/*
** If pExpr is an ASC iterator, this function returns a value with the
** same sign as:
**
**   (iLhs - iRhs)
**
** Otherwise, if this is a DESC iterator, the opposite is returned:
**
**   (iRhs - iLhs)
*/
static int fts5RowidCmp(
  Fts5Expr *pExpr,
  i64 iLhs,
  i64 iRhs
){
  assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
  if( pExpr->bDesc==0 ){
    if( iLhs<iRhs ) return -1;
    return (iLhs > iRhs);
  }else{
    if( iLhs>iRhs ) return -1;
    return (iLhs < iRhs);
  }
}

static void fts5ExprSetEof(Fts5ExprNode *pNode){
  int i;
  pNode->bEof = 1;
  for(i=0; i<pNode->nChild; i++){
    fts5ExprSetEof(pNode->apChild[i]);
  }
}

static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
  if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
    Fts5ExprNearset *pNear = pNode->pNear;
    int i;
    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      pPhrase->poslist.n = 0;
    }
  }else{
    int i;
    for(i=0; i<pNode->nChild; i++){
      fts5ExprNodeZeroPoslist(pNode->apChild[i]);
    }
  }
}


static int fts5ExprNodeNext(Fts5Expr*, Fts5ExprNode*, int, i64);

/*
** Argument pNode is an FTS5_AND node.
*/
static int fts5ExprAndNextRowid(
  Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
  Fts5ExprNode *pAnd              /* FTS5_AND node to advance */
){
  int iChild;
  i64 iLast = pAnd->iRowid;
  int rc = SQLITE_OK;
  int bMatch;

  assert( pAnd->bEof==0 );
  do {
    pAnd->bNomatch = 0;
    bMatch = 1;
    for(iChild=0; iChild<pAnd->nChild; iChild++){
      Fts5ExprNode *pChild = pAnd->apChild[iChild];
      if( 0 && pChild->eType==FTS5_STRING ){
        /* TODO */
      }else{
        int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
        if( cmp>0 ){
          /* Advance pChild until it points to iLast or laster */
          rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
          if( rc!=SQLITE_OK ) return rc;
        }
      }

      /* If the child node is now at EOF, so is the parent AND node. Otherwise,
      ** the child node is guaranteed to have advanced at least as far as
      ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
      ** new lastest rowid seen so far.  */
      assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
      if( pChild->bEof ){
        fts5ExprSetEof(pAnd);
        bMatch = 1;
        break;
      }else if( iLast!=pChild->iRowid ){
        bMatch = 0;
        iLast = pChild->iRowid;
      }

      if( pChild->bNomatch ){
        pAnd->bNomatch = 1;
      }
    }
  }while( bMatch==0 );

  if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
    fts5ExprNodeZeroPoslist(pAnd);
  }
  pAnd->iRowid = iLast;
  return SQLITE_OK;
}


/*
** Compare the values currently indicated by the two nodes as follows:
**
**    res = (*p1) - (*p2)
**
** Nodes that point to values that come later in the iteration order are
** considered to be larger. Nodes at EOF are the largest of all.
**
** This means that if the iteration order is ASC, then numerically larger
** rowids are considered larger. Or if it is the default DESC, numerically
** smaller rowids are larger.
*/
static int fts5NodeCompare(
  Fts5Expr *pExpr,
  Fts5ExprNode *p1, 
  Fts5ExprNode *p2
){
  if( p2->bEof ) return -1;
  if( p1->bEof ) return +1;
  return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
}

/*
** Advance node iterator pNode, part of expression pExpr. If argument
** bFromValid is zero, then pNode is advanced exactly once. Or, if argument
** bFromValid is non-zero, then pNode is advanced until it is at or past
** rowid value iFrom. Whether "past" means "less than" or "greater than"
** depends on whether this is an ASC or DESC iterator.
*/
static int fts5ExprNodeNext(
  Fts5Expr *pExpr, 
  Fts5ExprNode *pNode,
  int bFromValid,
  i64 iFrom
){
  int rc = SQLITE_OK;

  if( pNode->bEof==0 ){
    switch( pNode->eType ){
      case FTS5_STRING: {
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
        break;
      };

      case FTS5_TERM: {
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom);
        if( pNode->bEof==0 ){
          assert( rc==SQLITE_OK );
          rc = fts5ExprTokenTest(pExpr, pNode);
        }
        return rc;
      };

      case FTS5_AND: {
        Fts5ExprNode *pLeft = pNode->apChild[0];
        rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom);
        break;
      }

      case FTS5_OR: {
        int i;
        i64 iLast = pNode->iRowid;

        for(i=0; rc==SQLITE_OK && i<pNode->nChild; i++){
          Fts5ExprNode *p1 = pNode->apChild[i];
          assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
          if( p1->bEof==0 ){
            if( (p1->iRowid==iLast) 
             || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
            ){
              rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
            }
          }
        }

        break;
      }

      default: assert( pNode->eType==FTS5_NOT ); {
        assert( pNode->nChild==2 );
        rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
        break;
      }
    }

    if( rc==SQLITE_OK ){
      rc = fts5ExprNodeNextMatch(pExpr, pNode);
    }
  }

  /* Assert that if bFromValid was true, either:
  **
  **   a) an error occurred, or
  **   b) the node is now at EOF, or
  **   c) the node is now at or past rowid iFrom.
  */
  assert( bFromValid==0 
      || rc!=SQLITE_OK                                                  /* a */
      || pNode->bEof                                                    /* b */
      || pNode->iRowid==iFrom || pExpr->bDesc==(pNode->iRowid<iFrom)    /* c */
  );

  return rc;
}


/*
** If pNode currently points to a match, this function returns SQLITE_OK
** without modifying it. Otherwise, pNode is advanced until it does point
** to a match or EOF is reached.
*/
static int fts5ExprNodeNextMatch(
  Fts5Expr *pExpr,                /* Expression of which pNode is a part */
  Fts5ExprNode *pNode             /* Expression node to test */
){
  int rc = SQLITE_OK;
  if( pNode->bEof==0 ){
    switch( pNode->eType ){

      case FTS5_STRING: {
        /* Advance the iterators until they all point to the same rowid */
        rc = fts5ExprNearNextMatch(pExpr, pNode);
        break;
      }

      case FTS5_TERM: {
        rc = fts5ExprTokenTest(pExpr, pNode);
        break;
      }

      case FTS5_AND: {
        rc = fts5ExprAndNextRowid(pExpr, pNode);
        break;
      }

      case FTS5_OR: {
        Fts5ExprNode *pNext = pNode->apChild[0];
        int i;

        for(i=1; i<pNode->nChild; i++){
          Fts5ExprNode *pChild = pNode->apChild[i];
          int cmp = fts5NodeCompare(pExpr, pNext, pChild);
          if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
            pNext = pChild;
          }
        }
        pNode->iRowid = pNext->iRowid;
        pNode->bEof = pNext->bEof;
        pNode->bNomatch = pNext->bNomatch;
        break;
      }

      default: assert( pNode->eType==FTS5_NOT ); {
        Fts5ExprNode *p1 = pNode->apChild[0];
        Fts5ExprNode *p2 = pNode->apChild[1];
        assert( pNode->nChild==2 );

        while( rc==SQLITE_OK && p1->bEof==0 ){
          int cmp = fts5NodeCompare(pExpr, p1, p2);
          if( cmp>0 ){
            rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
            cmp = fts5NodeCompare(pExpr, p1, p2);
          }
          assert( rc!=SQLITE_OK || cmp<=0 );
          if( cmp || p2->bNomatch ) break;
          rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
        }
        pNode->bEof = p1->bEof;
        pNode->iRowid = p1->iRowid;
        break;
      }
    }
  }
  return rc;
}

 
/*
** Set node pNode, which is part of expression pExpr, to point to the first
** match. If there are no matches, set the Node.bEof flag to indicate EOF.
**
** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
** It is not an error if there are no matches.
*/
static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
  int rc = SQLITE_OK;
  pNode->bEof = 0;

  if( Fts5NodeIsString(pNode) ){
    /* Initialize all term iterators in the NEAR object. */
    rc = fts5ExprNearInitAll(pExpr, pNode);
  }else{
    int i;
    for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
      rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
    }
    pNode->iRowid = pNode->apChild[0]->iRowid;
  }

  if( rc==SQLITE_OK ){
    rc = fts5ExprNodeNextMatch(pExpr, pNode);
  }
  return rc;
}


/*
** Begin iterating through the set of documents in index pIdx matched by
** the MATCH expression passed as the first argument. If the "bDesc" 
** parameter is passed a non-zero value, iteration is in descending rowid 
** order. Or, if it is zero, in ascending order.
**
** If iterating in ascending rowid order (bDesc==0), the first document
** visited is that with the smallest rowid that is larger than or equal
** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
** then the first document visited must have a rowid smaller than or
** equal to iFirst.
**
** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
** is not considered an error if the query does not match any documents.
*/
int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
  Fts5ExprNode *pRoot = p->pRoot;
  int rc = SQLITE_OK;
  if( pRoot ){
    p->pIndex = pIdx;
    p->bDesc = bDesc;
    rc = fts5ExprNodeFirst(p, pRoot);

    /* If not at EOF but the current rowid occurs earlier than iFirst in
    ** the iteration order, move to document iFirst or later. */
    if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){
      rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
    }

    /* If the iterator is not at a real match, skip forward until it is. */
    while( pRoot->bNomatch && rc==SQLITE_OK && pRoot->bEof==0 ){
      rc = fts5ExprNodeNext(p, pRoot, 0, 0);
    }
  }
  return rc;
}

/*
** Move to the next document 
**
** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
** is not considered an error if the query does not match any documents.
*/
int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
  int rc;
  Fts5ExprNode *pRoot = p->pRoot;
  do {
    rc = fts5ExprNodeNext(p, pRoot, 0, 0);
  }while( pRoot->bNomatch && pRoot->bEof==0 && rc==SQLITE_OK );
  if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
    pRoot->bEof = 1;
  }
  return rc;
}

int sqlite3Fts5ExprEof(Fts5Expr *p){
  return (p->pRoot==0 || p->pRoot->bEof);
}

i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
  return p->pRoot->iRowid;
}

static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
  int rc = SQLITE_OK;
  *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
  return rc;
}

/*
** Free the phrase object passed as the only argument.
*/
static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
  if( pPhrase ){
    int i;
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}

/*
** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
** appended to it and the results returned.
**
** If an OOM error occurs, both the pNear and pPhrase objects are freed and
** NULL returned.
*/
Fts5ExprNearset *sqlite3Fts5ParseNearset(
  Fts5Parse *pParse,              /* Parse context */
  Fts5ExprNearset *pNear,         /* Existing nearset, or NULL */
  Fts5ExprPhrase *pPhrase         /* Recently parsed phrase */
){
  const int SZALLOC = 8;
  Fts5ExprNearset *pRet = 0;

  if( pParse->rc==SQLITE_OK ){
    if( pPhrase==0 ){
      return pNear;
    }
    if( pNear==0 ){
      int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
      pRet = sqlite3_malloc(nByte);
      if( pRet==0 ){
        pParse->rc = SQLITE_NOMEM;
      }else{
        memset(pRet, 0, nByte);
      }
    }else if( (pNear->nPhrase % SZALLOC)==0 ){
      int nNew = pNear->nPhrase + SZALLOC;
      int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);

      pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte);
      if( pRet==0 ){
        pParse->rc = SQLITE_NOMEM;
      }
    }else{
      pRet = pNear;
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNearsetFree(pNear);
    sqlite3Fts5ParsePhraseFree(pPhrase);
  }else{
    pRet->apPhrase[pRet->nPhrase++] = pPhrase;
  }
  return pRet;
}

typedef struct TokenCtx TokenCtx;
struct TokenCtx {
  Fts5ExprPhrase *pPhrase;
};

/*
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  Fts5ExprTerm *pTerm;

  if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
    Fts5ExprPhrase *pNew;
    int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

    pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, 
        sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
    );
    if( pNew==0 ) return SQLITE_NOMEM;
    if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
    pCtx->pPhrase = pPhrase = pNew;
    pNew->nTerm = nNew - SZALLOC;
  }

  pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
  memset(pTerm, 0, sizeof(Fts5ExprTerm));
  pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);

  return rc;
}


/*
** Free the phrase object passed as the only argument.
*/
void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
  fts5ExprPhraseFree(pPhrase);
}

/*
** Free the phrase object passed as the second argument.
*/
void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
  if( pNear ){
    int i;
    for(i=0; i<pNear->nPhrase; i++){
      fts5ExprPhraseFree(pNear->apPhrase[i]);
    }
    sqlite3_free(pNear->pColset);
    sqlite3_free(pNear);
  }
}

void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
  assert( pParse->pExpr==0 );
  pParse->pExpr = p;
}

/*
** This function is called by the parser to process a string token. The
** string may or may not be quoted. In any case it is tokenized and a
** phrase object consisting of all tokens returned.
*/
Fts5ExprPhrase *sqlite3Fts5ParseTerm(
  Fts5Parse *pParse,              /* Parse context */
  Fts5ExprPhrase *pAppend,        /* Phrase to append to */
  Fts5Token *pToken,              /* String to tokenize */
  int bPrefix                     /* True if there is a trailing "*" */
){
  Fts5Config *pConfig = pParse->pConfig;
  TokenCtx sCtx;                  /* Context object passed to callback */
  int rc;                         /* Tokenize return code */
  char *z = 0;

  memset(&sCtx, 0, sizeof(TokenCtx));
  sCtx.pPhrase = pAppend;

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){
    sqlite3Fts5Dequote(z);
    rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

    if( pAppend==0 ){
      if( (pParse->nPhrase % 8)==0 ){
        int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
        Fts5ExprPhrase **apNew;
        apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte);
        if( apNew==0 ){
          pParse->rc = SQLITE_NOMEM;
          fts5ExprPhraseFree(sCtx.pPhrase);
          return 0;
        }
        pParse->apPhrase = apNew;
      }
      pParse->nPhrase++;
    }

    pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
    assert( sCtx.pPhrase->nTerm>0 );
    sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix;
  }

  return sCtx.pPhrase;
}

/*
** Token pTok has appeared in a MATCH expression where the NEAR operator
** is expected. If token pTok does not contain "NEAR", store an error
** in the pParse object.
*/
void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
  if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
    sqlite3Fts5ParseError(
        pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
    );
  }
}

void sqlite3Fts5ParseSetDistance(
  Fts5Parse *pParse, 
  Fts5ExprNearset *pNear,
  Fts5Token *p
){
  int nNear = 0;
  int i;
  if( p->n ){
    for(i=0; i<p->n; i++){
      char c = (char)p->p[i];
      if( c<'0' || c>'9' ){
        sqlite3Fts5ParseError(
            pParse, "expected integer, got \"%.*s\"", p->n, p->p
        );
        return;
      }
      nNear = nNear * 10 + (p->p[i] - '0');
    }
  }else{
    nNear = FTS5_DEFAULT_NEARDIST;
  }
  pNear->nNear = nNear;
}

/*
** The second argument passed to this function may be NULL, or it may be
** an existing Fts5ExprColset object. This function returns a pointer to
** a new colset object containing the contents of (p) with new value column
** number iCol appended. 
**
** If an OOM error occurs, store an error code in pParse and return NULL.
** The old colset object (if any) is not freed in this case.
*/
static Fts5ExprColset *fts5ParseColset(
  Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
  Fts5ExprColset *p,              /* Existing colset object */
  int iCol                        /* New column to add to colset object */
){
  int nCol = p ? p->nCol : 0;     /* Num. columns already in colset object */
  Fts5ExprColset *pNew;           /* New colset object to return */

  assert( pParse->rc==SQLITE_OK );
  assert( iCol>=0 && iCol<pParse->pConfig->nCol );

  pNew = sqlite3_realloc(p, sizeof(Fts5ExprColset) + sizeof(int)*nCol);
  if( pNew==0 ){
    pParse->rc = SQLITE_NOMEM;
  }else{
    int *aiCol = pNew->aiCol;
    int i, j;
    for(i=0; i<nCol; i++){
      if( aiCol[i]==iCol ) return pNew;
      if( aiCol[i]>iCol ) break;
    }
    for(j=nCol; j>i; j--){
      aiCol[j] = aiCol[j-1];
    }
    aiCol[i] = iCol;
    pNew->nCol = nCol+1;

#ifndef NDEBUG
    /* Check that the array is in order and contains no duplicate entries. */
    for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
#endif
  }

  return pNew;
}

Fts5ExprColset *sqlite3Fts5ParseColset(
  Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
  Fts5ExprColset *pColset,        /* Existing colset object */
  Fts5Token *p
){
  Fts5ExprColset *pRet = 0;
  int iCol;
  char *z;                        /* Dequoted copy of token p */

  z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
  if( pParse->rc==SQLITE_OK ){
    Fts5Config *pConfig = pParse->pConfig;
    sqlite3Fts5Dequote(z);
    for(iCol=0; iCol<pConfig->nCol; iCol++){
      if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
    }
    if( iCol==pConfig->nCol ){
      sqlite3Fts5ParseError(pParse, "no such column: %s", z);
    }else{
      pRet = fts5ParseColset(pParse, pColset, iCol);
    }
    sqlite3_free(z);
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3_free(pColset);
  }

  return pRet;
}

void sqlite3Fts5ParseSetColset(
  Fts5Parse *pParse, 
  Fts5ExprNearset *pNear, 
  Fts5ExprColset *pColset 
){
  if( pNear ){
    pNear->pColset = pColset;
  }else{
    sqlite3_free(pColset);
  }
}

static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
  if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
    int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
    memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
    p->nChild += pSub->nChild;
    sqlite3_free(pSub);
  }else{
    p->apChild[p->nChild++] = pSub;
  }
}

/*
** Allocate and return a new expression object. If anything goes wrong (i.e.
** OOM error), leave an error code in pParse and return NULL.
*/
Fts5ExprNode *sqlite3Fts5ParseNode(
  Fts5Parse *pParse,              /* Parse context */
  int eType,                      /* FTS5_STRING, AND, OR or NOT */
  Fts5ExprNode *pLeft,            /* Left hand child expression */
  Fts5ExprNode *pRight,           /* Right hand child expression */
  Fts5ExprNearset *pNear          /* For STRING expressions, the near cluster */
){
  Fts5ExprNode *pRet = 0;

  if( pParse->rc==SQLITE_OK ){
    int nChild = 0;               /* Number of children of returned node */
    int nByte;                    /* Bytes of space to allocate for this node */
 
    assert( (eType!=FTS5_STRING && !pNear)
         || (eType==FTS5_STRING && !pLeft && !pRight)
    );
    if( eType==FTS5_STRING && pNear==0 ) return 0;
    if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
    if( eType!=FTS5_STRING && pRight==0 ) return pLeft;

    if( eType==FTS5_NOT ){
      nChild = 2;
    }else if( eType==FTS5_AND || eType==FTS5_OR ){
      nChild = 2;
      if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
      if( pRight->eType==eType ) nChild += pRight->nChild-1;
    }

    nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*nChild;
    pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);

    if( pRet ){
      pRet->eType = eType;
      pRet->pNear = pNear;
      if( eType==FTS5_STRING ){
        int iPhrase;
        for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
          pNear->apPhrase[iPhrase]->pNode = pRet;
        }
        if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
          pRet->eType = FTS5_TERM;
        }
      }else{
        fts5ExprAddChildren(pRet, pLeft);
        fts5ExprAddChildren(pRet, pRight);
      }
    }
  }

  if( pRet==0 ){
    assert( pParse->rc!=SQLITE_OK );
    sqlite3Fts5ParseNodeFree(pLeft);
    sqlite3Fts5ParseNodeFree(pRight);
    sqlite3Fts5ParseNearsetFree(pNear);
  }
  return pRet;
}

static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
  char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2);
  if( zQuoted ){
    int i = 0;
    char *zIn = pTerm->zTerm;
    zQuoted[i++] = '"';
    while( *zIn ){
      if( *zIn=='"' ) zQuoted[i++] = '"';
      zQuoted[i++] = *zIn++;
    }
    zQuoted[i++] = '"';
    if( pTerm->bPrefix ){
      zQuoted[i++] = ' ';
      zQuoted[i++] = '*';
    }
    zQuoted[i++] = '\0';
  }
  return zQuoted;
}

static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
  char *zNew;
  va_list ap;
  va_start(ap, zFmt);
  zNew = sqlite3_vmprintf(zFmt, ap);
  va_end(ap);
  if( zApp && zNew ){
    char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
    sqlite3_free(zNew);
    zNew = zNew2;
  }
  sqlite3_free(zApp);
  return zNew;
}

/*
** Compose a tcl-readable representation of expression pExpr. Return a 
** pointer to a buffer containing that representation. It is the 
** responsibility of the caller to at some point free the buffer using 
** sqlite3_free().
*/
static char *fts5ExprPrintTcl(
  Fts5Config *pConfig, 
  const char *zNearsetCmd,
  Fts5ExprNode *pExpr
){
  char *zRet = 0;
  if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
    Fts5ExprNearset *pNear = pExpr->pNear;
    int i; 
    int iTerm;

    zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
    if( zRet==0 ) return 0;
    if( pNear->pColset ){
      int *aiCol = pNear->pColset->aiCol;
      int nCol = pNear->pColset->nCol;
      if( nCol==1 ){
        zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
      }else{
        zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
        for(i=1; i<pNear->pColset->nCol; i++){
          zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
        }
        zRet = fts5PrintfAppend(zRet, "} ");
      }
      if( zRet==0 ) return 0;
    }

    if( pNear->nPhrase>1 ){
      zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
      if( zRet==0 ) return 0;
    }

    zRet = fts5PrintfAppend(zRet, "--");
    if( zRet==0 ) return 0;

    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];

      zRet = fts5PrintfAppend(zRet, " {");
      for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
        char *zTerm = pPhrase->aTerm[iTerm].zTerm;
        zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
      }

      if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
      if( zRet==0 ) return 0;
    }

  }else{
    char const *zOp = 0;
    int i;
    switch( pExpr->eType ){
      case FTS5_AND: zOp = "AND"; break;
      case FTS5_NOT: zOp = "NOT"; break;
      default: 
        assert( pExpr->eType==FTS5_OR );
        zOp = "OR"; 
        break;
    }

    zRet = sqlite3_mprintf("%s", zOp);
    for(i=0; zRet && i<pExpr->nChild; i++){
      char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
      if( !z ){
        sqlite3_free(zRet);
        zRet = 0;
      }else{
        zRet = fts5PrintfAppend(zRet, " [%z]", z);
      }
    }
  }

  return zRet;
}

static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
  char *zRet = 0;
  if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
    Fts5ExprNearset *pNear = pExpr->pNear;
    int i; 
    int iTerm;

    if( pNear->pColset ){
      int iCol = pNear->pColset->aiCol[0];
      zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]);
      if( zRet==0 ) return 0;
    }

    if( pNear->nPhrase>1 ){
      zRet = fts5PrintfAppend(zRet, "NEAR(");
      if( zRet==0 ) return 0;
    }

    for(i=0; i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      if( i!=0 ){
        zRet = fts5PrintfAppend(zRet, " ");
        if( zRet==0 ) return 0;
      }
      for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
        char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
        if( zTerm ){
          zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
          sqlite3_free(zTerm);
        }
        if( zTerm==0 || zRet==0 ){
          sqlite3_free(zRet);
          return 0;
        }
      }
    }

    if( pNear->nPhrase>1 ){
      zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
      if( zRet==0 ) return 0;
    }

  }else{
    char const *zOp = 0;
    int i;

    switch( pExpr->eType ){
      case FTS5_AND: zOp = " AND "; break;
      case FTS5_NOT: zOp = " NOT "; break;
      default:  
        assert( pExpr->eType==FTS5_OR );
        zOp = " OR "; 
        break;
    }

    for(i=0; i<pExpr->nChild; i++){
      char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
      if( z==0 ){
        sqlite3_free(zRet);
        zRet = 0;
      }else{
        int e = pExpr->apChild[i]->eType;
        int b = (e!=FTS5_STRING && e!=FTS5_TERM);
        zRet = fts5PrintfAppend(zRet, "%s%s%z%s", 
            (i==0 ? "" : zOp),
            (b?"(":""), z, (b?")":"")
        );
      }
      if( zRet==0 ) break;
    }
  }

  return zRet;
}

/*
** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
** and fts5_expr_tcl() (bTcl!=0).
*/
static void fts5ExprFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal,          /* Function arguments */
  int bTcl
){
  Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
  sqlite3 *db = sqlite3_context_db_handle(pCtx);
  const char *zExpr = 0;
  char *zErr = 0;
  Fts5Expr *pExpr = 0;
  int rc;
  int i;

  const char **azConfig;          /* Array of arguments for Fts5Config */
  const char *zNearsetCmd = "nearset";
  int nConfig;                    /* Size of azConfig[] */
  Fts5Config *pConfig = 0;
  int iArg = 1;

  if( bTcl && nArg>1 ){
    zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
    iArg = 2;
  }

  nConfig = 3 + (nArg-iArg);
  azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig);
  if( azConfig==0 ){
    sqlite3_result_error_nomem(pCtx);
    return;
  }
  azConfig[0] = 0;
  azConfig[1] = "main";
  azConfig[2] = "tbl";
  for(i=3; iArg<nArg; iArg++){
    azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]);
  }

  zExpr = (const char*)sqlite3_value_text(apVal[0]);

  rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr);
  }
  if( rc==SQLITE_OK ){
    char *zText;
    if( pExpr->pRoot==0 ){
      zText = sqlite3_mprintf("");
    }else if( bTcl ){
      zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
    }else{
      zText = fts5ExprPrint(pConfig, pExpr->pRoot);
    }
    if( zText==0 ){
      rc = SQLITE_NOMEM;
    }else{
      sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
      sqlite3_free(zText);
    }
  }

  if( rc!=SQLITE_OK ){
    if( zErr ){
      sqlite3_result_error(pCtx, zErr, -1);
      sqlite3_free(zErr);
    }else{
      sqlite3_result_error_code(pCtx, rc);
    }
  }
  sqlite3_free((void *)azConfig);
  sqlite3Fts5ConfigFree(pConfig);
  sqlite3Fts5ExprFree(pExpr);
}

static void fts5ExprFunctionHr(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
  fts5ExprFunction(pCtx, nArg, apVal, 0);
}
static void fts5ExprFunctionTcl(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
  fts5ExprFunction(pCtx, nArg, apVal, 1);
}

/*
** The implementation of an SQLite user-defined-function that accepts a
** single integer as an argument. If the integer is an alpha-numeric 
** unicode code point, 1 is returned. Otherwise 0.
*/
static void fts5ExprIsAlnum(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
  int iCode;
  if( nArg!=1 ){
    sqlite3_result_error(pCtx, 
        "wrong number of arguments to function fts5_isalnum", -1
    );
    return;
  }
  iCode = sqlite3_value_int(apVal[0]);
  sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode));
}

static void fts5ExprFold(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
  if( nArg!=1 && nArg!=2 ){
    sqlite3_result_error(pCtx, 
        "wrong number of arguments to function fts5_fold", -1
    );
  }else{
    int iCode;
    int bRemoveDiacritics = 0;
    iCode = sqlite3_value_int(apVal[0]);
    if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
    sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
  }
}

/*
** This is called during initialization to register the fts5_expr() scalar
** UDF with the SQLite handle passed as the only argument.
*/
int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
  struct Fts5ExprFunc {
    const char *z;
    void (*x)(sqlite3_context*,int,sqlite3_value**);
  } aFunc[] = {
    { "fts5_expr",     fts5ExprFunctionHr },
    { "fts5_expr_tcl", fts5ExprFunctionTcl },
    { "fts5_isalnum",  fts5ExprIsAlnum },
    { "fts5_fold",     fts5ExprFold },
  };
  int i;
  int rc = SQLITE_OK;
  void *pCtx = (void*)pGlobal;

  for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){
    struct Fts5ExprFunc *p = &aFunc[i];
    rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
  }

  return rc;
}

/*
** Return the number of phrases in expression pExpr.
*/
int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
  return pExpr->nPhrase;
}

/*
** Return the number of terms in the iPhrase'th phrase in pExpr.
*/
int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
  if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
  return pExpr->apExprPhrase[iPhrase]->nTerm;
}

/*
** This function is used to access the current position list for phrase
** iPhrase.
*/
int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
  int nRet;
  Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
  Fts5ExprNode *pNode = pPhrase->pNode;
  if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
    *pa = pPhrase->poslist.p;
    nRet = pPhrase->poslist.n;
  }else{
    *pa = 0;
    nRet = 0;
  }
  return nRet;
}

#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_hash.c.




































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
/*
** 2014 August 11
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/

#ifdef SQLITE_ENABLE_FTS5


#include "fts5Int.h"

typedef struct Fts5HashEntry Fts5HashEntry;

/*
** This file contains the implementation of an in-memory hash table used
** to accumuluate "term -> doclist" content before it is flused to a level-0
** segment.
*/


struct Fts5Hash {
  int *pnByte;                    /* Pointer to bytes counter */
  int nEntry;                     /* Number of entries currently in hash */
  int nSlot;                      /* Size of aSlot[] array */
  Fts5HashEntry *pScan;           /* Current ordered scan item */
  Fts5HashEntry **aSlot;          /* Array of hash slots */
};

/*
** Each entry in the hash table is represented by an object of the 
** following type. Each object, its key (zKey[]) and its current data
** are stored in a single memory allocation. The position list data 
** immediately follows the key data in memory.
**
** The data that follows the key is in a similar, but not identical format
** to the doclist data stored in the database. It is:
**
**   * Rowid, as a varint
**   * Position list, without 0x00 terminator.
**   * Size of previous position list and rowid, as a 4 byte
**     big-endian integer.
**
** iRowidOff:
**   Offset of last rowid written to data area. Relative to first byte of
**   structure.
**
** nData:
**   Bytes of data written since iRowidOff.
*/
struct Fts5HashEntry {
  Fts5HashEntry *pHashNext;       /* Next hash entry with same hash-key */
  Fts5HashEntry *pScanNext;       /* Next entry in sorted order */
  
  int nAlloc;                     /* Total size of allocation */
  int iSzPoslist;                 /* Offset of space for 4-byte poslist size */
  int nData;                      /* Total bytes of data (incl. structure) */
  u8 bDel;                        /* Set delete-flag @ iSzPoslist */

  int iCol;                       /* Column of last value written */
  int iPos;                       /* Position of last value written */
  i64 iRowid;                     /* Rowid of last value written */
  char zKey[0];                   /* Nul-terminated entry key */
};

/*
** Allocate a new hash table.
*/
int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){
  int rc = SQLITE_OK;
  Fts5Hash *pNew;

  *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
  if( pNew==0 ){
    rc = SQLITE_NOMEM;
  }else{
    int nByte;
    memset(pNew, 0, sizeof(Fts5Hash));
    pNew->pnByte = pnByte;

    pNew->nSlot = 1024;
    nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
    pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte);
    if( pNew->aSlot==0 ){
      sqlite3_free(pNew);
      *ppNew = 0;
      rc = SQLITE_NOMEM;
    }else{
      memset(pNew->aSlot, 0, nByte);
    }
  }
  return rc;
}

/*
** Free a hash table object.
*/
void sqlite3Fts5HashFree(Fts5Hash *pHash){
  if( pHash ){
    sqlite3Fts5HashClear(pHash);
    sqlite3_free(pHash->aSlot);
    sqlite3_free(pHash);
  }
}

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash *pHash){
  int i;
  for(i=0; i<pHash->nSlot; i++){
    Fts5HashEntry *pNext;
    Fts5HashEntry *pSlot;
    for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
      pNext = pSlot->pHashNext;
      sqlite3_free(pSlot);
    }
  }
  memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
  pHash->nEntry = 0;
}

static unsigned int fts5HashKey(int nSlot, const char *p, int n){
  int i;
  unsigned int h = 13;
  for(i=n-1; i>=0; i--){
    h = (h << 3) ^ h ^ p[i];
  }
  return (h % nSlot);
}

static unsigned int fts5HashKey2(int nSlot, char b, const char *p, int n){
  int i;
  unsigned int h = 13;
  for(i=n-1; i>=0; i--){
    h = (h << 3) ^ h ^ p[i];
  }
  h = (h << 3) ^ h ^ b;
  return (h % nSlot);
}

/*
** Resize the hash table by doubling the number of slots.
*/
static int fts5HashResize(Fts5Hash *pHash){
  int nNew = pHash->nSlot*2;
  int i;
  Fts5HashEntry **apNew;
  Fts5HashEntry **apOld = pHash->aSlot;

  apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*));
  if( !apNew ) return SQLITE_NOMEM;
  memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));

  for(i=0; i<pHash->nSlot; i++){
    while( apOld[i] ){
      int iHash;
      Fts5HashEntry *p = apOld[i];
      apOld[i] = p->pHashNext;
      iHash = fts5HashKey(nNew, p->zKey, strlen(p->zKey));
      p->pHashNext = apNew[iHash];
      apNew[iHash] = p;
    }
  }

  sqlite3_free(apOld);
  pHash->nSlot = nNew;
  pHash->aSlot = apNew;
  return SQLITE_OK;
}

static void fts5HashAddPoslistSize(Fts5HashEntry *p){
  if( p->iSzPoslist ){
    u8 *pPtr = (u8*)p;
    int nSz = (p->nData - p->iSzPoslist - 1);         /* Size in bytes */
    int nPos = nSz*2 + p->bDel;                       /* Value of nPos field */

    assert( p->bDel==0 || p->bDel==1 );
    if( nPos<=127 ){
      pPtr[p->iSzPoslist] = nPos;
    }else{
      int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
      sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
      p->nData += (nByte-1);
    }
    p->bDel = 0;
    p->iSzPoslist = 0;
  }
}

int sqlite3Fts5HashWrite(
  Fts5Hash *pHash,
  i64 iRowid,                     /* Rowid for this entry */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  char bByte,                     /* First byte of token */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
){
  unsigned int iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken);
  Fts5HashEntry *p;
  u8 *pPtr;
  int nIncr = 0;                  /* Amount to increment (*pHash->pnByte) by */

  /* Attempt to locate an existing hash entry */
  for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
    if( p->zKey[0]==bByte 
     && memcmp(&p->zKey[1], pToken, nToken)==0 
     && p->zKey[nToken+1]==0 
    ){
      break;
    }
  }

  /* If an existing hash entry cannot be found, create a new one. */
  if( p==0 ){
    int nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
    if( nByte<128 ) nByte = 128;

    if( (pHash->nEntry*2)>=pHash->nSlot ){
      int rc = fts5HashResize(pHash);
      if( rc!=SQLITE_OK ) return rc;
      iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken);
    }

    p = (Fts5HashEntry*)sqlite3_malloc(nByte);
    if( !p ) return SQLITE_NOMEM;
    memset(p, 0, sizeof(Fts5HashEntry));
    p->nAlloc = nByte;
    p->zKey[0] = bByte;
    memcpy(&p->zKey[1], pToken, nToken);
    assert( iHash==fts5HashKey(pHash->nSlot, p->zKey, nToken+1) );
    p->zKey[nToken+1] = '\0';
    p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
    p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
    p->iSzPoslist = p->nData;
    p->nData += 1;
    p->iRowid = iRowid;
    p->pHashNext = pHash->aSlot[iHash];
    pHash->aSlot[iHash] = p;
    pHash->nEntry++;
    nIncr += p->nData;
  }

  /* Check there is enough space to append a new entry. Worst case scenario
  ** is:
  **
  **     + 9 bytes for a new rowid,
  **     + 4 byte reserved for the "poslist size" varint.
  **     + 1 byte for a "new column" byte,
  **     + 3 bytes for a new column number (16-bit max) as a varint,
  **     + 5 bytes for the new position offset (32-bit max).
  */
  if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
    int nNew = p->nAlloc * 2;
    Fts5HashEntry *pNew;
    Fts5HashEntry **pp;
    pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew);
    if( pNew==0 ) return SQLITE_NOMEM;
    pNew->nAlloc = nNew;
    for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
    *pp = pNew;
    p = pNew;
  }
  pPtr = (u8*)p;
  nIncr -= p->nData;

  /* If this is a new rowid, append the 4-byte size field for the previous
  ** entry, and the new rowid for this entry.  */
  if( iRowid!=p->iRowid ){
    fts5HashAddPoslistSize(p);
    p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid);
    p->iSzPoslist = p->nData;
    p->nData += 1;
    p->iCol = 0;
    p->iPos = 0;
    p->iRowid = iRowid;
  }

  if( iCol>=0 ){
    /* Append a new column value, if necessary */
    assert( iCol>=p->iCol );
    if( iCol!=p->iCol ){
      pPtr[p->nData++] = 0x01;
      p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
      p->iCol = iCol;
      p->iPos = 0;
    }

    /* Append the new position offset */
    p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
    p->iPos = iPos;
  }else{
    /* This is a delete. Set the delete flag. */
    p->bDel = 1;
  }
  nIncr += p->nData;

  *pHash->pnByte += nIncr;
  return SQLITE_OK;
}


/*
** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
** each sorted in key order. This function merges the two lists into a
** single list and returns a pointer to its first element.
*/
static Fts5HashEntry *fts5HashEntryMerge(
  Fts5HashEntry *pLeft,
  Fts5HashEntry *pRight
){
  Fts5HashEntry *p1 = pLeft;
  Fts5HashEntry *p2 = pRight;
  Fts5HashEntry *pRet = 0;
  Fts5HashEntry **ppOut = &pRet;

  while( p1 || p2 ){
    if( p1==0 ){
      *ppOut = p2;
      p2 = 0;
    }else if( p2==0 ){
      *ppOut = p1;
      p1 = 0;
    }else{
      int i = 0;
      while( p1->zKey[i]==p2->zKey[i] ) i++;

      if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){
        /* p2 is smaller */
        *ppOut = p2;
        ppOut = &p2->pScanNext;
        p2 = p2->pScanNext;
      }else{
        /* p1 is smaller */
        *ppOut = p1;
        ppOut = &p1->pScanNext;
        p1 = p1->pScanNext;
      }
      *ppOut = 0;
    }
  }

  return pRet;
}

/*
** Extract all tokens from hash table iHash and link them into a list
** in sorted order. The hash table is cleared before returning. It is
** the responsibility of the caller to free the elements of the returned
** list.
*/
static int fts5HashEntrySort(
  Fts5Hash *pHash, 
  const char *pTerm, int nTerm,   /* Query prefix, if any */
  Fts5HashEntry **ppSorted
){
  const int nMergeSlot = 32;
  Fts5HashEntry **ap;
  Fts5HashEntry *pList;
  int iSlot;
  int i;

  *ppSorted = 0;
  ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot);
  if( !ap ) return SQLITE_NOMEM;
  memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);

  for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
    Fts5HashEntry *pIter;
    for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
      if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){
        Fts5HashEntry *pEntry = pIter;
        pEntry->pScanNext = 0;
        for(i=0; ap[i]; i++){
          pEntry = fts5HashEntryMerge(pEntry, ap[i]);
          ap[i] = 0;
        }
        ap[i] = pEntry;
      }
    }
  }

  pList = 0;
  for(i=0; i<nMergeSlot; i++){
    pList = fts5HashEntryMerge(pList, ap[i]);
  }

  pHash->nEntry = 0;
  sqlite3_free(ap);
  *ppSorted = pList;
  return SQLITE_OK;
}

/*
** Query the hash table for a doclist associated with term pTerm/nTerm.
*/
int sqlite3Fts5HashQuery(
  Fts5Hash *pHash,                /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
){
  unsigned int iHash = fts5HashKey(pHash->nSlot, pTerm, nTerm);
  Fts5HashEntry *p;

  for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
    if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break;
  }

  if( p ){
    fts5HashAddPoslistSize(p);
    *ppDoclist = (const u8*)&p->zKey[nTerm+1];
    *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
  }else{
    *ppDoclist = 0;
    *pnDoclist = 0;
  }

  return SQLITE_OK;
}

int sqlite3Fts5HashScanInit(
  Fts5Hash *p,                    /* Hash table to query */
  const char *pTerm, int nTerm    /* Query prefix */
){
  return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
}

void sqlite3Fts5HashScanNext(Fts5Hash *p){
  assert( !sqlite3Fts5HashScanEof(p) );
  p->pScan = p->pScan->pScanNext;
}

int sqlite3Fts5HashScanEof(Fts5Hash *p){
  return (p->pScan==0);
}

void sqlite3Fts5HashScanEntry(
  Fts5Hash *pHash,
  const char **pzTerm,            /* OUT: term (nul-terminated) */
  const u8 **ppDoclist,           /* OUT: pointer to doclist */
  int *pnDoclist                  /* OUT: size of doclist in bytes */
){
  Fts5HashEntry *p;
  if( (p = pHash->pScan) ){
    int nTerm = strlen(p->zKey);
    fts5HashAddPoslistSize(p);
    *pzTerm = p->zKey;
    *ppDoclist = (const u8*)&p->zKey[nTerm+1];
    *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1);
  }else{
    *pzTerm = 0;
    *ppDoclist = 0;
    *pnDoclist = 0;
  }
}

#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_index.c.






































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** Low level access to the FTS index stored in the database file. The 
** routines in this file file implement all read and write access to the
** %_data table. Other parts of the system access this functionality via
** the interface defined in fts5Int.h.
*/

#ifdef SQLITE_ENABLE_FTS5

#include "fts5Int.h"

/*
** Overview:
**
** The %_data table contains all the FTS indexes for an FTS5 virtual table.
** As well as the main term index, there may be up to 31 prefix indexes.
** The format is similar to FTS3/4, except that:
**
**   * all segment b-tree leaf data is stored in fixed size page records 
**     (e.g. 1000 bytes). A single doclist may span multiple pages. Care is 
**     taken to ensure it is possible to iterate in either direction through 
**     the entries in a doclist, or to seek to a specific entry within a 
**     doclist, without loading it into memory.
**
**   * large doclists that span many pages have associated "doclist index"
**     records that contain a copy of the first docid on each page spanned by
**     the doclist. This is used to speed up seek operations, and merges of
**     large doclists with very small doclists.
**
**   * extra fields in the "structure record" record the state of ongoing
**     incremental merge operations.
**
*/


#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

#define FTS5_MAIN_PREFIX '0'

#if FTS5_MAX_PREFIX_INDEXES > 31
# error "FTS5_MAX_PREFIX_INDEXES is too large"
#endif

/*
** Details:
**
** The %_data table managed by this module,
**
**     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
**
** , contains the following 5 types of records. See the comments surrounding
** the FTS5_*_ROWID macros below for a description of how %_data rowids are 
** assigned to each fo them.
**
** 1. Structure Records:
**
**   The set of segments that make up an index - the index structure - are
**   recorded in a single record within the %_data table. The record consists
**   of a single 32-bit configuration cookie value followed by a list of 
**   SQLite varints. If the FTS table features more than one index (because
**   there are one or more prefix indexes), it is guaranteed that all share
**   the same cookie value.
**
**   Immediately following the configuration cookie, the record begins with
**   three varints:
**
**     + number of levels,
**     + total number of segments on all levels,
**     + value of write counter.
**
**   Then, for each level from 0 to nMax:
**
**     + number of input segments in ongoing merge.
**     + total number of segments in level.
**     + for each segment from oldest to newest:
**         + segment id (always > 0)
**         + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
**         + first leaf page number (often 1, always greater than 0)
**         + final leaf page number
**
** 2. The Averages Record:
**
**   A single record within the %_data table. The data is a list of varints.
**   The first value is the number of rows in the index. Then, for each column
**   from left to right, the total number of tokens in the column for all 
**   rows of the table.
**
** 3. Segment leaves:
**
**   TERM DOCLIST FORMAT:
**
**     Most of each segment leaf is taken up by term/doclist data. The 
**     general format of the term/doclist data is:
**
**         varint : size of first term
**         blob:    first term data
**         doclist: first doclist
**         zero-or-more {
**           varint:  number of bytes in common with previous term
**           varint:  number of bytes of new term data (nNew)
**           blob:    nNew bytes of new term data
**           doclist: next doclist
**         }
**
**     doclist format:
**
**         varint:  first rowid
**         poslist: first poslist
**         zero-or-more {
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes multiplied by 2, not including
**                 this field. Plus 1 if this entry carries the "delete" flag.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
**     collist format:
**
**         varint: first offset + 2
**         zero-or-more {
**           varint: offset delta + 2
**         }
**
**   PAGINATION
**
**     The format described above is only accurate if the entire term/doclist
**     data fits on a single leaf page. If this is not the case, the format
**     is changed in two ways:
**
**       + if the first rowid on a page occurs before the first term, it
**         is stored as a literal value:
**
**             varint:  first rowid
**
**       + the first term on each page is stored in the same way as the
**         very first term of the segment:
**
**             varint : size of first term
**             blob:    first term data
**
**     Each leaf page begins with:
**
**       + 2-byte unsigned containing offset to first rowid (or 0).
**       + 2-byte unsigned containing offset to first term (or 0).
**
**   Followed by term/doclist data.
**
** 4. Segment interior nodes:
**
**   The interior nodes turn the list of leaves into a b+tree. 
**
**   Each interior node begins with a varint - the page number of the left
**   most child node. Following this, for each leaf page except the first,
**   the interior nodes contain:
**
**     a) If the leaf page contains at least one term, then a term-prefix that
**        is greater than all previous terms, and less than or equal to the
**        first term on the leaf page.
**
**     b) If the leaf page no terms, a record indicating how many consecutive
**        leaves contain no terms, and whether or not there is an associated
**        by-rowid index record.
**
**   By definition, there is never more than one type (b) record in a row.
**   Type (b) records only ever appear on height=1 pages - immediate parents
**   of leaves. Only type (a) records are pushed to higher levels.
**
**   Term format:
**
**     * Number of bytes in common with previous term plus 2, as a varint.
**     * Number of bytes of new term data, as a varint.
**     * new term data.
**
**   No-term format:
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**
**   Doclist indexes are themselves b-trees, however they usually consist of
**   a single leaf record only. The format of each doclist index leaf page 
**   is:
**
**     * Flags byte. Bits are:
**         0x01: Clear if leaf is also the root page, otherwise set.
**
**     * Page number of fts index leaf page. As a varint.
**
**     * First docid on page indicated by previous field. As a varint.
**
**     * A list of varints, one for each subsequent termless page. A 
**       positive delta if the termless page contains at least one docid, 
**       or an 0x00 byte otherwise.
**
**   Internal doclist index nodes are:
**
**     * Flags byte. Bits are:
**         0x01: Clear for root page, otherwise set.
**
**     * Page number of first child page. As a varint.
**
**     * Copy of first docid on page indicated by previous field. As a varint.
**
**     * A list of delta-encoded varints - the first docid on each subsequent
**       child page. 
**
*/

/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID   10    /* The structure record */

/*
** Macros determining the rowids used by segment nodes. All nodes in all
** segments for all indexes (the regular FTS index and any prefix indexes)
** are stored in the %_data table with large positive rowids.
**
** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS) 
** indexes - one regular term index and zero or more prefix indexes.
**
** Each segment in an index has a unique id greater than zero.
**
** Each node in a segment b-tree is assigned a "page number" that is unique
** within nodes of its height within the segment (leaf nodes have a height 
** of 0, parents 1, etc.). Page numbers are allocated sequentially so that
** a nodes page number is always one more than its left sibling.
**
** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
** SQLITE_FULL and fails the current operation if they ever prove too small.
*/
#define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
#define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */

#define fts5_dri(segid, dlidx, height, pgno) (                                 \
 ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
 ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
 ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
 ((i64)(pgno))                                                                 \
)

#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno)   fts5_dri(segid, 1, height, pgno)

/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000

#ifdef SQLITE_DEBUG
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
#endif


/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8

typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
typedef struct Fts5DlidxWriter Fts5DlidxWriter;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5SegIter Fts5SegIter;
typedef struct Fts5DoclistIter Fts5DoclistIter;
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */
};

/*
** One object per %_data table.
*/
struct Fts5Index {
  Fts5Config *pConfig;            /* Virtual table configuration */
  char *zDataTbl;                 /* Name of %_data table */
  int nWorkUnit;                  /* Leaf pages in a "unit" of work */

  /*
  ** Variables related to the accumulation of tokens and doclists within the
  ** in-memory hash tables before they are flushed to disk.
  */
  Fts5Hash *pHash;                /* Hash table for in-memory data */
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  i64 iWriteRowid;                /* Rowid for current doc being written */
  Fts5Buffer scratch;

  /* Error state. */
  int rc;                         /* Current error code */

  /* State used by the fts5DataXXX() functions. */
  sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
  sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
  sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
  int nRead;                      /* Total number of blocks read */
};

struct Fts5DoclistIter {
  u8 *a;
  int n;
  int i;

  /* Output variables. aPoslist==0 at EOF */
  i64 iRowid;
  u8 *aPoslist;
  int nPoslist;
};

/*
** Each iterator used by external modules is an instance of this type.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;
  Fts5Structure *pStruct;
  Fts5MultiSegIter *pMulti;
  Fts5Buffer poslist;             /* Buffer containing current poslist */
};

/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the 
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
  int iSegid;                     /* Segment id */
  int nHeight;                    /* Height of segment b-tree */
  int pgnoFirst;                  /* First leaf page number in segment */
  int pgnoLast;                   /* Last leaf page number in segment */
};
struct Fts5StructureLevel {
  int nMerge;                     /* Number of segments in incr-merge */
  int nSeg;                       /* Total number of segments on level */
  Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
};
struct Fts5Structure {
  u64 nWriteCounter;              /* Total leaves written to level 0 */
  int nSegment;                   /* Total segments in this structure */
  int nLevel;                     /* Number of levels in this index */
  Fts5StructureLevel aLevel[0];   /* Array of nLevel level objects */
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  Fts5Buffer buf;                 /* Buffer containing page data */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous docid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  int nWriter;                    /* Number of entries in aWriter */
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
};

/*
** Object for iterating through the merged results of one or more segments,
** visiting each term/docid pair in the merged data.
**
** nSeg is always a power of two greater than or equal to the number of
** segments that this object is merging data from. Both the aSeg[] and
** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
** with zeroed objects - these are handled as if they were iterators opened
** on empty segments.
**
** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the 
** comparison in this context is the index of the iterator that currently
** points to the smaller term/rowid combination. Iterators at EOF are
** considered to be greater than all other iterators.
**
** aFirst[1] contains the index in aSeg[] of the iterator that points to
** the smallest key overall. aFirst[0] is unused. 
*/

typedef struct Fts5CResult Fts5CResult;
struct Fts5CResult {
  u16 iFirst;                     /* aSeg[] index of firstest iterator */
  u8 bTermEq;                     /* True if the terms are equal */
};

struct Fts5MultiSegIter {
  int nSeg;                       /* Size of aSeg[] array */
  int bRev;                       /* True to iterate in reverse order */
  int bSkipEmpty;                 /* True to skip deleted entries */
  Fts5SegIter *aSeg;              /* Array of segment iterators */
  Fts5CResult *aFirst;            /* Current merge state (see above) */
};

/*
** Object for iterating through a single segment, visiting each term/docid
** pair in the segment.
**
** pSeg:
**   The segment to iterate through.
**
** iLeafPgno:
**   Current leaf page number within segment.
**
** iLeafOffset:
**   Byte offset within the current leaf that is the first byte of the 
**   position list data (one byte passed the position-list size field).
**   rowid field of the current entry. Usually this is the size field of the
**   position list data. The exception is if the rowid for the current entry 
**   is the last thing on the leaf page.
**
** pLeaf:
**   Buffer containing current leaf page data. Set to NULL at EOF.
**
** iTermLeafPgno, iTermLeafOffset:
**   Leaf page number containing the last term read from the segment. And
**   the offset immediately following the term data.
**
** flags:
**   Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
**
**   FTS5_SEGITER_ONETERM:
**     If set, set the iterator to point to EOF after the current doclist 
**     has been exhausted. Do not proceed to the next term in the segment.
**
**   FTS5_SEGITER_REVERSE:
**     This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
**     it is set, iterate through docids in descending order instead of the
**     default ascending order.
**
** iRowidOffset/nRowidOffset/aRowidOffset:
**     These are used if the FTS5_SEGITER_REVERSE flag is set.
**
**     For each rowid on the page corresponding to the current term, the
**     corresponding aRowidOffset[] entry is set to the byte offset of the
**     start of the "position-list-size" field within the page.
*/
struct Fts5SegIter {
  Fts5StructureSegment *pSeg;     /* Segment to iterate through */
  int flags;                      /* Mask of configuration flags */
  int iLeafPgno;                  /* Current leaf page number */
  Fts5Data *pLeaf;                /* Current leaf data */
  Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

  Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
  int nPos;                       /* Number of bytes in current position list */
  int bDel;                       /* True if the delete flag is set */
};

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02


/*
** Object for iterating through the conents of a single internal node in 
** memory.
*/
struct Fts5NodeIter {
  /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, 
  ** the EOF test for the iterator is (Fts5NodeIter.aData==0).  */
  const u8 *aData;
  int nData;
  int iOff;

  /* Output variables */
  Fts5Buffer term;
  int nEmpty;
  int iChild;
  int bDlidx;
};

/*
** An instance of the following type is used to iterate through the contents
** of a doclist-index record.
**
** pData:
**   Record containing the doclist-index data.
**
** bEof:
**   Set to true once iterator has reached EOF.
**
** iOff:
**   Set to the current offset within record pData.
*/
struct Fts5DlidxLvl {
  Fts5Data *pData;              /* Data for current page of this level */
  int iOff;                     /* Current offset into pData */
  int bEof;                     /* At EOF already */
  int iFirstOff;                /* Used by reverse iterators */

  /* Output variables */
  int iLeafPgno;                /* Page number of current leaf page */
  i64 iRowid;                   /* First rowid on leaf iLeafPgno */
};
struct Fts5DlidxIter {
  int nLvl;
  int iSegid;
  Fts5DlidxLvl aLvl[1];
};



/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:
**
**   iLeaf:  The page number of the leaf page the entry points to.
**
**   term:   A split-key that all terms on leaf page $iLeaf must be greater
**           than or equal to. The "term" associated with the first b-tree
**           hierarchy entry (the one that points to leaf page 1) is always 
**           an empty string.
**
**   nEmpty: The number of empty (termless) leaf pages that immediately
**           following iLeaf.
**
** The Fts5BtreeIter object is only used as part of the integrity-check code.
*/
struct Fts5BtreeIterLevel {
  Fts5NodeIter s;                 /* Iterator for the current node */
  Fts5Data *pData;                /* Data for the current node */
};
struct Fts5BtreeIter {
  Fts5Index *p;                   /* FTS5 backend object */
  Fts5StructureSegment *pSeg;     /* Iterate through this segment's b-tree */
  int nLvl;                       /* Size of aLvl[] array */
  Fts5BtreeIterLevel *aLvl;       /* Level for each tier of b-tree */

  /* Output variables */
  Fts5Buffer term;                /* Current term */
  int iLeaf;                      /* Leaf containing terms >= current term */
  int nEmpty;                     /* Number of "empty" leaves following iLeaf */
  int bEof;                       /* Set to true at EOF */
  int bDlidx;                     /* True if there exists a dlidx */
};


static void fts5PutU16(u8 *aOut, u16 iVal){
  aOut[0] = (iVal>>8);
  aOut[1] = (iVal&0xFF);
}

static u16 fts5GetU16(const u8 *aIn){
  return ((u16)aIn[0] << 8) + aIn[1];
} 

/*
** Allocate and return a buffer at least nByte bytes in size.
**
** If an OOM error is encountered, return NULL and set the error code in
** the Fts5Index handle passed as the first argument.
*/
static void *fts5IdxMalloc(Fts5Index *p, int nByte){
  return sqlite3Fts5MallocZero(&p->rc, nByte);
}

/*
** Compare the contents of the pLeft buffer with the pRight/nRight blob.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/
static int fts5BufferCompareBlob(
  Fts5Buffer *pLeft,              /* Left hand side of comparison */
  const u8 *pRight, int nRight    /* Right hand side of comparison */
){
  int nCmp = MIN(pLeft->n, nRight);
  int res = memcmp(pLeft->p, pRight, nCmp);
  return (res==0 ? (pLeft->n - nRight) : res);
}


/*
** Compare the contents of the two buffers using memcmp(). If one buffer
** is a prefix of the other, it is considered the lesser.
**
** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
** +ve if pRight is smaller than pLeft. In other words:
**
**     res = *pLeft - *pRight
*/
static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
  int nCmp = MIN(pLeft->n, pRight->n);
  int res = memcmp(pLeft->p, pRight->p, nCmp);
  return (res==0 ? (pLeft->n - pRight->n) : res);
}

#ifdef SQLITE_DEBUG
static int fts5BlobCompare(
  const u8 *pLeft, int nLeft, 
  const u8 *pRight, int nRight
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif


/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
    p->pReader = 0;
    sqlite3_blob_close(pReader);
  }
}

static Fts5Data *fts5DataReadOrBuffer(
  Fts5Index *p, 
  Fts5Buffer *pBuf, 
  i64 iRowid
){
  Fts5Data *pRet = 0;
  if( p->rc==SQLITE_OK ){
    int rc = SQLITE_OK;

    if( p->pReader ){
      /* This call may return SQLITE_ABORT if there has been a savepoint
      ** rollback since it was last used. In this case a new blob handle
      ** is required.  */
      sqlite3_blob *pBlob = p->pReader;
      p->pReader = 0;
      rc = sqlite3_blob_reopen(pBlob, iRowid);
      assert( p->pReader==0 );
      p->pReader = pBlob;
      if( rc!=SQLITE_OK ){
        fts5CloseReader(p);
      }
      if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
    }

    /* If the blob handle is not yet open, open and seek it. Otherwise, use
    ** the blob_reopen() API to reseek the existing blob handle.  */
    if( p->pReader==0 && rc==SQLITE_OK ){
      Fts5Config *pConfig = p->pConfig;
      rc = sqlite3_blob_open(pConfig->db, 
          pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
      );
    }

    /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
    ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
    ** All the reasons those functions might return SQLITE_ERROR - missing
    ** table, missing row, non-blob/text in block column - indicate 
    ** backing store corruption.  */
    if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;

    if( rc==SQLITE_OK ){
      u8 *aOut;                   /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);
      if( pBuf ){
        fts5BufferZero(pBuf);
        fts5BufferGrow(&rc, pBuf, nByte);
        aOut = pBuf->p;
        pBuf->n = nByte;
      }else{
        int nSpace = nByte + FTS5_DATA_ZERO_PADDING;
        pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nSpace+sizeof(Fts5Data));
        if( pRet ){
          pRet->n = nByte;
          aOut = pRet->p = (u8*)&pRet[1];
        }
      }

      if( rc==SQLITE_OK ){
        rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_free(pRet);
        pRet = 0;
      }
    }
    p->rc = rc;
    p->nRead++;
  }

  return pRet;
}

/*
** Retrieve a record from the %_data table.
**
** If an error occurs, NULL is returned and an error left in the 
** Fts5Index object.
*/
static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
  Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid);
  assert( (pRet==0)==(p->rc!=SQLITE_OK) );
  return pRet;
}

/*
** Read a record from the %_data table into the buffer supplied as the
** second argument.
**
** If an error occurs, an error is left in the Fts5Index object. If an
** error has already occurred when this function is called, it is a 
** no-op.
*/
static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){
  (void)fts5DataReadOrBuffer(p, pBuf, iRowid);
}

/*
** Release a reference to data record returned by an earlier call to
** fts5DataRead().
*/
static void fts5DataRelease(Fts5Data *pData){
  sqlite3_free(pData);
}

/*
** INSERT OR REPLACE a record into the %_data table.
*/
static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
  if( p->rc!=SQLITE_OK ) return;

  if( p->pWriter==0 ){
    int rc = SQLITE_OK;
    Fts5Config *pConfig = p->pConfig;
    char *zSql = sqlite3Fts5Mprintf(&rc,
        "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl
    );
    if( zSql ){
      rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0);
      sqlite3_free(zSql);
    }
    if( rc!=SQLITE_OK ){
      p->rc = rc;
      return;
    }
  }

  sqlite3_bind_int64(p->pWriter, 1, iRowid);
  sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
  sqlite3_step(p->pWriter);
  p->rc = sqlite3_reset(p->pWriter);
}

/*
** Execute the following SQL:
**
**     DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
*/
static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
  if( p->rc!=SQLITE_OK ) return;

  if( p->pDeleter==0 ){
    int rc;
    Fts5Config *pConfig = p->pConfig;
    char *zSql = sqlite3_mprintf(
        "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl
    );
    if( zSql==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0);
      sqlite3_free(zSql);
    }
    if( rc!=SQLITE_OK ){
      p->rc = rc;
      return;
    }
  }

  sqlite3_bind_int64(p->pDeleter, 1, iFirst);
  sqlite3_bind_int64(p->pDeleter, 2, iLast);
  sqlite3_step(p->pDeleter);
  p->rc = sqlite3_reset(p->pDeleter);
}

/*
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
  i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
  i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
  fts5DataDelete(p, iFirst, iLast);
}

/*
** Release a reference to an Fts5Structure object returned by an earlier 
** call to fts5StructureRead() or fts5StructureDecode().
*/
static void fts5StructureRelease(Fts5Structure *pStruct){
  if( pStruct ){
    int i;
    for(i=0; i<pStruct->nLevel; i++){
      sqlite3_free(pStruct->aLevel[i].aSeg);
    }
    sqlite3_free(pStruct);
  }
}

/*
** Deserialize and return the structure record currently stored in serialized
** form within buffer pData/nData.
**
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
** are over-allocated by one slot. This allows the structure contents
** to be more easily edited.
**
** If an error occurs, *ppOut is set to NULL and an SQLite error code
** returned. Otherwise, *ppOut is set to point to the new object and
** SQLITE_OK returned.
*/
static int fts5StructureDecode(
  const u8 *pData,                /* Buffer containing serialized structure */
  int nData,                      /* Size of buffer pData in bytes */
  int *piCookie,                  /* Configuration cookie value */
  Fts5Structure **ppOut           /* OUT: Deserialized object */
){
  int rc = SQLITE_OK;
  int i = 0;
  int iLvl;
  int nLevel = 0;
  int nSegment = 0;
  int nByte;                      /* Bytes of space to allocate at pRet */
  Fts5Structure *pRet = 0;        /* Structure object to return */

  /* Grab the cookie value */
  if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
  i = 4;

  /* Read the total number of levels and segments from the start of the
  ** structure record.  */
  i += fts5GetVarint32(&pData[i], nLevel);
  i += fts5GetVarint32(&pData[i], nSegment);
  nByte = (
      sizeof(Fts5Structure) +                    /* Main structure */
      sizeof(Fts5StructureLevel) * (nLevel)      /* aLevel[] array */
  );
  pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);

  if( pRet ){
    pRet->nLevel = nLevel;
    pRet->nSegment = nSegment;
    i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);

    for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
      int nTotal;
      int iSeg;

      i += fts5GetVarint32(&pData[i], pLvl->nMerge);
      i += fts5GetVarint32(&pData[i], nTotal);
      assert( nTotal>=pLvl->nMerge );
      pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, 
          nTotal * sizeof(Fts5StructureSegment)
      );

      if( rc==SQLITE_OK ){
        pLvl->nSeg = nTotal;
        for(iSeg=0; iSeg<nTotal; iSeg++){
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
        }
      }else{
        fts5StructureRelease(pRet);
        pRet = 0;
      }
    }
  }

  *ppOut = pRet;
  return rc;
}

/*
**
*/
static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
  if( *pRc==SQLITE_OK ){
    Fts5Structure *pStruct = *ppStruct;
    int nLevel = pStruct->nLevel;
    int nByte = (
        sizeof(Fts5Structure) +                  /* Main structure */
        sizeof(Fts5StructureLevel) * (nLevel+1)  /* aLevel[] array */
    );

    pStruct = sqlite3_realloc(pStruct, nByte);
    if( pStruct ){
      memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
      pStruct->nLevel++;
      *ppStruct = pStruct;
    }else{
      *pRc = SQLITE_NOMEM;
    }
  }
}

/*
** Extend level iLvl so that there is room for at least nExtra more
** segments.
*/
static void fts5StructureExtendLevel(
  int *pRc, 
  Fts5Structure *pStruct, 
  int iLvl, 
  int nExtra, 
  int bInsert
){
  if( *pRc==SQLITE_OK ){
    Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
    Fts5StructureSegment *aNew;
    int nByte;

    nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
    aNew = sqlite3_realloc(pLvl->aSeg, nByte);
    if( aNew ){
      if( bInsert==0 ){
        memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
      }else{
        int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
        memmove(&aNew[nExtra], aNew, nMove);
        memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
      }
      pLvl->aSeg = aNew;
    }else{
      *pRc = SQLITE_NOMEM;
    }
  }
}

/*
** Read, deserialize and return the structure record.
**
** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
** are over-allocated as described for function fts5StructureDecode() 
** above.
**
** If an error occurs, NULL is returned and an error code left in the
** Fts5Index handle. If an error has already occurred when this function
** is called, it is a no-op.
*/
static Fts5Structure *fts5StructureRead(Fts5Index *p){
  Fts5Config *pConfig = p->pConfig;
  Fts5Structure *pRet = 0;        /* Object to return */
  Fts5Data *pData;                /* %_data entry containing structure record */
  int iCookie;                    /* Configuration cookie */

  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
  if( !pData ) return 0;
  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);

  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

  fts5DataRelease(pData);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);
    pRet = 0;
  }
  return pRet;
}

/*
** Return the total number of segments in index structure pStruct. This
** function is only ever used as part of assert() conditions.
*/
#ifdef SQLITE_DEBUG
static int fts5StructureCountSegments(Fts5Structure *pStruct){
  int nSegment = 0;               /* Total number of segments */
  if( pStruct ){
    int iLvl;                     /* Used to iterate through levels */
    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      nSegment += pStruct->aLevel[iLvl].nSeg;
    }
  }

  return nSegment;
}
#endif

/*
** Serialize and store the "structure" record.
**
** If an error occurs, leave an error code in the Fts5Index object. If an
** error has already occurred, this function is a no-op.
*/
static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
  if( p->rc==SQLITE_OK ){
    Fts5Buffer buf;               /* Buffer to serialize record into */
    int iLvl;                     /* Used to iterate through levels */
    int iCookie;                  /* Cookie value to store */

    assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
    memset(&buf, 0, sizeof(Fts5Buffer));

    /* Append the current configuration cookie */
    iCookie = p->pConfig->iCookie;
    if( iCookie<0 ) iCookie = 0;
    fts5BufferAppend32(&p->rc, &buf, iCookie);

    fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel);
    fts5BufferAppendVarint(&p->rc, &buf, pStruct->nSegment);
    fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter);

    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      int iSeg;                     /* Used to iterate through segments */
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
      assert( pLvl->nMerge<=pLvl->nSeg );

      for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
      }
    }

    fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
    fts5BufferFree(&buf);
  }
}

#if 0
static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
  int rc = SQLITE_OK;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(buf));
  fts5DebugStructure(&rc, &buf, pStruct);
  fprintf(stdout, "%s: %s\n", zCaption, buf.p);
  fflush(stdout);
  fts5BufferFree(&buf);
}
#else
# define fts5PrintStructure(x,y)
#endif

static int fts5SegmentSize(Fts5StructureSegment *pSeg){
  return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
}

/*
** Return a copy of index structure pStruct. Except, promote as many 
** segments as possible to level iPromote. If an OOM occurs, NULL is 
** returned.
*/
static void fts5StructurePromoteTo(
  Fts5Index *p,
  int iPromote,
  int szPromote,
  Fts5Structure *pStruct
){
  int il, is;
  Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];

  if( pOut->nMerge==0 ){
    for(il=iPromote+1; il<pStruct->nLevel; il++){
      Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
      if( pLvl->nMerge ) return;
      for(is=pLvl->nSeg-1; is>=0; is--){
        int sz = fts5SegmentSize(&pLvl->aSeg[is]);
        if( sz>szPromote ) return;
        fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
        if( p->rc ) return;
        memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
        pOut->nSeg++;
        pLvl->nSeg--;
      }
    }
  }
}

/*
** A new segment has just been written to level iLvl of index structure
** pStruct. This function determines if any segments should be promoted
** as a result. Segments are promoted in two scenarios:
**
**   a) If the segment just written is smaller than one or more segments
**      within the previous populated level, it is promoted to the previous
**      populated level.
**
**   b) If the segment just written is larger than the newest segment on
**      the next populated level, then that segment, and any other adjacent
**      segments that are also smaller than the one just written, are 
**      promoted. 
**
** If one or more segments are promoted, the structure object is updated
** to reflect this.
*/
static void fts5StructurePromote(
  Fts5Index *p,                   /* FTS5 backend object */
  int iLvl,                       /* Index level just updated */
  Fts5Structure *pStruct          /* Index structure */
){
  if( p->rc==SQLITE_OK ){
    int iTst;
    int iPromote = -1;
    int szPromote;                /* Promote anything this size or smaller */
    Fts5StructureSegment *pSeg;   /* Segment just written */
    int szSeg;                    /* Size of segment just written */


    pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
    szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);

    /* Check for condition (a) */
    for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
    if( iTst>=0 ){
      int i;
      int szMax = 0;
      Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
      assert( pTst->nMerge==0 );
      for(i=0; i<pTst->nSeg; i++){
        int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
        if( sz>szMax ) szMax = sz;
      }
      if( szMax>=szSeg ){
        /* Condition (a) is true. Promote the newest segment on level 
        ** iLvl to level iTst.  */
        iPromote = iTst;
        szPromote = szMax;
      }
    }

    /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
    ** is a no-op if it is not.  */
    if( iPromote<0 ){
      iPromote = iLvl;
      szPromote = szSeg;
    }
    fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
  }
}


/*
** If the pIter->iOff offset currently points to an entry indicating one
** or more term-less nodes, advance past it and set pIter->nEmpty to
** the number of empty child nodes.
*/
static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
  if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
    pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
    pIter->iOff++;
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
  }else{
    pIter->nEmpty = 0;
    pIter->bDlidx = 0;
  }
}

/*
** Advance to the next entry within the node.
*/
static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){
  if( pIter->iOff>=pIter->nData ){
    pIter->aData = 0;
    pIter->iChild += pIter->nEmpty;
  }else{
    int nPre, nNew;
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre);
    pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew);
    pIter->term.n = nPre-2;
    fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff);
    pIter->iOff += nNew;
    pIter->iChild += (1 + pIter->nEmpty);
    fts5NodeIterGobbleNEmpty(pIter);
    if( *pRc ) pIter->aData = 0;
  }
}


/*
** Initialize the iterator object pIter to iterate through the internal
** segment node in pData.
*/
static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){
  memset(pIter, 0, sizeof(*pIter));
  pIter->aData = aData;
  pIter->nData = nData;
  pIter->iOff = fts5GetVarint32(aData, pIter->iChild);
  fts5NodeIterGobbleNEmpty(pIter);
}

/*
** Free any memory allocated by the iterator object.
*/
static void fts5NodeIterFree(Fts5NodeIter *pIter){
  fts5BufferFree(&pIter->term);
}

/*
** Advance the iterator passed as the only argument. If the end of the 
** doclist-index page is reached, return non-zero.
*/
static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
  Fts5Data *pData = pLvl->pData;

  if( pLvl->iOff==0 ){
    assert( pLvl->bEof==0 );
    pLvl->iOff = 1;
    pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
    pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
    pLvl->iFirstOff = pLvl->iOff;
  }else{
    int iOff;
    for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
      if( pData->p[iOff] ) break; 
    }

    if( iOff<pData->n ){
      i64 iVal;
      pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
      iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
      pLvl->iRowid += iVal;
      pLvl->iOff = iOff;
    }else{
      pLvl->bEof = 1;
    }
  }

  return pLvl->bEof;
}

/*
** Advance the iterator passed as the only argument.
*/
static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];

  assert( iLvl<pIter->nLvl );
  if( fts5DlidxLvlNext(pLvl) ){
    if( (iLvl+1) < pIter->nLvl ){
      fts5DlidxIterNextR(p, pIter, iLvl+1);
      if( pLvl[1].bEof==0 ){
        fts5DataRelease(pLvl->pData);
        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
        pLvl->pData = fts5DataRead(p, 
            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
        );
        if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
      }
    }
  }

  return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
  return fts5DlidxIterNextR(p, pIter, 0);
}

/*
** The iterator passed as the first argument has the following fields set
** as follows. This function sets up the rest of the iterator so that it
** points to the first rowid in the doclist-index.
**
**   pData:
**     pointer to doclist-index record, 
**
** When this function is called pIter->iLeafPgno is the page number the
** doclist is associated with (the one featuring the term).
*/
static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
  int i;
  for(i=0; i<pIter->nLvl; i++){
    fts5DlidxLvlNext(&pIter->aLvl[i]);
  }
  return pIter->aLvl[0].bEof;
}


static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
  return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
}

static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
  int i;

  /* Advance each level to the last entry on the last page */
  for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
    Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
    while( fts5DlidxLvlNext(pLvl)==0 );
    pLvl->bEof = 0;

    if( i>0 ){
      Fts5DlidxLvl *pChild = &pLvl[-1];
      fts5DataRelease(pChild->pData);
      memset(pChild, 0, sizeof(Fts5DlidxLvl));
      pChild->pData = fts5DataRead(p, 
          FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
      );
    }
  }
}

/*
** Move the iterator passed as the only argument to the previous entry.
*/
static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
  int iOff = pLvl->iOff;

  assert( pLvl->bEof==0 );
  if( iOff<=pLvl->iFirstOff ){
    pLvl->bEof = 1;
  }else{
    u8 *a = pLvl->pData->p;
    i64 iVal;
    int iLimit;
    int ii;
    int nZero = 0;

    /* Currently iOff points to the first byte of a varint. This block 
    ** decrements iOff until it points to the first byte of the previous 
    ** varint. Taking care not to read any memory locations that occur
    ** before the buffer in memory.  */
    iLimit = (iOff>9 ? iOff-9 : 0);
    for(iOff--; iOff>iLimit; iOff--){
      if( (a[iOff-1] & 0x80)==0 ) break;
    }

    fts5GetVarint(&a[iOff], (u64*)&iVal);
    pLvl->iRowid -= iVal;
    pLvl->iLeafPgno--;

    /* Skip backwards past any 0x00 varints. */
    for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
      nZero++;
    }
    if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
      /* The byte immediately before the last 0x00 byte has the 0x80 bit
      ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
      ** bytes before a[ii]. */
      int bZero = 0;              /* True if last 0x00 counts */
      if( (ii-8)>=pLvl->iFirstOff ){
        int j;
        for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
        bZero = (j>8);
      }
      if( bZero==0 ) nZero--;
    }
    pLvl->iLeafPgno -= nZero;
    pLvl->iOff = iOff - nZero;
  }

  return pLvl->bEof;
}

static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
  Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];

  assert( iLvl<pIter->nLvl );
  if( fts5DlidxLvlPrev(pLvl) ){
    if( (iLvl+1) < pIter->nLvl ){
      fts5DlidxIterPrevR(p, pIter, iLvl+1);
      if( pLvl[1].bEof==0 ){
        fts5DataRelease(pLvl->pData);
        memset(pLvl, 0, sizeof(Fts5DlidxLvl));
        pLvl->pData = fts5DataRead(p, 
            FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
        );
        if( pLvl->pData ){
          while( fts5DlidxLvlNext(pLvl)==0 );
          pLvl->bEof = 0;
        }
      }
    }
  }

  return pIter->aLvl[0].bEof;
}
static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
  return fts5DlidxIterPrevR(p, pIter, 0);
}

/*
** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
*/
static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
  if( pIter ){
    int i;
    for(i=0; i<pIter->nLvl; i++){
      fts5DataRelease(pIter->aLvl[i].pData);
    }
    sqlite3_free(pIter);
  }
}

static Fts5DlidxIter *fts5DlidxIterInit(
  Fts5Index *p,                   /* Fts5 Backend to iterate within */
  int bRev,                       /* True for ORDER BY ASC */
  int iSegid,                     /* Segment id */
  int iLeafPg                     /* Leaf page number to load dlidx for */
){
  Fts5DlidxIter *pIter = 0;
  int i;
  int bDone = 0;

  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
    int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
    Fts5DlidxIter *pNew;

    pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte);
    if( pNew==0 ){
      p->rc = SQLITE_NOMEM;
    }else{
      i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
      Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
      pIter = pNew;
      memset(pLvl, 0, sizeof(Fts5DlidxLvl));
      pLvl->pData = fts5DataRead(p, iRowid);
      if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
        bDone = 1;
      }
      pIter->nLvl = i+1;
    }
  }

  if( p->rc==SQLITE_OK ){
    pIter->iSegid = iSegid;
    if( bRev==0 ){
      fts5DlidxIterFirst(pIter);
    }else{
      fts5DlidxIterLast(p, pIter);
    }
  }

  if( p->rc!=SQLITE_OK ){
    fts5DlidxIterFree(pIter);
    pIter = 0;
  }

  return pIter;
}

static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
  return pIter->aLvl[0].iRowid;
}
static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
  return pIter->aLvl[0].iLeafPgno;
}

static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){
  *piRowid = (int)fts5GetU16(&pLeaf->p[0]);
  *piTerm = (int)fts5GetU16(&pLeaf->p[2]);
}

/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
){
  Fts5StructureSegment *pSeg = pIter->pSeg;
  fts5DataRelease(pIter->pLeaf);
  pIter->iLeafPgno++;
  if( pIter->pNextLeaf ){
    assert( pIter->iLeafPgno<=pSeg->pgnoLast );
    pIter->pLeaf = pIter->pNextLeaf;
    pIter->pNextLeaf = 0;
  }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
    pIter->pLeaf = fts5DataRead(p, 
        FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
}

/*
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
** list, and *pbDel to true if the delete flag is set, or false otherwise.
*/
static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
  int nSz;
  int n = fts5GetVarint32(p, nSz);
  assert_nc( nSz>=0 );
  *pnSz = nSz/2;
  *pbDel = nSz & 0x0001;
  return n;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of a
** position-list size field. Read the value of the field and store it
** in the following variables:
**
**   Fts5SegIter.nPos
**   Fts5SegIter.bDel
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    if( iOff>=pIter->pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**
** This function populates:
**
**   Fts5SegIter.term
**   Fts5SegIter.rowid
**
** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
** the first position list. The position list belonging to document 
** (Fts5SegIter.iRowid).
*/
static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;  /* Offset to read at */
  int nNew;                       /* Bytes of new data */

  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
  }
  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  pIter->iLeafOffset = iOff;
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterInit(
  Fts5Index *p,                   /* FTS index object */
  Fts5StructureSegment *pSeg,     /* Description of segment */
  Fts5SegIter *pIter              /* Object to populate */
){
  if( pSeg->pgnoFirst==0 ){
    /* This happens if the segment is being used as an input to an incremental
    ** merge and all data has already been "trimmed". See function
    ** fts5TrimSegments() for details. In this case leave the iterator empty.
    ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
    ** at EOF already. */
    assert( pIter->pLeaf==0 );
    return;
  }

  if( p->rc==SQLITE_OK ){
    memset(pIter, 0, sizeof(*pIter));
    pIter->pSeg = pSeg;
    pIter->iLeafPgno = pSeg->pgnoFirst-1;
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    u8 *a = pIter->pLeaf->p;
    pIter->iLeafOffset = fts5GetU16(&a[2]);
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
**
** The iterator is in an unusual state when this function is called: the
** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
** the position-list size field for the first relevant rowid on the page.
** Fts5SegIter.rowid is set, but nPos and bDel are not.
**
** This function advances the iterator so that it points to the last 
** relevant rowid on the page and, if necessary, initializes the 
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int n = pIter->pLeaf->n;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  while( 1 ){
    i64 iDelta = 0;
    int nPos;
    int bDummy;

    i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
    i += nPos;
    if( i>=n ) break;
    i += fts5GetVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
      int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
      if( aNew==0 ){
        p->rc = SQLITE_NOMEM;
        break;
      }
      pIter->aRowidOffset = aNew;
      pIter->nRowidOffset = nNew;
    }

    pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
    pIter->iLeafOffset = i;
  }
  pIter->iRowidOffset = iRowidOffset;
  fts5SegIterLoadNPos(p, pIter);
}

/*
**
*/
static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
  assert( pIter->flags & FTS5_SEGITER_REVERSE );
  assert( pIter->flags & FTS5_SEGITER_ONETERM );

  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
    Fts5Data *pNew;
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->pSeg->iSegid, 0, pIter->iLeafPgno
    ));
    if( pNew ){
      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
        if( pIter->iTermLeafOffset<pNew->n ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = pIter->iTermLeafOffset;
        }
      }else{
        int iRowidOff, dummy;
        fts5LeafHeader(pNew, &iRowidOff, &dummy);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
      }

      if( pIter->pLeaf ){
        u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
        pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
        break;
      }else{
        fts5DataRelease(pNew);
      }
    }
  }

  if( pIter->pLeaf ){
    fts5SegIterReverseInitPage(p, pIter);
  }
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
** position-list.
*/
static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){
  Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
  return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
}

/*
** Advance iterator pIter to the next entry. 
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 
** is not considered an error if the iterator reaches EOF. If an error has 
** already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterNext(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int *pbNewTerm                  /* OUT: Set for new term */
){
  assert( pbNewTerm==0 || *pbNewTerm==0 );
  if( p->rc==SQLITE_OK ){
    if( pIter->flags & FTS5_SEGITER_REVERSE ){
      assert( pIter->pNextLeaf==0 );
      if( pIter->iRowidOffset>0 ){
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        int bDummy;
        i64 iDelta;

        pIter->iRowidOffset--;
        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
        iOff += nPos;
        fts5GetVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
        fts5SegIterLoadNPos(p, pIter);
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;

      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;
        if( iDelta==0 ){
          bNewTerm = 1;
          if( iOff>=n ){
            fts5SegIterNextPage(p, pIter);
            pIter->iLeafOffset = 4;
          }else if( iOff!=fts5GetU16(&a[2]) ){
            pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          pIter->iRowid += iDelta;
        }
      }else if( pIter->pSeg==0 ){
        const u8 *pList = 0;
        const char *zTerm;
        int nList;
        if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
          sqlite3Fts5HashScanNext(p->pHash);
          sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
        }
        if( pList==0 ){
          fts5DataRelease(pIter->pLeaf);
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;
          }
          else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){
            pIter->iLeafOffset = iOff;
            bNewTerm = 1;
          }
          if( iOff>=pLeaf->n ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }

      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{
            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }
      }
    }
  }
}

#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }

/*
** Iterator pIter currently points to the first rowid in a doclist. This
** function sets the iterator up so that iterates in reverse order through
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  if( pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = fts5DlidxIterPgno(pDlidx);
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
  }else{
    int iOff;                               /* Byte offset within pLeaf */
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */

    /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first 
    ** byte of position-list content for the current rowid. Back it up
    ** so that it points to the start of the position-list size field. */
    pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
    iOff = pIter->iLeafOffset;
    assert( iOff>=4 );

    /* Search for a new term within the current leaf. If one can be found,
    ** then this page contains the largest rowid for the current term. */
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;
      int bDummy;

      /* Read the position-list size field */
      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
      iOff += nPos;
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }

    /* If this condition is true then the largest rowid for the current
    ** term may not be stored on the current page. So search forward to
    ** see where said rowid really is.  */
    if( iOff>=pLeaf->n ){
      int pgno;
      Fts5StructureSegment *pSeg = pIter->pSeg;

      /* The last rowid in the doclist may not be on the current page. Search
      ** forward to find the page containing the last rowid.  */
      for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
        i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno);
        Fts5Data *pNew = fts5DataRead(p, iAbs);
        if( pNew ){
          int iRowid, iTerm;
          fts5LeafHeader(pNew, &iRowid, &iTerm);
          if( iRowid ){
            SWAPVAL(Fts5Data*, pNew, pLast);
            pgnoLast = pgno;
          }
          fts5DataRelease(pNew);
          if( iTerm ) break;
        }
      }
    }
  }

  /* If pLast is NULL at this point, then the last rowid for this doclist
  ** lies on the page currently indicated by the iterator. In this case 
  ** pIter->iLeafOffset is already set to point to the position-list size
  ** field associated with the first relevant rowid on the page.
  **
  ** Or, if pLast is non-NULL, then it is the page that contains the last
  ** rowid. In this case configure the iterator so that it points to the
  ** first rowid on this page.
  */
  if( pLast ){
    int dummy;
    int iOff;
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = pLast;
    pIter->iLeafPgno = pgnoLast;
    fts5LeafHeader(pLast, &iOff, &dummy);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;
  }

  fts5SegIterReverseInitPage(p, pIter);
}

/*
** Iterator pIter currently points to the first rowid of a doclist.
** There is a doclist-index associated with the final term on the current 
** page. If the current term is the last term on the page, load the 
** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
*/
static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
  int iSeg = pIter->pSeg->iSegid;
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx==0 );

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    int iOff = pIter->iLeafOffset + pIter->nPos;
    while( iOff<pLeaf->n ){
      int bDummy;
      int nPos;
      i64 iDelta;

      /* iOff is currently the offset of the start of position list data */
      iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return;
      assert_nc( iOff<pLeaf->n );
      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
      iOff += nPos;
    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterSeekInit(
  Fts5Index *p,                   /* FTS5 backend */
  const u8 *pTerm, int nTerm,     /* Term to seek to */
  int flags,                      /* Mask of FTS5INDEX_XXX flags */
  Fts5StructureSegment *pSeg,     /* Description of segment */
  Fts5SegIter *pIter              /* Object to populate */
){
  int iPg = 1;
  int h;
  int bGe = (flags & FTS5INDEX_QUERY_SCAN);
  int bDlidx = 0;                 /* True if there is a doclist-index */
  Fts5Data *pLeaf;

  assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
  assert( pTerm && nTerm );
  memset(pIter, 0, sizeof(*pIter));
  pIter->pSeg = pSeg;

  /* This block sets stack variable iPg to the leaf page number that may
  ** contain term (pTerm/nTerm), if it is present in the segment. */
  for(h=pSeg->nHeight-1; h>0; h--){
    Fts5NodeIter node;              /* For iterating through internal nodes */
    i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg);
    Fts5Data *pNode = fts5DataRead(p, iRowid);
    if( pNode==0 ) break;

    fts5NodeIterInit(pNode->p, pNode->n, &node);
    assert( node.term.n==0 );

    iPg = node.iChild;
    bDlidx = node.bDlidx;
    for(fts5NodeIterNext(&p->rc, &node);
        node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0;
        fts5NodeIterNext(&p->rc, &node)
    ){
      iPg = node.iChild;
      bDlidx = node.bDlidx;
    }
    fts5NodeIterFree(&node);
    fts5DataRelease(pNode);
  }

  if( iPg<pSeg->pgnoFirst ){
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( (pLeaf = pIter->pLeaf) ){
    int res;
    pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]);
    if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      fts5SegIterLoadTerm(p, pIter, 0);
      fts5SegIterLoadNPos(p, pIter);
      do {
        res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm);
        if( res>=0 ) break;
        fts5SegIterNext(p, pIter, 0);
      }while( pIter->pLeaf && p->rc==SQLITE_OK );

      if( bGe==0 && res ){
        /* Set iterator to point to EOF */
        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
      }
    }
  }

  if( p->rc==SQLITE_OK && bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_DESC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;
      }
      if( bDlidx ){
        fts5SegIterLoadDlidx(p, pIter);
      }
      if( flags & FTS5INDEX_QUERY_DESC ){
        fts5SegIterReverse(p, pIter);
      }
    }
  }
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within the
** in-memory hash table. If there is no such term in the hash-table, the 
** iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
*/
static void fts5SegIterHashInit(
  Fts5Index *p,                   /* FTS5 backend */
  const u8 *pTerm, int nTerm,     /* Term to seek to */
  int flags,                      /* Mask of FTS5INDEX_XXX flags */
  Fts5SegIter *pIter              /* Object to populate */
){
  const u8 *pList = 0;
  int nList = 0;
  const u8 *z = 0;
  int n = 0;

  assert( p->pHash );
  assert( p->rc==SQLITE_OK );

  if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
    p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
    sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
    n = (z ? strlen((const char*)z) : 0);
  }else{
    pIter->flags |= FTS5_SEGITER_ONETERM;
    sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList);
    z = pTerm;
    n = nTerm;
  }

  if( pList ){
    Fts5Data *pLeaf;
    sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
    pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
    if( pLeaf==0 ) return;
    pLeaf->p = (u8*)pList;
    pLeaf->n = nList;
    pIter->pLeaf = pLeaf;
    pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);

    if( flags & FTS5INDEX_QUERY_DESC ){
      pIter->flags |= FTS5_SEGITER_REVERSE;
      fts5SegIterReverseInitPage(p, pIter);
    }else{
      fts5SegIterLoadNPos(p, pIter);
    }
  }
}

/*
** Zero the iterator passed as the only argument.
*/
static void fts5SegIterClear(Fts5SegIter *pIter){
  fts5BufferFree(&pIter->term);
  fts5DataRelease(pIter->pLeaf);
  fts5DataRelease(pIter->pNextLeaf);
  fts5DlidxIterFree(pIter->pDlidx);
  sqlite3_free(pIter->aRowidOffset);
  memset(pIter, 0, sizeof(Fts5SegIter));
}

#ifdef SQLITE_DEBUG

/*
** This function is used as part of the big assert() procedure implemented by
** fts5AssertMultiIterSetup(). It ensures that the result currently stored
** in *pRes is the correct result of comparing the current positions of the
** two iterators.
*/
static void fts5AssertComparisonResult(
  Fts5MultiSegIter *pIter, 
  Fts5SegIter *p1,
  Fts5SegIter *p2,
  Fts5CResult *pRes
){
  int i1 = p1 - pIter->aSeg;
  int i2 = p2 - pIter->aSeg;

  if( p1->pLeaf || p2->pLeaf ){
    if( p1->pLeaf==0 ){
      assert( pRes->iFirst==i2 );
    }else if( p2->pLeaf==0 ){
      assert( pRes->iFirst==i1 );
    }else{
      int nMin = MIN(p1->term.n, p2->term.n);
      int res = memcmp(p1->term.p, p2->term.p, nMin);
      if( res==0 ) res = p1->term.n - p2->term.n;

      if( res==0 ){
        assert( pRes->bTermEq==1 );
        assert( p1->iRowid!=p2->iRowid );
        res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
      }else{
        assert( pRes->bTermEq==0 );
      }

      if( res<0 ){
        assert( pRes->iFirst==i1 );
      }else{
        assert( pRes->iFirst==i2 );
      }
    }
  }
}

/*
** This function is a no-op unless SQLITE_DEBUG is defined when this module
** is compiled. In that case, this function is essentially an assert() 
** statement used to verify that the contents of the pIter->aFirst[] array
** are correct.
*/
static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int i;
    for(i=0; i<pIter->nSeg; i+=2){
      Fts5SegIter *p1 = &pIter->aSeg[i];
      Fts5SegIter *p2 = &pIter->aSeg[i+1];
      Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
      fts5AssertComparisonResult(pIter, p1, p2, pRes);
    }

    for(i=1; i<(pIter->nSeg / 2); i+=2){
      Fts5CResult *pRes = &pIter->aFirst[i];
      Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
      Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];

      fts5AssertComparisonResult(pIter, p1, p2, pRes);
    }
  }
}
#else
# define fts5AssertMultiIterSetup(x,y)
#endif

/*
** Do the comparison necessary to populate pIter->aFirst[iOut].
**
** If the returned value is non-zero, then it is the index of an entry
** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
** to a key that is a duplicate of another, higher priority, 
** segment-iterator in the pSeg->aSeg[] array.
*/
static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){
  int i1;                         /* Index of left-hand Fts5SegIter */
  int i2;                         /* Index of right-hand Fts5SegIter */
  int iRes;
  Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
  Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
  Fts5CResult *pRes = &pIter->aFirst[iOut];

  assert( iOut<pIter->nSeg && iOut>0 );
  assert( pIter->bRev==0 || pIter->bRev==1 );

  if( iOut>=(pIter->nSeg/2) ){
    i1 = (iOut - pIter->nSeg/2) * 2;
    i2 = i1 + 1;
  }else{
    i1 = pIter->aFirst[iOut*2].iFirst;
    i2 = pIter->aFirst[iOut*2+1].iFirst;
  }
  p1 = &pIter->aSeg[i1];
  p2 = &pIter->aSeg[i2];

  pRes->bTermEq = 0;
  if( p1->pLeaf==0 ){           /* If p1 is at EOF */
    iRes = i2;
  }else if( p2->pLeaf==0 ){     /* If p2 is at EOF */
    iRes = i1;
  }else{
    int res = fts5BufferCompare(&p1->term, &p2->term);
    if( res==0 ){
      assert( i2>i1 );
      assert( i2!=0 );
      pRes->bTermEq = 1;
      if( p1->iRowid==p2->iRowid ){
        p1->bDel = p2->bDel;
        return i2;
      }
      res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
    }
    assert( res!=0 );
    if( res<0 ){
      iRes = i1;
    }else{
      iRes = i2;
    }
  }

  pRes->iFirst = iRes;
  return 0;
}

/*
** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
** It is an error if leaf iLeafPgno does not exist or contains no rowids.
*/
static void fts5SegIterGotoPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  int iLeafPgno
){
  assert( iLeafPgno>pIter->iLeafPgno );

  if( iLeafPgno>pIter->pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }else{
    fts5DataRelease(pIter->pNextLeaf);
    pIter->pNextLeaf = 0;
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
      int n = pIter->pLeaf->n;

      iOff = fts5GetU16(&a[0]);
      if( iOff<4 || iOff>=n ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
        pIter->iLeafOffset = iOff;
        fts5SegIterLoadNPos(p, pIter);
      }
    }
  }
}

/*
** Advance the iterator passed as the second argument until it is at or 
** past rowid iFrom. Regardless of the value of iFrom, the iterator is
** always advanced at least once.
*/
static void fts5SegIterNextFrom(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter,             /* Iterator to advance */
  i64 iMatch                      /* Advance iterator at least this far */
){
  int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  int iLeafPgno = pIter->iLeafPgno;
  int bMove = 1;

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx );
  assert( pIter->pLeaf );

  if( bRev==0 ){
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
      iLeafPgno = fts5DlidxIterPgno(pDlidx);
      fts5DlidxIterNext(p, pDlidx);
    }
    assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
    if( iLeafPgno>pIter->iLeafPgno ){
      fts5SegIterGotoPage(p, pIter, iLeafPgno);
      bMove = 0;
    }
  }else{
    assert( pIter->pNextLeaf==0 );
    assert( iMatch<pIter->iRowid );
    while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
      fts5DlidxIterPrev(p, pDlidx);
    }
    iLeafPgno = fts5DlidxIterPgno(pDlidx);

    assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );

    if( iLeafPgno<pIter->iLeafPgno ){
      pIter->iLeafPgno = iLeafPgno+1;
      fts5SegIterReverseNewPage(p, pIter);
      bMove = 0;
    }
  }

  while( p->rc==SQLITE_OK ){
    if( bMove ) fts5SegIterNext(p, pIter, 0);
    if( pIter->pLeaf==0 ) break;
    if( bRev==0 && pIter->iRowid>=iMatch ) break;
    if( bRev!=0 && pIter->iRowid<=iMatch ) break;
    bMove = 1;
  }
}


/*
** Free the iterator object passed as the second argument.
*/
static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){
  if( pIter ){
    int i;
    for(i=0; i<pIter->nSeg; i++){
      fts5SegIterClear(&pIter->aSeg[i]);
    }
    sqlite3_free(pIter);
  }
}

static void fts5MultiIterAdvanced(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5MultiSegIter *pIter,        /* Iterator to update aFirst[] array for */
  int iChanged,                   /* Index of sub-iterator just advanced */
  int iMinset                     /* Minimum entry in aFirst[] to set */
){
  int i;
  for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
    int iEq;
    if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
      fts5SegIterNext(p, &pIter->aSeg[iEq], 0);
      i = pIter->nSeg + iEq;
    }
  }
}

static int fts5MultiIterAdvanceRowid(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5MultiSegIter *pIter,        /* Iterator to update aFirst[] array for */
  int iChanged                    /* Index of sub-iterator just advanced */
){
  int i;
  Fts5SegIter *pNew = &pIter->aSeg[iChanged];
  Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];

  for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){
    Fts5CResult *pRes = &pIter->aFirst[i];

    assert( pNew->pLeaf );
    assert( pRes->bTermEq==0 || pOther->pLeaf );
    
    if( pRes->bTermEq ){
      if( pNew->iRowid==pOther->iRowid ){
        return 1;
      }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
        pNew = pOther;
      }
    }
    pRes->iFirst = (pNew - pIter->aSeg);
    if( i==1 ) break;

    pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
  }

  return 0;
}

/*
** Move the iterator to the next entry. 
**
** If an error occurs, an error code is left in Fts5Index.rc. It is not 
** considered an error if the iterator reaches EOF, or if it is already at 
** EOF when this function is called.
*/
static void fts5MultiIterNext(
  Fts5Index *p, 
  Fts5MultiSegIter *pIter,
  int bFrom,                      /* True if argument iFrom is valid */
  i64 iFrom                       /* Advance at least as far as this */
){
  if( p->rc==SQLITE_OK ){
    int bUseFrom = bFrom;
    do {
      int iFirst = pIter->aFirst[1].iFirst;
      int bNewTerm = 0;
      Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
      assert( p->rc==SQLITE_OK );
      if( bUseFrom && pSeg->pDlidx ){
        fts5SegIterNextFrom(p, pSeg, iFrom);
      }else{
        fts5SegIterNext(p, pSeg, &bNewTerm);
      }

      if( pSeg->pLeaf==0 || bNewTerm 
       || fts5MultiIterAdvanceRowid(p, pIter, iFirst)
      ){
        fts5MultiIterAdvanced(p, pIter, iFirst, 1);
      }
      fts5AssertMultiIterSetup(p, pIter);

      bUseFrom = 0;
    }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) );
  }
}

static Fts5MultiSegIter *fts5MultiIterAlloc(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  int nSeg
){
  Fts5MultiSegIter *pNew;
  int nSlot;                      /* Power of two >= nSeg */

  for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
  pNew = fts5IdxMalloc(p, 
      sizeof(Fts5MultiSegIter) +          /* pNew */
      sizeof(Fts5SegIter) * nSlot +       /* pNew->aSeg[] */
      sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
  );
  if( pNew ){
    pNew->nSeg = nSlot;
    pNew->aSeg = (Fts5SegIter*)&pNew[1];
    pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
  }
  return pNew;
}

/*
** Allocate a new Fts5MultiSegIter object.
**
** The new object will be used to iterate through data in structure pStruct.
** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
** is zero or greater, data from the first nSegment segments on level iLevel
** is merged.
**
** The iterator initially points to the first term/rowid entry in the 
** iterated data.
*/
static void fts5MultiIterNew(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5Structure *pStruct,         /* Structure of specific index */
  int bSkipEmpty,                 /* True to ignore delete-keys */
  int flags,                      /* FTS5INDEX_QUERY_XXX flags */
  const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
  int iLevel,                     /* Level to iterate (-1 for all) */
  int nSegment,                   /* Number of segments to merge (iLevel>=0) */
  Fts5MultiSegIter **ppOut        /* New object */
){
  int nSeg = 0;                   /* Number of segment-iters in use */
  int iIter = 0;                  /* */
  int iSeg;                       /* Used to iterate through segments */
  Fts5StructureLevel *pLvl;
  Fts5MultiSegIter *pNew;

  assert( (pTerm==0 && nTerm==0) || iLevel<0 );

  /* Allocate space for the new multi-seg-iterator. */
  if( p->rc==SQLITE_OK ){
    if( iLevel<0 ){
      assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
      nSeg = pStruct->nSegment;
      nSeg += (p->pHash ? 1 : 0);
    }else{
      nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
    }
  }
  *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
  if( pNew==0 ) return;
  pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
  pNew->bSkipEmpty = bSkipEmpty;

  /* Initialize each of the component segment iterators. */
  if( iLevel<0 ){
    Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
    if( p->pHash ){
      /* Add a segment iterator for the current contents of the hash table. */
      Fts5SegIter *pIter = &pNew->aSeg[iIter++];
      fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
    }
    for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
      for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
        Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
        Fts5SegIter *pIter = &pNew->aSeg[iIter++];
        if( pTerm==0 ){
          fts5SegIterInit(p, pSeg, pIter);
        }else{
          fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
        }
      }
    }
  }else{
    pLvl = &pStruct->aLevel[iLevel];
    for(iSeg=nSeg-1; iSeg>=0; iSeg--){
      fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
    }
  }
  assert( iIter==nSeg );

  /* If the above was successful, each component iterators now points 
  ** to the first entry in its segment. In this case initialize the 
  ** aFirst[] array. Or, if an error has occurred, free the iterator
  ** object and set the output variable to NULL.  */
  if( p->rc==SQLITE_OK ){
    for(iIter=pNew->nSeg-1; iIter>0; iIter--){
      int iEq;
      if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
        fts5SegIterNext(p, &pNew->aSeg[iEq], 0);
        fts5MultiIterAdvanced(p, pNew, iEq, iIter);
      }
    }
    fts5AssertMultiIterSetup(p, pNew);

    if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
      fts5MultiIterNext(p, pNew, 0, 0);
    }
  }else{
    fts5MultiIterFree(p, pNew);
    *ppOut = 0;
  }
}

/*
** Create an Fts5MultiSegIter that iterates through the doclist provided
** as the second argument.
*/
static void fts5MultiIterNew2(
  Fts5Index *p,                   /* FTS5 backend to iterate within */
  Fts5Data *pData,                /* Doclist to iterate through */
  int bDesc,                      /* True for descending rowid order */
  Fts5MultiSegIter **ppOut        /* New object */
){
  Fts5MultiSegIter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];

    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->n>0 ){
      pIter->pLeaf = pData;
      pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
      pNew->aFirst[1].iFirst = 1;
      if( bDesc ){
        pNew->bRev = 1;
        pIter->flags |= FTS5_SEGITER_REVERSE;
        fts5SegIterReverseInitPage(p, pIter);
      }else{
        fts5SegIterLoadNPos(p, pIter);
      }
      pData = 0;
    }

    *ppOut = pNew;
  }

  fts5DataRelease(pData);
}

/*
** Return true if the iterator is at EOF or if an error has occurred. 
** False otherwise.
*/
static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){
  return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0);
}

/*
** Return the rowid of the entry that the iterator currently points
** to. If the iterator points to EOF when this function is called the
** results are undefined.
*/
static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){
  assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
  return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
}

/*
** Move the iterator to the next entry at or following iMatch.
*/
static void fts5MultiIterNextFrom(
  Fts5Index *p, 
  Fts5MultiSegIter *pIter, 
  i64 iMatch
){
  while( 1 ){
    i64 iRowid;
    fts5MultiIterNext(p, pIter, 1, iMatch);
    if( fts5MultiIterEof(p, pIter) ) break;
    iRowid = fts5MultiIterRowid(pIter);
    if( pIter->bRev==0 && iRowid>=iMatch ) break;
    if( pIter->bRev!=0 && iRowid<=iMatch ) break;
  }
}

/*
** Return a pointer to a buffer containing the term associated with the 
** entry that the iterator currently points to.
*/
static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  *pn = p->term.n;
  return p->term.p;
}

static void fts5ChunkIterate(
  Fts5Index *p,                   /* Index object */
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;
    fts5DataRelease(pData);
    if( nRem<=0 ){
      break;
    }else{
      pgno++;
      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
      if( pData==0 ) break;
      pChunk = &pData->p[4];
      nChunk = MIN(nRem, pData->n - 4);
      if( pgno==pgnoSave ){
        assert( pSeg->pNextLeaf==0 );
        pSeg->pNextLeaf = pData;
        pData = 0;
      }
    }
  }
}



/*
** Allocate a new segment-id for the structure pStruct. The new segment
** id must be between 1 and 65335 inclusive, and must not be used by 
** any currently existing segment. If a free segment id cannot be found,
** SQLITE_FULL is returned.
**
** If an error has already occurred, this function is a no-op. 0 is 
** returned in this case.
*/
static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
  u32 iSegid = 0;

  if( p->rc==SQLITE_OK ){
    if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
      p->rc = SQLITE_FULL;
    }else{
      while( iSegid==0 ){
        int iLvl, iSeg;
        sqlite3_randomness(sizeof(u32), (void*)&iSegid);
        iSegid = (iSegid % ((1 << FTS5_DATA_ID_B) - 2)) + 1;
        assert( iSegid>0 && iSegid<=65535 );
        for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
          for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
            if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){
              iSegid = 0;
            }
          }
        }
      }
    }
  }

  return (int)iSegid;
}

/*
** Discard all data currently cached in the hash-tables.
*/
static void fts5IndexDiscardData(Fts5Index *p){
  assert( p->pHash || p->nPendingData==0 );
  if( p->pHash ){
    sqlite3Fts5HashClear(p->pHash);
    p->nPendingData = 0;
  }
}

/*
** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares
** with buffer (nOld/pOld).
*/
static int fts5PrefixCompress(
  int nOld, const u8 *pOld,
  int nNew, const u8 *pNew
){
  int i;
  assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 );
  for(i=0; i<nOld; i++){
    if( pOld[i]!=pNew[i] ) break;
  }
  return i;
}

static void fts5WriteDlidxClear(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int bFlush                      /* If true, write dlidx to disk */
){
  int i;
  assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
  for(i=0; i<pWriter->nDlidx; i++){
    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
    if( pDlidx->buf.n==0 ) break;
    if( bFlush ){
      assert( pDlidx->pgno!=0 );
      fts5DataWrite(p, 
          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
          pDlidx->buf.p, pDlidx->buf.n
      );
    }
    sqlite3Fts5BufferZero(&pDlidx->buf);
    pDlidx->bPrevValid = 0;
  }
}

/*
** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
** Any new array elements are zeroed before returning.
*/
static int fts5WriteDlidxGrow(
  Fts5Index *p,
  Fts5SegWriter *pWriter,
  int nLvl
){
  if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
    Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc(
        pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
    );
    if( aDlidx==0 ){
      p->rc = SQLITE_NOMEM;
    }else{
      int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
      memset(&aDlidx[pWriter->nDlidx], 0, nByte);
      pWriter->aDlidx = aDlidx;
      pWriter->nDlidx = nLvl;
    }
  }
  return p->rc;
}

/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now. 
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  if( pWriter->nEmpty ){
    int bFlag = 0;
    Fts5PageWriter *pPg;
    pPg = &pWriter->aWriter[1];

    /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
    ** to the database, also write the doclist-index to disk.  */
    if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
      bFlag = 1;
    }
    fts5WriteDlidxClear(p, pWriter, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
    fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
    pWriter->nEmpty = 0;
  }else{
    fts5WriteDlidxClear(p, pWriter, 0);
  }

  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 );
  assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 );
}

static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *aNew;
    Fts5PageWriter *pNew;
    int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);

    aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
    if( aNew==0 ){
      p->rc = SQLITE_NOMEM;
      return;
    }

    pNew = &aNew[pWriter->nWriter];
    memset(pNew, 0, sizeof(Fts5PageWriter));
    pNew->pgno = 1;
    fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);

    pWriter->nWriter++;
    pWriter->aWriter = aNew;
  }
}

/*
** This is called once for each leaf page except the first that contains
** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
** is larger than all terms written to earlier leaves, and equal to or
** smaller than the first term on the new leaf.
**
** If an error occurs, an error code is left in Fts5Index.rc. If an error
** has already occurred when this function is called, it is a no-op.
*/
static void fts5WriteBtreeTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer object */
  int nTerm, const u8 *pTerm      /* First term on new page */
){
  int iHeight;
  for(iHeight=1; 1; iHeight++){
    Fts5PageWriter *pPage;

    if( iHeight>=pWriter->nWriter ){
      fts5WriteBtreeGrow(p, pWriter);
      if( p->rc ) return;
    }
    pPage = &pWriter->aWriter[iHeight];

    fts5WriteBtreeNEmpty(p, pWriter);

    if( pPage->buf.n>=p->pConfig->pgsz ){
      /* pPage will be written to disk. The term will be written into the
      ** parent of pPage.  */
      i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno);
      fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
      fts5BufferZero(&pPage->buf);
      fts5BufferZero(&pPage->term);
      fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno);
      pPage->pgno++;
    }else{
      int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
      fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2);
      fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre);
      fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre);
      fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
      break;
    }
  }
}

/*
** This function is called when flushing a leaf page that contains no
** terms at all to disk.
*/
static void fts5WriteBtreeNoTerm(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter          /* Writer object */
){
  /* If there were no rowids on the leaf page either and the doclist-index
  ** has already been started, append an 0x00 byte to it.  */
  if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
    assert( pDlidx->bPrevValid );
    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
  }

  /* Increment the "number of sequential leaves without a term" counter. */
  pWriter->nEmpty++;
}

static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
  i64 iRowid;
  int iOff;

  iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
  fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
  return iRowid;
}

/*
** Rowid iRowid has just been appended to the current leaf page. It is the
** first on the page. This function appends an appropriate entry to the current
** doclist-index.
*/
static void fts5WriteDlidxAppend(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  i64 iRowid
){
  int i;
  int bDone = 0;

  for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
    i64 iVal;
    Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];

    if( pDlidx->buf.n>=p->pConfig->pgsz ){
      /* The current doclist-index page is full. Write it to disk and push
      ** a copy of iRowid (which will become the first rowid on the next
      ** doclist-index leaf page) up into the next level of the b-tree 
      ** hierarchy. If the node being flushed is currently the root node,
      ** also push its first rowid upwards. */
      pDlidx->buf.p[0] = 0x01;    /* Not the root node */
      fts5DataWrite(p, 
          FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
          pDlidx->buf.p, pDlidx->buf.n
      );
      fts5WriteDlidxGrow(p, pWriter, i+2);
      pDlidx = &pWriter->aDlidx[i];
      if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
        i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);

        /* This was the root node. Push its first rowid up to the new root. */
        pDlidx[1].pgno = pDlidx->pgno;
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
        sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
        pDlidx[1].bPrevValid = 1;
        pDlidx[1].iPrev = iFirst;
      }

      sqlite3Fts5BufferZero(&pDlidx->buf);
      pDlidx->bPrevValid = 0;
      pDlidx->pgno++;
    }else{
      bDone = 1;
    }

    if( pDlidx->bPrevValid ){
      iVal = iRowid - pDlidx->iPrev;
    }else{
      i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno);
      assert( pDlidx->buf.n==0 );
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
      sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
      iVal = iRowid;
    }

    sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
    pDlidx->bPrevValid = 1;
    pDlidx->iPrev = iRowid;
  }
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  i64 iRowid;

  if( pWriter->bFirstTermInPage ){
    /* No term was written to this page. */
    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
    fts5WriteBtreeNoTerm(p, pWriter);
  }

  /* Write the current page to the db. */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
  pWriter->bFirstRowidInPage = 1;
}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5WriteAppendTerm(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->aWriter[0];

  assert( pPage->buf.n==0 || pPage->buf.n>4 );
  if( pPage->buf.n==0 ){
    /* Zero the first term and first docid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pWriter->bFirstTermInPage );
  }
  if( p->rc ) return;
  
  if( pWriter->bFirstTermInPage ){
    /* Update the "first term" field of the page header. */
    assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
    fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one
      ** byte longer than the longest prefix (pTerm/nTerm) shares with the
      ** previous term. 
      **
      ** Usually, the previous term is available in pPage->term. The exception
      ** is if this is the first term written in an incremental-merge step.
      ** In this case the previous term is not available, so just write a
      ** copy of (pTerm/nTerm) into the parent node. This is slightly
      ** inefficient, but still correct.  */
      int n = nTerm;
      if( pPage->term.n ){
        n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
      }
      fts5WriteBtreeTerm(p, pWriter, n, pTerm);
      pPage = &pWriter->aWriter[0];
    }
  }else{
    nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
  }

  /* Append the number of bytes of new data, then the term data itself
  ** to the page. */
  fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pPage->pgno;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
  }
}

/*
** Append a docid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid,
  int nPos
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->aWriter[0];

    /* If this is to be the first docid written to the page, set the 
    ** docid-pointer in the page-header. Also append a value to the dlidx
    ** buffer, in case a doclist-index is required.  */
    if( pWriter->bFirstRowidInPage ){
      fts5PutU16(pPage->buf.p, pPage->buf.n);
      fts5WriteDlidxAppend(p, pWriter, iRowid);
    }

    /* Write the docid. */
    if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
    }else{
      assert( p->rc || iRowid>pWriter->iPrevRowid );
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  const u8 *a = aData;
  int n = nData;
  
  assert( p->pConfig->pgsz>0 );
  while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){
    int nReq = p->pConfig->pgsz - pPage->buf.n;
    int nCopy = 0;
    while( nCopy<nReq ){
      i64 dummy;
      nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;
    n -= nCopy;
    fts5WriteFlushLeaf(p, pWriter);
  }
  if( n>0 ){
    fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
  }
}

static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
  fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
}

/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnHeight,                  /* OUT: Height of the b-tree */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
    if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
      *pnLeaf = 0;
      *pnHeight = 0;
    }else{
      if( pLeaf->buf.n>4 ){
        fts5WriteFlushLeaf(p, pWriter);
      }
      *pnLeaf = pLeaf->pgno-1;
      if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
        fts5WriteBtreeGrow(p, pWriter);
      }
      if( pWriter->nWriter>1 ){
        fts5WriteBtreeNEmpty(p, pWriter);
      }
      *pnHeight = pWriter->nWriter;

      for(i=1; i<pWriter->nWriter; i++){
        Fts5PageWriter *pPg = &pWriter->aWriter[i];
        fts5DataWrite(p, 
            FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno), 
            pPg->buf.p, pPg->buf.n
        );
      }
    }
  }
  for(i=0; i<pWriter->nWriter; i++){
    Fts5PageWriter *pPg = &pWriter->aWriter[i];
    fts5BufferFree(&pPg->term);
    fts5BufferFree(&pPg->buf);
  }
  sqlite3_free(pWriter->aWriter);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter));
  if( fts5WriteDlidxGrow(p, pWriter, 1) ) return;
  pWriter->nWriter = 1;
  pWriter->nDlidx = 1;
  pWriter->aWriter[0].pgno = 1;
  pWriter->bFirstTermInPage = 1;
}

static void fts5WriteInitForAppend(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer to initialize */
  Fts5StructureSegment *pSeg      /* Segment object to append to */
){
  int nByte = pSeg->nHeight * sizeof(Fts5PageWriter);
  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = pSeg->iSegid;
  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte);
  pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));

  if( p->rc==SQLITE_OK ){
    int pgno = 1;
    int i;
    pWriter->nDlidx = 1;
    pWriter->nWriter = pSeg->nHeight;
    pWriter->aWriter[0].pgno = pSeg->pgnoLast+1;
    for(i=pSeg->nHeight-1; i>0; i--){
      i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno);
      Fts5PageWriter *pPg = &pWriter->aWriter[i];
      pPg->pgno = pgno;
      fts5DataBuffer(p, &pPg->buf, iRowid);
      if( p->rc==SQLITE_OK ){
        Fts5NodeIter ss;
        fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss);
        while( ss.aData ) fts5NodeIterNext(&p->rc, &ss);
        fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p);
        pgno = ss.iChild;
        fts5NodeIterFree(&ss);
      }
    }
    assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
    pWriter->bFirstTermInPage = 1;
    assert( pWriter->aWriter[0].term.n==0 );
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
*/
static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){
  int i;
  Fts5Buffer buf;
  memset(&buf, 0, sizeof(Fts5Buffer));
  for(i=0; i<pIter->nSeg; i++){
    Fts5SegIter *pSeg = &pIter->aSeg[i];
    if( pSeg->pSeg==0 ){
      /* no-op */
    }else if( pSeg->pLeaf==0 ){
      /* All keys from this input segment have been transfered to the output.
      ** Set both the first and last page-numbers to 0 to indicate that the
      ** segment is now empty. */
      pSeg->pSeg->pgnoLast = 0;
      pSeg->pSeg->pgnoFirst = 0;
    }else{
      int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
      i64 iLeafRowid;
      Fts5Data *pData;
      int iId = pSeg->pSeg->iSegid;
      u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04};

      iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno);
      pData = fts5DataRead(p, iLeafRowid);
      if( pData ){
        fts5BufferZero(&buf);
        fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
        fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
        fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
        fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);
        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}

static void fts5MergeChunkCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
  fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
}

/*
**
*/
static void fts5IndexMergeLevel(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
  int iLvl,                       /* Level to read input from */
  int *pnRem                      /* Write up to this many output leaves */
){
  Fts5Structure *pStruct = *ppStruct;
  Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  Fts5StructureLevel *pLvlOut;
  Fts5MultiSegIter *pIter = 0;    /* Iterator to read input data */
  int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
  int nInput;                     /* Number of input segments */
  Fts5SegWriter writer;           /* Writer object */
  Fts5StructureSegment *pSeg;     /* Output segment */
  Fts5Buffer term;
  int bRequireDoclistTerm = 0;    /* Doclist terminator (0x00) required */
  int bOldest;                    /* True if the output segment is the oldest */

  assert( iLvl<pStruct->nLevel );
  assert( pLvl->nMerge<=pLvl->nSeg );

  memset(&writer, 0, sizeof(Fts5SegWriter));
  memset(&term, 0, sizeof(Fts5Buffer));
  if( pLvl->nMerge ){
    pLvlOut = &pStruct->aLevel[iLvl+1];
    assert( pLvlOut->nSeg>0 );
    nInput = pLvl->nMerge;
    fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]);
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
  }else{
    int iSegid = fts5AllocateSegid(p, pStruct);

    /* Extend the Fts5Structure object as required to ensure the output
    ** segment exists. */
    if( iLvl==pStruct->nLevel-1 ){
      fts5StructureAddLevel(&p->rc, ppStruct);
      pStruct = *ppStruct;
    }
    fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
    if( p->rc ) return;
    pLvl = &pStruct->aLevel[iLvl];
    pLvlOut = &pStruct->aLevel[iLvl+1];

    fts5WriteInit(p, &writer, iSegid);

    /* Add the new segment to the output level */
    pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
    pLvlOut->nSeg++;
    pSeg->pgnoFirst = 1;
    pSeg->iSegid = iSegid;
    pStruct->nSegment++;

    /* Read input from all segments in the input level */
    nInput = pLvl->nSeg;
  }
  bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);

  assert( iLvl>=0 );
  for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
    int nPos;                     /* position-list size field value */
    int nTerm;
    const u8 *pTerm;

    /* Check for key annihilation. */
    if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue;

    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        break;
      }

      /* This is a new term. Append a term to the output segment. */
      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSeg->nPos*2 + pSeg->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    /* Append the position-list data to the output */
    fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
    int i;

    /* Remove the redundant segments from the %_data table */
    for(i=0; i<nInput; i++){
      fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
    }

    /* Remove the redundant segments from the input level */
    if( pLvl->nSeg!=nInput ){
      int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
      memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
    }
    pStruct->nSegment -= nInput;
    pLvl->nSeg -= nInput;
    pLvl->nMerge = 0;
    if( pSeg->pgnoLast==0 ){
      pLvlOut->nSeg--;
      pStruct->nSegment--;
    }
  }else{
    assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
    fts5TrimSegments(p, pIter);
    pLvl->nMerge = nInput;
  }

  fts5MultiIterFree(p, pIter);
  fts5BufferFree(&term);
  if( pnRem ) *pnRem -= writer.nLeafWritten;
}

/*
** Do up to nPg pages of automerge work on the index.
*/
static void fts5IndexMerge(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
  int nPg                         /* Pages of work to do */
){
  int nRem = nPg;
  Fts5Structure *pStruct = *ppStruct;
  while( nRem>0 && p->rc==SQLITE_OK ){
    int iLvl;                   /* To iterate through levels */
    int iBestLvl = 0;           /* Level offering the most input segments */
    int nBest = 0;              /* Number of input segments on best level */

    /* Set iBestLvl to the level to read input segments from. */
    assert( pStruct->nLevel>0 );
    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      if( pLvl->nMerge ){
        if( pLvl->nMerge>nBest ){
          iBestLvl = iLvl;
          nBest = pLvl->nMerge;
        }
        break;
      }
      if( pLvl->nSeg>nBest ){
        nBest = pLvl->nSeg;
        iBestLvl = iLvl;
      }
    }

    /* If nBest is still 0, then the index must be empty. */
#ifdef SQLITE_DEBUG
    for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
      assert( pStruct->aLevel[iLvl].nSeg==0 );
    }
#endif

    if( nBest<p->pConfig->nAutomerge 
        && pStruct->aLevel[iBestLvl].nMerge==0 
      ){
      break;
    }
    fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
    if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
      fts5StructurePromote(p, iBestLvl+1, pStruct);
    }
  }
  *ppStruct = pStruct;
}

/*
** A total of nLeaf leaf pages of data has just been flushed to a level-0
** segment. This function updates the write-counter accordingly and, if
** necessary, performs incremental merge work.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5IndexAutomerge(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
  int nLeaf                       /* Number of output leaves just written */
){
  if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){
    Fts5Structure *pStruct = *ppStruct;
    u64 nWrite;                   /* Initial value of write-counter */
    int nWork;                    /* Number of work-quanta to perform */
    int nRem;                     /* Number of leaf pages left to write */

    /* Update the write-counter. While doing so, set nWork. */
    nWrite = pStruct->nWriteCounter;
    nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
    pStruct->nWriteCounter += nLeaf;
    nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);

    fts5IndexMerge(p, ppStruct, nRem);
  }
}

static void fts5IndexCrisismerge(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
){
  const int nCrisis = p->pConfig->nCrisisMerge;
  Fts5Structure *pStruct = *ppStruct;
  int iLvl = 0;

  assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
  while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
    fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
    fts5StructurePromote(p, iLvl+1, pStruct);
    iLvl++;
  }
  *ppStruct = pStruct;
}

static int fts5IndexReturn(Fts5Index *p){
  int rc = p->rc;
  p->rc = SQLITE_OK;
  return rc;
}

typedef struct Fts5FlushCtx Fts5FlushCtx;
struct Fts5FlushCtx {
  Fts5Index *pIdx;
  Fts5SegWriter writer; 
};

/*
** Buffer aBuf[] contains a list of varints, all small enough to fit
** in a 32-bit integer. Return the size of the largest prefix of this 
** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
  int ret;
  u32 dummy;
  ret = fts5GetVarint32(aBuf, dummy);
  while( 1 ){
    int i = fts5GetVarint32(&aBuf[ret], dummy);
    if( (ret + i) > nMax ) break;
    ret += i;
  }
  return ret;
}

#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  assert( pBuf->nSpace>=(pBuf->n+nBlob) );             \
  memcpy(&pBuf->p[pBuf->n], pBlob, nBlob);             \
  pBuf->n += nBlob;                                    \
}

/*
** Flush the contents of in-memory hash table iHash to a new level-0 
** segment on disk. Also update the corresponding structure record.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
** already occurred, this function is a no-op.
*/
static void fts5FlushOneHash(Fts5Index *p){
  Fts5Hash *pHash = p->pHash;
  Fts5Structure *pStruct;
  int iSegid;
  int pgnoLast = 0;                 /* Last leaf page number in segment */

  /* Obtain a reference to the index structure and allocate a new segment-id
  ** for the new level-0 segment.  */
  pStruct = fts5StructureRead(p);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
    const u8 *zPrev = 0;

    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iSegid);

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
    pBuf = &writer.aWriter[0].buf;
    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);

    /* Begin scanning through hash table entries. This loop runs once for each
    ** term/doclist currently stored within the hash table. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }
    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;          /* Buffer containing term */
      int nTerm;                  /* Size of zTerm in bytes */
      const u8 *pDoclist;         /* Pointer to doclist for this term */
      int nDoclist;               /* Size of doclist in bytes */
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
      }

      /* Write the term to the leaf. And if it is the first on the leaf, and
      ** the leaf is not page number 1, push it up into the b-tree hierarchy 
      ** as well.  */
      if( writer.bFirstTermInPage==0 ){
        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
        pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre);
        nSuffix = nTerm - nPre;
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.aWriter[0].pgno!=1 ){
          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
          pBuf = &writer.aWriter[0].buf;
          assert( nPre<nTerm );
        }
        nSuffix = nTerm;
      }
      pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix);
      fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);

      /* We just wrote a term into page writer.aWriter[0].pgno. If a 
      ** doclist-index is to be generated for this doclist, it will be
      ** associated with this page. */
      assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
      writer.aDlidx[0].pgno = writer.aWriter[0].pgno;

      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;

        writer.bFirstRowidInPage = 0;

        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( p->rc==SQLITE_OK && iOff<nDoclist ){
          int nPos;
          int nCopy;
          int bDummy;
          iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta);
          nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( writer.bFirstRowidInPage ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
            writer.bFirstRowidInPage = 0;
            fts5WriteDlidxAppend(p, &writer, iRowid);
          }else{
            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + nCopy) <= pgsz ){
            /* The entire poslist will fit on the current leaf. So copy
            ** it in one go. */
            fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
          }else{
            /* The entire poslist will not fit on this leaf. So it needs
            ** to be broken into sections. The only qualification being
            ** that each varint must be stored contiguously.  */
            const u8 *pPoslist = &pDoclist[iOff];
            int iPos = 0;
            while( p->rc==SQLITE_OK ){
              int nSpace = pgsz - pBuf->n;
              int n = 0;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
              }
              assert( n>0 );
              fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
              iPos += n;
              if( pBuf->n>=pgsz ){
                fts5WriteFlushLeaf(p, &writer);
                pBuf = &writer.aWriter[0].buf;
              }
              if( iPos>=nCopy ) break;
            }
          }
          iOff += nCopy;
        }
      }

      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );
      zPrev = (const u8*)zTerm;
      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
      pSeg->iSegid = iSegid;
      pSeg->nHeight = nHeight;
      pSeg->pgnoFirst = 1;
      pSeg->pgnoLast = pgnoLast;
      pStruct->nSegment++;
    }
    fts5StructurePromote(p, 0, pStruct);
  }


  fts5IndexAutomerge(p, &pStruct, pgnoLast);
  fts5IndexCrisismerge(p, &pStruct);
  fts5StructureWrite(p, pStruct);
  fts5StructureRelease(pStruct);
}

/*
** Flush any data stored in the in-memory hash tables to the database.
*/
static void fts5IndexFlush(Fts5Index *p){
  /* Unless it is empty, flush the hash table to disk */
  if( p->nPendingData ){
    assert( p->pHash );
    p->nPendingData = 0;
    fts5FlushOneHash(p);
  }
}


int sqlite3Fts5IndexOptimize(Fts5Index *p){
  Fts5Structure *pStruct;
  Fts5Structure *pNew = 0;
  int nSeg = 0;

  assert( p->rc==SQLITE_OK );
  fts5IndexFlush(p);
  pStruct = fts5StructureRead(p);

  if( pStruct ){
    assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
    nSeg = pStruct->nSegment;
    if( nSeg>1 ){
      int nByte = sizeof(Fts5Structure);
      nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
      pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
    }
  }
  if( pNew ){
    Fts5StructureLevel *pLvl;
    int nByte = nSeg * sizeof(Fts5StructureSegment);
    pNew->nLevel = pStruct->nLevel+1;
    pNew->nWriteCounter = pStruct->nWriteCounter;
    pLvl = &pNew->aLevel[pStruct->nLevel];
    pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
    if( pLvl->aSeg ){
      int iLvl, iSeg;
      int iSegOut = 0;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
          iSegOut++;
        }
      }
      pNew->nSegment = pLvl->nSeg = nSeg;
    }else{
      sqlite3_free(pNew);
      pNew = 0;
    }
  }

  if( pNew ){
    int iLvl = pNew->nLevel-1;
    while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
      int nRem = FTS5_OPT_WORK_UNIT;
      fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
    }

    fts5StructureWrite(p, pNew);
    fts5StructureRelease(pNew);
  }

  fts5StructureRelease(pStruct);
  return fts5IndexReturn(p); 
}

int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
  Fts5Structure *pStruct;

  pStruct = fts5StructureRead(p);
  if( pStruct && pStruct->nLevel ){
    fts5IndexMerge(p, &pStruct, nMerge);
    fts5StructureWrite(p, pStruct);
  }
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Buffer *pBuf
){
  fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static void fts5MultiIterPoslist(
  Fts5Index *p,
  Fts5MultiSegIter *pMulti,
  int bSz,                        /* Append a size field before the data */
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );

    if( bSz ){
      /* WRITEPOSLISTSIZE */
      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
    }
    fts5SegiterPoslist(p, pSeg, pBuf);
  }
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  if( pIter->i<pIter->n ){
    int bDummy;
    if( pIter->i ){
      i64 iDelta;
      pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&iDelta);
      pIter->iRowid += iDelta;
    }else{
      pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }
    pIter->i += fts5GetPoslistSize(
        &pIter->a[pIter->i], &pIter->nPoslist, &bDummy
    );
    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{
    pIter->aPoslist = 0;
  }
}

static void fts5DoclistIterInit(
  Fts5Buffer *pBuf, 
  Fts5DoclistIter *pIter
){
  memset(pIter, 0, sizeof(*pIter));
  pIter->a = pBuf->p;
  pIter->n = pBuf->n;
  fts5DoclistIterNext(pIter);
}

/*
** Append a doclist to buffer pBuf.
*/
static void fts5MergeAppendDocid(
  int *pRc,                       /* IN/OUT: Error code */
  Fts5Buffer *pBuf,               /* Buffer to write to */
  i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
  i64 iRowid                      /* Rowid to append */
){
  if( pBuf->n==0 ){
    fts5BufferAppendVarint(pRc, pBuf, iRowid);
  }else{
    fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid);
  }
  *piLastRowid = iRowid;
}

/*
** Buffers p1 and p2 contain doclists. This function merges the content
** of the two doclists together and sets buffer p1 to the result before
** returning.
**
** If an error occurs, an error code is left in p->rc. If an error has
** already occurred, this function is a no-op.
*/
static void fts5MergePrefixLists(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5Buffer *p1,                 /* First list to merge */
  Fts5Buffer *p2                  /* Second list to merge */
){
  if( p2->n ){
    i64 iLastRowid = 0;
    Fts5DoclistIter i1;
    Fts5DoclistIter i2;
    Fts5Buffer out;
    Fts5Buffer tmp;
    memset(&out, 0, sizeof(out));
    memset(&tmp, 0, sizeof(tmp));

    fts5DoclistIterInit(p1, &i1);
    fts5DoclistIterInit(p2, &i2);
    while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){
      if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i1.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;
        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;

        memset(&writer, 0, sizeof(writer));

        /* Merge the two position lists. */ 
        fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid);
        fts5BufferZero(&tmp);
        sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1);
        sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2);
        while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){
          i64 iNew;
          if( r2.bEof || (r1.bEof==0 && r1.iPos<r2.iPos) ){
            iNew = r1.iPos;
            sqlite3Fts5PoslistReaderNext(&r1);
          }else{
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);
            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }

        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
    fts5BufferFree(&tmp);
    fts5BufferFree(&out);
  }
}

static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
  Fts5Buffer tmp = *p1;
  *p1 = *p2;
  *p2 = tmp;
}

static void fts5SetupPrefixIter(
  Fts5Index *p,                   /* Index to read from */
  int bDesc,                      /* True for "ORDER BY rowid DESC" */
  const u8 *pToken,               /* Buffer containing prefix to match */
  int nToken,                     /* Size of buffer pToken in bytes */
  Fts5IndexIter *pIter            /* Populate this object */
){
  Fts5Structure *pStruct;
  Fts5Buffer *aBuf;
  const int nBuf = 32;

  aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
  pStruct = fts5StructureRead(p);

  if( aBuf && pStruct ){
    const int flags = FTS5INDEX_QUERY_SCAN;
    int i;
    i64 iLastRowid = 0;
    Fts5MultiSegIter *p1 = 0;     /* Iterator used to gather data from index */
    Fts5Data *pData;
    Fts5Buffer doclist;

    memset(&doclist, 0, sizeof(doclist));
    for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1);
        fts5MultiIterEof(p, p1)==0;
        fts5MultiIterNext(p, p1, 0, 0)
    ){
      i64 iRowid = fts5MultiIterRowid(p1);
      int nTerm;
      const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm);
      assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
      if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;

      if( doclist.n>0 && iRowid<=iLastRowid ){
        for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
          assert( i<nBuf );
          if( aBuf[i].n==0 ){
            fts5BufferSwap(&doclist, &aBuf[i]);
            fts5BufferZero(&doclist);
          }else{
            fts5MergePrefixLists(p, &doclist, &aBuf[i]);
            fts5BufferZero(&aBuf[i]);
          }
        }
      }

      fts5MergeAppendDocid(&p->rc, &doclist, &iLastRowid, iRowid);
      fts5MultiIterPoslist(p, p1, 1, &doclist);
    }

    for(i=0; i<nBuf; i++){
      fts5MergePrefixLists(p, &doclist, &aBuf[i]);
      fts5BufferFree(&aBuf[i]);
    }
    fts5MultiIterFree(p, p1);

    pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
    if( pData ){
      pData->p = (u8*)&pData[1];
      pData->n = doclist.n;
      memcpy(pData->p, doclist.p, doclist.n);
      fts5MultiIterNew2(p, pData, bDesc, &pIter->pMulti);
    }
    fts5BufferFree(&doclist);
  }

  fts5StructureRelease(pStruct);
  sqlite3_free(aBuf);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.
*/
int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
  assert( p->rc==SQLITE_OK );

  /* Allocate the hash table if it has not already been allocated */
  if( p->pHash==0 ){
    p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData);
  }

  /* Flush the hash table to disk if required */
  if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){
    fts5IndexFlush(p);
  }
  p->iWriteRowid = iRowid;
  return fts5IndexReturn(p);
}

/*
** Commit data to disk.
*/
int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){
  assert( p->rc==SQLITE_OK );
  fts5IndexFlush(p);
  if( bCommit ) fts5CloseReader(p);
  return fts5IndexReturn(p);
}

/*
** Discard any data stored in the in-memory hash tables. Do not write it
** to the database. Additionally, assume that the contents of the %_data
** table may have changed on disk. So any in-memory caches of %_data 
** records must be invalidated.
*/
int sqlite3Fts5IndexRollback(Fts5Index *p){
  fts5CloseReader(p);
  fts5IndexDiscardData(p);
  assert( p->rc==SQLITE_OK );
  return SQLITE_OK;
}

/*
** The %_data table is completely empty when this function is called. This
** function populates it with the initial structure objects for each index,
** and the initial version of the "averages" record (a zero-byte blob).
*/
int sqlite3Fts5IndexReinit(Fts5Index *p){
  Fts5Structure s;

  assert( p->rc==SQLITE_OK );
  p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0);

  memset(&s, 0, sizeof(Fts5Structure));
  fts5StructureWrite(p, &s);

  return fts5IndexReturn(p);
}

/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying %_data table.
**
** If successful, set *pp to point to the new object and return SQLITE_OK.
** Otherwise, set *pp to NULL and return an SQLite error code.
*/
int sqlite3Fts5IndexOpen(
  Fts5Config *pConfig, 
  int bCreate, 
  Fts5Index **pp,
  char **pzErr
){
  int rc = SQLITE_OK;
  Fts5Index *p;                   /* New object */

  *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
  if( rc==SQLITE_OK ){
    p->pConfig = pConfig;
    p->nWorkUnit = FTS5_WORK_UNIT;
    p->nMaxPendingData = 1024*1024;
    p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
    if( p->zDataTbl && bCreate ){
      rc = sqlite3Fts5CreateTable(
          pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
      );
      if( rc==SQLITE_OK ){
        rc = sqlite3Fts5IndexReinit(p);
      }
    }
  }

  assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
  if( rc ){
    sqlite3Fts5IndexClose(p);
    *pp = 0;
  }
  return rc;
}

/*
** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
*/
int sqlite3Fts5IndexClose(Fts5Index *p){
  int rc = SQLITE_OK;
  if( p ){
    assert( p->pReader==0 );
    sqlite3_finalize(p->pWriter);
    sqlite3_finalize(p->pDeleter);
    sqlite3Fts5HashFree(p->pHash);
    sqlite3Fts5BufferFree(&p->scratch);
    sqlite3_free(p->zDataTbl);
    sqlite3_free(p);
  }
  return rc;
}

/*
** Argument p points to a buffer containing utf-8 text that is n bytes in 
** size. Return the number of bytes in the nChar character prefix of the
** buffer, or 0 if there are less than nChar characters in total.
*/
static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){
  int n = 0;
  int i;
  for(i=0; i<nChar; i++){
    if( n>=nByte ) return 0;      /* Input contains fewer than nChar chars */
    if( (unsigned char)p[n++]>=0xc0 ){
      while( (p[n] & 0xc0)==0x80 ) n++;
    }
  }
  return n;
}

/*
** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
** unicode characters in the string.
*/
static int fts5IndexCharlen(const char *pIn, int nIn){
  int nChar = 0;            
  int i = 0;
  while( i<nIn ){
    if( (unsigned char)pIn[i++]>=0xc0 ){
      while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
    }
    nChar++;
  }
  return nChar;
}

/*
** Insert or remove data to or from the index. Each time a document is 
** added to or removed from the index, this function is called one or more
** times.
**
** For an insert, it must be called once for each token in the new document.
** If the operation is a delete, it must be called (at least) once for each
** unique token in the document with an iCol value less than zero. The iPos
** argument is ignored for a delete.
*/
int sqlite3Fts5IndexWrite(
  Fts5Index *p,                   /* Index to write to */
  int iCol,                       /* Column token appears in (-ve -> delete) */
  int iPos,                       /* Position of token within column */
  const char *pToken, int nToken  /* Token to add or remove to or from index */
){
  int i;                          /* Used to iterate through indexes */
  int rc = SQLITE_OK;             /* Return code */
  Fts5Config *pConfig = p->pConfig;

  assert( p->rc==SQLITE_OK );

  /* Add the entry to the main terms index. */
  rc = sqlite3Fts5HashWrite(
      p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
  );

  for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
    int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
    if( nByte ){
      rc = sqlite3Fts5HashWrite(p->pHash, 
          p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX+i+1, pToken, nByte
      );
    }
  }

  return rc;
}

/*
** Open a new iterator to iterate though all docids that match the 
** specified token or token prefix.
*/
int sqlite3Fts5IndexQuery(
  Fts5Index *p,                   /* FTS index to query */
  const char *pToken, int nToken, /* Token (or prefix) to query for */
  int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
  Fts5IndexIter **ppIter          /* OUT: New iterator object */
){
  Fts5Config *pConfig = p->pConfig;
  Fts5IndexIter *pRet;
  int iIdx = 0;
  Fts5Buffer buf = {0, 0, 0};

  /* If the QUERY_SCAN flag is set, all other flags must be clear. */
  assert( (flags & FTS5INDEX_QUERY_SCAN)==0
       || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN
  );

  if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){
    memcpy(&buf.p[1], pToken, nToken);
  }

#ifdef SQLITE_DEBUG
  if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){
    assert( flags & FTS5INDEX_QUERY_PREFIX );
    iIdx = 1+pConfig->nPrefix;
  }else
#endif
  if( flags & FTS5INDEX_QUERY_PREFIX ){
    int nChar = fts5IndexCharlen(pToken, nToken);
    for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
      if( pConfig->aPrefix[iIdx-1]==nChar ) break;
    }
  }

  pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter));
  if( pRet ){
    pRet->pIndex = p;
    if( iIdx<=pConfig->nPrefix ){
      buf.p[0] = FTS5_MAIN_PREFIX + iIdx;
      pRet->pStruct = fts5StructureRead(p);
      if( pRet->pStruct ){
        fts5MultiIterNew(
            p, pRet->pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet->pMulti
        );
      }
    }else{
      int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
      buf.p[0] = FTS5_MAIN_PREFIX;
      fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pRet);
    }
  }

  if( p->rc ){
    sqlite3Fts5IterClose(pRet);
    pRet = 0;
  }
  *ppIter = pRet;
  sqlite3Fts5BufferFree(&buf);
  return fts5IndexReturn(p);
}

/*
** Return true if the iterator passed as the only argument is at EOF.
*/
int sqlite3Fts5IterEof(Fts5IndexIter *pIter){
  assert( pIter->pIndex->rc==SQLITE_OK );
  return fts5MultiIterEof(pIter->pIndex, pIter->pMulti);
}

/*
** Move to the next matching rowid. 
*/
int sqlite3Fts5IterNext(Fts5IndexIter *pIter){
  assert( pIter->pIndex->rc==SQLITE_OK );
  fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0);
  return fts5IndexReturn(pIter->pIndex);
}

/*
** Move to the next matching term/rowid. Used by the fts5vocab module.
*/
int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){
  Fts5Index *p = pIter->pIndex;
  Fts5MultiSegIter *pMulti = pIter->pMulti;

  assert( pIter->pIndex->rc==SQLITE_OK );
  assert( pMulti );

  fts5MultiIterNext(p, pMulti, 0, 0);
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
      fts5DataRelease(pSeg->pLeaf);
      pSeg->pLeaf = 0;
    }
  }

  return fts5IndexReturn(pIter->pIndex);
}

/*
** Move to the next matching rowid that occurs at or after iMatch. The
** definition of "at or after" depends on whether this iterator iterates
** in ascending or descending rowid order.
*/
int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){
  fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch);
  return fts5IndexReturn(pIter->pIndex);
}

/*
** Return the current rowid.
*/
i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
  return fts5MultiIterRowid(pIter->pMulti);
}

/*
** Return the current term.
*/
const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){
  int n;
  const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n);
  *pn = n-1;
  return &z[1];
}


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(
  Fts5IndexIter *pIter, 
  const u8 **pp,                  /* OUT: Pointer to position-list data */
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5MultiSegIter *pMulti = pIter->pMulti;
  Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
    *pp = pIter->poslist.p;
  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;
  Fts5MultiSegIter *pMulti = pIter->pMulti;

  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5MultiIterPoslist(p, pMulti, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
    fts5MultiIterFree(pIter->pIndex, pIter->pMulti);
    fts5StructureRelease(pIter->pStruct);
    fts5BufferFree(&pIter->poslist);
    fts5CloseReader(pIter->pIndex);
    sqlite3_free(pIter);
  }
}

/*
** Read the "averages" record into the buffer supplied as the second 
** argument. Return SQLITE_OK if successful, or an SQLite error code
** if an error occurs.
*/
int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){
  assert( p->rc==SQLITE_OK );
  fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID);
  return fts5IndexReturn(p);
}

/*
** Replace the current "averages" record with the contents of the buffer 
** supplied as the second argument.
*/
int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
  assert( p->rc==SQLITE_OK );
  fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
  return fts5IndexReturn(p);
}

/*
** Return the total number of blocks this module has read from the %_data
** table since it was created.
*/
int sqlite3Fts5IndexReads(Fts5Index *p){
  return p->nRead;
}

/*
** Set the 32-bit cookie value stored at the start of all structure 
** records to the value passed as the second argument.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
  int rc;                              /* Return code */
  Fts5Config *pConfig = p->pConfig;    /* Configuration object */
  u8 aCookie[4];                       /* Binary representation of iNew */
  sqlite3_blob *pBlob = 0;

  assert( p->rc==SQLITE_OK );
  sqlite3Fts5Put32(aCookie, iNew);

  rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, 
      "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
  );
  if( rc==SQLITE_OK ){
    sqlite3_blob_write(pBlob, aCookie, 4, 0);
    rc = sqlite3_blob_close(pBlob);
  }

  return rc;
}

int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
  Fts5Structure *pStruct;
  pStruct = fts5StructureRead(p);
  fts5StructureRelease(pStruct);
  return fts5IndexReturn(p);
}


/*************************************************************************
**************************************************************************
** Below this point is the implementation of the integrity-check 
** functionality.
*/

/*
** Return a simple checksum value based on the arguments.
*/
static u64 fts5IndexEntryCksum(
  i64 iRowid, 
  int iCol, 
  int iPos, 
  int iIdx,
  const char *pTerm,
  int nTerm
){
  int i;
  u64 ret = iRowid;
  ret += (ret<<3) + iCol;
  ret += (ret<<3) + iPos;
  if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
  for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
  return ret;
}

static void fts5BtreeIterInit(
  Fts5Index *p, 
  Fts5StructureSegment *pSeg, 
  Fts5BtreeIter *pIter
){
  int nByte;
  int i;
  nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
  memset(pIter, 0, sizeof(*pIter));
  if( nByte ){
    pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
  }
  if( p->rc==SQLITE_OK ){
    pIter->nLvl = pSeg->nHeight-1;
    pIter->p = p;
    pIter->pSeg = pSeg;
  }
  for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
    i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1);
    Fts5Data *pData;
    pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
    if( pData ){
      fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);
    }
  }

  if( pIter->nLvl==0 || p->rc ){
    pIter->bEof = 1;
    pIter->iLeaf = pSeg->pgnoLast;
  }else{
    pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
    pIter->iLeaf = pIter->aLvl[0].s.iChild;
    pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
  }
}

static void fts5BtreeIterNext(Fts5BtreeIter *pIter){
  Fts5Index *p = pIter->p;
  int i;

  assert( pIter->bEof==0 && pIter->aLvl[0].s.aData );
  for(i=0; i<pIter->nLvl && p->rc==SQLITE_OK; i++){
    Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
    fts5NodeIterNext(&p->rc, &pLvl->s);
    if( pLvl->s.aData ){
      fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p);
      break;
    }else{
      fts5NodeIterFree(&pLvl->s);
      fts5DataRelease(pLvl->pData);
      pLvl->pData = 0;
    }
  }
  if( i==pIter->nLvl || p->rc ){
    pIter->bEof = 1;
  }else{
    int iSegid = pIter->pSeg->iSegid;
    for(i--; i>=0; i--){
      Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
      i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild);
      pLvl->pData = fts5DataRead(p, iRowid);
      if( pLvl->pData ){
        fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s);
      }
    }
  }

  pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
  pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
  pIter->iLeaf = pIter->aLvl[0].s.iChild;
}

static void fts5BtreeIterFree(Fts5BtreeIter *pIter){
  int i;
  for(i=0; i<pIter->nLvl; i++){
    Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
    fts5NodeIterFree(&pLvl->s);
    if( pLvl->pData ){
      fts5DataRelease(pLvl->pData);
      pLvl->pData = 0;
    }
  }
  sqlite3_free(pIter->aLvl);
  fts5BufferFree(&pIter->term);
}

#ifdef SQLITE_DEBUG
/*
** This function is purely an internal test. It does not contribute to 
** FTS functionality, or even the integrity-check, in any way.
**
** Instead, it tests that the same set of pgno/rowid combinations are 
** visited regardless of whether the doclist-index identified by parameters
** iSegid/iLeaf is iterated in forwards or reverse order.
*/
static void fts5TestDlidxReverse(
  Fts5Index *p, 
  int iSegid,                     /* Segment id to load from */
  int iLeaf                       /* Load doclist-index for this leaf */
){
  Fts5DlidxIter *pDlidx = 0;
  u64 cksum1 = 13;
  u64 cksum2 = 13;

  for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterNext(p, pDlidx)
  ){
    i64 iRowid = fts5DlidxIterRowid(pDlidx);
    int pgno = fts5DlidxIterPgno(pDlidx);
    assert( pgno>iLeaf );
    cksum1 += iRowid + ((i64)pgno<<32);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
      fts5DlidxIterEof(p, pDlidx)==0;
      fts5DlidxIterPrev(p, pDlidx)
  ){
    i64 iRowid = fts5DlidxIterRowid(pDlidx);
    int pgno = fts5DlidxIterPgno(pDlidx);
    assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
    cksum2 += iRowid + ((i64)pgno<<32);
  }
  fts5DlidxIterFree(pDlidx);
  pDlidx = 0;

  if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
}

static int fts5QueryCksum(
  Fts5Index *p,                   /* Fts5 index object */
  int iIdx,
  const char *z,                  /* Index key to query for */
  int n,                          /* Size of index key in bytes */
  int flags,                      /* Flags for Fts5IndexQuery */
  u64 *pCksum                     /* IN/OUT: Checksum value */
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    i64 dummy;
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
      }
      rc = sqlite3Fts5IterNext(pIdxIter);
    }
  }
  sqlite3Fts5IterClose(pIdxIter);

  *pCksum = cksum;
  return rc;
}


/*
** This function is also purely an internal test. It does not contribute to 
** FTS functionality, or even the integrity-check, in any way.
*/
static void fts5TestTerm(
  Fts5Index *p, 
  Fts5Buffer *pPrev,              /* Previous term */
  const char *z, int n,           /* Possibly new term to test */
  u64 expected,
  u64 *pCksum
){
  int rc = p->rc;
  if( pPrev->n==0 ){
    fts5BufferSet(&rc, pPrev, n, (const u8*)z);
  }else
  if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
    u64 cksum3 = *pCksum;
    const char *zTerm = (const char*)&pPrev->p[1];  /* term sans prefix-byte */
    int nTerm = pPrev->n-1;            /* Size of zTerm in bytes */
    int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
    int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
    int rc;
    u64 ck1 = 0;
    u64 ck2 = 0;

    /* Check that the results returned for ASC and DESC queries are
    ** the same. If not, call this corruption.  */
    rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
    if( rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_DESC;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
    }
    if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;

    /* If this is a prefix query, check that the results returned if the
    ** the index is disabled are the same. In both ASC and DESC order. */
    if( iIdx>0 && rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
      ck2 = 0;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
    }
    if( iIdx>0 && rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
      ck2 = 0;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
    }

    cksum3 ^= ck1;
    fts5BufferSet(&rc, pPrev, n, (const u8*)z);

    if( rc==SQLITE_OK && cksum3!=expected ){
      rc = FTS5_CORRUPT;
    }
    *pCksum = cksum3;
  }
  p->rc = rc;
}
 
#else
# define fts5TestDlidxReverse(x,y,z)
# define fts5TestTerm(u,v,w,x,y,z)
#endif

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5BtreeIter iter;             /* Used to iterate through b-tree hierarchy */

  if( pSeg->pgnoFirst==0 ) return;

  /* Iterate through the b-tree hierarchy.  */
  for(fts5BtreeIterInit(p, pSeg, &iter);
      p->rc==SQLITE_OK && iter.bEof==0;
      fts5BtreeIterNext(&iter)
  ){
    i64 iRow;                     /* Rowid for this leaf */
    Fts5Data *pLeaf;              /* Data for this leaf */
    int iOff;                     /* Offset of first term on leaf */
    int i;                        /* Used to iterate through empty leaves */

    /* If the leaf in question has already been trimmed from the segment, 
    ** ignore this b-tree entry. Otherwise, load it into memory. */
    if( iter.iLeaf<pSeg->pgnoFirst ) continue;
    iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf);
    pLeaf = fts5DataRead(p, iRow);
    if( pLeaf==0 ) break;

    /* Check that the leaf contains at least one term, and that it is equal
    ** to or larger than the split-key in iter.term.  Also check that if there
    ** is also a rowid pointer within the leaf page header, it points to a
    ** location before the term.  */
    iOff = fts5GetU16(&pLeaf->p[2]);
    if( iOff==0 ){
      p->rc = FTS5_CORRUPT;
    }else{
      int iRowidOff;
      int nTerm;                  /* Size of term on leaf in bytes */
      int res;                    /* Comparison of term and split-key */

      iRowidOff = fts5GetU16(&pLeaf->p[0]);
      if( iRowidOff>=iOff ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
        res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n));
        if( res==0 ) res = nTerm - iter.term.n;
        if( res<0 ) p->rc = FTS5_CORRUPT;
      }
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;


    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
    for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){
      pLeaf = fts5DataRead(p, iRow+i);
      if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
        p->rc = FTS5_CORRUPT;
      }
      fts5DataRelease(pLeaf);
    }

    /* If there is a doclist-index, check that it looks right. */
    if( iter.bDlidx ){
      Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
      int iPrevLeaf = iter.iLeaf;
      int iSegid = pSeg->iSegid;
      int iPg;
      i64 iKey;

      for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(p, pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
          iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = fts5DlidxIterPgno(pDlidx);

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
          if( iRowidOff>=pLeaf->n ){
            p->rc = FTS5_CORRUPT;
          }else{
            fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
          }
          fts5DataRelease(pLeaf);
        }
      }

      for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
          fts5DataRelease(pLeaf);
        }
      }

      fts5DlidxIterFree(pDlidx);
      fts5TestDlidxReverse(p, iSegid, iter.iLeaf);
    }
  }

  /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
  if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
    p->rc = FTS5_CORRUPT;
  }

  fts5BtreeIterFree(&iter);
}


/*
** Run internal checks to ensure that the FTS index (a) is internally 
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
** checksum does not match. Return SQLITE_OK if all checks pass without
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
  Fts5MultiSegIter *pIter;        /* Used to iterate through entire index */
  Fts5Structure *pStruct;         /* Index structure */

  /* Used by extra internal tests only run if NDEBUG is not defined */
  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
  
  /* Load the FTS index structure */
  pStruct = fts5StructureRead(p);

  /* Check that the internal nodes of each segment match the leaves */
  if( pStruct ){
    int iLvl, iSeg;
    for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
      for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
        Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
        fts5IndexIntegrityCheckSegment(p, pSeg);
      }
    }
  }

  /* The cksum argument passed to this function is a checksum calculated
  ** based on all expected entries in the FTS index (including prefix index
  ** entries). This block checks that a checksum calculated based on the
  ** actual contents of FTS index is identical.
  **
  ** Two versions of the same checksum are calculated. The first (stack
  ** variable cksum2) based on entries extracted from the full-text index
  ** while doing a linear scan of each individual index in turn. 
  **
  ** As each term visited by the linear scans, a separate query for the
  ** same term is performed. cksum3 is calculated based on the entries
  ** extracted by these queries.
  */
  for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter, 0, 0)
  ){
    int n;                      /* Size of term in bytes */
    i64 iPos = 0;               /* Position read from poslist */
    int iOff = 0;               /* Offset within poslist */
    i64 iRowid = fts5MultiIterRowid(pIter);
    char *z = (char*)fts5MultiIterTerm(pIter, &n);

    poslist.n = 0;
    fts5MultiIterPoslist(p, pIter, 0, &poslist);
    while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
      int iCol = FTS5_POS2COLUMN(iPos);
      int iTokOff = FTS5_POS2OFFSET(iPos);
      cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
    }

    /* If this is a new term, query for it. Update cksum3 with the results. */
    fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
  }
  fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);

  fts5MultiIterFree(p, pIter);
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;

  fts5StructureRelease(pStruct);
  fts5BufferFree(&term);
  fts5BufferFree(&poslist);
  return fts5IndexReturn(p);
}


/*
** Calculate and return a checksum that is the XOR of the index entry
** checksum of all entries that would be generated by the token specified
** by the final 5 arguments.
*/
u64 sqlite3Fts5IndexCksum(
  Fts5Config *pConfig,            /* Configuration object */
  i64 iRowid,                     /* Document term appears in */
  int iCol,                       /* Column term appears in */
  int iPos,                       /* Position term appears in */
  const char *pTerm, int nTerm    /* Term at iPos */
){
  u64 ret = 0;                    /* Return value */
  int iIdx;                       /* For iterating through indexes */

  ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm);

  for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){
    int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]);
    if( nByte ){
      ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte);
    }
  }

  return ret;
}

/*************************************************************************
**************************************************************************
** Below this point is the implementation of the fts5_decode() scalar
** function only.
*/

/*
** Decode a segment-data rowid from the %_data table. This function is
** the opposite of macro FTS5_SEGMENT_ROWID().
*/
static void fts5DecodeRowid(
  i64 iRowid,                     /* Rowid from %_data table */
  int *piSegid,                   /* OUT: Segment id */
  int *pbDlidx,                   /* OUT: Dlidx flag */
  int *piHeight,                  /* OUT: Height */
  int *piPgno                     /* OUT: Page number */
){
  *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
  iRowid >>= FTS5_DATA_PAGE_B;

  *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
  iRowid >>= FTS5_DATA_HEIGHT_B;

  *pbDlidx = (int)(iRowid & 0x0001);
  iRowid >>= FTS5_DATA_DLI_B;

  *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
}

static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
  int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
  fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);

  if( iSegid==0 ){
    if( iKey==FTS5_AVERAGES_ROWID ){
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) ");
    }else{
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)");
    }
  }
  else{
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)",
        bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
    );
  }
}

static void fts5DebugStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
  Fts5Structure *p
){
  int iLvl, iSeg;                 /* Iterate through levels, segments */

  for(iLvl=0; iLvl<p->nLevel; iLvl++){
    Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
        " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge
    );
    for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
      Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, 
          pSeg->pgnoFirst, pSeg->pgnoLast
      );
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }
}

/*
** This is part of the fts5_decode() debugging aid.
**
** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
** function appends a human-readable representation of the same object
** to the buffer passed as the second argument. 
*/
static void fts5DecodeStructure(
  int *pRc,                       /* IN/OUT: error code */
  Fts5Buffer *pBuf,
  const u8 *pBlob, int nBlob
){
  int rc;                         /* Return code */
  Fts5Structure *p = 0;           /* Decoded structure object */

  rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
  if( rc!=SQLITE_OK ){
    *pRc = rc;
    return;
  }

  fts5DebugStructure(pRc, pBuf, p);
  fts5StructureRelease(p);
}

/*
** Buffer (a/n) is assumed to contain a list of serialized varints. Read
** each varint and append its string representation to buffer pBuf. Return
** after either the input buffer is exhausted or a 0 value is read.
**
** The return value is the number of bytes read from the input buffer.
*/
static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  int iOff = 0;
  while( iOff<n ){
    int iVal;
    iOff += fts5GetVarint32(&a[iOff], iVal);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
  }
  return iOff;
}

/*
** The start of buffer (a/n) contains the start of a doclist. The doclist
** may or may not finish within the buffer. This function appends a text
** representation of the part of the doclist that is present to buffer
** pBuf. 
**
** The return value is the number of bytes read from the input buffer.
*/
static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  i64 iDocid;
  int iOff = 0;

  iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid);
  sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  while( iOff<n ){
    int nPos;
    int bDummy;
    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }
  }

  return iOff;
}

/*
** The implementation of user-defined scalar function fts5_decode().
*/
static void fts5DecodeFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  i64 iRowid;                     /* Rowid for record being decoded */
  int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
  const u8 *aBlob; int n;         /* Record to decode */
  u8 *a = 0;
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */
  int nSpace = 0;

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);

  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);
  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;

    dlidx.p = a;
    dlidx.n = n;

    memset(&lvl, 0, sizeof(Fts5DlidxLvl));
    lvl.pData = &dlidx;
    lvl.iLeafPgno = iPgno;

    for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
      );
    }
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{

    Fts5Buffer term;
    memset(&term, 0, sizeof(Fts5Buffer));

    if( iHeight==0 ){
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;

      if( n>=4 ){
        iRowidOff = fts5GetU16(&a[0]);
        iTermOff = fts5GetU16(&a[2]);
      }else{
        sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
        goto decode_out;
      }

      if( iRowidOff ){
        iOff = iRowidOff;
      }else if( iTermOff ){
        iOff = iTermOff;
      }else{
        iOff = n;
      }
      fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

      assert( iRowidOff==0 || iOff==iRowidOff );
      if( iRowidOff ){
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      }

      assert( iTermOff==0 || iOff==iTermOff );
      while( iOff<n ){
        int nByte;
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n= nKeep;
        fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
        iOff += nByte;

        sqlite3Fts5BufferAppendPrintf(
            &rc, &s, " term=%.*s", term.n, (const char*)term.p
        );
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
        if( iOff<n ){
          iOff += fts5GetVarint32(&a[iOff], nKeep);
        }
      }
      fts5BufferFree(&term);
    }else{
      Fts5NodeIter ss;
      for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){
        if( ss.term.n==0 ){
          sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild);
        }else{
          sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", 
              ss.term.n, ss.term.p
          );
        }
        if( ss.nEmpty ){
          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
              ss.bDlidx ? "*" : ""
          );
        }
      }
      fts5NodeIterFree(&ss);
    }
  }
  
 decode_out:
  sqlite3_free(a);
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
  }else{
    sqlite3_result_error_code(pCtx, rc);
  }
  fts5BufferFree(&s);
}

/*
** The implementation of user-defined scalar function fts5_rowid().
*/
static void fts5RowidFunction(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args (always 2) */
  sqlite3_value **apVal           /* Function arguments */
){
  const char *zArg;
  if( nArg==0 ){
    sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  }else{
    zArg = (const char*)sqlite3_value_text(apVal[0]);
    if( 0==sqlite3_stricmp(zArg, "segment") ){
      i64 iRowid;
      int segid, height, pgno;
      if( nArg!=4 ){
        sqlite3_result_error(pCtx, 
            "should be: fts5_rowid('segment', segid, height, pgno))", -1
        );
      }else{
        segid = sqlite3_value_int(apVal[1]);
        height = sqlite3_value_int(apVal[2]);
        pgno = sqlite3_value_int(apVal[3]);
        iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno);
        sqlite3_result_int64(pCtx, iRowid);
      }
    }else {
      sqlite3_result_error(pCtx, 
        "first arg to fts5_rowid() must be 'segment' "
        "or 'start-of-index'"
        , -1
      );
    }
  }
}

/*
** This is called as part of registering the FTS5 module with database
** connection db. It registers several user-defined scalar functions useful
** with FTS5.
**
** If successful, SQLITE_OK is returned. If an error occurs, some other
** SQLite error code is returned instead.
*/
int sqlite3Fts5IndexInit(sqlite3 *db){
  int rc = sqlite3_create_function(
      db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
  );
  if( rc==SQLITE_OK ){
    rc = sqlite3_create_function(
        db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
    );
  }
  return rc;
}

#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_main.c.










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
/*
** 2014 Jun 09
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite module implementing full-text search.
*/

#if defined(SQLITE_ENABLE_FTS5)

#include "fts5Int.h"

/*
** This variable is set to false when running tests for which the on disk
** structures should not be corrupt. Otherwise, true. If it is false, extra
** assert() conditions in the fts5 code are activated - conditions that are
** only true if it is guaranteed that the fts5 database is not corrupt.
*/
int sqlite3_fts5_may_be_corrupt = 1;


typedef struct Fts5Table Fts5Table;
typedef struct Fts5Cursor Fts5Cursor;
typedef struct Fts5Global Fts5Global;
typedef struct Fts5Auxiliary Fts5Auxiliary;
typedef struct Fts5Auxdata Fts5Auxdata;

typedef struct Fts5TokenizerModule Fts5TokenizerModule;

/*
** NOTES ON TRANSACTIONS: 
**
** SQLite invokes the following virtual table methods as transactions are 
** opened and closed by the user:
**
**     xBegin():    Start of a new transaction.
**     xSync():     Initial part of two-phase commit.
**     xCommit():   Final part of two-phase commit.
**     xRollback(): Rollback the transaction.
**
** Anything that is required as part of a commit that may fail is performed
** in the xSync() callback. Current versions of SQLite ignore any errors 
** returned by xCommit().
**
** And as sub-transactions are opened/closed:
**
**     xSavepoint(int S):  Open savepoint S.
**     xRelease(int S):    Commit and close savepoint S.
**     xRollbackTo(int S): Rollback to start of savepoint S.
**
** During a write-transaction the fts5_index.c module may cache some data 
** in-memory. It is flushed to disk whenever xSync(), xRelease() or
** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() 
** is called.
**
** Additionally, if SQLITE_DEBUG is defined, an instance of the following
** structure is used to record the current transaction state. This information
** is not required, but it is used in the assert() statements executed by
** function fts5CheckTransactionState() (see below).
*/
struct Fts5TransactionState {
  int eState;                     /* 0==closed, 1==open, 2==synced */
  int iSavepoint;                 /* Number of open savepoints (0 -> none) */
};

/*
** A single object of this type is allocated when the FTS5 module is 
** registered with a database handle. It is used to store pointers to
** all registered FTS5 extensions - tokenizers and auxiliary functions.
*/
struct Fts5Global {
  fts5_api api;                   /* User visible part of object (see fts5.h) */
  sqlite3 *db;                    /* Associated database connection */ 
  i64 iNextId;                    /* Used to allocate unique cursor ids */
  Fts5Auxiliary *pAux;            /* First in list of all aux. functions */
  Fts5TokenizerModule *pTok;      /* First in list of all tokenizer modules */
  Fts5TokenizerModule *pDfltTok;  /* Default tokenizer module */
  Fts5Cursor *pCsr;               /* First in list of all open cursors */
};

/*
** Each auxiliary function registered with the FTS5 module is represented
** by an object of the following type. All such objects are stored as part
** of the Fts5Global.pAux list.
*/
struct Fts5Auxiliary {
  Fts5Global *pGlobal;            /* Global context for this function */
  char *zFunc;                    /* Function name (nul-terminated) */
  void *pUserData;                /* User-data pointer */
  fts5_extension_function xFunc;  /* Callback function */
  void (*xDestroy)(void*);        /* Destructor function */
  Fts5Auxiliary *pNext;           /* Next registered auxiliary function */
};

/*
** Each tokenizer module registered with the FTS5 module is represented
** by an object of the following type. All such objects are stored as part
** of the Fts5Global.pTok list.
*/
struct Fts5TokenizerModule {
  char *zName;                    /* Name of tokenizer */
  void *pUserData;                /* User pointer passed to xCreate() */
  fts5_tokenizer x;               /* Tokenizer functions */
  void (*xDestroy)(void*);        /* Destructor function */
  Fts5TokenizerModule *pNext;     /* Next registered tokenizer module */
};

/*
** Virtual-table object.
*/
struct Fts5Table {
  sqlite3_vtab base;              /* Base class used by SQLite core */
  Fts5Config *pConfig;            /* Virtual table configuration */
  Fts5Index *pIndex;              /* Full-text index */
  Fts5Storage *pStorage;          /* Document store */
  Fts5Global *pGlobal;            /* Global (connection wide) data */
  Fts5Cursor *pSortCsr;           /* Sort data from this cursor */
#ifdef SQLITE_DEBUG
  struct Fts5TransactionState ts;
#endif
};

struct Fts5MatchPhrase {
  Fts5Buffer *pPoslist;           /* Pointer to current poslist */
  int nTerm;                      /* Size of phrase in terms */
};

/*
** pStmt:
**   SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
**
** aIdx[]:
**   There is one entry in the aIdx[] array for each phrase in the query,
**   the value of which is the offset within aPoslist[] following the last 
**   byte of the position list for the corresponding phrase.
*/
struct Fts5Sorter {
  sqlite3_stmt *pStmt;
  i64 iRowid;                     /* Current rowid */
  const u8 *aPoslist;             /* Position lists for current row */
  int nIdx;                       /* Number of entries in aIdx[] */
  int aIdx[0];                    /* Offsets into aPoslist for current row */
};


/*
** Virtual-table cursor object.
**
** iSpecial:
**   If this is a 'special' query (refer to function fts5SpecialMatch()), 
**   then this variable contains the result of the query. 
**
** iFirstRowid, iLastRowid:
**   These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
**   cursor iterates in ascending order of rowids, iFirstRowid is the lower
**   limit of rowids to return, and iLastRowid the upper. In other words, the
**   WHERE clause in the user's query might have been:
**
**       <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
**
**   If the cursor iterates in descending order of rowid, iFirstRowid
**   is the upper limit (i.e. the "first" rowid visited) and iLastRowid
**   the lower.
*/
struct Fts5Cursor {
  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
  int ePlan;                      /* FTS5_PLAN_XXX value */
  int bDesc;                      /* True for "ORDER BY rowid DESC" queries */
  i64 iFirstRowid;                /* Return no rowids earlier than this */
  i64 iLastRowid;                 /* Return no rowids later than this */
  sqlite3_stmt *pStmt;            /* Statement used to read %_content */
  Fts5Expr *pExpr;                /* Expression for MATCH queries */
  Fts5Sorter *pSorter;            /* Sorter for "ORDER BY rank" queries */
  int csrflags;                   /* Mask of cursor flags (see below) */
  Fts5Cursor *pNext;              /* Next cursor in Fts5Cursor.pCsr list */
  i64 iSpecial;                   /* Result of special query */

  /* "rank" function. Populated on demand from vtab.xColumn(). */
  char *zRank;                    /* Custom rank function */
  char *zRankArgs;                /* Custom rank function args */
  Fts5Auxiliary *pRank;           /* Rank callback (or NULL) */
  int nRankArg;                   /* Number of trailing arguments for rank() */
  sqlite3_value **apRankArg;      /* Array of trailing arguments */
  sqlite3_stmt *pRankArgStmt;     /* Origin of objects in apRankArg[] */

  /* Variables used by auxiliary functions */
  i64 iCsrId;                     /* Cursor id */
  Fts5Auxiliary *pAux;            /* Currently executing extension function */
  Fts5Auxdata *pAuxdata;          /* First in linked list of saved aux-data */
  int *aColumnSize;               /* Values for xColumnSize() */

  /* Cache used by auxiliary functions xInst() and xInstCount() */
  int nInstCount;                 /* Number of phrase instances */
  int *aInst;                     /* 3 integers per phrase instance */
};

/*
** Bits that make up the "idxNum" parameter passed indirectly by 
** xBestIndex() to xFilter().
*/
#define FTS5_BI_MATCH        0x0001         /* <tbl> MATCH ? */
#define FTS5_BI_RANK         0x0002         /* rank MATCH ? */
#define FTS5_BI_ROWID_EQ     0x0004         /* rowid == ? */
#define FTS5_BI_ROWID_LE     0x0008         /* rowid <= ? */
#define FTS5_BI_ROWID_GE     0x0010         /* rowid >= ? */

#define FTS5_BI_ORDER_RANK   0x0020
#define FTS5_BI_ORDER_ROWID  0x0040
#define FTS5_BI_ORDER_DESC   0x0080

/*
** Values for Fts5Cursor.csrflags
*/
#define FTS5CSR_REQUIRE_CONTENT   0x01
#define FTS5CSR_REQUIRE_DOCSIZE   0x02
#define FTS5CSR_EOF               0x04
#define FTS5CSR_FREE_ZRANK        0x08
#define FTS5CSR_REQUIRE_RESEEK    0x10

#define BitFlagAllTest(x,y) (((x) & (y))==(y))
#define BitFlagTest(x,y)    (((x) & (y))!=0)

/*
** Constants for the largest and smallest possible 64-bit signed integers.
** These are copied from sqliteInt.h.
*/
#ifndef SQLITE_AMALGAMATION
# define LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
#endif

/*
** Macros to Set(), Clear() and Test() cursor flags.
*/
#define CsrFlagSet(pCsr, flag)   ((pCsr)->csrflags |= (flag))
#define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
#define CsrFlagTest(pCsr, flag)  ((pCsr)->csrflags & (flag))

struct Fts5Auxdata {
  Fts5Auxiliary *pAux;            /* Extension to which this belongs */
  void *pPtr;                     /* Pointer value */
  void(*xDelete)(void*);          /* Destructor */
  Fts5Auxdata *pNext;             /* Next object in linked list */
};

#ifdef SQLITE_DEBUG
#define FTS5_BEGIN      1
#define FTS5_SYNC       2
#define FTS5_COMMIT     3
#define FTS5_ROLLBACK   4
#define FTS5_SAVEPOINT  5
#define FTS5_RELEASE    6
#define FTS5_ROLLBACKTO 7
static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){
  switch( op ){
    case FTS5_BEGIN:
      assert( p->ts.eState==0 );
      p->ts.eState = 1;
      p->ts.iSavepoint = -1;
      break;

    case FTS5_SYNC:
      assert( p->ts.eState==1 );
      p->ts.eState = 2;
      break;

    case FTS5_COMMIT:
      assert( p->ts.eState==2 );
      p->ts.eState = 0;
      break;

    case FTS5_ROLLBACK:
      assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 );
      p->ts.eState = 0;
      break;

    case FTS5_SAVEPOINT:
      assert( p->ts.eState==1 );
      assert( iSavepoint>=0 );
      assert( iSavepoint>p->ts.iSavepoint );
      p->ts.iSavepoint = iSavepoint;
      break;
      
    case FTS5_RELEASE:
      assert( p->ts.eState==1 );
      assert( iSavepoint>=0 );
      assert( iSavepoint<=p->ts.iSavepoint );
      p->ts.iSavepoint = iSavepoint-1;
      break;

    case FTS5_ROLLBACKTO:
      assert( p->ts.eState==1 );
      assert( iSavepoint>=0 );
      assert( iSavepoint<=p->ts.iSavepoint );
      p->ts.iSavepoint = iSavepoint;
      break;
  }
}
#else
# define fts5CheckTransactionState(x,y,z)
#endif

/*
** Return true if pTab is a contentless table.
*/
static int fts5IsContentless(Fts5Table *pTab){
  return pTab->pConfig->eContent==FTS5_CONTENT_NONE;
}

/*
** Delete a virtual table handle allocated by fts5InitVtab(). 
*/
static void fts5FreeVtab(Fts5Table *pTab){
  if( pTab ){
    sqlite3Fts5IndexClose(pTab->pIndex);
    sqlite3Fts5StorageClose(pTab->pStorage);
    sqlite3Fts5ConfigFree(pTab->pConfig);
    sqlite3_free(pTab);
  }
}

/*
** The xDisconnect() virtual table method.
*/
static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
  fts5FreeVtab((Fts5Table*)pVtab);
  return SQLITE_OK;
}

/*
** The xDestroy() virtual table method.
*/
static int fts5DestroyMethod(sqlite3_vtab *pVtab){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  int rc = sqlite3Fts5DropAll(pTab->pConfig);
  if( rc==SQLITE_OK ){
    fts5FreeVtab((Fts5Table*)pVtab);
  }
  return rc;
}

/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
** The argv[] array contains the following:
**
**   argv[0]   -> module name  ("fts5")
**   argv[1]   -> database name
**   argv[2]   -> table name
**   argv[...] -> "column name" and other module argument fields.
*/
static int fts5InitVtab(
  int bCreate,                    /* True for xCreate, false for xConnect */
  sqlite3 *db,                    /* The SQLite database connection */
  void *pAux,                     /* Hash table containing tokenizers */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
  char **pzErr                    /* Write any error message here */
){
  Fts5Global *pGlobal = (Fts5Global*)pAux;
  const char **azConfig = (const char**)argv;
  int rc = SQLITE_OK;             /* Return code */
  Fts5Config *pConfig;            /* Results of parsing argc/argv */
  Fts5Table *pTab = 0;            /* New virtual table object */

  /* Allocate the new vtab object and parse the configuration */
  pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table));
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
    assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
  }
  if( rc==SQLITE_OK ){
    pTab->pConfig = pConfig;
    pTab->pGlobal = pGlobal;
  }

  /* Open the index sub-system */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr);
  }

  /* Open the storage sub-system */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5StorageOpen(
        pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr
    );
  }

  /* Call sqlite3_declare_vtab() */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
  }

  if( rc!=SQLITE_OK ){
    fts5FreeVtab(pTab);
    pTab = 0;
  }else if( bCreate ){
    fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
  }
  *ppVTab = (sqlite3_vtab*)pTab;
  return rc;
}

/*
** The xConnect() and xCreate() methods for the virtual table. All the
** work is done in function fts5InitVtab().
*/
static int fts5ConnectMethod(
  sqlite3 *db,                    /* Database connection */
  void *pAux,                     /* Pointer to tokenizer hash table */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
){
  return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts5CreateMethod(
  sqlite3 *db,                    /* Database connection */
  void *pAux,                     /* Pointer to tokenizer hash table */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
){
  return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
}

/*
** The different query plans.
*/
#define FTS5_PLAN_SCAN           1       /* No usable constraint */
#define FTS5_PLAN_MATCH          2       /* (<tbl> MATCH ?) */
#define FTS5_PLAN_SORTED_MATCH   3       /* (<tbl> MATCH ? ORDER BY rank) */
#define FTS5_PLAN_ROWID          4       /* (rowid = ?) */
#define FTS5_PLAN_SOURCE         5       /* A source cursor for SORTED_MATCH */
#define FTS5_PLAN_SPECIAL        6       /* An internal query */

/*
** Implementation of the xBestIndex method for FTS5 tables. Within the 
** WHERE constraint, it searches for the following:
**
**   1. A MATCH constraint against the special column.
**   2. A MATCH constraint against the "rank" column.
**   3. An == constraint against the rowid column.
**   4. A < or <= constraint against the rowid column.
**   5. A > or >= constraint against the rowid column.
**
** Within the ORDER BY, either:
**
**   5. ORDER BY rank [ASC|DESC]
**   6. ORDER BY rowid [ASC|DESC]
**
** Costs are assigned as follows:
**
**  a) If an unusable MATCH operator is present in the WHERE clause, the
**     cost is unconditionally set to 1e50 (a really big number).
**
**  a) If a MATCH operator is present, the cost depends on the other
**     constraints also present. As follows:
**
**       * No other constraints:         cost=1000.0
**       * One rowid range constraint:   cost=750.0
**       * Both rowid range constraints: cost=500.0
**       * An == rowid constraint:       cost=100.0
**
**  b) Otherwise, if there is no MATCH:
**
**       * No other constraints:         cost=1000000.0
**       * One rowid range constraint:   cost=750000.0
**       * Both rowid range constraints: cost=250000.0
**       * An == rowid constraint:       cost=10.0
**
** Costs are not modified by the ORDER BY clause.
*/
static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  Fts5Table *pTab = (Fts5Table*)pVTab;
  Fts5Config *pConfig = pTab->pConfig;
  int idxFlags = 0;               /* Parameter passed through to xFilter() */
  int bHasMatch;
  int iNext;
  int i;

  struct Constraint {
    int op;                       /* Mask against sqlite3_index_constraint.op */
    int fts5op;                   /* FTS5 mask for idxFlags */
    int iCol;                     /* 0==rowid, 1==tbl, 2==rank */
    int omit;                     /* True to omit this if found */
    int iConsIndex;               /* Index in pInfo->aConstraint[] */
  } aConstraint[] = {
    {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_MATCH,    1, 1, -1},
    {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_RANK,     2, 1, -1},
    {SQLITE_INDEX_CONSTRAINT_EQ,    FTS5_BI_ROWID_EQ, 0, 0, -1},
    {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE, 
                                    FTS5_BI_ROWID_LE, 0, 0, -1},
    {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE, 
                                    FTS5_BI_ROWID_GE, 0, 0, -1},
  };

  int aColMap[3];
  aColMap[0] = -1;
  aColMap[1] = pConfig->nCol;
  aColMap[2] = pConfig->nCol+1;

  /* Set idxFlags flags for all WHERE clause terms that will be used. */
  for(i=0; i<pInfo->nConstraint; i++){
    struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
    int j;
    for(j=0; j<sizeof(aConstraint)/sizeof(aConstraint[0]); j++){
      struct Constraint *pC = &aConstraint[j];
      if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){
        if( p->usable ){
          pC->iConsIndex = i;
          idxFlags |= pC->fts5op;
        }else if( j==0 ){
          /* As there exists an unusable MATCH constraint this is an 
          ** unusable plan. Set a prohibitively high cost. */
          pInfo->estimatedCost = 1e50;
          return SQLITE_OK;
        }
      }
    }
  }

  /* Set idxFlags flags for the ORDER BY clause */
  if( pInfo->nOrderBy==1 ){
    int iSort = pInfo->aOrderBy[0].iColumn;
    if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){
      idxFlags |= FTS5_BI_ORDER_RANK;
    }else if( iSort==-1 ){
      idxFlags |= FTS5_BI_ORDER_ROWID;
    }
    if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
      pInfo->orderByConsumed = 1;
      if( pInfo->aOrderBy[0].desc ){
        idxFlags |= FTS5_BI_ORDER_DESC;
      }
    }
  }

  /* Calculate the estimated cost based on the flags set in idxFlags. */
  bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH);
  if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){
    pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0;
  }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
    pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0;
  }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){
    pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0;
  }else{
    pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0;
  }

  /* Assign argvIndex values to each constraint in use. */
  iNext = 1;
  for(i=0; i<sizeof(aConstraint)/sizeof(aConstraint[0]); i++){
    struct Constraint *pC = &aConstraint[i];
    if( pC->iConsIndex>=0 ){
      pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++;
      pInfo->aConstraintUsage[pC->iConsIndex].omit = pC->omit;
    }
  }

  pInfo->idxNum = idxFlags;
  return SQLITE_OK;
}

/*
** Implementation of xOpen method.
*/
static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
  Fts5Table *pTab = (Fts5Table*)pVTab;
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Cursor *pCsr;               /* New cursor object */
  int nByte;                      /* Bytes of space to allocate */
  int rc = SQLITE_OK;             /* Return code */

  nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
  pCsr = (Fts5Cursor*)sqlite3_malloc(nByte);
  if( pCsr ){
    Fts5Global *pGlobal = pTab->pGlobal;
    memset(pCsr, 0, nByte);
    pCsr->aColumnSize = (int*)&pCsr[1];
    pCsr->pNext = pGlobal->pCsr;
    pGlobal->pCsr = pCsr;
    pCsr->iCsrId = ++pGlobal->iNextId;
  }else{
    rc = SQLITE_NOMEM;
  }
  *ppCsr = (sqlite3_vtab_cursor*)pCsr;
  return rc;
}

static int fts5StmtType(Fts5Cursor *pCsr){
  if( pCsr->ePlan==FTS5_PLAN_SCAN ){
    return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC;
  }
  return FTS5_STMT_LOOKUP;
}

/*
** This function is called after the cursor passed as the only argument
** is moved to point at a different row. It clears all cached data 
** specific to the previous row stored by the cursor object.
*/
static void fts5CsrNewrow(Fts5Cursor *pCsr){
  CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
  sqlite3_free(pCsr->aInst);
  pCsr->aInst = 0;
  pCsr->nInstCount = 0;
}

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
  if( pCursor ){
    Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
    Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
    Fts5Cursor **pp;
    Fts5Auxdata *pData;
    Fts5Auxdata *pNext;

    fts5CsrNewrow(pCsr);
    if( pCsr->pStmt ){
      int eStmt = fts5StmtType(pCsr);
      sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
    }
    if( pCsr->pSorter ){
      Fts5Sorter *pSorter = pCsr->pSorter;
      sqlite3_finalize(pSorter->pStmt);
      sqlite3_free(pSorter);
    }

    if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){
      sqlite3Fts5ExprFree(pCsr->pExpr);
    }

    for(pData=pCsr->pAuxdata; pData; pData=pNext){
      pNext = pData->pNext;
      if( pData->xDelete ) pData->xDelete(pData->pPtr);
      sqlite3_free(pData);
    }

    /* Remove the cursor from the Fts5Global.pCsr list */
    for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
    *pp = pCsr->pNext;

    sqlite3_finalize(pCsr->pRankArgStmt);
    sqlite3_free(pCsr->apRankArg);

    if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
      sqlite3_free(pCsr->zRank);
      sqlite3_free(pCsr->zRankArgs);
    }
    sqlite3_free(pCsr);
  }
  return SQLITE_OK;
}

static int fts5SorterNext(Fts5Cursor *pCsr){
  Fts5Sorter *pSorter = pCsr->pSorter;
  int rc;

  rc = sqlite3_step(pSorter->pStmt);
  if( rc==SQLITE_DONE ){
    rc = SQLITE_OK;
    CsrFlagSet(pCsr, FTS5CSR_EOF);
  }else if( rc==SQLITE_ROW ){
    const u8 *a;
    const u8 *aBlob;
    int nBlob;
    int i;
    int iOff = 0;
    rc = SQLITE_OK;

    pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0);
    nBlob = sqlite3_column_bytes(pSorter->pStmt, 1);
    aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1);

    for(i=0; i<(pSorter->nIdx-1); i++){
      int iVal;
      a += fts5GetVarint32(a, iVal);
      iOff += iVal;
      pSorter->aIdx[i] = iOff;
    }
    pSorter->aIdx[i] = &aBlob[nBlob] - a;

    pSorter->aPoslist = a;
    fts5CsrNewrow(pCsr);
  }

  return rc;
}


/*
** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors 
** open on table pTab.
*/
static void fts5TripCursors(Fts5Table *pTab){
  Fts5Cursor *pCsr;
  for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->ePlan==FTS5_PLAN_MATCH
     && pCsr->base.pVtab==(sqlite3_vtab*)pTab 
    ){
      CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
    }
  }
}

/*
** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
** argument, close and reopen all Fts5IndexIter iterators that the cursor 
** is using. Then attempt to move the cursor to a rowid equal to or laster
** (in the cursors sort order - ASC or DESC) than the current rowid. 
**
** If the new rowid is not equal to the old, set output parameter *pbSkip
** to 1 before returning. Otherwise, leave it unchanged.
**
** Return SQLITE_OK if successful or if no reseek was required, or an 
** error code if an error occurred.
*/
static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
  int rc = SQLITE_OK;
  assert( *pbSkip==0 );
  if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){
    Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
    int bDesc = pCsr->bDesc;
    i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);

    rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc);
    if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
      *pbSkip = 1;
    }

    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK);
    fts5CsrNewrow(pCsr);
    if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
      CsrFlagSet(pCsr, FTS5CSR_EOF);
    }
  }
  return rc;
}


/*
** Advance the cursor to the next row in the table that matches the 
** search criteria.
**
** Return SQLITE_OK if nothing goes wrong.  SQLITE_OK is returned
** even if we reach end-of-file.  The fts5EofMethod() will be called
** subsequently to determine whether or not an EOF was hit.
*/
static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  int ePlan = pCsr->ePlan;
  int bSkip = 0;
  int rc;

  if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;

  switch( ePlan ){
    case FTS5_PLAN_MATCH:
    case FTS5_PLAN_SOURCE:
      rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
      if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
        CsrFlagSet(pCsr, FTS5CSR_EOF);
      }
      fts5CsrNewrow(pCsr);
      break;

    case FTS5_PLAN_SPECIAL: {
      CsrFlagSet(pCsr, FTS5CSR_EOF);
      break;
    }

    case FTS5_PLAN_SORTED_MATCH: {
      rc = fts5SorterNext(pCsr);
      break;
    }

    default:
      rc = sqlite3_step(pCsr->pStmt);
      if( rc!=SQLITE_ROW ){
        CsrFlagSet(pCsr, FTS5CSR_EOF);
        rc = sqlite3_reset(pCsr->pStmt);
      }else{
        rc = SQLITE_OK;
      }
      break;
  }
  
  return rc;
}

static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Sorter *pSorter;
  int nPhrase;
  int nByte;
  int rc = SQLITE_OK;
  char *zSql;
  const char *zRank = pCsr->zRank;
  const char *zRankArgs = pCsr->zRankArgs;
  
  nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
  nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase;
  pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
  if( pSorter==0 ) return SQLITE_NOMEM;
  memset(pSorter, 0, nByte);
  pSorter->nIdx = nPhrase;

  /* TODO: It would be better to have some system for reusing statement
  ** handles here, rather than preparing a new one for each query. But that
  ** is not possible as SQLite reference counts the virtual table objects.
  ** And since the statement required here reads from this very virtual 
  ** table, saving it creates a circular reference.
  **
  ** If SQLite a built-in statement cache, this wouldn't be a problem. */
  zSql = sqlite3Fts5Mprintf(&rc, 
      "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
      pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
      (zRankArgs ? ", " : ""),
      (zRankArgs ? zRankArgs : ""),
      bDesc ? "DESC" : "ASC"
  );
  if( zSql ){
    rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pSorter->pStmt, 0);
    sqlite3_free(zSql);
  }

  pCsr->pSorter = pSorter;
  if( rc==SQLITE_OK ){
    assert( pTab->pSortCsr==0 );
    pTab->pSortCsr = pCsr;
    rc = fts5SorterNext(pCsr);
    pTab->pSortCsr = 0;
  }

  if( rc!=SQLITE_OK ){
    sqlite3_finalize(pSorter->pStmt);
    sqlite3_free(pSorter);
    pCsr->pSorter = 0;
  }

  return rc;
}

static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
  int rc;
  Fts5Expr *pExpr = pCsr->pExpr;
  rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc);
  if( sqlite3Fts5ExprEof(pExpr) ){
    CsrFlagSet(pCsr, FTS5CSR_EOF);
  }
  fts5CsrNewrow(pCsr);
  return rc;
}

/*
** Process a "special" query. A special query is identified as one with a
** MATCH expression that begins with a '*' character. The remainder of
** the text passed to the MATCH operator are used as  the special query
** parameters.
*/
static int fts5SpecialMatch(
  Fts5Table *pTab, 
  Fts5Cursor *pCsr, 
  const char *zQuery
){
  int rc = SQLITE_OK;             /* Return code */
  const char *z = zQuery;         /* Special query text */
  int n;                          /* Number of bytes in text at z */

  while( z[0]==' ' ) z++;
  for(n=0; z[n] && z[n]!=' '; n++);

  assert( pTab->base.zErrMsg==0 );
  pCsr->ePlan = FTS5_PLAN_SPECIAL;

  if( 0==sqlite3_strnicmp("reads", z, n) ){
    pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex);
  }
  else if( 0==sqlite3_strnicmp("id", z, n) ){
    pCsr->iSpecial = pCsr->iCsrId;
  }
  else{
    /* An unrecognized directive. Return an error message. */
    pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z);
    rc = SQLITE_ERROR;
  }

  return rc;
}

/*
** Search for an auxiliary function named zName that can be used with table
** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
** structure. Otherwise, if no such function exists, return NULL.
*/
static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){
  Fts5Auxiliary *pAux;

  for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
    if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
  }

  /* No function of the specified name was found. Return 0. */
  return 0;
}


static int fts5FindRankFunction(Fts5Cursor *pCsr){
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;
  int rc = SQLITE_OK;
  Fts5Auxiliary *pAux = 0;
  const char *zRank = pCsr->zRank;
  const char *zRankArgs = pCsr->zRankArgs;

  if( zRankArgs ){
    char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
    if( zSql ){
      sqlite3_stmt *pStmt = 0;
      rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0);
      sqlite3_free(zSql);
      assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
      if( rc==SQLITE_OK ){
        if( SQLITE_ROW==sqlite3_step(pStmt) ){
          int nByte;
          pCsr->nRankArg = sqlite3_column_count(pStmt);
          nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
          pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
          if( rc==SQLITE_OK ){
            int i;
            for(i=0; i<pCsr->nRankArg; i++){
              pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i);
            }
          }
          pCsr->pRankArgStmt = pStmt;
        }else{
          rc = sqlite3_finalize(pStmt);
          assert( rc!=SQLITE_OK );
        }
      }
    }
  }

  if( rc==SQLITE_OK ){
    pAux = fts5FindAuxiliary(pTab, zRank);
    if( pAux==0 ){
      assert( pTab->base.zErrMsg==0 );
      pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank);
      rc = SQLITE_ERROR;
    }
  }

  pCsr->pRank = pAux;
  return rc;
}


static int fts5CursorParseRank(
  Fts5Config *pConfig,
  Fts5Cursor *pCsr, 
  sqlite3_value *pRank
){
  int rc = SQLITE_OK;
  if( pRank ){
    const char *z = (const char*)sqlite3_value_text(pRank);
    char *zRank = 0;
    char *zRankArgs = 0;

    if( z==0 ){
      if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR;
    }else{
      rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
    }
    if( rc==SQLITE_OK ){
      pCsr->zRank = zRank;
      pCsr->zRankArgs = zRankArgs;
      CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
    }else if( rc==SQLITE_ERROR ){
      pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
          "parse error in rank function: %s", z
      );
    }
  }else{
    if( pConfig->zRank ){
      pCsr->zRank = (char*)pConfig->zRank;
      pCsr->zRankArgs = (char*)pConfig->zRankArgs;
    }else{
      pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
      pCsr->zRankArgs = 0;
    }
  }
  return rc;
}

static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
  if( pVal ){
    int eType = sqlite3_value_numeric_type(pVal);
    if( eType==SQLITE_INTEGER ){
      return sqlite3_value_int64(pVal);
    }
  }
  return iDefault;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.
** 
** There are three possible query strategies:
**
**   1. Full-text search using a MATCH operator.
**   2. A by-rowid lookup.
**   3. A full-table scan.
*/
static int fts5FilterMethod(
  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
  int idxNum,                     /* Strategy index */
  const char *idxStr,             /* Unused */
  int nVal,                       /* Number of elements in apVal */
  sqlite3_value **apVal           /* Arguments for the indexing scheme */
){
  Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  int rc = SQLITE_OK;             /* Error code */
  int iVal = 0;                   /* Counter for apVal[] */
  int bDesc;                      /* True if ORDER BY [rank|rowid] DESC */
  int bOrderByRank;               /* True if ORDER BY rank */
  sqlite3_value *pMatch = 0;      /* <tbl> MATCH ? expression (or NULL) */
  sqlite3_value *pRank = 0;       /* rank MATCH ? expression (or NULL) */
  sqlite3_value *pRowidEq = 0;    /* rowid = ? expression (or NULL) */
  sqlite3_value *pRowidLe = 0;    /* rowid <= ? expression (or NULL) */
  sqlite3_value *pRowidGe = 0;    /* rowid >= ? expression (or NULL) */
  char **pzErrmsg = pConfig->pzErrmsg;

  assert( pCsr->pStmt==0 );
  assert( pCsr->pExpr==0 );
  assert( pCsr->csrflags==0 );
  assert( pCsr->pRank==0 );
  assert( pCsr->zRank==0 );
  assert( pCsr->zRankArgs==0 );

  assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg );
  pConfig->pzErrmsg = &pTab->base.zErrMsg;

  /* Decode the arguments passed through to this function.
  **
  ** Note: The following set of if(...) statements must be in the same
  ** order as the corresponding entries in the struct at the top of
  ** fts5BestIndexMethod().  */
  if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++];
  if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++];
  if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++];
  if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++];
  if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++];
  assert( iVal==nVal );
  bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0);
  pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0);

  /* Set the cursor upper and lower rowid limits. Only some strategies 
  ** actually use them. This is ok, as the xBestIndex() method leaves the
  ** sqlite3_index_constraint.omit flag clear for range constraints
  ** on the rowid field.  */
  if( pRowidEq ){
    pRowidLe = pRowidGe = pRowidEq;
  }
  if( bDesc ){
    pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
    pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
  }else{
    pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
    pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
  }

  if( pTab->pSortCsr ){
    /* If pSortCsr is non-NULL, then this call is being made as part of 
    ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
    ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
    ** return results to the user for this query. The current cursor 
    ** (pCursor) is used to execute the query issued by function 
    ** fts5CursorFirstSorted() above.  */
    assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 );
    assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 );
    assert( pCsr->iLastRowid==LARGEST_INT64 );
    assert( pCsr->iFirstRowid==SMALLEST_INT64 );
    pCsr->ePlan = FTS5_PLAN_SOURCE;
    pCsr->pExpr = pTab->pSortCsr->pExpr;
    rc = fts5CursorFirst(pTab, pCsr, bDesc);
  }else if( pMatch ){
    const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);

    rc = fts5CursorParseRank(pConfig, pCsr, pRank);
    if( rc==SQLITE_OK ){
      if( zExpr[0]=='*' ){
        /* The user has issued a query of the form "MATCH '*...'". This
        ** indicates that the MATCH expression is not a full text query,
        ** but a request for an internal parameter.  */
        rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]);
      }else{
        char **pzErr = &pTab->base.zErrMsg;
        rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr);
        if( rc==SQLITE_OK ){
          if( bOrderByRank ){
            pCsr->ePlan = FTS5_PLAN_SORTED_MATCH;
            rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
          }else{
            pCsr->ePlan = FTS5_PLAN_MATCH;
            rc = fts5CursorFirst(pTab, pCsr, bDesc);
          }
        }
      }
    }
  }else if( pConfig->zContent==0 ){
    *pConfig->pzErrmsg = sqlite3_mprintf(
        "%s: table does not support scanning", pConfig->zName
    );
    rc = SQLITE_ERROR;
  }else{
    /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
    ** by rowid (ePlan==FTS5_PLAN_ROWID).  */
    pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN);
    rc = sqlite3Fts5StorageStmt(
        pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg
    );
    if( rc==SQLITE_OK ){
      if( pCsr->ePlan==FTS5_PLAN_ROWID ){
        sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
      }else{
        sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
        sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
      }
      rc = fts5NextMethod(pCursor);
    }
  }

  pConfig->pzErrmsg = pzErrmsg;
  return rc;
}

/* 
** This is the xEof method of the virtual table. SQLite calls this 
** routine to find out if it has reached the end of a result set.
*/
static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0);
}

/*
** Return the rowid that the cursor currently points to.
*/
static i64 fts5CursorRowid(Fts5Cursor *pCsr){
  assert( pCsr->ePlan==FTS5_PLAN_MATCH 
       || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH 
       || pCsr->ePlan==FTS5_PLAN_SOURCE 
  );
  if( pCsr->pSorter ){
    return pCsr->pSorter->iRowid;
  }else{
    return sqlite3Fts5ExprRowid(pCsr->pExpr);
  }
}

/* 
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. fts5
** exposes %_content.docid as the rowid for the virtual table. The
** rowid should be written to *pRowid.
*/
static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  int ePlan = pCsr->ePlan;
  
  assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
  switch( ePlan ){
    case FTS5_PLAN_SPECIAL:
      *pRowid = 0;
      break;

    case FTS5_PLAN_SOURCE:
    case FTS5_PLAN_MATCH:
    case FTS5_PLAN_SORTED_MATCH:
      *pRowid = fts5CursorRowid(pCsr);
      break;

    default:
      *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
      break;
  }

  return SQLITE_OK;
}

/*
** If the cursor requires seeking (bSeekRequired flag is set), seek it.
** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
**
** If argument bErrormsg is true and an error occurs, an error message may
** be left in sqlite3_vtab.zErrMsg.
*/
static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
  int rc = SQLITE_OK;

  /* If the cursor does not yet have a statement handle, obtain one now. */ 
  if( pCsr->pStmt==0 ){
    Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
    int eStmt = fts5StmtType(pCsr);
    rc = sqlite3Fts5StorageStmt(
        pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0)
    );
    assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 );
    assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) );
  }

  if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){
    assert( pCsr->pExpr );
    sqlite3_reset(pCsr->pStmt);
    sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
    rc = sqlite3_step(pCsr->pStmt);
    if( rc==SQLITE_ROW ){
      rc = SQLITE_OK;
      CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
    }else{
      rc = sqlite3_reset(pCsr->pStmt);
      if( rc==SQLITE_OK ){
        rc = FTS5_CORRUPT;
      }
    }
  }
  return rc;
}

static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){
  va_list ap;                     /* ... printf arguments */
  va_start(ap, zFormat);
  assert( p->base.zErrMsg==0 );
  p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap);
  va_end(ap);
}

/*
** This function is called to handle an FTS INSERT command. In other words,
** an INSERT statement of the form:
**
**     INSERT INTO fts(fts) VALUES($pCmd)
**     INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
**
** Argument pVal is the value assigned to column "fts" by the INSERT 
** statement. This function returns SQLITE_OK if successful, or an SQLite
** error code if an error occurs.
**
** The commands implemented by this function are documented in the "Special
** INSERT Directives" section of the documentation. It should be updated if
** more commands are added to this function.
*/
static int fts5SpecialInsert(
  Fts5Table *pTab,                /* Fts5 table object */
  sqlite3_value *pCmd,            /* Value inserted into special column */
  sqlite3_value *pVal             /* Value inserted into rank column */
){
  Fts5Config *pConfig = pTab->pConfig;
  const char *z = (const char*)sqlite3_value_text(pCmd);
  int rc = SQLITE_OK;
  int bError = 0;

  if( 0==sqlite3_stricmp("delete-all", z) ){
    if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
      fts5SetVtabError(pTab, 
          "'delete-all' may only be used with a "
          "contentless or external content fts5 table"
      );
      rc = SQLITE_ERROR;
    }else{
      rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
    }
  }else if( 0==sqlite3_stricmp("rebuild", z) ){
    if( pConfig->eContent==FTS5_CONTENT_NONE ){
      fts5SetVtabError(pTab, 
          "'rebuild' may not be used with a contentless fts5 table"
      );
      rc = SQLITE_ERROR;
    }else{
      rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
    }
  }else if( 0==sqlite3_stricmp("optimize", z) ){
    rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
  }else if( 0==sqlite3_stricmp("merge", z) ){
    int nMerge = sqlite3_value_int(pVal);
    rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
  }else if( 0==sqlite3_stricmp("integrity-check", z) ){
    rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
  }else{
    rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError);
    }
    if( rc==SQLITE_OK ){
      if( bError ){
        rc = SQLITE_ERROR;
      }else{
        rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal, 0);
      }
    }
  }
  return rc;
}

static int fts5SpecialDelete(
  Fts5Table *pTab, 
  sqlite3_value **apVal, 
  sqlite3_int64 *piRowid
){
  int rc = SQLITE_OK;
  int eType1 = sqlite3_value_type(apVal[1]);
  if( eType1==SQLITE_INTEGER ){
    sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
    rc = sqlite3Fts5StorageSpecialDelete(pTab->pStorage, iDel, &apVal[2]);
  }
  return rc;
}

/* 
** This function is the implementation of the xUpdate callback used by 
** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
** inserted, updated or deleted.
*/
static int fts5UpdateMethod(
  sqlite3_vtab *pVtab,            /* Virtual table handle */
  int nArg,                       /* Size of argument array */
  sqlite3_value **apVal,          /* Array of arguments */
  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  Fts5Config *pConfig = pTab->pConfig;
  int eType0;                     /* value_type() of apVal[0] */
  int eConflict;                  /* ON CONFLICT for this DML */
  int rc = SQLITE_OK;             /* Return code */

  /* A transaction must be open when this is called. */
  assert( pTab->ts.eState==1 );

  assert( pTab->pConfig->pzErrmsg==0 );
  pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;

  /* A delete specifies a single argument - the rowid of the row to remove.
  ** Update and insert operations pass:
  **
  **   1. The "old" rowid, or NULL.
  **   2. The "new" rowid.
  **   3. Values for each of the nCol matchable columns.
  **   4. Values for the two hidden columns (<tablename> and "rank").
  */

  eType0 = sqlite3_value_type(apVal[0]);
  eConflict = sqlite3_vtab_on_conflict(pConfig->db);

  assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL );
  assert( pVtab->zErrMsg==0 );
  assert( (nArg==1 && eType0==SQLITE_INTEGER) || nArg==(2+pConfig->nCol+2) );

  fts5TripCursors(pTab);
  if( eType0==SQLITE_INTEGER ){
    if( fts5IsContentless(pTab) ){
      pTab->base.zErrMsg = sqlite3_mprintf(
          "cannot %s contentless fts5 table: %s", 
          (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
      );
      rc = SQLITE_ERROR;
    }else{
      i64 iDel = sqlite3_value_int64(apVal[0]);  /* Rowid to delete */
      rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel);
    }
  }else{
    sqlite3_value *pCmd = apVal[2 + pConfig->nCol];
    assert( nArg>1 );
    if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){
      const char *z = (const char*)sqlite3_value_text(pCmd);
      if( pConfig->eContent!=FTS5_CONTENT_NORMAL 
       && 0==sqlite3_stricmp("delete", z) 
      ){
        rc = fts5SpecialDelete(pTab, apVal, pRowid);
      }else{
        rc = fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]);
      }
      goto update_method_out;
    }
  }


  if( rc==SQLITE_OK && nArg>1 ){
    rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid);
  }

 update_method_out:
  pTab->pConfig->pzErrmsg = 0;
  return rc;
}

/*
** Implementation of xSync() method. 
*/
static int fts5SyncMethod(sqlite3_vtab *pVtab){
  int rc;
  Fts5Table *pTab = (Fts5Table*)pVtab;
  fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
  pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg;
  fts5TripCursors(pTab);
  rc = sqlite3Fts5StorageSync(pTab->pStorage, 1);
  pTab->pConfig->pzErrmsg = 0;
  return rc;
}

/*
** Implementation of xBegin() method. 
*/
static int fts5BeginMethod(sqlite3_vtab *pVtab){
  fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0);
  return SQLITE_OK;
}

/*
** Implementation of xCommit() method. This is a no-op. The contents of
** the pending-terms hash-table have already been flushed into the database
** by fts5SyncMethod().
*/
static int fts5CommitMethod(sqlite3_vtab *pVtab){
  fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0);
  return SQLITE_OK;
}

/*
** Implementation of xRollback(). Discard the contents of the pending-terms
** hash-table. Any changes made to the database are reverted by SQLite.
*/
static int fts5RollbackMethod(sqlite3_vtab *pVtab){
  int rc;
  Fts5Table *pTab = (Fts5Table*)pVtab;
  fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
  rc = sqlite3Fts5StorageRollback(pTab->pStorage);
  return rc;
}

static void *fts5ApiUserData(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return pCsr->pAux->pUserData;
}

static int fts5ApiColumnCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
}

static int fts5ApiColumnTotalSize(
  Fts5Context *pCtx, 
  int iCol, 
  sqlite3_int64 *pnToken
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
}

static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}

static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
  int (*xToken)(void*, const char*, int, int, int)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);
}

static int fts5ApiPhraseCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
}

static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){
  int n;
  if( pCsr->pSorter ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
    n = pSorter->aIdx[iPhrase] - i1;
    *pa = &pSorter->aPoslist[i1];
  }else{
    n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
  }
  return n;
}

/*
** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
** correctly for the current view. Return SQLITE_OK if successful, or an
** SQLite error code otherwise.
*/
static int fts5CacheInstArray(Fts5Cursor *pCsr){
  int rc = SQLITE_OK;
  if( pCsr->aInst==0 ){
    Fts5PoslistReader *aIter;     /* One iterator for each phrase */
    int nIter;                    /* Number of iterators/phrases */
    int nByte;
    
    nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
    nByte = sizeof(Fts5PoslistReader) * nIter;
    aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
    if( aIter ){
      Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */
      int nInst = 0;              /* Number instances seen so far */
      int i;

      /* Initialize all iterators */
      for(i=0; i<nIter; i++){
        const u8 *a;
        int n = fts5CsrPoslist(pCsr, i, &a);
        sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]);
      }

      while( 1 ){
        int *aInst;
        int iBest = -1;
        for(i=0; i<nIter; i++){
          if( (aIter[i].bEof==0) 
           && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) 
          ){
            iBest = i;
          }
        }

        if( iBest<0 ) break;
        nInst++;
        if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break;

        aInst = &((int*)buf.p)[3 * (nInst-1)];
        aInst[0] = iBest;
        aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
        aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
        sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
      }

      pCsr->aInst = (int*)buf.p;
      pCsr->nInstCount = nInst;
      sqlite3_free(aIter);
    }
  }
  return rc;
}

static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  int rc;
  if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
    *pnInst = pCsr->nInstCount;
  }
  return rc;
}

static int fts5ApiInst(
  Fts5Context *pCtx, 
  int iIdx, 
  int *piPhrase, 
  int *piCol, 
  int *piOff
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  int rc;
  if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
    if( iIdx<0 || iIdx>=pCsr->nInstCount ){
      rc = SQLITE_RANGE;
    }else{
      *piPhrase = pCsr->aInst[iIdx*3];
      *piCol = pCsr->aInst[iIdx*3 + 1];
      *piOff = pCsr->aInst[iIdx*3 + 2];
    }
  }
  return rc;
}

static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
  return fts5CursorRowid((Fts5Cursor*)pCtx);
}

static int fts5ApiColumnText(
  Fts5Context *pCtx, 
  int iCol, 
  const char **pz, 
  int *pn
){
  int rc = SQLITE_OK;
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){
    *pz = 0;
    *pn = 0;
  }else{
    rc = fts5SeekCursor(pCsr, 0);
    if( rc==SQLITE_OK ){
      *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
      *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
    }
  }
  return rc;
}

static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  int *pCnt = (int*)pContext;
  *pCnt = *pCnt + 1;
  return SQLITE_OK;
}

static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;
  int rc = SQLITE_OK;

  if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){
    if( pConfig->bColumnsize ){
      i64 iRowid = fts5CursorRowid(pCsr);
      rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
    }else if( pConfig->zContent==0 ){
      int i;
      for(i=0; i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          pCsr->aColumnSize[i] = -1;
        }
      }
    }else{
      int i;
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          const char *z; int n;
          void *p = (void*)(&pCsr->aColumnSize[i]);
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);
          }
        }
      }
    }
    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  }
  if( iCol<0 ){
    int i;
    *pnToken = 0;
    for(i=0; i<pConfig->nCol; i++){
      *pnToken += pCsr->aColumnSize[i];
    }
  }else if( iCol<pConfig->nCol ){
    *pnToken = pCsr->aColumnSize[iCol];
  }else{
    *pnToken = 0;
    rc = SQLITE_RANGE;
  }
  return rc;
}

/*
** Implementation of the xSetAuxdata() method.
*/
static int fts5ApiSetAuxdata(
  Fts5Context *pCtx,              /* Fts5 context */
  void *pPtr,                     /* Pointer to save as auxdata */
  void(*xDelete)(void*)           /* Destructor for pPtr (or NULL) */
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Auxdata *pData;

  /* Search through the cursors list of Fts5Auxdata objects for one that
  ** corresponds to the currently executing auxiliary function.  */
  for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
    if( pData->pAux==pCsr->pAux ) break;
  }

  if( pData ){
    if( pData->xDelete ){
      pData->xDelete(pData->pPtr);
    }
  }else{
    int rc = SQLITE_OK;
    pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
    if( pData==0 ){
      if( xDelete ) xDelete(pPtr);
      return rc;
    }
    pData->pAux = pCsr->pAux;
    pData->pNext = pCsr->pAuxdata;
    pCsr->pAuxdata = pData;
  }

  pData->xDelete = xDelete;
  pData->pPtr = pPtr;
  return SQLITE_OK;
}

static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Auxdata *pData;
  void *pRet = 0;

  for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
    if( pData->pAux==pCsr->pAux ) break;
  }

  if( pData ){
    pRet = pData->pPtr;
    if( bClear ){
      pData->pPtr = 0;
      pData->xDelete = 0;
    }
  }

  return pRet;
}

static int fts5ApiQueryPhrase(Fts5Context*, int, void*, 
    int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);

static const Fts5ExtensionApi sFts5Api = {
  1,                            /* iVersion */
  fts5ApiUserData,
  fts5ApiColumnCount,
  fts5ApiRowCount,
  fts5ApiColumnTotalSize,
  fts5ApiTokenize,
  fts5ApiPhraseCount,
  fts5ApiPhraseSize,
  fts5ApiInstCount,
  fts5ApiInst,
  fts5ApiRowid,
  fts5ApiColumnText,
  fts5ApiColumnSize,
  fts5ApiQueryPhrase,
  fts5ApiSetAuxdata,
  fts5ApiGetAuxdata,
};


/*
** Implementation of API function xQueryPhrase().
*/
static int fts5ApiQueryPhrase(
  Fts5Context *pCtx, 
  int iPhrase, 
  void *pUserData,
  int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  int rc;
  Fts5Cursor *pNew = 0;

  rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
  if( rc==SQLITE_OK ){
    Fts5Config *pConf = pTab->pConfig;
    pNew->ePlan = FTS5_PLAN_MATCH;
    pNew->iFirstRowid = SMALLEST_INT64;
    pNew->iLastRowid = LARGEST_INT64;
    pNew->base.pVtab = (sqlite3_vtab*)pTab;
    rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
  }

  if( rc==SQLITE_OK ){
    for(rc = fts5CursorFirst(pTab, pNew, 0);
        rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
        rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
    ){
      rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
      if( rc!=SQLITE_OK ){
        if( rc==SQLITE_DONE ) rc = SQLITE_OK;
        break;
      }
    }
  }

  fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
  return rc;
}

static void fts5ApiInvoke(
  Fts5Auxiliary *pAux,
  Fts5Cursor *pCsr,
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  assert( pCsr->pAux==0 );
  pCsr->pAux = pAux;
  pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
  pCsr->pAux = 0;
}

static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
  Fts5Cursor *pCsr;
  for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->iCsrId==iCsrId ) break;
  }
  return pCsr;
}

static void fts5ApiCallback(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){

  Fts5Auxiliary *pAux;
  Fts5Cursor *pCsr;
  i64 iCsrId;

  assert( argc>=1 );
  pAux = (Fts5Auxiliary*)sqlite3_user_data(context);
  iCsrId = sqlite3_value_int64(argv[0]);

  pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
  if( pCsr==0 ){
    char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
    sqlite3_result_error(context, zErr, -1);
    sqlite3_free(zErr);
  }else{
    fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
  }
}


/*
** Given cursor id iId, return a pointer to the corresponding Fts5Index 
** object. Or NULL If the cursor id does not exist.
**
** If successful, set *pnCol to the number of indexed columns in the
** table before returning.
*/
Fts5Index *sqlite3Fts5IndexFromCsrid(
  Fts5Global *pGlobal, 
  i64 iCsrId, 
  int *pnCol
){
  Fts5Cursor *pCsr;
  Fts5Table *pTab;

  pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
  pTab = (Fts5Table*)pCsr->base.pVtab;
  *pnCol = pTab->pConfig->nCol;

  return pTab->pIndex;
}

/*
** Return a "position-list blob" corresponding to the current position of
** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
** the current position-list for each phrase in the query associated with
** cursor pCsr.
**
** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
** the number of phrases in the query. Following the varints are the
** concatenated position lists for each phrase, in order.
**
** The first varint (if it exists) contains the size of the position list
** for phrase 0. The second (same disclaimer) contains the size of position
** list 1. And so on. There is no size field for the final position list,
** as it can be derived from the total size of the blob.
*/
static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
  int i;
  int rc = SQLITE_OK;
  int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
  Fts5Buffer val;

  memset(&val, 0, sizeof(Fts5Buffer));

  /* Append the varints */
  for(i=0; i<(nPhrase-1); i++){
    const u8 *dummy;
    int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
    sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
  }

  /* Append the position lists */
  for(i=0; i<nPhrase; i++){
    const u8 *pPoslist;
    int nPoslist;
    nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
    sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
  }

  sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free);
  return rc;
}

/* 
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
*/
static int fts5ColumnMethod(
  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
  int iCol                        /* Index of column to read value from */
){
  Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
  int rc = SQLITE_OK;
  
  assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );

  if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){
    if( iCol==pConfig->nCol ){
      sqlite3_result_int64(pCtx, pCsr->iSpecial);
    }
  }else

  if( iCol==pConfig->nCol ){
    /* User is requesting the value of the special column with the same name
    ** as the table. Return the cursor integer id number. This value is only
    ** useful in that it may be passed as the first argument to an FTS5
    ** auxiliary function.  */
    sqlite3_result_int64(pCtx, pCsr->iCsrId);
  }else if( iCol==pConfig->nCol+1 ){

    /* The value of the "rank" column. */
    if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
      fts5PoslistBlob(pCtx, pCsr);
    }else if( 
        pCsr->ePlan==FTS5_PLAN_MATCH
     || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
    ){
      if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){
        fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
      }
    }
  }else if( !fts5IsContentless(pTab) ){
    rc = fts5SeekCursor(pCsr, 1);
    if( rc==SQLITE_OK ){
      sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
    }
  }
  return rc;
}


/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.
*/
static int fts5FindFunctionMethod(
  sqlite3_vtab *pVtab,            /* Virtual table handle */
  int nArg,                       /* Number of SQL function arguments */
  const char *zName,              /* Name of SQL function */
  void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
  void **ppArg                    /* OUT: User data for *pxFunc */
){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  Fts5Auxiliary *pAux;

  pAux = fts5FindAuxiliary(pTab, zName);
  if( pAux ){
    *pxFunc = fts5ApiCallback;
    *ppArg = (void*)pAux;
    return 1;
  }

  /* No function of the specified name was found. Return 0. */
  return 0;
}

/*
** Implementation of FTS5 xRename method. Rename an fts5 table.
*/
static int fts5RenameMethod(
  sqlite3_vtab *pVtab,            /* Virtual table handle */
  const char *zName               /* New name of table */
){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  return sqlite3Fts5StorageRename(pTab->pStorage, zName);
}

/*
** The xSavepoint() method.
**
** Flush the contents of the pending-terms table to disk.
*/
static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint);
  fts5TripCursors(pTab);
  return sqlite3Fts5StorageSync(pTab->pStorage, 0);
}

/*
** The xRelease() method.
**
** This is a no-op.
*/
static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint);
  fts5TripCursors(pTab);
  return sqlite3Fts5StorageSync(pTab->pStorage, 0);
}

/*
** The xRollbackTo() method.
**
** Discard the contents of the pending terms table.
*/
static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
  Fts5Table *pTab = (Fts5Table*)pVtab;
  fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
  fts5TripCursors(pTab);
  return sqlite3Fts5StorageRollback(pTab->pStorage);
}

/*
** Register a new auxiliary function with global context pGlobal.
*/
static int fts5CreateAux(
  fts5_api *pApi,                 /* Global context (one per db handle) */
  const char *zName,              /* Name of new function */
  void *pUserData,                /* User data for aux. function */
  fts5_extension_function xFunc,  /* Aux. function implementation */
  void(*xDestroy)(void*)          /* Destructor for pUserData */
){
  Fts5Global *pGlobal = (Fts5Global*)pApi;
  int rc = sqlite3_overload_function(pGlobal->db, zName, -1);
  if( rc==SQLITE_OK ){
    Fts5Auxiliary *pAux;
    int nByte;                      /* Bytes of space to allocate */

    nByte = sizeof(Fts5Auxiliary) + strlen(zName) + 1;
    pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte);
    if( pAux ){
      memset(pAux, 0, nByte);
      pAux->zFunc = (char*)&pAux[1];
      strcpy(pAux->zFunc, zName);
      pAux->pGlobal = pGlobal;
      pAux->pUserData = pUserData;
      pAux->xFunc = xFunc;
      pAux->xDestroy = xDestroy;
      pAux->pNext = pGlobal->pAux;
      pGlobal->pAux = pAux;
    }else{
      rc = SQLITE_NOMEM;
    }
  }

  return rc;
}

/*
** Register a new tokenizer. This is the implementation of the 
** fts5_api.xCreateTokenizer() method.
*/
static int fts5CreateTokenizer(
  fts5_api *pApi,                 /* Global context (one per db handle) */
  const char *zName,              /* Name of new function */
  void *pUserData,                /* User data for aux. function */
  fts5_tokenizer *pTokenizer,     /* Tokenizer implementation */
  void(*xDestroy)(void*)          /* Destructor for pUserData */
){
  Fts5Global *pGlobal = (Fts5Global*)pApi;
  Fts5TokenizerModule *pNew;
  int nByte;                      /* Bytes of space to allocate */
  int rc = SQLITE_OK;

  nByte = sizeof(Fts5TokenizerModule) + strlen(zName) + 1;
  pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte);
  if( pNew ){
    memset(pNew, 0, nByte);
    pNew->zName = (char*)&pNew[1];
    strcpy(pNew->zName, zName);
    pNew->pUserData = pUserData;
    pNew->x = *pTokenizer;
    pNew->xDestroy = xDestroy;
    pNew->pNext = pGlobal->pTok;
    pGlobal->pTok = pNew;
    if( pNew->pNext==0 ){
      pGlobal->pDfltTok = pNew;
    }
  }else{
    rc = SQLITE_NOMEM;
  }

  return rc;
}

static Fts5TokenizerModule *fts5LocateTokenizer(
  Fts5Global *pGlobal, 
  const char *zName
){
  Fts5TokenizerModule *pMod = 0;

  if( zName==0 ){
    pMod = pGlobal->pDfltTok;
  }else{
    for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
      if( sqlite3_stricmp(zName, pMod->zName)==0 ) break;
    }
  }

  return pMod;
}

/*
** Find a tokenizer. This is the implementation of the 
** fts5_api.xFindTokenizer() method.
*/
static int fts5FindTokenizer(
  fts5_api *pApi,                 /* Global context (one per db handle) */
  const char *zName,              /* Name of new function */
  void **ppUserData,
  fts5_tokenizer *pTokenizer      /* Populate this object */
){
  int rc = SQLITE_OK;
  Fts5TokenizerModule *pMod;

  pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
  if( pMod ){
    *pTokenizer = pMod->x;
    *ppUserData = pMod->pUserData;
  }else{
    memset(pTokenizer, 0, sizeof(fts5_tokenizer));
    rc = SQLITE_ERROR;
  }

  return rc;
}

int sqlite3Fts5GetTokenizer(
  Fts5Global *pGlobal, 
  const char **azArg,
  int nArg,
  Fts5Tokenizer **ppTok,
  fts5_tokenizer **ppTokApi,
  char **pzErr
){
  Fts5TokenizerModule *pMod;
  int rc = SQLITE_OK;

  pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
  if( pMod==0 ){
    assert( nArg>0 );
    rc = SQLITE_ERROR;
    *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
  }else{
    rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok);
    *ppTokApi = &pMod->x;
    if( rc!=SQLITE_OK && pzErr ){
      *pzErr = sqlite3_mprintf("error in tokenizer constructor");
    }
  }

  if( rc!=SQLITE_OK ){
    *ppTokApi = 0;
    *ppTok = 0;
  }

  return rc;
}

static void fts5ModuleDestroy(void *pCtx){
  Fts5TokenizerModule *pTok, *pNextTok;
  Fts5Auxiliary *pAux, *pNextAux;
  Fts5Global *pGlobal = (Fts5Global*)pCtx;

  for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
    pNextAux = pAux->pNext;
    if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
    sqlite3_free(pAux);
  }

  for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
    pNextTok = pTok->pNext;
    if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
    sqlite3_free(pTok);
  }

  sqlite3_free(pGlobal);
}

static void fts5Fts5Func(
  sqlite3_context *pCtx,          /* Function call context */
  int nArg,                       /* Number of args */
  sqlite3_value **apVal           /* Function arguments */
){
  Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
  char buf[8];
  assert( nArg==0 );
  assert( sizeof(buf)>=sizeof(pGlobal) );
  memcpy(buf, (void*)&pGlobal, sizeof(pGlobal));
  sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT);
}

#ifdef _WIN32
__declspec(dllexport)
#endif
int sqlite3_fts5_init(
  sqlite3 *db,
  char **pzErrMsg,
  const sqlite3_api_routines *pApi
){
  static const sqlite3_module fts5Mod = {
    /* iVersion      */ 2,
    /* xCreate       */ fts5CreateMethod,
    /* xConnect      */ fts5ConnectMethod,
    /* xBestIndex    */ fts5BestIndexMethod,
    /* xDisconnect   */ fts5DisconnectMethod,
    /* xDestroy      */ fts5DestroyMethod,
    /* xOpen         */ fts5OpenMethod,
    /* xClose        */ fts5CloseMethod,
    /* xFilter       */ fts5FilterMethod,
    /* xNext         */ fts5NextMethod,
    /* xEof          */ fts5EofMethod,
    /* xColumn       */ fts5ColumnMethod,
    /* xRowid        */ fts5RowidMethod,
    /* xUpdate       */ fts5UpdateMethod,
    /* xBegin        */ fts5BeginMethod,
    /* xSync         */ fts5SyncMethod,
    /* xCommit       */ fts5CommitMethod,
    /* xRollback     */ fts5RollbackMethod,
    /* xFindFunction */ fts5FindFunctionMethod,
    /* xRename       */ fts5RenameMethod,
    /* xSavepoint    */ fts5SavepointMethod,
    /* xRelease      */ fts5ReleaseMethod,
    /* xRollbackTo   */ fts5RollbackToMethod,
  };

  int rc;
  Fts5Global *pGlobal = 0;

  SQLITE_EXTENSION_INIT2(pApi);

  pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  if( pGlobal==0 ){
    rc = SQLITE_NOMEM;
  }else{
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
    pGlobal->api.iVersion = 1;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
    rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db);
    if( rc==SQLITE_OK ){
      rc = sqlite3_create_function(
          db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0
      );
    }
  }
  return rc;
}
#endif /* defined(SQLITE_ENABLE_FTS5) */


Added ext/fts5/fts5_storage.c.














































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/

#ifdef SQLITE_ENABLE_FTS5


#include "fts5Int.h"

struct Fts5Storage {
  Fts5Config *pConfig;
  Fts5Index *pIndex;
  int bTotalsValid;               /* True if nTotalRow/aTotalSize[] are valid */
  i64 nTotalRow;                  /* Total number of rows in FTS table */
  i64 *aTotalSize;                /* Total sizes of each column */ 
  sqlite3_stmt *aStmt[11];
};


#if FTS5_STMT_SCAN_ASC!=0 
# error "FTS5_STMT_SCAN_ASC mismatch" 
#endif
#if FTS5_STMT_SCAN_DESC!=1 
# error "FTS5_STMT_SCAN_DESC mismatch" 
#endif
#if FTS5_STMT_LOOKUP!=2
# error "FTS5_STMT_LOOKUP mismatch" 
#endif

#define FTS5_STMT_INSERT_CONTENT  3
#define FTS5_STMT_REPLACE_CONTENT 4
#define FTS5_STMT_DELETE_CONTENT  5
#define FTS5_STMT_REPLACE_DOCSIZE  6
#define FTS5_STMT_DELETE_DOCSIZE  7
#define FTS5_STMT_LOOKUP_DOCSIZE  8
#define FTS5_STMT_REPLACE_CONFIG 9
#define FTS5_STMT_SCAN 10

/*
** Prepare the two insert statements - Fts5Storage.pInsertContent and
** Fts5Storage.pInsertDocsize - if they have not already been prepared.
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5StorageGetStmt(
  Fts5Storage *p,                 /* Storage handle */
  int eStmt,                      /* FTS5_STMT_XXX constant */
  sqlite3_stmt **ppStmt,          /* OUT: Prepared statement handle */
  char **pzErrMsg                 /* OUT: Error message (if any) */
){
  int rc = SQLITE_OK;

  /* If there is no %_docsize table, there should be no requests for 
  ** statements to operate on it.  */
  assert( p->pConfig->bColumnsize || (
        eStmt!=FTS5_STMT_REPLACE_DOCSIZE 
     && eStmt!=FTS5_STMT_DELETE_DOCSIZE 
     && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE 
  ));

  assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) );
  if( p->aStmt[eStmt]==0 ){
    const char *azStmt[] = {
      "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
      "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
      "SELECT %s FROM %s T WHERE T.%Q=?",               /* LOOKUP  */

      "INSERT INTO %Q.'%q_content' VALUES(%s)",         /* INSERT_CONTENT  */
      "REPLACE INTO %Q.'%q_content' VALUES(%s)",        /* REPLACE_CONTENT */
      "DELETE FROM %Q.'%q_content' WHERE id=?",         /* DELETE_CONTENT  */
      "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",       /* REPLACE_DOCSIZE  */
      "DELETE FROM %Q.'%q_docsize' WHERE id=?",         /* DELETE_DOCSIZE  */

      "SELECT sz FROM %Q.'%q_docsize' WHERE id=?",      /* LOOKUP_DOCSIZE  */

      "REPLACE INTO %Q.'%q_config' VALUES(?,?)",        /* REPLACE_CONFIG */
      "SELECT %s FROM %s AS T",                         /* SCAN */
    };
    Fts5Config *pC = p->pConfig;
    char *zSql = 0;

    switch( eStmt ){
      case FTS5_STMT_SCAN:
        zSql = sqlite3_mprintf(azStmt[eStmt], 
            pC->zContentExprlist, pC->zContent
        );
        break;

      case FTS5_STMT_SCAN_ASC:
      case FTS5_STMT_SCAN_DESC:
        zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, 
            pC->zContent, pC->zContentRowid, pC->zContentRowid,
            pC->zContentRowid
        );
        break;

      case FTS5_STMT_LOOKUP:
        zSql = sqlite3_mprintf(azStmt[eStmt], 
            pC->zContentExprlist, pC->zContent, pC->zContentRowid
        );
        break;

      case FTS5_STMT_INSERT_CONTENT: 
      case FTS5_STMT_REPLACE_CONTENT: {
        int nCol = pC->nCol + 1;
        char *zBind;
        int i;

        zBind = sqlite3_malloc(1 + nCol*2);
        if( zBind ){
          for(i=0; i<nCol; i++){
            zBind[i*2] = '?';
            zBind[i*2 + 1] = ',';
          }
          zBind[i*2-1] = '\0';
          zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
          sqlite3_free(zBind);
        }
        break;
      }

      default:
        zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
        break;
    }

    if( zSql==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0);
      sqlite3_free(zSql);
      if( rc!=SQLITE_OK && pzErrMsg ){
        *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db));
      }
    }
  }

  *ppStmt = p->aStmt[eStmt];
  return rc;
}


static int fts5ExecPrintf(
  sqlite3 *db,
  char **pzErr,
  const char *zFormat,
  ...
){
  int rc;
  va_list ap;                     /* ... printf arguments */
  char *zSql;

  va_start(ap, zFormat);
  zSql = sqlite3_vmprintf(zFormat, ap);

  if( zSql==0 ){
    rc = SQLITE_NOMEM;
  }else{
    rc = sqlite3_exec(db, zSql, 0, 0, pzErr);
    sqlite3_free(zSql);
  }

  va_end(ap);
  return rc;
}

/*
** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
** code otherwise.
*/
int sqlite3Fts5DropAll(Fts5Config *pConfig){
  int rc = fts5ExecPrintf(pConfig->db, 0, 
      "DROP TABLE IF EXISTS %Q.'%q_data';"
      "DROP TABLE IF EXISTS %Q.'%q_config';",
      pConfig->zDb, pConfig->zName,
      pConfig->zDb, pConfig->zName
  );
  if( rc==SQLITE_OK && pConfig->bColumnsize ){
    rc = fts5ExecPrintf(pConfig->db, 0, 
        "DROP TABLE IF EXISTS %Q.'%q_docsize';",
        pConfig->zDb, pConfig->zName
    );
  }
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    rc = fts5ExecPrintf(pConfig->db, 0, 
        "DROP TABLE IF EXISTS %Q.'%q_content';",
        pConfig->zDb, pConfig->zName
    );
  }
  return rc;
}

static void fts5StorageRenameOne(
  Fts5Config *pConfig,            /* Current FTS5 configuration */
  int *pRc,                       /* IN/OUT: Error code */
  const char *zTail,              /* Tail of table name e.g. "data", "config" */
  const char *zName               /* New name of FTS5 table */
){
  if( *pRc==SQLITE_OK ){
    *pRc = fts5ExecPrintf(pConfig->db, 0, 
        "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
        pConfig->zDb, pConfig->zName, zTail, zName, zTail
    );
  }
}

int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
  Fts5Config *pConfig = pStorage->pConfig;
  int rc = sqlite3Fts5StorageSync(pStorage, 1);

  fts5StorageRenameOne(pConfig, &rc, "data", zName);
  fts5StorageRenameOne(pConfig, &rc, "config", zName);
  if( pConfig->bColumnsize ){
    fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
  }
  if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
    fts5StorageRenameOne(pConfig, &rc, "content", zName);
  }
  return rc;
}

/*
** Create the shadow table named zPost, with definition zDefn. Return
** SQLITE_OK if successful, or an SQLite error code otherwise.
*/
int sqlite3Fts5CreateTable(
  Fts5Config *pConfig,            /* FTS5 configuration */
  const char *zPost,              /* Shadow table to create (e.g. "content") */
  const char *zDefn,              /* Columns etc. for shadow table */
  int bWithout,                   /* True for without rowid */
  char **pzErr                    /* OUT: Error message */
){
  int rc;
  char *zErr = 0;

  rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
      pConfig->zDb, pConfig->zName, zPost, zDefn, bWithout?" WITHOUT ROWID":""
  );
  if( zErr ){
    *pzErr = sqlite3_mprintf(
        "fts5: error creating shadow table %q_%s: %s", 
        pConfig->zName, zPost, zErr
    );
    sqlite3_free(zErr);
  }

  return rc;
}

/*
** Open a new Fts5Index handle. If the bCreate argument is true, create
** and initialize the underlying tables 
**
** If successful, set *pp to point to the new object and return SQLITE_OK.
** Otherwise, set *pp to NULL and return an SQLite error code.
*/
int sqlite3Fts5StorageOpen(
  Fts5Config *pConfig, 
  Fts5Index *pIndex, 
  int bCreate, 
  Fts5Storage **pp,
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE_OK;
  Fts5Storage *p;                 /* New object */
  int nByte;                      /* Bytes of space to allocate */

  nByte = sizeof(Fts5Storage)               /* Fts5Storage object */
        + pConfig->nCol * sizeof(i64);      /* Fts5Storage.aTotalSize[] */
  *pp = p = (Fts5Storage*)sqlite3_malloc(nByte);
  if( !p ) return SQLITE_NOMEM;

  memset(p, 0, nByte);
  p->aTotalSize = (i64*)&p[1];
  p->pConfig = pConfig;
  p->pIndex = pIndex;

  if( bCreate ){
    if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
      int nDefn = 32 + pConfig->nCol*10;
      char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10);
      if( zDefn==0 ){
        rc = SQLITE_NOMEM;
      }else{
        int i;
        int iOff;
        sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
        iOff = strlen(zDefn);
        for(i=0; i<pConfig->nCol; i++){
          sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
          iOff += strlen(&zDefn[iOff]);
        }
        rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
      }
      sqlite3_free(zDefn);
    }

    if( rc==SQLITE_OK && pConfig->bColumnsize ){
      rc = sqlite3Fts5CreateTable(
          pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
      );
    }
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5CreateTable(
          pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
      );
    }
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
    }
  }

  if( rc ){
    sqlite3Fts5StorageClose(p);
    *pp = 0;
  }
  return rc;
}

/*
** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
*/
int sqlite3Fts5StorageClose(Fts5Storage *p){
  int rc = SQLITE_OK;
  if( p ){
    int i;

    /* Finalize all SQL statements */
    for(i=0; i<ArraySize(p->aStmt); i++){
      sqlite3_finalize(p->aStmt[i]);
    }

    sqlite3_free(p);
  }
  return rc;
}

typedef struct Fts5InsertCtx Fts5InsertCtx;
struct Fts5InsertCtx {
  Fts5Storage *pStorage;
  int iCol;
  int szCol;                      /* Size of column value in tokens */
};

/*
** Tokenization callback used when inserting tokens into the FTS index.
*/
static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;
  int iPos = pCtx->szCol++;
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
}

/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
*/
static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){
  Fts5Config *pConfig = p->pConfig;
  sqlite3_stmt *pSeek;            /* SELECT to read row iDel from %_data */
  int rc;                         /* Return code */

  rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    sqlite3_bind_int64(pSeek, 1, iDel);
    if( sqlite3_step(pSeek)==SQLITE_ROW ){
      int iCol;
      Fts5InsertCtx ctx;
      ctx.pStorage = p;
      ctx.iCol = -1;
      rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
      for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
        p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      }
      p->nTotalRow--;
    }
    rc2 = sqlite3_reset(pSeek);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  return rc;
}


/*
** Insert a record into the %_docsize table. Specifically, do:
**
**   INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
**
** If there is no %_docsize table (as happens if the columnsize=0 option
** is specified when the FTS5 table is created), this function is a no-op.
*/
static int fts5StorageInsertDocsize(
  Fts5Storage *p,                 /* Storage module to write to */
  i64 iRowid,                     /* id value */
  Fts5Buffer *pBuf                /* sz value */
){
  int rc = SQLITE_OK;
  if( p->pConfig->bColumnsize ){
    sqlite3_stmt *pReplace = 0;
    rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int64(pReplace, 1, iRowid);
      sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
      sqlite3_step(pReplace);
      rc = sqlite3_reset(pReplace);
    }
  }
  return rc;
}

/*
** Load the contents of the "averages" record from disk into the 
** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
** argument bCache is true, set the p->bTotalsValid flag to indicate
** that the contents of aTotalSize[] and nTotalRow are valid until
** further notice.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
  int rc = SQLITE_OK;
  if( p->bTotalsValid==0 ){
    int nCol = p->pConfig->nCol;
    Fts5Buffer buf;
    memset(&buf, 0, sizeof(buf));

    memset(p->aTotalSize, 0, sizeof(i64) * nCol);
    p->nTotalRow = 0;
    rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf);
    if( rc==SQLITE_OK && buf.n ){
      int i = 0;
      int iCol;
      i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow);
      for(iCol=0; i<buf.n && iCol<nCol; iCol++){
        i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]);
      }
    }
    sqlite3_free(buf.p);
    p->bTotalsValid = bCache;
  }
  return rc;
}

/*
** Store the current contents of the p->nTotalRow and p->aTotalSize[] 
** variables in the "averages" record on disk.
**
** Return SQLITE_OK if successful, or an SQLite error code if an error
** occurs.
*/
static int fts5StorageSaveTotals(Fts5Storage *p){
  int nCol = p->pConfig->nCol;
  int i;
  Fts5Buffer buf;
  int rc = SQLITE_OK;
  memset(&buf, 0, sizeof(buf));

  sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
  for(i=0; i<nCol; i++){
    sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
  }
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
  }
  sqlite3_free(buf.p);

  return rc;
}

/*
** Remove a row from the FTS table.
*/
int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){
  Fts5Config *pConfig = p->pConfig;
  int rc;
  sqlite3_stmt *pDel;

  rc = fts5StorageLoadTotals(p, 1);

  /* Delete the index records */
  if( rc==SQLITE_OK ){
    rc = fts5StorageDeleteFromIndex(p, iDel);
  }

  /* Delete the %_docsize record */
  if( rc==SQLITE_OK && pConfig->bColumnsize ){
    rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int64(pDel, 1, iDel);
      sqlite3_step(pDel);
      rc = sqlite3_reset(pDel);
    }
  }

  /* Delete the %_content record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0);
  }
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(pDel, 1, iDel);
    sqlite3_step(pDel);
    rc = sqlite3_reset(pDel);
  }

  /* Write the averages record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageSaveTotals(p);
  }

  return rc;
}

int sqlite3Fts5StorageSpecialDelete(
  Fts5Storage *p, 
  i64 iDel, 
  sqlite3_value **apVal
){
  Fts5Config *pConfig = p->pConfig;
  int rc;
  sqlite3_stmt *pDel;

  assert( pConfig->eContent!=FTS5_CONTENT_NORMAL );
  rc = fts5StorageLoadTotals(p, 1);

  /* Delete the index records */
  if( rc==SQLITE_OK ){
    int iCol;
    Fts5InsertCtx ctx;
    ctx.pStorage = p;
    ctx.iCol = -1;

    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
    for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 
        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
        fts5StorageInsertCallback
      );
      p->aTotalSize[iCol] -= (i64)ctx.szCol;
    }
    p->nTotalRow--;
  }

  /* Delete the %_docsize record */
  if( pConfig->bColumnsize ){
    if( rc==SQLITE_OK ){
      rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
    }
    if( rc==SQLITE_OK ){
      sqlite3_bind_int64(pDel, 1, iDel);
      sqlite3_step(pDel);
      rc = sqlite3_reset(pDel);
    }
  }

  /* Write the averages record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageSaveTotals(p);
  }

  return rc;
}

/*
** Delete all entries in the FTS5 index.
*/
int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
  Fts5Config *pConfig = p->pConfig;
  int rc;

  /* Delete the contents of the %_data and %_docsize tables. */
  rc = fts5ExecPrintf(pConfig->db, 0,
      "DELETE FROM %Q.'%q_data';",
      pConfig->zDb, pConfig->zName
  );
  if( rc==SQLITE_OK && pConfig->bColumnsize ){
    rc = fts5ExecPrintf(pConfig->db, 0,
        "DELETE FROM %Q.'%q_docsize';",
        pConfig->zDb, pConfig->zName
    );
  }

  /* Reinitialize the %_data table. This call creates the initial structure
  ** and averages records.  */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexReinit(p->pIndex);
  }
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
  }
  return rc;
}

int sqlite3Fts5StorageRebuild(Fts5Storage *p){
  Fts5Buffer buf = {0,0,0};
  Fts5Config *pConfig = p->pConfig;
  sqlite3_stmt *pScan = 0;
  Fts5InsertCtx ctx;
  int rc;

  memset(&ctx, 0, sizeof(Fts5InsertCtx));
  ctx.pStorage = p;
  rc = sqlite3Fts5StorageDeleteAll(p);
  if( rc==SQLITE_OK ){
    rc = fts5StorageLoadTotals(p, 1);
  }

  if( rc==SQLITE_OK ){
    rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
  }

  while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){
    i64 iRowid = sqlite3_column_int64(pScan, 0);

    sqlite3Fts5BufferZero(&buf);
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
    for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 
            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
      }
      sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
      p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
    }
    p->nTotalRow++;

    if( rc==SQLITE_OK ){
      rc = fts5StorageInsertDocsize(p, iRowid, &buf);
    }
  }
  sqlite3_free(buf.p);

  /* Write the averages record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageSaveTotals(p);
  }
  return rc;
}

int sqlite3Fts5StorageOptimize(Fts5Storage *p){
  return sqlite3Fts5IndexOptimize(p->pIndex);
}

int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
  return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
}

/*
** Allocate a new rowid. This is used for "external content" tables when
** a NULL value is inserted into the rowid column. The new rowid is allocated
** by inserting a dummy row into the %_docsize table. The dummy will be
** overwritten later.
**
** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
** this case the user is required to provide a rowid explicitly.
*/
static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
  int rc = SQLITE_MISMATCH;
  if( p->pConfig->bColumnsize ){
    sqlite3_stmt *pReplace = 0;
    rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_null(pReplace, 1);
      sqlite3_bind_null(pReplace, 2);
      sqlite3_step(pReplace);
      rc = sqlite3_reset(pReplace);
    }
    if( rc==SQLITE_OK ){
      *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
    }
  }
  return rc;
}

/*
** Insert a new row into the FTS table.
*/
int sqlite3Fts5StorageInsert(
  Fts5Storage *p,                 /* Storage module to write to */
  sqlite3_value **apVal,          /* Array of values passed to xUpdate() */
  int eConflict,                  /* on conflict clause */
  i64 *piRowid                    /* OUT: rowid of new record */
){
  Fts5Config *pConfig = p->pConfig;
  int rc = SQLITE_OK;             /* Return code */
  sqlite3_stmt *pInsert;          /* Statement used to write %_content table */
  int eStmt = 0;                  /* Type of statement used on %_content */
  int i;                          /* Counter variable */
  Fts5InsertCtx ctx;              /* Tokenization callback context object */
  Fts5Buffer buf;                 /* Buffer used to build up %_docsize blob */

  memset(&buf, 0, sizeof(Fts5Buffer));
  rc = fts5StorageLoadTotals(p, 1);

  /* Insert the new row into the %_content table. */
  if( rc==SQLITE_OK ){
    if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
      if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){
        *piRowid = sqlite3_value_int64(apVal[1]);
      }else{
        rc = fts5StorageNewRowid(p, piRowid);
      }
    }else{
      if( eConflict==SQLITE_REPLACE ){
        eStmt = FTS5_STMT_REPLACE_CONTENT;
        rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1]));
      }else{
        eStmt = FTS5_STMT_INSERT_CONTENT;
      }
      if( rc==SQLITE_OK ){
        rc = fts5StorageGetStmt(p, eStmt, &pInsert, 0);
      }
      for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
        rc = sqlite3_bind_value(pInsert, i, apVal[i]);
      }
      if( rc==SQLITE_OK ){
        sqlite3_step(pInsert);
        rc = sqlite3_reset(pInsert);
      }
      *piRowid = sqlite3_last_insert_rowid(pConfig->db);
    }
  }

  /* Add new entries to the FTS index */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
    ctx.pStorage = p;
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 
          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
    p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
  }
  p->nTotalRow++;

  /* Write the %_docsize record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageInsertDocsize(p, *piRowid, &buf);
  }
  sqlite3_free(buf.p);

  /* Write the averages record */
  if( rc==SQLITE_OK ){
    rc = fts5StorageSaveTotals(p);
  }

  return rc;
}

static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
  Fts5Config *pConfig = p->pConfig;
  char *zSql;
  int rc;

  zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", 
      pConfig->zDb, pConfig->zName, zSuffix
  );
  if( zSql==0 ){
    rc = SQLITE_NOMEM;
  }else{
    sqlite3_stmt *pCnt = 0;
    rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
    if( rc==SQLITE_OK ){
      if( SQLITE_ROW==sqlite3_step(pCnt) ){
        *pnRow = sqlite3_column_int64(pCnt, 0);
      }
      rc = sqlite3_finalize(pCnt);
    }
  }

  sqlite3_free(zSql);
  return rc;
}

/*
** Context object used by sqlite3Fts5StorageIntegrity().
*/
typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
struct Fts5IntegrityCtx {
  i64 iRowid;
  int iCol;
  int szCol;
  u64 cksum;
  Fts5Config *pConfig;
};

/*
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */
){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
  int iPos = pCtx->szCol++;
  pCtx->cksum ^= sqlite3Fts5IndexCksum(
      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
  );
  return SQLITE_OK;
}

/*
** Check that the contents of the FTS index match that of the %_content
** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
** some other SQLite error code if an error occurs while attempting to
** determine this.
*/
int sqlite3Fts5StorageIntegrity(Fts5Storage *p){
  Fts5Config *pConfig = p->pConfig;
  int rc;                         /* Return code */
  int *aColSize;                  /* Array of size pConfig->nCol */
  i64 *aTotalSize;                /* Array of size pConfig->nCol */
  Fts5IntegrityCtx ctx;
  sqlite3_stmt *pScan;

  memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
  ctx.pConfig = p->pConfig;
  aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64)));
  if( !aTotalSize ) return SQLITE_NOMEM;
  aColSize = (int*)&aTotalSize[pConfig->nCol];
  memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);

  /* Generate the expected index checksum based on the contents of the
  ** %_content table. This block stores the checksum in ctx.cksum. */
  rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    while( SQLITE_ROW==sqlite3_step(pScan) ){
      int i;
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;
      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(
            pConfig, 
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
    }
    rc2 = sqlite3_reset(pScan);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  /* Test that the "totals" (sometimes called "averages") record looks Ok */
  if( rc==SQLITE_OK ){
    int i;
    rc = fts5StorageLoadTotals(p, 0);
    for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
      if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
    }
  }

  /* Check that the %_docsize and %_content tables contain the expected
  ** number of rows.  */
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    i64 nRow;
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }
  if( rc==SQLITE_OK ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }

  /* Pass the expected checksum down to the FTS index module. It will
  ** verify, amongst other things, that it matches the checksum generated by
  ** inspecting the index itself.  */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);
  }

  sqlite3_free(aTotalSize);
  return rc;
}

/*
** Obtain an SQLite statement handle that may be used to read data from the
** %_content table.
*/
int sqlite3Fts5StorageStmt(
  Fts5Storage *p, 
  int eStmt, 
  sqlite3_stmt **pp, 
  char **pzErrMsg
){
  int rc;
  assert( eStmt==FTS5_STMT_SCAN_ASC 
       || eStmt==FTS5_STMT_SCAN_DESC
       || eStmt==FTS5_STMT_LOOKUP
  );
  rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
  if( rc==SQLITE_OK ){
    assert( p->aStmt[eStmt]==*pp );
    p->aStmt[eStmt] = 0;
  }
  return rc;
}

/*
** Release an SQLite statement handle obtained via an earlier call to
** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
** must match that passed to the sqlite3Fts5StorageStmt() call.
*/
void sqlite3Fts5StorageStmtRelease(
  Fts5Storage *p, 
  int eStmt, 
  sqlite3_stmt *pStmt
){
  assert( eStmt==FTS5_STMT_SCAN_ASC
       || eStmt==FTS5_STMT_SCAN_DESC
       || eStmt==FTS5_STMT_LOOKUP
  );
  if( p->aStmt[eStmt]==0 ){
    sqlite3_reset(pStmt);
    p->aStmt[eStmt] = pStmt;
  }else{
    sqlite3_finalize(pStmt);
  }
}

static int fts5StorageDecodeSizeArray(
  int *aCol, int nCol,            /* Array to populate */
  const u8 *aBlob, int nBlob      /* Record to read varints from */
){
  int i;
  int iOff = 0;
  for(i=0; i<nCol; i++){
    if( iOff>=nBlob ) return 1;
    iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]);
  }
  return (iOff!=nBlob);
}

/*
** Argument aCol points to an array of integers containing one entry for
** each table column. This function reads the %_docsize record for the
** specified rowid and populates aCol[] with the results.
**
** An SQLite error code is returned if an error occurs, or SQLITE_OK
** otherwise.
*/
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
  int nCol = p->pConfig->nCol;
  sqlite3_stmt *pLookup = 0;
  int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
  if( rc==SQLITE_OK ){
    int bCorrupt = 1;
    sqlite3_bind_int64(pLookup, 1, iRowid);
    if( SQLITE_ROW==sqlite3_step(pLookup) ){
      const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
      int nBlob = sqlite3_column_bytes(pLookup, 0);
      if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
        bCorrupt = 0;
      }
    }
    rc = sqlite3_reset(pLookup);
    if( bCorrupt && rc==SQLITE_OK ){
      rc = FTS5_CORRUPT;
    }
  }

  return rc;
}

int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
  int rc = fts5StorageLoadTotals(p, 0);
  if( rc==SQLITE_OK ){
    *pnToken = 0;
    if( iCol<0 ){
      int i;
      for(i=0; i<p->pConfig->nCol; i++){
        *pnToken += p->aTotalSize[i];
      }
    }else if( iCol<p->pConfig->nCol ){
      *pnToken = p->aTotalSize[iCol];
    }else{
      rc = SQLITE_RANGE;
    }
  }
  return rc;
}

int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
  int rc = fts5StorageLoadTotals(p, 0);
  if( rc==SQLITE_OK ){
    *pnRow = p->nTotalRow;
  }
  return rc;
}

/*
** Flush any data currently held in-memory to disk.
*/
int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){
  if( bCommit && p->bTotalsValid ){
    int rc = fts5StorageSaveTotals(p);
    p->bTotalsValid = 0;
    if( rc!=SQLITE_OK ) return rc;
  }
  return sqlite3Fts5IndexSync(p->pIndex, bCommit);
}

int sqlite3Fts5StorageRollback(Fts5Storage *p){
  p->bTotalsValid = 0;
  return sqlite3Fts5IndexRollback(p->pIndex);
}

int sqlite3Fts5StorageConfigValue(
  Fts5Storage *p, 
  const char *z,
  sqlite3_value *pVal,
  int iVal
){
  sqlite3_stmt *pReplace = 0;
  int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC);
    if( pVal ){
      sqlite3_bind_value(pReplace, 2, pVal);
    }else{
      sqlite3_bind_int(pReplace, 2, iVal);
    }
    sqlite3_step(pReplace);
    rc = sqlite3_reset(pReplace);
  }
  if( rc==SQLITE_OK && pVal ){
    int iNew = p->pConfig->iCookie + 1;
    rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
    if( rc==SQLITE_OK ){
      p->pConfig->iCookie = iNew;
    }
  }
  return rc;
}


#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_tcl.c.


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
/*
** 2014 Dec 01
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/


#ifdef SQLITE_TEST
#include <tcl.h>

#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include <string.h>
#include <assert.h>

extern int sqlite3_fts5_may_be_corrupt;

/*************************************************************************
** This is a copy of the first part of the SqliteDb structure in 
** tclsqlite.c.  We need it here so that the get_sqlite_pointer routine
** can extract the sqlite3* pointer from an existing Tcl SQLite
** connection.
*/

extern const char *sqlite3ErrName(int);

struct SqliteDb {
  sqlite3 *db;
};

/*
** Decode a pointer to an sqlite3 object.
*/
static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){
  struct SqliteDb *p;
  Tcl_CmdInfo cmdInfo;
  char *z = Tcl_GetString(pObj);
  if( Tcl_GetCommandInfo(interp, z, &cmdInfo) ){
    p = (struct SqliteDb*)cmdInfo.objClientData;
    *ppDb = p->db;
    return TCL_OK;
  }
  return TCL_ERROR;
}

/* End of code that accesses the SqliteDb struct.
**************************************************************************/

static int f5tResultToErrorCode(const char *zRes){
  struct ErrorCode {
    int rc;
    const char *zError;
  } aErr[] = {
    { SQLITE_DONE,  "SQLITE_DONE" },
    { SQLITE_ERROR, "SQLITE_ERROR" },
    { SQLITE_OK,    "SQLITE_OK" },
    { SQLITE_OK,    "" },
  };
  int i;

  for(i=0; i<sizeof(aErr)/sizeof(aErr[0]); i++){
    if( 0==sqlite3_stricmp(zRes, aErr[i].zError) ){
      return aErr[i].rc;
    }
  }

  return SQLITE_ERROR;
}

static int f5tDbAndApi(
  Tcl_Interp *interp, 
  Tcl_Obj *pObj, 
  sqlite3 **ppDb, 
  fts5_api **ppApi
){
  sqlite3 *db = 0;
  int rc = f5tDbPointer(interp, pObj, &db);
  if( rc!=TCL_OK ){
    return TCL_ERROR;
  }else{
    sqlite3_stmt *pStmt = 0;
    fts5_api *pApi = 0;

    rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0);
    if( rc!=SQLITE_OK ){
      Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
      return TCL_ERROR;
    }

    if( SQLITE_ROW==sqlite3_step(pStmt) ){
      const void *pPtr = sqlite3_column_blob(pStmt, 0);
      memcpy((void*)&pApi, pPtr, sizeof(pApi));
    }

    if( sqlite3_finalize(pStmt)!=SQLITE_OK ){
      Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
      return TCL_ERROR;
    }

    *ppDb = db;
    *ppApi = pApi;
  }

  return TCL_OK;
}

typedef struct F5tFunction F5tFunction;
struct F5tFunction {
  Tcl_Interp *interp;
  Tcl_Obj *pScript;
};

typedef struct F5tApi F5tApi;
struct F5tApi {
  const Fts5ExtensionApi *pApi;
  Fts5Context *pFts;
};

/*
** An object of this type is used with the xSetAuxdata() and xGetAuxdata()
** API test wrappers. The tcl interface allows a single tcl value to be 
** saved using xSetAuxdata(). Instead of simply storing a pointer to the
** tcl object, the code in this file wraps it in an sqlite3_malloc'd 
** instance of the following struct so that if the destructor is not 
** correctly invoked it will be reported as an SQLite memory leak.
*/
typedef struct F5tAuxData F5tAuxData;
struct F5tAuxData {
  Tcl_Obj *pObj;
};

static int xTokenizeCb(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
  int rc;

  Tcl_IncrRefCount(pEval);
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken));
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart));
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd));

  rc = Tcl_EvalObjEx(p->interp, pEval, 0);
  Tcl_DecrRefCount(pEval);
  if( rc==TCL_OK ){
    rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp));
  }

  return rc;
}

static int xF5tApi(void*, Tcl_Interp*, int, Tcl_Obj *CONST []);

static int xQueryPhraseCb(
  const Fts5ExtensionApi *pApi, 
  Fts5Context *pFts, 
  void *pCtx
){
  F5tFunction *p = (F5tFunction*)pCtx;
  static sqlite3_int64 iCmd = 0;
  Tcl_Obj *pEval;
  int rc;

  char zCmd[64];
  F5tApi sApi;

  sApi.pApi = pApi;
  sApi.pFts = pFts;
  sprintf(zCmd, "f5t_2_%lld", iCmd++);
  Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0);

  pEval = Tcl_DuplicateObj(p->pScript);
  Tcl_IncrRefCount(pEval);
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1));
  rc = Tcl_EvalObjEx(p->interp, pEval, 0);
  Tcl_DecrRefCount(pEval);
  Tcl_DeleteCommand(p->interp, zCmd);

  if( rc==TCL_OK ){
    rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp));
  }

  return rc;
}

static void xSetAuxdataDestructor(void *p){
  F5tAuxData *pData = (F5tAuxData*)p;
  Tcl_DecrRefCount(pData->pObj);
  sqlite3_free(pData);
}

/*
**      api sub-command...
**
** Description...
*/
static int xF5tApi(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Sub {
    const char *zName;
    int nArg;
    const char *zMsg;
  } aSub[] = {
    { "xColumnCount",      0, "" },                   /*  0 */
    { "xRowCount",         0, "" },                   /*  1 */
    { "xColumnTotalSize",  1, "COL" },                /*  2 */
    { "xTokenize",         2, "TEXT SCRIPT" },        /*  3 */
    { "xPhraseCount",      0, "" },                   /*  4 */
    { "xPhraseSize",       1, "PHRASE" },             /*  5 */
    { "xInstCount",        0, "" },                   /*  6 */
    { "xInst",             1, "IDX" },                /*  7 */
    { "xRowid",            0, "" },                   /*  8 */
    { "xColumnText",       1, "COL" },                /*  9 */
    { "xColumnSize",       1, "COL" },                /* 10 */
    { "xQueryPhrase",      2, "PHRASE SCRIPT" },      /* 11 */
    { "xSetAuxdata",       1, "VALUE" },              /* 12 */
    { "xGetAuxdata",       1, "CLEAR" },              /* 13 */
    { "xSetAuxdataInt",    1, "INTEGER" },            /* 14 */
    { "xGetAuxdataInt",    1, "CLEAR" },              /* 15 */
    { 0, 0, 0}
  };

  int rc;
  int iSub = 0;
  F5tApi *p = (F5tApi*)clientData;

  if( objc<2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND");
    return TCL_ERROR;
  }

  rc = Tcl_GetIndexFromObjStruct(
      interp, objv[1], aSub, sizeof(aSub[0]), "SUB-COMMAND", 0, &iSub
  );
  if( rc!=TCL_OK ) return rc;
  if( aSub[iSub].nArg!=objc-2 ){
    Tcl_WrongNumArgs(interp, 1, objv, aSub[iSub].zMsg);
    return TCL_ERROR;
  }

#define CASE(i,str) case i: assert( strcmp(aSub[i].zName, str)==0 );
  switch( iSub ){
    CASE(0, "xColumnCount") {
      int nCol;
      nCol = p->pApi->xColumnCount(p->pFts);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewIntObj(nCol));
      }
      break;
    }
    CASE(1, "xRowCount") {
      sqlite3_int64 nRow;
      rc = p->pApi->xRowCount(p->pFts, &nRow);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nRow));
      }
      break;
    }
    CASE(2, "xColumnTotalSize") {
      int iCol;
      sqlite3_int64 nSize;
      if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ) return TCL_ERROR;
      rc = p->pApi->xColumnTotalSize(p->pFts, iCol, &nSize);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize));
      }
      break;
    }
    CASE(3, "xTokenize") {
      int nText;
      char *zText = Tcl_GetStringFromObj(objv[2], &nText);
      F5tFunction ctx;
      ctx.interp = interp;
      ctx.pScript = objv[3];
      rc = p->pApi->xTokenize(p->pFts, zText, nText, &ctx, xTokenizeCb);
      if( rc==SQLITE_OK ){
        Tcl_ResetResult(interp);
      }
      return rc;
    }
    CASE(4, "xPhraseCount") {
      int nPhrase;
      nPhrase = p->pApi->xPhraseCount(p->pFts);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewIntObj(nPhrase));
      }
      break;
    }
    CASE(5, "xPhraseSize") {
      int iPhrase;
      int sz;
      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){
        return TCL_ERROR;
      }
      sz = p->pApi->xPhraseSize(p->pFts, iPhrase);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewIntObj(sz));
      }
      break;
    }
    CASE(6, "xInstCount") {
      int nInst;
      rc = p->pApi->xInstCount(p->pFts, &nInst);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewIntObj(nInst));
      }
      break;
    }
    CASE(7, "xInst") {
      int iIdx, ip, ic, io;
      if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ){
        return TCL_ERROR;
      }
      rc = p->pApi->xInst(p->pFts, iIdx, &ip, &ic, &io);
      if( rc==SQLITE_OK ){
        Tcl_Obj *pList = Tcl_NewObj();
        Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ip));
        Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ic));
        Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(io));
        Tcl_SetObjResult(interp, pList);
      }
      break;
    }
    CASE(8, "xRowid") {
      sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts);
      Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid));
      break;
    }
    CASE(9, "xColumnText") {
      const char *z = 0;
      int n = 0;
      int iCol;
      if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
        return TCL_ERROR;
      }
      rc = p->pApi->xColumnText(p->pFts, iCol, &z, &n);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n));
      }
      break;
    }
    CASE(10, "xColumnSize") {
      int n = 0;
      int iCol;
      if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){
        return TCL_ERROR;
      }
      rc = p->pApi->xColumnSize(p->pFts, iCol, &n);
      if( rc==SQLITE_OK ){
        Tcl_SetObjResult(interp, Tcl_NewIntObj(n));
      }
      break;
    }
    CASE(11, "xQueryPhrase") {
      int iPhrase;
      F5tFunction ctx;
      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){
        return TCL_ERROR;
      }
      ctx.interp = interp;
      ctx.pScript = objv[3];
      rc = p->pApi->xQueryPhrase(p->pFts, iPhrase, &ctx, xQueryPhraseCb);
      if( rc==SQLITE_OK ){
        Tcl_ResetResult(interp);
      }
      break;
    }
    CASE(12, "xSetAuxdata") {
      F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData));
      if( pData==0 ){
        Tcl_AppendResult(interp, "out of memory", 0);
        return TCL_ERROR;
      }
      pData->pObj = objv[2];
      Tcl_IncrRefCount(pData->pObj);
      rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor);
      break;
    }
    CASE(13, "xGetAuxdata") {
      F5tAuxData *pData;
      int bClear;
      if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){
        return TCL_ERROR;
      }
      pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear);
      if( pData==0 ){
        Tcl_ResetResult(interp);
      }else{
        Tcl_SetObjResult(interp, pData->pObj);
        if( bClear ){
          xSetAuxdataDestructor((void*)pData);
        }
      }
      break;
    }

    /* These two - xSetAuxdataInt and xGetAuxdataInt - are similar to the
    ** xSetAuxdata and xGetAuxdata methods implemented above. The difference
    ** is that they may only save an integer value as auxiliary data, and
    ** do not specify a destructor function.  */
    CASE(14, "xSetAuxdataInt") {
      int iVal;
      if( Tcl_GetIntFromObj(interp, objv[2], &iVal) ) return TCL_ERROR;
      rc = p->pApi->xSetAuxdata(p->pFts, (void*)iVal, 0);
      break;
    }
    CASE(15, "xGetAuxdataInt") {
      int iVal;
      int bClear;
      if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR;
      iVal = (int)p->pApi->xGetAuxdata(p->pFts, bClear);
      Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
      break;
    }

    default: 
      assert( 0 );
      break;
  }
#undef CASE

  if( rc!=SQLITE_OK ){
    Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
    return TCL_ERROR;
  }

  return TCL_OK;
}

static void xF5tFunction(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  F5tFunction *p = (F5tFunction*)pApi->xUserData(pFts);
  Tcl_Obj *pEval;                 /* Script to evaluate */
  int i;
  int rc;

  static sqlite3_int64 iCmd = 0;
  char zCmd[64];
  F5tApi sApi;
  sApi.pApi = pApi;
  sApi.pFts = pFts;

  sprintf(zCmd, "f5t_%lld", iCmd++);
  Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0);
  pEval = Tcl_DuplicateObj(p->pScript);
  Tcl_IncrRefCount(pEval);
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1));

  for(i=0; i<nVal; i++){
    Tcl_Obj *pObj = 0;
    switch( sqlite3_value_type(apVal[i]) ){
      case SQLITE_TEXT:
        pObj = Tcl_NewStringObj((const char*)sqlite3_value_text(apVal[i]), -1);
        break;
      case SQLITE_BLOB:
        pObj = Tcl_NewByteArrayObj(
            sqlite3_value_blob(apVal[i]), sqlite3_value_bytes(apVal[i])
        );
        break;
      case SQLITE_INTEGER:
        pObj = Tcl_NewWideIntObj(sqlite3_value_int64(apVal[i]));
        break;
      case SQLITE_FLOAT:
        pObj = Tcl_NewDoubleObj(sqlite3_value_double(apVal[i]));
        break;
      default:
        pObj = Tcl_NewObj();
        break;
    }
    Tcl_ListObjAppendElement(p->interp, pEval, pObj);
  }

  rc = Tcl_EvalObjEx(p->interp, pEval, TCL_GLOBAL_ONLY);
  Tcl_DecrRefCount(pEval);
  Tcl_DeleteCommand(p->interp, zCmd);

  if( rc!=TCL_OK ){
    sqlite3_result_error(pCtx, Tcl_GetStringResult(p->interp), -1);
  }else{
    Tcl_Obj *pVar = Tcl_GetObjResult(p->interp);
    int n;
    const char *zType = (pVar->typePtr ? pVar->typePtr->name : "");
    char c = zType[0];
    if( c=='b' && strcmp(zType,"bytearray")==0 && pVar->bytes==0 ){
      /* Only return a BLOB type if the Tcl variable is a bytearray and
      ** has no string representation. */
      unsigned char *data = Tcl_GetByteArrayFromObj(pVar, &n);
      sqlite3_result_blob(pCtx, data, n, SQLITE_TRANSIENT);
    }else if( c=='b' && strcmp(zType,"boolean")==0 ){
      Tcl_GetIntFromObj(0, pVar, &n);
      sqlite3_result_int(pCtx, n);
    }else if( c=='d' && strcmp(zType,"double")==0 ){
      double r;
      Tcl_GetDoubleFromObj(0, pVar, &r);
      sqlite3_result_double(pCtx, r);
    }else if( (c=='w' && strcmp(zType,"wideInt")==0) ||
          (c=='i' && strcmp(zType,"int")==0) ){
      Tcl_WideInt v;
      Tcl_GetWideIntFromObj(0, pVar, &v);
      sqlite3_result_int64(pCtx, v);
    }else{
      unsigned char *data = (unsigned char *)Tcl_GetStringFromObj(pVar, &n);
      sqlite3_result_text(pCtx, (char *)data, n, SQLITE_TRANSIENT);
    }
  }
}

static void xF5tDestroy(void *pCtx){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_DecrRefCount(p->pScript);
  ckfree((char *)p);
}

/*
**      sqlite3_fts5_create_function DB NAME SCRIPT
**
** Description...
*/
static int f5tCreateFunction(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  char *zName;
  Tcl_Obj *pScript;
  sqlite3 *db = 0;
  fts5_api *pApi = 0;
  F5tFunction *pCtx = 0;
  int rc;

  if( objc!=4 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
    return TCL_ERROR;
  }
  if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR;

  zName = Tcl_GetString(objv[2]);
  pScript = objv[3];
  pCtx = (F5tFunction*)ckalloc(sizeof(F5tFunction));
  pCtx->interp = interp;
  pCtx->pScript = pScript;
  Tcl_IncrRefCount(pScript);

  rc = pApi->xCreateFunction(
      pApi, zName, (void*)pCtx, xF5tFunction, xF5tDestroy
  );
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0);
    return TCL_ERROR;
  }

  return TCL_OK;
}

typedef struct F5tTokenizeCtx F5tTokenizeCtx;
struct F5tTokenizeCtx {
  Tcl_Obj *pRet;
  int bSubst;
  const char *zInput;
};

static int xTokenizeCb2(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
  if( p->bSubst ){
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(
        0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
    );
  }else{
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart));
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd));
  }
  return SQLITE_OK;
}


/*
**      sqlite3_fts5_tokenize DB TOKENIZER TEXT
**
** Description...
*/
static int f5tTokenize(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  char *zText;
  int nText;
  sqlite3 *db = 0;
  fts5_api *pApi = 0;
  Fts5Tokenizer *pTok = 0;
  fts5_tokenizer tokenizer;
  Tcl_Obj *pRet = 0;
  void *pUserdata;
  int rc;

  int nArg;
  const char **azArg;
  F5tTokenizeCtx ctx;

  if( objc!=4 && objc!=5 ){
    Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT");
    return TCL_ERROR;
  }
  if( objc==5 ){
    char *zOpt = Tcl_GetString(objv[1]);
    if( strcmp("-subst", zOpt) ){
      Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0);
      return TCL_ERROR;
    }
  }
  if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR;
  if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){
    return TCL_ERROR;
  }
  if( nArg==0 ){
    Tcl_AppendResult(interp, "no such tokenizer: ", 0);
    Tcl_Free((void*)azArg);
    return TCL_ERROR;
  }
  zText = Tcl_GetStringFromObj(objv[objc-1], &nText);

  rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0);
    return TCL_ERROR;
  }

  rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0);
    return TCL_ERROR;
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
  rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
    Tcl_DecrRefCount(pRet);
    return TCL_ERROR;
  }


  Tcl_Free((void*)azArg);
  Tcl_SetObjResult(interp, pRet);
  Tcl_DecrRefCount(pRet);
  return TCL_OK;
}

/*************************************************************************
** Start of tokenizer wrapper.
*/

typedef struct F5tTokenizerContext F5tTokenizerContext;
typedef struct F5tTokenizerCb F5tTokenizerCb;
typedef struct F5tTokenizerModule F5tTokenizerModule;
typedef struct F5tTokenizerModule F5tTokenizerInstance;

struct F5tTokenizerContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int);
};

struct F5tTokenizerModule {
  Tcl_Interp *interp;
  Tcl_Obj *pScript;
  F5tTokenizerContext *pContext;
};

static int f5tTokenizerCreate(
  void *pCtx, 
  const char **azArg, 
  int nArg, 
  Fts5Tokenizer **ppOut
){
  F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
  Tcl_Obj *pEval;
  int rc = TCL_OK;
  int i;

  pEval = Tcl_DuplicateObj(pMod->pScript);
  Tcl_IncrRefCount(pEval);
  for(i=0; rc==TCL_OK && i<nArg; i++){
    Tcl_Obj *pObj = Tcl_NewStringObj(azArg[i], -1);
    rc = Tcl_ListObjAppendElement(pMod->interp, pEval, pObj);
  }

  if( rc==TCL_OK ){
    rc = Tcl_EvalObjEx(pMod->interp, pEval, TCL_GLOBAL_ONLY);
  }
  Tcl_DecrRefCount(pEval);

  if( rc==TCL_OK ){
    F5tTokenizerInstance *pInst;
    pInst = (F5tTokenizerInstance*)ckalloc(sizeof(F5tTokenizerInstance));
    memset(pInst, 0, sizeof(F5tTokenizerInstance));
    pInst->interp = pMod->interp;
    pInst->pScript = Tcl_GetObjResult(pMod->interp);
    pInst->pContext = pMod->pContext;
    Tcl_IncrRefCount(pInst->pScript);
    *ppOut = (Fts5Tokenizer*)pInst;
  }

  return rc;
}


static void f5tTokenizerDelete(Fts5Tokenizer *p){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  Tcl_DecrRefCount(pInst->pScript);
  ckfree((char *)pInst);
}

static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,
  const char *pText, int nText, 
  int (*xToken)(void*, const char*, int, int, int)
){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
  int (*xOldToken)(void*, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;

  pOldCtx = pInst->pContext->pCtx;
  xOldToken = pInst->pContext->xToken;

  pEval = Tcl_DuplicateObj(pInst->pScript);
  Tcl_IncrRefCount(pEval);
  rc = Tcl_ListObjAppendElement(
      pInst->interp, pEval, Tcl_NewStringObj(pText, nText)
  );
  if( rc==TCL_OK ){
    rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY);
  }
  Tcl_DecrRefCount(pEval);

  pInst->pContext->pCtx = pOldCtx;
  pInst->pContext->xToken = xOldToken;
  return rc;
}

/*
** sqlite3_fts5_token TEXT START END POS
*/
static int f5tTokenizerReturn(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
  int iStart;
  int iEnd;
  int nToken;
  char *zToken;
  int rc;

  assert( p );
  if( objc!=4 ){
    Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END");
    return TCL_ERROR;
  }
  if( p->xToken==0 ){
    Tcl_AppendResult(interp, 
        "sqlite3_fts5_token may only be used by tokenizer callback", 0
    );
    return TCL_ERROR;
  }

  zToken = Tcl_GetStringFromObj(objv[1], &nToken);
  if( Tcl_GetIntFromObj(interp, objv[2], &iStart) 
   || Tcl_GetIntFromObj(interp, objv[3], &iEnd) 
  ){
    return TCL_ERROR;
  }

  rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
  Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
  return TCL_OK;
}

static void f5tDelTokenizer(void *pCtx){
  F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx;
  Tcl_DecrRefCount(pMod->pScript);
  ckfree((char *)pMod);
}

/*
**      sqlite3_fts5_create_tokenizer DB NAME SCRIPT
**
** Register a tokenizer named NAME implemented by script SCRIPT. When
** a tokenizer instance is created (fts5_tokenizer.xCreate), any tokenizer
** arguments are appended to SCRIPT and the result executed.
**
** The value returned by (SCRIPT + args) is itself a tcl script. This 
** script - call it SCRIPT2 - is executed to tokenize text using the
** tokenizer instance "returned" by SCRIPT. Specifically, to tokenize
** text SCRIPT2 is invoked with a single argument appended to it - the
** text to tokenize.
**
** SCRIPT2 should invoke the [sqlite3_fts5_token] command once for each
** token within the tokenized text.
*/
static int f5tCreateTokenizer(
  ClientData clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  F5tTokenizerContext *pContext = (F5tTokenizerContext*)clientData;
  sqlite3 *db;
  fts5_api *pApi;
  char *zName;
  Tcl_Obj *pScript;
  fts5_tokenizer t;
  F5tTokenizerModule *pMod;
  int rc;

  if( objc!=4 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
    return TCL_ERROR;
  }
  if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
    return TCL_ERROR;
  }
  zName = Tcl_GetString(objv[2]);
  pScript = objv[3];

  t.xCreate = f5tTokenizerCreate;
  t.xTokenize = f5tTokenizerTokenize;
  t.xDelete = f5tTokenizerDelete;

  pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
  pMod->interp = interp;
  pMod->pScript = pScript;
  pMod->pContext = pContext;
  Tcl_IncrRefCount(pScript);
  rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0);
    return TCL_ERROR;
  }

  return TCL_OK;
}

static void xF5tFree(ClientData clientData){
  ckfree(clientData);
}

/*
**      sqlite3_fts5_may_be_corrupt BOOLEAN
**
** Set or clear the global "may-be-corrupt" flag. Return the old value.
*/
static int f5tMayBeCorrupt(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int bOld = sqlite3_fts5_may_be_corrupt;

  if( objc!=2 && objc!=1 ){
    Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?");
    return TCL_ERROR;
  }
  if( objc==2 ){
    int bNew;
    if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR;
    sqlite3_fts5_may_be_corrupt = bNew;
  }

  Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld));
  return TCL_OK;
}


static unsigned int f5t_fts5HashKey(int nSlot, const char *p, int n){
  int i;
  unsigned int h = 13;
  for(i=n-1; i>=0; i--){
    h = (h << 3) ^ h ^ p[i];
  }
  return (h % nSlot);
}

static int f5tTokenHash(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int bOld = sqlite3_fts5_may_be_corrupt;
  char *z;
  int n;
  unsigned int iVal;
  int nSlot;

  if( objc!=3 ){
    Tcl_WrongNumArgs(interp, 1, objv, "NSLOT TOKEN");
    return TCL_ERROR;
  }
  if( Tcl_GetIntFromObj(interp, objv[1], &nSlot) ){
    return TCL_ERROR;
  }
  z = Tcl_GetStringFromObj(objv[2], &n);

  iVal = f5t_fts5HashKey(nSlot, z, n);
  Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal));
  return TCL_OK;
}

/*
** Entry point.
*/
int Fts5tcl_Init(Tcl_Interp *interp){
  static struct Cmd {
    char *zName;
    Tcl_ObjCmdProc *xProc;
    int bTokenizeCtx;
  } aCmd[] = {
    { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
    { "sqlite3_fts5_token",            f5tTokenizerReturn, 1 },
    { "sqlite3_fts5_tokenize",         f5tTokenize, 0 },
    { "sqlite3_fts5_create_function",  f5tCreateFunction, 0 },
    { "sqlite3_fts5_may_be_corrupt",   f5tMayBeCorrupt, 0 },
    { "sqlite3_fts5_token_hash",       f5tTokenHash, 0 }
  };
  int i;
  F5tTokenizerContext *pContext;

  pContext = (F5tTokenizerContext*)ckalloc(sizeof(F5tTokenizerContext));
  memset(pContext, 0, sizeof(*pContext));

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    struct Cmd *p = &aCmd[i];
    void *pCtx = 0;
    if( p->bTokenizeCtx ) pCtx = (void*)pContext;
    Tcl_CreateObjCommand(interp, p->zName, p->xProc, pCtx, (i ? 0 : xF5tFree));
  }

  return TCL_OK;
}
#else  /* SQLITE_ENABLE_FTS5 */
int Fts5tcl_Init(Tcl_Interp *interp){
  return TCL_OK;
}
#endif /* SQLITE_ENABLE_FTS5 */
#endif /* SQLITE_TEST */
Added ext/fts5/fts5_tokenize.c.






























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
*/

#if defined(SQLITE_ENABLE_FTS5)

#include "fts5Int.h"

/**************************************************************************
** Start of ascii tokenizer implementation.
*/

/*
** For tokenizers with no "unicode" modifier, the set of token characters
** is the same as the set of ASCII range alphanumeric characters. 
*/
static unsigned char aAsciiTokenChar[128] = {
  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00..0x0F */
  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x10..0x1F */
  0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20..0x2F */
  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30..0x3F */
  0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40..0x4F */
  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 0,   /* 0x50..0x5F */
  0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60..0x6F */
  1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 0,   /* 0x70..0x7F */
};

typedef struct AsciiTokenizer AsciiTokenizer;
struct AsciiTokenizer {
  unsigned char aTokenChar[128];
};

static void fts5AsciiAddExceptions(
  AsciiTokenizer *p, 
  const char *zArg, 
  int bTokenChars
){
  int i;
  for(i=0; zArg[i]; i++){
    if( (zArg[i] & 0x80)==0 ){
      p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
    }
  }
}

/*
** Delete a "ascii" tokenizer.
*/
static void fts5AsciiDelete(Fts5Tokenizer *p){
  sqlite3_free(p);
}

/*
** Create an "ascii" tokenizer.
*/
static int fts5AsciiCreate(
  void *pCtx, 
  const char **azArg, int nArg,
  Fts5Tokenizer **ppOut
){
  int rc = SQLITE_OK;
  AsciiTokenizer *p = 0;
  if( nArg%2 ){
    rc = SQLITE_ERROR;
  }else{
    p = sqlite3_malloc(sizeof(AsciiTokenizer));
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      int i;
      memset(p, 0, sizeof(AsciiTokenizer));
      memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        const char *zArg = azArg[i+1];
        if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
          fts5AsciiAddExceptions(p, zArg, 1);
        }else
        if( 0==sqlite3_stricmp(azArg[i], "separators") ){
          fts5AsciiAddExceptions(p, zArg, 0);
        }else{
          rc = SQLITE_ERROR;
        }
      }
      if( rc!=SQLITE_OK ){
        fts5AsciiDelete((Fts5Tokenizer*)p);
        p = 0;
      }
    }
  }

  *ppOut = (Fts5Tokenizer*)p;
  return rc;
}


static void asciiFold(char *aOut, const char *aIn, int nByte){
  int i;
  for(i=0; i<nByte; i++){
    char c = aIn[i];
    if( c>='A' && c<='Z' ) c += 32;
    aOut[i] = c;
  }
}

/*
** Tokenize some text using the ascii tokenizer.
*/
static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  int ie;
  int is = 0;

  char aFold[64];
  int nFold = sizeof(aFold);
  char *pFold = aFold;
  unsigned char *a = p->aTokenChar;

  while( is<nText && rc==SQLITE_OK ){
    int nByte;

    /* Skip any leading divider characters. */
    while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
      is++;
    }
    if( is==nText ) break;

    /* Count the token characters */
    ie = is+1;
    while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
      ie++;
    }

    /* Fold to lower case */
    nByte = ie-is;
    if( nByte>nFold ){
      if( pFold!=aFold ) sqlite3_free(pFold);
      pFold = sqlite3_malloc(nByte*2);
      if( pFold==0 ){
        rc = SQLITE_NOMEM;
        break;
      }
      nFold = nByte*2;
    }
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
    rc = xToken(pCtx, pFold, nByte, is, ie);
    is = ie+1;
  }
  
  if( pFold!=aFold ) sqlite3_free(pFold);
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}

/**************************************************************************
** Start of unicode61 tokenizer implementation.
*/


/*
** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
** from the sqlite3 source file utf.c. If this file is compiled as part
** of the amalgamation, they are not required.
*/
#ifndef SQLITE_AMALGAMATION

static const unsigned char sqlite3Utf8Trans1[] = {
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};

#define READ_UTF8(zIn, zTerm, c)                           \
  c = *(zIn++);                                            \
  if( c>=0xc0 ){                                           \
    c = sqlite3Utf8Trans1[c-0xc0];                         \
    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
      c = (c<<6) + (0x3f & *(zIn++));                      \
    }                                                      \
    if( c<0x80                                             \
        || (c&0xFFFFF800)==0xD800                          \
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
  }


#define WRITE_UTF8(zOut, c) {                          \
  if( c<0x00080 ){                                     \
    *zOut++ = (unsigned char)(c&0xFF);                 \
  }                                                    \
  else if( c<0x00800 ){                                \
    *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F);     \
    *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
  }                                                    \
  else if( c<0x10000 ){                                \
    *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F);    \
    *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F);   \
    *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
  }else{                                               \
    *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07);  \
    *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F);  \
    *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F);   \
    *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
  }                                                    \
}

#endif /* ifndef SQLITE_AMALGAMATION */

typedef struct Unicode61Tokenizer Unicode61Tokenizer;
struct Unicode61Tokenizer {
  unsigned char aTokenChar[128];  /* ASCII range token characters */
  char *aFold;                    /* Buffer to fold text into */
  int nFold;                      /* Size of aFold[] in bytes */
  int bRemoveDiacritic;           /* True if remove_diacritics=1 is set */
  int nException;
  int *aiException;
};

static int fts5UnicodeAddExceptions(
  Unicode61Tokenizer *p,          /* Tokenizer object */
  const char *z,                  /* Characters to treat as exceptions */
  int bTokenChars                 /* 1 for 'tokenchars', 0 for 'separators' */
){
  int rc = SQLITE_OK;
  int n = strlen(z);
  int *aNew;

  if( n>0 ){
    aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
    if( aNew ){
      int nNew = p->nException;
      const unsigned char *zCsr = (const unsigned char*)z;
      const unsigned char *zTerm = (const unsigned char*)&z[n];
      while( zCsr<zTerm ){
        int iCode;
        int bToken;
        READ_UTF8(zCsr, zTerm, iCode);
        if( iCode<128 ){
          p->aTokenChar[iCode] = bTokenChars;
        }else{
          bToken = sqlite3Fts5UnicodeIsalnum(iCode);
          assert( (bToken==0 || bToken==1) ); 
          assert( (bTokenChars==0 || bTokenChars==1) );
          if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
            int i;
            for(i=0; i<nNew; i++){
              if( aNew[i]>iCode ) break;
            }
            memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
            aNew[i] = iCode;
            nNew++;
          }
        }
      }
      p->aiException = aNew;
      p->nException = nNew;
    }else{
      rc = SQLITE_NOMEM;
    }
  }

  return rc;
}

/*
** Return true if the p->aiException[] array contains the value iCode.
*/
static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
  if( p->nException>0 ){
    int *a = p->aiException;
    int iLo = 0;
    int iHi = p->nException-1;

    while( iHi>=iLo ){
      int iTest = (iHi + iLo) / 2;
      if( iCode==a[iTest] ){
        return 1;
      }else if( iCode>a[iTest] ){
        iLo = iTest+1;
      }else{
        iHi = iTest-1;
      }
    }
  }

  return 0;
}

/*
** Delete a "unicode61" tokenizer.
*/
static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
  if( pTok ){
    Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
    sqlite3_free(p->aiException);
    sqlite3_free(p->aFold);
    sqlite3_free(p);
  }
  return;
}

/*
** Create a "unicode61" tokenizer.
*/
static int fts5UnicodeCreate(
  void *pCtx, 
  const char **azArg, int nArg,
  Fts5Tokenizer **ppOut
){
  int rc = SQLITE_OK;             /* Return code */
  Unicode61Tokenizer *p = 0;      /* New tokenizer object */ 

  if( nArg%2 ){
    rc = SQLITE_ERROR;
  }else{
    p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
    if( p ){
      int i;
      memset(p, 0, sizeof(Unicode61Tokenizer));
      memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
      p->bRemoveDiacritic = 1;
      p->nFold = 64;
      p->aFold = sqlite3_malloc(p->nFold * sizeof(char));
      if( p->aFold==0 ){
        rc = SQLITE_NOMEM;
      }
      for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
        const char *zArg = azArg[i+1];
        if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
          if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
            rc = SQLITE_ERROR;
          }
          p->bRemoveDiacritic = (zArg[0]=='1');
        }else
        if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 1);
        }else
        if( 0==sqlite3_stricmp(azArg[i], "separators") ){
          rc = fts5UnicodeAddExceptions(p, zArg, 0);
        }else{
          rc = SQLITE_ERROR;
        }
      }
    }else{
      rc = SQLITE_NOMEM;
    }
    if( rc!=SQLITE_OK ){
      fts5UnicodeDelete((Fts5Tokenizer*)p);
      p = 0;
    }
    *ppOut = (Fts5Tokenizer*)p;
  }
  return rc;
}

/*
** Return true if, for the purposes of tokenizing with the tokenizer
** passed as the first argument, codepoint iCode is considered a token 
** character (not a separator).
*/
static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
  assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
  return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  unsigned char *a = p->aTokenChar;

  unsigned char *zTerm = (unsigned char*)&pText[nText];
  unsigned char *zCsr = (unsigned char *)pText;

  /* Output buffer */
  char *aFold = p->aFold;
  int nFold = p->nFold;
  const char *pEnd = &aFold[nFold-6];

  /* Each iteration of this loop gobbles up a contiguous run of separators,
  ** then the next token.  */
  while( rc==SQLITE_OK ){
    int iCode;                    /* non-ASCII codepoint read from input */
    char *zOut = aFold;
    int is;
    int ie;

    /* Skip any separator characters. */
    while( 1 ){
      if( zCsr>=zTerm ) goto tokenize_done;
      if( *zCsr & 0x80 ) {
        /* A character outside of the ascii range. Skip past it if it is
        ** a separator character. Or break out of the loop if it is not. */
        is = zCsr - (unsigned char*)pText;
        READ_UTF8(zCsr, zTerm, iCode);
        if( fts5UnicodeIsAlnum(p, iCode) ){
          goto non_ascii_tokenchar;
        }
      }else{
        if( a[*zCsr] ){
          is = zCsr - (unsigned char*)pText;
          goto ascii_tokenchar;
        }
        zCsr++;
      }
    }

    /* Run through the tokenchars. Fold them into the output buffer along
    ** the way.  */
    while( zCsr<zTerm ){

      /* Grow the output buffer so that there is sufficient space to fit the
      ** largest possible utf-8 character.  */
      if( zOut>pEnd ){
        aFold = sqlite3_malloc(nFold*2);
        if( aFold==0 ){
          rc = SQLITE_NOMEM;
          goto tokenize_done;
        }
        zOut = &aFold[zOut - p->aFold];
        memcpy(aFold, p->aFold, nFold);
        sqlite3_free(p->aFold);
        p->aFold = aFold;
        p->nFold = nFold = nFold*2;
        pEnd = &aFold[nFold-6];
      }

      if( *zCsr & 0x80 ){
        /* An non-ascii-range character. Fold it into the output buffer if
        ** it is a token character, or break out of the loop if it is not. */
        READ_UTF8(zCsr, zTerm, iCode);
        if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
 non_ascii_tokenchar:
          iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
          if( iCode ) WRITE_UTF8(zOut, iCode);
        }else{
          break;
        }
      }else if( a[*zCsr]==0 ){
        /* An ascii-range separator character. End of token. */
        break; 
      }else{
 ascii_tokenchar:
        if( *zCsr>='A' && *zCsr<='Z' ){
          *zOut++ = *zCsr + 32;
        }else{
          *zOut++ = *zCsr;
        }
        zCsr++;
      }
      ie = zCsr - (unsigned char*)pText;
    }

    /* Invoke the token callback */
    rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
  }
  
 tokenize_done:
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}

/**************************************************************************
** Start of porter stemmer implementation.
*/

/* Any tokens larger than this (in bytes) are passed through without
** stemming. */
#define FTS5_PORTER_MAX_TOKEN 64

typedef struct PorterTokenizer PorterTokenizer;
struct PorterTokenizer {
  fts5_tokenizer tokenizer;       /* Parent tokenizer module */
  Fts5Tokenizer *pTokenizer;      /* Parent tokenizer instance */
  char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
};

/*
** Delete a "porter" tokenizer.
*/
static void fts5PorterDelete(Fts5Tokenizer *pTok){
  if( pTok ){
    PorterTokenizer *p = (PorterTokenizer*)pTok;
    if( p->pTokenizer ){
      p->tokenizer.xDelete(p->pTokenizer);
    }
    sqlite3_free(p);
  }
}

/*
** Create a "porter" tokenizer.
*/
static int fts5PorterCreate(
  void *pCtx, 
  const char **azArg, int nArg,
  Fts5Tokenizer **ppOut
){
  fts5_api *pApi = (fts5_api*)pCtx;
  int rc = SQLITE_OK;
  PorterTokenizer *pRet;
  void *pUserdata = 0;
  const char *zBase = "unicode61";

  if( nArg>0 ){
    zBase = azArg[0];
  }

  pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
  if( pRet ){
    memset(pRet, 0, sizeof(PorterTokenizer));
    rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
  }else{
    rc = SQLITE_NOMEM;
  }
  if( rc==SQLITE_OK ){
    rc = pRet->tokenizer.xCreate(pUserdata, 0, 0, &pRet->pTokenizer);
  }

  if( rc!=SQLITE_OK ){
    fts5PorterDelete((Fts5Tokenizer*)pRet);
    pRet = 0;
  }
  *ppOut = (Fts5Tokenizer*)pRet;
  return rc;
}

typedef struct PorterContext PorterContext;
struct PorterContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int);
  char *aBuf;
};

typedef struct PorterRule PorterRule;
struct PorterRule {
  const char *zSuffix;
  int nSuffix;
  int (*xCond)(char *zStem, int nStem);
  const char *zOutput;
  int nOutput;
};

#if 0
static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
  int ret = -1;
  int nBuf = *pnBuf;
  PorterRule *p;

  for(p=aRule; p->zSuffix; p++){
    assert( strlen(p->zSuffix)==p->nSuffix );
    assert( strlen(p->zOutput)==p->nOutput );
    if( nBuf<p->nSuffix ) continue;
    if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
  }

  if( p->zSuffix ){
    int nStem = nBuf - p->nSuffix;
    if( p->xCond==0 || p->xCond(aBuf, nStem) ){
      memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
      *pnBuf = nStem + p->nOutput;
      ret = p - aRule;
    }
  }

  return ret;
}
#endif

static int fts5PorterIsVowel(char c, int bYIsVowel){
  return (
      c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
  );
}

static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
  int i;
  int bCons = bPrevCons;

  /* Scan for a vowel */
  for(i=0; i<nStem; i++){
    if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
  }

  /* Scan for a consonent */
  for(i++; i<nStem; i++){
    if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
  }
  return 0;
}

/* porter rule condition: (m > 0) */
static int fts5Porter_MGt0(char *zStem, int nStem){
  return !!fts5PorterGobbleVC(zStem, nStem, 0);
}

/* porter rule condition: (m > 1) */
static int fts5Porter_MGt1(char *zStem, int nStem){
  int n;
  n = fts5PorterGobbleVC(zStem, nStem, 0);
  if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
    return 1;
  }
  return 0;
}

/* porter rule condition: (m = 1) */
static int fts5Porter_MEq1(char *zStem, int nStem){
  int n;
  n = fts5PorterGobbleVC(zStem, nStem, 0);
  if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
    return 1;
  }
  return 0;
}

/* porter rule condition: (*o) */
static int fts5Porter_Ostar(char *zStem, int nStem){
  if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
    return 0;
  }else{
    int i;
    int mask = 0;
    int bCons = 0;
    for(i=0; i<nStem; i++){
      bCons = !fts5PorterIsVowel(zStem[i], bCons);
      assert( bCons==0 || bCons==1 );
      mask = (mask << 1) + bCons;
    }
    return ((mask & 0x0007)==0x0005);
  }
}

/* porter rule condition: (m > 1 and (*S or *T)) */
static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
  assert( nStem>0 );
  return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') 
      && fts5Porter_MGt1(zStem, nStem);
}

/* porter rule condition: (*v*) */
static int fts5Porter_Vowel(char *zStem, int nStem){
  int i;
  for(i=0; i<nStem; i++){
    if( fts5PorterIsVowel(zStem[i], i>0) ){
      return 1;
    }
  }
  return 0;
}


/**************************************************************************
***************************************************************************
** GENERATED CODE STARTS HERE (mkportersteps.tcl)
*/

static int fts5PorterStep4(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    
    case 'a': 
      if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
        if( fts5Porter_MGt1(aBuf, nBuf-2) ){
          *pnBuf = nBuf - 2;
        }
      }
      break;
  
    case 'c': 
      if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt1(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt1(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }
      break;
  
    case 'e': 
      if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
        if( fts5Porter_MGt1(aBuf, nBuf-2) ){
          *pnBuf = nBuf - 2;
        }
      }
      break;
  
    case 'i': 
      if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
        if( fts5Porter_MGt1(aBuf, nBuf-2) ){
          *pnBuf = nBuf - 2;
        }
      }
      break;
  
    case 'l': 
      if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt1(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt1(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }
      break;
  
    case 'n': 
      if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt1(aBuf, nBuf-5) ){
          *pnBuf = nBuf - 5;
        }
      }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt1(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 'o': 
      if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
        if( fts5Porter_MGt1(aBuf, nBuf-2) ){
          *pnBuf = nBuf - 2;
        }
      }
      break;
  
    case 's': 
      if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 't': 
      if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 'u': 
      if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 'v': 
      if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 'z': 
      if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt1(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
  }
  return ret;
}
  

static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    
    case 'a': 
      if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
        memcpy(&aBuf[nBuf-2], "ate", 3);
        *pnBuf = nBuf - 2 + 3;
        ret = 1;
      }
      break;
  
    case 'b': 
      if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
        memcpy(&aBuf[nBuf-2], "ble", 3);
        *pnBuf = nBuf - 2 + 3;
        ret = 1;
      }
      break;
  
    case 'i': 
      if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
        memcpy(&aBuf[nBuf-2], "ize", 3);
        *pnBuf = nBuf - 2 + 3;
        ret = 1;
      }
      break;
  
  }
  return ret;
}
  

static int fts5PorterStep2(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    
    case 'a': 
      if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
        if( fts5Porter_MGt0(aBuf, nBuf-7) ){
          memcpy(&aBuf[nBuf-7], "ate", 3);
          *pnBuf = nBuf - 7 + 3;
        }
      }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
        if( fts5Porter_MGt0(aBuf, nBuf-6) ){
          memcpy(&aBuf[nBuf-6], "tion", 4);
          *pnBuf = nBuf - 6 + 4;
        }
      }
      break;
  
    case 'c': 
      if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "ence", 4);
          *pnBuf = nBuf - 4 + 4;
        }
      }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "ance", 4);
          *pnBuf = nBuf - 4 + 4;
        }
      }
      break;
  
    case 'e': 
      if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "ize", 3);
          *pnBuf = nBuf - 4 + 3;
        }
      }
      break;
  
    case 'g': 
      if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "log", 3);
          *pnBuf = nBuf - 4 + 3;
        }
      }
      break;
  
    case 'l': 
      if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt0(aBuf, nBuf-3) ){
          memcpy(&aBuf[nBuf-3], "ble", 3);
          *pnBuf = nBuf - 3 + 3;
        }
      }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "al", 2);
          *pnBuf = nBuf - 4 + 2;
        }
      }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ent", 3);
          *pnBuf = nBuf - 5 + 3;
        }
      }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt0(aBuf, nBuf-3) ){
          memcpy(&aBuf[nBuf-3], "e", 1);
          *pnBuf = nBuf - 3 + 1;
        }
      }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ous", 3);
          *pnBuf = nBuf - 5 + 3;
        }
      }
      break;
  
    case 'o': 
      if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
        if( fts5Porter_MGt0(aBuf, nBuf-7) ){
          memcpy(&aBuf[nBuf-7], "ize", 3);
          *pnBuf = nBuf - 7 + 3;
        }
      }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ate", 3);
          *pnBuf = nBuf - 5 + 3;
        }
      }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "ate", 3);
          *pnBuf = nBuf - 4 + 3;
        }
      }
      break;
  
    case 's': 
      if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "al", 2);
          *pnBuf = nBuf - 5 + 2;
        }
      }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
        if( fts5Porter_MGt0(aBuf, nBuf-7) ){
          memcpy(&aBuf[nBuf-7], "ive", 3);
          *pnBuf = nBuf - 7 + 3;
        }
      }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
        if( fts5Porter_MGt0(aBuf, nBuf-7) ){
          memcpy(&aBuf[nBuf-7], "ful", 3);
          *pnBuf = nBuf - 7 + 3;
        }
      }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
        if( fts5Porter_MGt0(aBuf, nBuf-7) ){
          memcpy(&aBuf[nBuf-7], "ous", 3);
          *pnBuf = nBuf - 7 + 3;
        }
      }
      break;
  
    case 't': 
      if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "al", 2);
          *pnBuf = nBuf - 5 + 2;
        }
      }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ive", 3);
          *pnBuf = nBuf - 5 + 3;
        }
      }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
        if( fts5Porter_MGt0(aBuf, nBuf-6) ){
          memcpy(&aBuf[nBuf-6], "ble", 3);
          *pnBuf = nBuf - 6 + 3;
        }
      }
      break;
  
  }
  return ret;
}
  

static int fts5PorterStep3(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    
    case 'a': 
      if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          memcpy(&aBuf[nBuf-4], "ic", 2);
          *pnBuf = nBuf - 4 + 2;
        }
      }
      break;
  
    case 's': 
      if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
        if( fts5Porter_MGt0(aBuf, nBuf-4) ){
          *pnBuf = nBuf - 4;
        }
      }
      break;
  
    case 't': 
      if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ic", 2);
          *pnBuf = nBuf - 5 + 2;
        }
      }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "ic", 2);
          *pnBuf = nBuf - 5 + 2;
        }
      }
      break;
  
    case 'u': 
      if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt0(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
        }
      }
      break;
  
    case 'v': 
      if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          *pnBuf = nBuf - 5;
        }
      }
      break;
  
    case 'z': 
      if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
        if( fts5Porter_MGt0(aBuf, nBuf-5) ){
          memcpy(&aBuf[nBuf-5], "al", 2);
          *pnBuf = nBuf - 5 + 2;
        }
      }
      break;
  
  }
  return ret;
}
  

static int fts5PorterStep1B(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    
    case 'e': 
      if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_MGt0(aBuf, nBuf-3) ){
          memcpy(&aBuf[nBuf-3], "ee", 2);
          *pnBuf = nBuf - 3 + 2;
        }
      }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
        if( fts5Porter_Vowel(aBuf, nBuf-2) ){
          *pnBuf = nBuf - 2;
          ret = 1;
        }
      }
      break;
  
    case 'n': 
      if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
        if( fts5Porter_Vowel(aBuf, nBuf-3) ){
          *pnBuf = nBuf - 3;
          ret = 1;
        }
      }
      break;
  
  }
  return ret;
}
  
/* 
** GENERATED CODE ENDS HERE (mkportersteps.tcl)
***************************************************************************
**************************************************************************/

static void fts5PorterStep1A(char *aBuf, int *pnBuf){
  int nBuf = *pnBuf;
  if( aBuf[nBuf-1]=='s' ){
    if( aBuf[nBuf-2]=='e' ){
      if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') 
       || (nBuf>3 && aBuf[nBuf-3]=='i' )
      ){
        *pnBuf = nBuf-2;
      }else{
        *pnBuf = nBuf-1;
      }
    }
    else if( aBuf[nBuf-2]!='s' ){
      *pnBuf = nBuf-1;
    }
  }
}

static int fts5PorterCb(
  void *pCtx, 
  const char *pToken, 
  int nToken, 
  int iStart, 
  int iEnd
){
  PorterContext *p = (PorterContext*)pCtx;

  char *aBuf;
  int nBuf;

  if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
  aBuf = p->aBuf;
  nBuf = nToken;
  memcpy(aBuf, pToken, nBuf);

  /* Step 1. */
  fts5PorterStep1A(aBuf, &nBuf);
  if( fts5PorterStep1B(aBuf, &nBuf) ){
    if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
      char c = aBuf[nBuf-1];
      if( fts5PorterIsVowel(c, 0)==0 
       && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] 
      ){
        nBuf--;
      }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
        aBuf[nBuf++] = 'e';
      }
    }
  }

  /* Step 1C. */
  if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
    aBuf[nBuf-1] = 'i';
  }

  /* Steps 2 through 4. */
  fts5PorterStep2(aBuf, &nBuf);
  fts5PorterStep3(aBuf, &nBuf);
  fts5PorterStep4(aBuf, &nBuf);

  /* Step 5a. */
  assert( nBuf>0 );
  if( aBuf[nBuf-1]=='e' ){
    if( fts5Porter_MGt1(aBuf, nBuf-1) 
     || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
    ){
      nBuf--;
    }
  }

  /* Step 5b. */
  if( nBuf>1 && aBuf[nBuf-1]=='l' 
   && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  ){
    nBuf--;
  }

  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);

 pass_through:
  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
}

/*
** Tokenize using the porter tokenizer.
*/
static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
  sCtx.xToken = xToken;
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
      p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
  );
}

/*
** Register all built-in tokenizers with FTS5.
*/
int sqlite3Fts5TokenizerInit(fts5_api *pApi){
  struct BuiltinTokenizer {
    const char *zName;
    fts5_tokenizer x;
  } aBuiltin[] = {
    { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
    { "ascii",     {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
    { "porter",    {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
  };
  
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* To iterate through builtin functions */

  for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){
    rc = pApi->xCreateTokenizer(pApi,
        aBuiltin[i].zName,
        (void*)pApi,
        &aBuiltin[i].x,
        0
    );
  }

  return SQLITE_OK;
}
#endif /* defined(SQLITE_ENABLE_FTS5) */


Added ext/fts5/fts5_unicode2.c.




















































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
/*
** 2012 May 25
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/

#if defined(SQLITE_ENABLE_FTS5)

#include <assert.h>

/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeIsalnum(int c){
  /* Each unsigned integer in the following array corresponds to a contiguous
  ** range of unicode codepoints that are not either letters or numbers (i.e.
  ** codepoints for which this function should return 0).
  **
  ** The most significant 22 bits in each 32-bit value contain the first 
  ** codepoint in the range. The least significant 10 bits are used to store
  ** the size of the range (always at least 1). In other words, the value 
  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
  ** C. It is not possible to represent a range larger than 1023 codepoints 
  ** using this format.
  */
  static const unsigned int aEntry[] = {
    0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
    0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
    0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
    0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
    0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
    0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
    0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
    0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
    0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
    0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
    0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
    0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
    0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
    0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
    0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
    0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
    0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
    0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
    0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
    0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
    0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
    0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
    0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
    0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
    0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
    0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
    0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
    0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
    0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
    0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
    0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
    0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
    0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
    0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
    0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
    0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
    0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
    0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
    0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
    0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
    0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
    0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
    0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
    0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
    0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
    0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
    0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
    0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
    0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
    0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
    0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
    0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
    0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
    0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
    0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
    0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
    0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
    0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
    0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
    0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
    0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
    0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
    0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
    0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
    0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
    0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
    0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
    0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
    0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
    0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
    0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
    0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
    0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
    0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
    0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
    0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
    0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
    0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
    0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
    0x380400F0,
  };
  static const unsigned int aAscii[4] = {
    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
  };

  if( c<128 ){
    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
  }else if( c<(1<<22) ){
    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
    int iRes = 0;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
    int iLo = 0;
    while( iHi>=iLo ){
      int iTest = (iHi + iLo) / 2;
      if( key >= aEntry[iTest] ){
        iRes = iTest;
        iLo = iTest+1;
      }else{
        iHi = iTest-1;
      }
    }
    assert( aEntry[0]<key );
    assert( key>=aEntry[iRes] );
    return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
  }
  return 1;
}


/*
** If the argument is a codepoint corresponding to a lowercase letter
** in the ASCII range with a diacritic added, return the codepoint
** of the ASCII letter only. For example, if passed 235 - "LATIN
** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
** E"). The resuls of passing a codepoint that corresponds to an
** uppercase letter are undefined.
*/
static int fts5_remove_diacritic(int c){
  unsigned short aDia[] = {
        0,  1797,  1848,  1859,  1891,  1928,  1940,  1995, 
     2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286, 
     2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732, 
     2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336, 
     3456,  3696,  3712,  3728,  3744,  3896,  3912,  3928, 
     3968,  4008,  4040,  4106,  4138,  4170,  4202,  4234, 
     4266,  4296,  4312,  4344,  4408,  4424,  4472,  4504, 
     6148,  6198,  6264,  6280,  6360,  6429,  6505,  6529, 
    61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 
    61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 
    62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 
    62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 
    62924, 63050, 63082, 63274, 63390, 
  };
  char aChar[] = {
    '\0', 'a',  'c',  'e',  'i',  'n',  'o',  'u',  'y',  'y',  'a',  'c',  
    'd',  'e',  'e',  'g',  'h',  'i',  'j',  'k',  'l',  'n',  'o',  'r',  
    's',  't',  'u',  'u',  'w',  'y',  'z',  'o',  'u',  'a',  'i',  'o',  
    'u',  'g',  'k',  'o',  'j',  'g',  'n',  'a',  'e',  'i',  'o',  'r',  
    'u',  's',  't',  'h',  'a',  'e',  'o',  'y',  '\0', '\0', '\0', '\0', 
    '\0', '\0', '\0', '\0', 'a',  'b',  'd',  'd',  'e',  'f',  'g',  'h',  
    'h',  'i',  'k',  'l',  'l',  'm',  'n',  'p',  'r',  'r',  's',  't',  
    'u',  'v',  'w',  'w',  'x',  'y',  'z',  'h',  't',  'w',  'y',  'a',  
    'e',  'i',  'o',  'u',  'y',  
  };

  unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
  int iRes = 0;
  int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
  int iLo = 0;
  while( iHi>=iLo ){
    int iTest = (iHi + iLo) / 2;
    if( key >= aDia[iTest] ){
      iRes = iTest;
      iLo = iTest+1;
    }else{
      iHi = iTest-1;
    }
  }
  assert( key>=aDia[iRes] );
  return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
}


/*
** Return true if the argument interpreted as a unicode codepoint
** is a diacritical modifier character.
*/
int sqlite3Fts5UnicodeIsdiacritic(int c){
  unsigned int mask0 = 0x08029FDF;
  unsigned int mask1 = 0x000361F8;
  if( c<768 || c>817 ) return 0;
  return (c < 768+32) ?
      (mask0 & (1 << (c-768))) :
      (mask1 & (1 << (c-768-32)));
}


/*
** Interpret the argument as a unicode codepoint. If the codepoint
** is an upper case character that has a lower case equivalent,
** return the codepoint corresponding to the lower case version.
** Otherwise, return a copy of the argument.
**
** The results are undefined if the value passed to this function
** is less than zero.
*/
int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
  /* Each entry in the following array defines a rule for folding a range
  ** of codepoints to lower case. The rule applies to a range of nRange
  ** codepoints starting at codepoint iCode.
  **
  ** If the least significant bit in flags is clear, then the rule applies
  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
  ** need to be folded). Or, if it is set, then the rule only applies to
  ** every second codepoint in the range, starting with codepoint C.
  **
  ** The 7 most significant bits in flags are an index into the aiOff[]
  ** array. If a specific codepoint C does require folding, then its lower
  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
  **
  ** The contents of this array are generated by parsing the CaseFolding.txt
  ** file distributed as part of the "Unicode Character Database". See
  ** http://www.unicode.org for details.
  */
  static const struct TableEntry {
    unsigned short iCode;
    unsigned char flags;
    unsigned char nRange;
  } aEntry[] = {
    {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
    {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
    {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
    {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
    {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
    {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
    {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
    {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
    {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
    {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
    {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
    {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
    {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
    {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
    {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
    {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
    {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
    {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
    {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
    {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
    {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
    {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
    {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
    {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
    {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
    {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
    {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
    {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
    {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
    {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
    {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
    {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
    {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
    {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
    {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
    {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
    {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
    {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
    {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
    {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
    {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
    {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
    {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
    {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
    {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
    {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
    {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
    {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
    {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
    {65313, 14, 26},       
  };
  static const unsigned short aiOff[] = {
   1,     2,     8,     15,    16,    26,    28,    32,    
   37,    38,    40,    48,    63,    64,    69,    71,    
   79,    80,    116,   202,   203,   205,   206,   207,   
   209,   210,   211,   213,   214,   217,   218,   219,   
   775,   7264,  10792, 10795, 23228, 23256, 30204, 54721, 
   54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 
   57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 
   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
   65514, 65521, 65527, 65528, 65529, 
  };

  int ret = c;

  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
    int iLo = 0;
    int iRes = -1;

    assert( c>aEntry[0].iCode );
    while( iHi>=iLo ){
      int iTest = (iHi + iLo) / 2;
      int cmp = (c - aEntry[iTest].iCode);
      if( cmp>=0 ){
        iRes = iTest;
        iLo = iTest+1;
      }else{
        iHi = iTest-1;
      }
    }

    assert( iRes>=0 && c>=aEntry[iRes].iCode );
    p = &aEntry[iRes];
    if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
      ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
      assert( ret>0 );
    }

    if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret);
  }
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}
#endif /* defined(SQLITE_ENABLE_FTS5) */
Added ext/fts5/fts5_varint.c.
















































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/*
** 2015 May 30
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** Routines for varint serialization and deserialization.
*/

#ifdef SQLITE_ENABLE_FTS5

#include "fts5Int.h"

/*
** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
** Except, this version does handle the single byte case that the core
** version depends on being handled before its function is called.
*/
int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
  u32 a,b;

  /* The 1-byte case. Overwhelmingly the most common. */
  a = *p;
  /* a: p0 (unmasked) */
  if (!(a&0x80))
  {
    /* Values between 0 and 127 */
    *v = a;
    return 1;
  }

  /* The 2-byte case */
  p++;
  b = *p;
  /* b: p1 (unmasked) */
  if (!(b&0x80))
  {
    /* Values between 128 and 16383 */
    a &= 0x7f;
    a = a<<7;
    *v = a | b;
    return 2;
  }

  /* The 3-byte case */
  p++;
  a = a<<14;
  a |= *p;
  /* a: p0<<14 | p2 (unmasked) */
  if (!(a&0x80))
  {
    /* Values between 16384 and 2097151 */
    a &= (0x7f<<14)|(0x7f);
    b &= 0x7f;
    b = b<<7;
    *v = a | b;
    return 3;
  }

  /* A 32-bit varint is used to store size information in btrees.
  ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
  ** A 3-byte varint is sufficient, for example, to record the size
  ** of a 1048569-byte BLOB or string.
  **
  ** We only unroll the first 1-, 2-, and 3- byte cases.  The very
  ** rare larger cases can be handled by the slower 64-bit varint
  ** routine.
  */
  {
    u64 v64;
    u8 n;
    p -= 2;
    n = sqlite3Fts5GetVarint(p, &v64);
    *v = (u32)v64;
    assert( n>3 && n<=9 );
    return n;
  }
}


/*
** Bitmasks used by sqlite3GetVarint().  These precomputed constants
** are defined here rather than simply putting the constant expressions
** inline in order to work around bugs in the RVT compiler.
**
** SLOT_2_0     A mask for  (0x7f<<14) | 0x7f
**
** SLOT_4_2_0   A mask for  (0x7f<<28) | SLOT_2_0
*/
#define SLOT_2_0     0x001fc07f
#define SLOT_4_2_0   0xf01fc07f

/*
** Read a 64-bit variable-length integer from memory starting at p[0].
** Return the number of bytes read.  The value is stored in *v.
*/
u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
  u32 a,b,s;

  a = *p;
  /* a: p0 (unmasked) */
  if (!(a&0x80))
  {
    *v = a;
    return 1;
  }

  p++;
  b = *p;
  /* b: p1 (unmasked) */
  if (!(b&0x80))
  {
    a &= 0x7f;
    a = a<<7;
    a |= b;
    *v = a;
    return 2;
  }

  /* Verify that constants are precomputed correctly */
  assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
  assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );

  p++;
  a = a<<14;
  a |= *p;
  /* a: p0<<14 | p2 (unmasked) */
  if (!(a&0x80))
  {
    a &= SLOT_2_0;
    b &= 0x7f;
    b = b<<7;
    a |= b;
    *v = a;
    return 3;
  }

  /* CSE1 from below */
  a &= SLOT_2_0;
  p++;
  b = b<<14;
  b |= *p;
  /* b: p1<<14 | p3 (unmasked) */
  if (!(b&0x80))
  {
    b &= SLOT_2_0;
    /* moved CSE1 up */
    /* a &= (0x7f<<14)|(0x7f); */
    a = a<<7;
    a |= b;
    *v = a;
    return 4;
  }

  /* a: p0<<14 | p2 (masked) */
  /* b: p1<<14 | p3 (unmasked) */
  /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
  /* moved CSE1 up */
  /* a &= (0x7f<<14)|(0x7f); */
  b &= SLOT_2_0;
  s = a;
  /* s: p0<<14 | p2 (masked) */

  p++;
  a = a<<14;
  a |= *p;
  /* a: p0<<28 | p2<<14 | p4 (unmasked) */
  if (!(a&0x80))
  {
    /* we can skip these cause they were (effectively) done above in calc'ing s */
    /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
    /* b &= (0x7f<<14)|(0x7f); */
    b = b<<7;
    a |= b;
    s = s>>18;
    *v = ((u64)s)<<32 | a;
    return 5;
  }

  /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
  s = s<<7;
  s |= b;
  /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */

  p++;
  b = b<<14;
  b |= *p;
  /* b: p1<<28 | p3<<14 | p5 (unmasked) */
  if (!(b&0x80))
  {
    /* we can skip this cause it was (effectively) done above in calc'ing s */
    /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
    a &= SLOT_2_0;
    a = a<<7;
    a |= b;
    s = s>>18;
    *v = ((u64)s)<<32 | a;
    return 6;
  }

  p++;
  a = a<<14;
  a |= *p;
  /* a: p2<<28 | p4<<14 | p6 (unmasked) */
  if (!(a&0x80))
  {
    a &= SLOT_4_2_0;
    b &= SLOT_2_0;
    b = b<<7;
    a |= b;
    s = s>>11;
    *v = ((u64)s)<<32 | a;
    return 7;
  }

  /* CSE2 from below */
  a &= SLOT_2_0;
  p++;
  b = b<<14;
  b |= *p;
  /* b: p3<<28 | p5<<14 | p7 (unmasked) */
  if (!(b&0x80))
  {
    b &= SLOT_4_2_0;
    /* moved CSE2 up */
    /* a &= (0x7f<<14)|(0x7f); */
    a = a<<7;
    a |= b;
    s = s>>4;
    *v = ((u64)s)<<32 | a;
    return 8;
  }

  p++;
  a = a<<15;
  a |= *p;
  /* a: p4<<29 | p6<<15 | p8 (unmasked) */

  /* moved CSE2 up */
  /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
  b &= SLOT_2_0;
  b = b<<8;
  a |= b;

  s = s<<4;
  b = p[-4];
  b &= 0x7f;
  b = b>>3;
  s |= b;

  *v = ((u64)s)<<32 | a;

  return 9;
}

/*
** The variable-length integer encoding is as follows:
**
** KEY:
**         A = 0xxxxxxx    7 bits of data and one flag bit
**         B = 1xxxxxxx    7 bits of data and one flag bit
**         C = xxxxxxxx    8 bits of data
**
**  7 bits - A
** 14 bits - BA
** 21 bits - BBA
** 28 bits - BBBA
** 35 bits - BBBBA
** 42 bits - BBBBBA
** 49 bits - BBBBBBA
** 56 bits - BBBBBBBA
** 64 bits - BBBBBBBBC
*/

#ifdef SQLITE_NOINLINE
# define FTS5_NOINLINE SQLITE_NOINLINE
#else
# define FTS5_NOINLINE
#endif

/*
** Write a 64-bit variable-length integer to memory starting at p[0].
** The length of data write will be between 1 and 9 bytes.  The number
** of bytes written is returned.
**
** A variable-length integer consists of the lower 7 bits of each byte
** for all bytes that have the 8th bit set and one byte with the 8th
** bit clear.  Except, if we get to the 9th byte, it stores the full
** 8 bits and is the last byte.
*/
static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
  int i, j, n;
  u8 buf[10];
  if( v & (((u64)0xff000000)<<32) ){
    p[8] = (u8)v;
    v >>= 8;
    for(i=7; i>=0; i--){
      p[i] = (u8)((v & 0x7f) | 0x80);
      v >>= 7;
    }
    return 9;
  }    
  n = 0;
  do{
    buf[n++] = (u8)((v & 0x7f) | 0x80);
    v >>= 7;
  }while( v!=0 );
  buf[0] &= 0x7f;
  assert( n<=9 );
  for(i=0, j=n-1; j>=0; j--, i++){
    p[i] = buf[j];
  }
  return n;
}

int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
  if( v<=0x7f ){
    p[0] = v&0x7f;
    return 1;
  }
  if( v<=0x3fff ){
    p[0] = ((v>>7)&0x7f)|0x80;
    p[1] = v&0x7f;
    return 2;
  }
  return fts5PutVarint64(p,v);
}


int sqlite3Fts5GetVarintLen(u32 iVal){
  if( iVal<(1 << 7 ) ) return 1;
  if( iVal<(1 << 14) ) return 2;
  if( iVal<(1 << 21) ) return 3;
  if( iVal<(1 << 28) ) return 4;
  return 5;
}

#endif /* SQLITE_ENABLE_FTS5 */
Added ext/fts5/fts5_vocab.c.






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
/*
** 2015 May 08
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This is an SQLite virtual table module implementing direct access to an
** existing FTS5 index. The module may create several different types of 
** tables:
**
** col:
**     CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
**
**   One row for each term/column combination. The value of $doc is set to
**   the number of fts5 rows that contain at least one instance of term
**   $term within column $col. Field $cnt is set to the total number of 
**   instances of term $term in column $col (in any row of the fts5 table). 
**
** row:
**     CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
**
**   One row for each term in the database. The value of $doc is set to
**   the number of fts5 rows that contain at least one instance of term
**   $term. Field $cnt is set to the total number of instances of term 
**   $term in the database.
*/

#if defined(SQLITE_ENABLE_FTS5)

#include "fts5Int.h"


typedef struct Fts5VocabTable Fts5VocabTable;
typedef struct Fts5VocabCursor Fts5VocabCursor;

struct Fts5VocabTable {
  sqlite3_vtab base;
  char *zFts5Tbl;                 /* Name of fts5 table */
  char *zFts5Db;                  /* Db containing fts5 table */
  sqlite3 *db;                    /* Database handle */
  Fts5Global *pGlobal;            /* FTS5 global object for this database */
  int eType;                      /* FTS5_VOCAB_COL or ROW */
};

struct Fts5VocabCursor {
  sqlite3_vtab_cursor base;
  sqlite3_stmt *pStmt;            /* Statement holding lock on pIndex */
  Fts5Index *pIndex;              /* Associated FTS5 index */

  int bEof;                       /* True if this cursor is at EOF */
  Fts5IndexIter *pIter;           /* Term/rowid iterator object */

  /* These are used by 'col' tables only */
  int nCol;
  int iCol;
  i64 *aCnt;
  i64 *aDoc;

  /* Output values */
  i64 rowid;                      /* This table's current rowid value */
  Fts5Buffer term;                /* Current value of 'term' column */
  i64 aVal[3];                    /* Up to three columns left of 'term' */
};

#define FTS5_VOCAB_COL    0
#define FTS5_VOCAB_ROW    1

#define FTS5_VOCAB_COL_SCHEMA  "term, col, doc, cnt"
#define FTS5_VOCAB_ROW_SCHEMA  "term, doc, cnt"

/*
** Translate a string containing an fts5vocab table type to an 
** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
** and return SQLITE_ERROR.
*/
static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
  int rc = SQLITE_OK;
  char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
  if( rc==SQLITE_OK ){
    sqlite3Fts5Dequote(zCopy);
    if( sqlite3_stricmp(zCopy, "col")==0 ){
      *peType = FTS5_VOCAB_COL;
    }else

    if( sqlite3_stricmp(zCopy, "row")==0 ){
      *peType = FTS5_VOCAB_ROW;
    }else
    {
      *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
      rc = SQLITE_ERROR;
    }
    sqlite3_free(zCopy);
  }

  return rc;
}


/*
** The xDisconnect() virtual table method.
*/
static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
  Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
  sqlite3_free(pTab);
  return SQLITE_OK;
}

/*
** The xDestroy() virtual table method.
*/
static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
  Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
  sqlite3_free(pTab);
  return SQLITE_OK;
}

/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
** The argv[] array contains the following:
**
**   argv[0]   -> module name  ("fts5vocab")
**   argv[1]   -> database name
**   argv[2]   -> table name
**
** then:
**
**   argv[3]   -> name of fts5 table
**   argv[4]   -> type of fts5vocab table
**
** or, for tables in the TEMP schema only.
**
**   argv[3]   -> name of fts5 tables database
**   argv[4]   -> name of fts5 table
**   argv[5]   -> type of fts5vocab table
*/
static int fts5VocabInitVtab(
  sqlite3 *db,                    /* The SQLite database connection */
  void *pAux,                     /* Pointer to Fts5Global object */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
  char **pzErr                    /* Write any error message here */
){
  const char *azSchema[] = { 
    "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA  ")", 
    "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA  ")"
  };

  Fts5VocabTable *pRet = 0;
  int rc = SQLITE_OK;             /* Return code */
  int bDb;

  bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);

  if( argc!=5 && bDb==0 ){
    *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
    rc = SQLITE_ERROR;
  }else{
    int nByte;                      /* Bytes of space to allocate */
    const char *zDb = bDb ? argv[3] : argv[1];
    const char *zTab = bDb ? argv[4] : argv[3];
    const char *zType = bDb ? argv[5] : argv[4];
    int nDb = strlen(zDb)+1; 
    int nTab = strlen(zTab)+1;
    int eType;
    
    rc = fts5VocabTableType(zType, pzErr, &eType);
    if( rc==SQLITE_OK ){
      assert( eType>=0 && eType<sizeof(azSchema)/sizeof(azSchema[0]) );
      rc = sqlite3_declare_vtab(db, azSchema[eType]);
    }

    nByte = sizeof(Fts5VocabTable) + nDb + nTab;
    pRet = sqlite3Fts5MallocZero(&rc, nByte);
    if( pRet ){
      pRet->pGlobal = (Fts5Global*)pAux;
      pRet->eType = eType;
      pRet->db = db;
      pRet->zFts5Tbl = (char*)&pRet[1];
      pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
      memcpy(pRet->zFts5Tbl, zTab, nTab);
      memcpy(pRet->zFts5Db, zDb, nDb);
      sqlite3Fts5Dequote(pRet->zFts5Tbl);
      sqlite3Fts5Dequote(pRet->zFts5Db);
    }
  }

  *ppVTab = (sqlite3_vtab*)pRet;
  return rc;
}


/*
** The xConnect() and xCreate() methods for the virtual table. All the
** work is done in function fts5VocabInitVtab().
*/
static int fts5VocabConnectMethod(
  sqlite3 *db,                    /* Database connection */
  void *pAux,                     /* Pointer to tokenizer hash table */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
){
  return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}
static int fts5VocabCreateMethod(
  sqlite3 *db,                    /* Database connection */
  void *pAux,                     /* Pointer to tokenizer hash table */
  int argc,                       /* Number of elements in argv array */
  const char * const *argv,       /* xCreate/xConnect argument array */
  sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
  char **pzErr                    /* OUT: sqlite3_malloc'd error message */
){
  return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
}

/* 
** Implementation of the xBestIndex method.
*/
static int fts5VocabBestIndexMethod(
  sqlite3_vtab *pVTab, 
  sqlite3_index_info *pInfo
){
  return SQLITE_OK;
}

/*
** Implementation of xOpen method.
*/
static int fts5VocabOpenMethod(
  sqlite3_vtab *pVTab, 
  sqlite3_vtab_cursor **ppCsr
){
  Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
  Fts5Index *pIndex = 0;
  int nCol = 0;
  Fts5VocabCursor *pCsr = 0;
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt = 0;
  char *zSql = 0;
  int nByte;

  zSql = sqlite3Fts5Mprintf(&rc,
      "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
      pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
  );
  if( zSql ){
    rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
  }
  sqlite3_free(zSql);
  assert( rc==SQLITE_OK || pStmt==0 );
  if( rc==SQLITE_ERROR ) rc = SQLITE_OK;

  if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
    i64 iId = sqlite3_column_int64(pStmt, 0);
    pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol);
  }

  if( rc==SQLITE_OK && pIndex==0 ){
    rc = sqlite3_finalize(pStmt);
    pStmt = 0;
    if( rc==SQLITE_OK ){
      pVTab->zErrMsg = sqlite3_mprintf(
          "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
      );
      rc = SQLITE_ERROR;
    }
  }

  nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor);
  pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
  if( pCsr ){
    pCsr->pIndex = pIndex;
    pCsr->pStmt = pStmt;
    pCsr->nCol = nCol;
    pCsr->aCnt = (i64*)&pCsr[1];
    pCsr->aDoc = &pCsr->aCnt[nCol];
  }else{
    sqlite3_finalize(pStmt);
  }

  *ppCsr = (sqlite3_vtab_cursor*)pCsr;
  return rc;
}

static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
  pCsr->rowid = 0;
  sqlite3Fts5IterClose(pCsr->pIter);
  pCsr->pIter = 0;
}

/*
** Close the cursor.  For additional information see the documentation
** on the xClose method of the virtual table interface.
*/
static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  fts5VocabResetCursor(pCsr);
  sqlite3Fts5BufferFree(&pCsr->term);
  sqlite3_finalize(pCsr->pStmt);
  sqlite3_free(pCsr);
  return SQLITE_OK;
}


/*
** Advance the cursor to the next row in the table.
*/
static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
  int rc = SQLITE_OK;

  pCsr->rowid++;

  if( pTab->eType==FTS5_VOCAB_COL ){
    for(pCsr->iCol++; pCsr->iCol<pCsr->nCol; pCsr->iCol++){
      if( pCsr->aCnt[pCsr->iCol] ) break;
    }
  }

  if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){
    if( sqlite3Fts5IterEof(pCsr->pIter) ){
      pCsr->bEof = 1;
    }else{
      const char *zTerm;
      int nTerm;

      zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
      sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
      memset(pCsr->aVal, 0, sizeof(pCsr->aVal));
      memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64));
      memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64));
      pCsr->iCol = 0;

      assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
      while( rc==SQLITE_OK ){
        i64 dummy;
        const u8 *pPos; int nPos;   /* Position list */
        i64 iPos = 0;               /* 64-bit position read from poslist */
        int iOff = 0;               /* Current offset within position list */

        rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy);
        if( rc==SQLITE_OK ){
          if( pTab->eType==FTS5_VOCAB_ROW ){
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              pCsr->aVal[1]++;
            }
            pCsr->aVal[0]++;
          }else{
            int iCol = -1;
            while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
              int ii = FTS5_POS2COLUMN(iPos);
              pCsr->aCnt[ii]++;
              if( iCol!=ii ){
                pCsr->aDoc[ii]++;
                iCol = ii;
              }
            }
          }
          rc = sqlite3Fts5IterNextScan(pCsr->pIter);
        }
        if( rc==SQLITE_OK ){
          zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
          if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break;
          if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
        }
      }
    }
  }

  if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
    while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++;
    pCsr->aVal[0] = pCsr->iCol;
    pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol];
    pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol];
  }
  return rc;
}

/*
** This is the xFilter implementation for the virtual table.
*/
static int fts5VocabFilterMethod(
  sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
  int idxNum,                     /* Strategy index */
  const char *idxStr,             /* Unused */
  int nVal,                       /* Number of elements in apVal */
  sqlite3_value **apVal           /* Arguments for the indexing scheme */
){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  int rc;
  const int flags = FTS5INDEX_QUERY_SCAN;

  fts5VocabResetCursor(pCsr);
  rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter);
  if( rc==SQLITE_OK ){
    rc = fts5VocabNextMethod(pCursor);
  }

  return rc;
}

/* 
** This is the xEof method of the virtual table. SQLite calls this 
** routine to find out if it has reached the end of a result set.
*/
static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  return pCsr->bEof;
}

static int fts5VocabColumnMethod(
  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
  int iCol                        /* Index of column to read value from */
){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  switch( iCol ){
    case 0: /* term */
      sqlite3_result_text(
          pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
      );
      break;

    default:
      assert( iCol<4 && iCol>0 );
      sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]);
      break;
  }
  return SQLITE_OK;
}

/* 
** This is the xRowid method. The SQLite core calls this routine to
** retrieve the rowid for the current row of the result set. fts5
** exposes %_content.docid as the rowid for the virtual table. The
** rowid should be written to *pRowid.
*/
static int fts5VocabRowidMethod(
  sqlite3_vtab_cursor *pCursor, 
  sqlite_int64 *pRowid
){
  Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
  *pRowid = pCsr->rowid;
  return SQLITE_OK;
}

int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
  static const sqlite3_module fts5Vocab = {
    /* iVersion      */ 2,
    /* xCreate       */ fts5VocabCreateMethod,
    /* xConnect      */ fts5VocabConnectMethod,
    /* xBestIndex    */ fts5VocabBestIndexMethod,
    /* xDisconnect   */ fts5VocabDisconnectMethod,
    /* xDestroy      */ fts5VocabDestroyMethod,
    /* xOpen         */ fts5VocabOpenMethod,
    /* xClose        */ fts5VocabCloseMethod,
    /* xFilter       */ fts5VocabFilterMethod,
    /* xNext         */ fts5VocabNextMethod,
    /* xEof          */ fts5VocabEofMethod,
    /* xColumn       */ fts5VocabColumnMethod,
    /* xRowid        */ fts5VocabRowidMethod,
    /* xUpdate       */ 0,
    /* xBegin        */ 0,
    /* xSync         */ 0,
    /* xCommit       */ 0,
    /* xRollback     */ 0,
    /* xFindFunction */ 0,
    /* xRename       */ 0,
    /* xSavepoint    */ 0,
    /* xRelease      */ 0,
    /* xRollbackTo   */ 0,
  };
  void *p = (void*)pGlobal;

  return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
}
#endif /* defined(SQLITE_ENABLE_FTS5) */


Added ext/fts5/fts5parse.y.


























































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
** 2014 May 31
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
*/


// All token codes are small integers with #defines that begin with "TK_"
%token_prefix FTS5_

// The type of the data attached to each token is Token.  This is also the
// default type for non-terminals.
//
%token_type {Fts5Token}
%default_type {Fts5Token}

// The generated parser function takes a 4th argument as follows:
%extra_argument {Fts5Parse *pParse}

// This code runs whenever there is a syntax error
//
%syntax_error {
  sqlite3Fts5ParseError(
    pParse, "fts5: syntax error near \"%.*s\"",TOKEN.n,TOKEN.p
  );
}
%stack_overflow {
  assert( 0 );
}

// The name of the generated procedure that implements the parser
// is as follows:
%name sqlite3Fts5Parser

// The following text is included near the beginning of the C source
// code file that implements the parser.
//
%include {
#include "fts5Int.h"
#include "fts5parse.h"

/*
** Disable all error recovery processing in the parser push-down
** automaton.
*/
#define YYNOERRORRECOVERY 1

/*
** Make yytestcase() the same as testcase()
*/
#define yytestcase(X) testcase(X)

} // end %include

%left OR.
%left AND.
%left NOT.
%left TERM.
%left COLON.

input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); }

%type cnearset    {Fts5ExprNode*}
%type expr        {Fts5ExprNode*}
%type exprlist    {Fts5ExprNode*}
%destructor cnearset { sqlite3Fts5ParseNodeFree($$); }
%destructor expr     { sqlite3Fts5ParseNodeFree($$); }
%destructor exprlist { sqlite3Fts5ParseNodeFree($$); }

expr(A) ::= expr(X) AND expr(Y). {
  A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}
expr(A) ::= expr(X) OR expr(Y). {
  A = sqlite3Fts5ParseNode(pParse, FTS5_OR, X, Y, 0);
}
expr(A) ::= expr(X) NOT expr(Y). {
  A = sqlite3Fts5ParseNode(pParse, FTS5_NOT, X, Y, 0);
}

expr(A) ::= LP expr(X) RP. {A = X;}
expr(A) ::= exprlist(X).   {A = X;}

exprlist(A) ::= cnearset(X). {A = X;}
exprlist(A) ::= exprlist(X) cnearset(Y). {
  A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0);
}

cnearset(A) ::= nearset(X). { 
  A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); 
}
cnearset(A) ::= colset(X) COLON nearset(Y). { 
  sqlite3Fts5ParseSetColset(pParse, Y, X);
  A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); 
}

%type colset {Fts5ExprColset*}
%destructor colset { sqlite3_free($$); }
%type colsetlist {Fts5ExprColset*}
%destructor colsetlist { sqlite3_free($$); }

colset(A) ::= LCP colsetlist(X) RCP. { A = X; }
colset(A) ::= STRING(X). {
  A = sqlite3Fts5ParseColset(pParse, 0, &X);
}

colsetlist(A) ::= colsetlist(Y) STRING(X). { 
  A = sqlite3Fts5ParseColset(pParse, Y, &X); }
colsetlist(A) ::= STRING(X). { 
  A = sqlite3Fts5ParseColset(pParse, 0, &X); 
}


%type nearset     {Fts5ExprNearset*}
%type nearphrases {Fts5ExprNearset*}
%destructor nearset { sqlite3Fts5ParseNearsetFree($$); }
%destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); }

nearset(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); }
nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. {
  sqlite3Fts5ParseNear(pParse, &X);
  sqlite3Fts5ParseSetDistance(pParse, Y, &Z);
  A = Y;
}

nearphrases(A) ::= phrase(X). { 
  A = sqlite3Fts5ParseNearset(pParse, 0, X); 
}
nearphrases(A) ::= nearphrases(X) phrase(Y). {
  A = sqlite3Fts5ParseNearset(pParse, X, Y);
}

/*
** The optional ", <integer>" at the end of the NEAR() arguments.
*/
neardist_opt(A) ::= . { A.p = 0; A.n = 0; }
neardist_opt(A) ::= COMMA STRING(X). { A = X; }

/*
** A phrase. A set of primitives connected by "+" operators. Examples:
**
**     "the" + "quick brown" + fo *
**     "the quick brown fo" *
**     the+quick+brown+fo*
*/
%type phrase {Fts5ExprPhrase*}
%destructor phrase { sqlite3Fts5ParsePhraseFree($$); }

phrase(A) ::= phrase(X) PLUS STRING(Y) star_opt(Z). { 
  A = sqlite3Fts5ParseTerm(pParse, X, &Y, Z);
}
phrase(A) ::= STRING(Y) star_opt(Z). { 
  A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z);
}

/*
** Optional "*" character.
*/
%type star_opt {int}

star_opt(A) ::= STAR. { A = 1; }
star_opt(A) ::= . { A = 0; }




Added ext/fts5/mkportersteps.tcl.




























































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#
# 2014 Jun 09
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#-------------------------------------------------------------------------
#
# This script generates the implementations of the following C functions,
# which are part of the porter tokenizer implementation:
#
#   static int fts5PorterStep1B(char *aBuf, int *pnBuf);
#   static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
#   static int fts5PorterStep2(char *aBuf, int *pnBuf);
#   static int fts5PorterStep3(char *aBuf, int *pnBuf);
#   static int fts5PorterStep4(char *aBuf, int *pnBuf);
#

set O(Step1B2) {
  { at  {} ate 1 }
  { bl  {} ble 1 }
  { iz  {} ize 1 }
}

set O(Step1B) {
  { "eed"  fts5Porter_MGt0  "ee" 0 }
  { "ed"   fts5Porter_Vowel ""   1 }
  { "ing"  fts5Porter_Vowel ""   1 }
}

set O(Step2) {
  { "ational" fts5Porter_MGt0 "ate" } 
  { "tional"  fts5Porter_MGt0 "tion" } 
  { "enci"    fts5Porter_MGt0 "ence" } 
  { "anci"    fts5Porter_MGt0 "ance" } 
  { "izer"    fts5Porter_MGt0 "ize" } 
  { "logi"    fts5Porter_MGt0 "log" }
  { "bli"     fts5Porter_MGt0 "ble" }
  { "alli"    fts5Porter_MGt0 "al" } 
  { "entli"   fts5Porter_MGt0 "ent" } 
  { "eli"     fts5Porter_MGt0 "e" } 
  { "ousli"   fts5Porter_MGt0 "ous" } 
  { "ization" fts5Porter_MGt0 "ize" } 
  { "ation"   fts5Porter_MGt0 "ate" } 
  { "ator"    fts5Porter_MGt0 "ate" } 
  { "alism"   fts5Porter_MGt0 "al" } 
  { "iveness" fts5Porter_MGt0 "ive" } 
  { "fulness" fts5Porter_MGt0 "ful" } 
  { "ousness" fts5Porter_MGt0 "ous" } 
  { "aliti"   fts5Porter_MGt0 "al" } 
  { "iviti"   fts5Porter_MGt0 "ive" } 
  { "biliti"  fts5Porter_MGt0 "ble" } 
}

set O(Step3) {
  { "icate" fts5Porter_MGt0 "ic" } 
  { "ative" fts5Porter_MGt0 "" } 
  { "alize" fts5Porter_MGt0 "al" } 
  { "iciti" fts5Porter_MGt0 "ic" } 
  { "ical" fts5Porter_MGt0 "ic" } 
  { "ful" fts5Porter_MGt0 "" } 
  { "ness" fts5Porter_MGt0 "" } 
}

set O(Step4) {
  { "al" fts5Porter_MGt1 "" } 
  { "ance" fts5Porter_MGt1 "" } 
  { "ence" fts5Porter_MGt1 "" } 
  { "er" fts5Porter_MGt1 "" } 
  { "ic" fts5Porter_MGt1 "" } 
  { "able" fts5Porter_MGt1 "" } 
  { "ible" fts5Porter_MGt1 "" } 
  { "ant" fts5Porter_MGt1 "" } 
  { "ement" fts5Porter_MGt1 "" } 
  { "ment" fts5Porter_MGt1 "" } 
  { "ent" fts5Porter_MGt1 "" } 
  { "ion" fts5Porter_MGt1_and_S_or_T "" } 
  { "ou"  fts5Porter_MGt1 "" } 
  { "ism" fts5Porter_MGt1 "" } 
  { "ate" fts5Porter_MGt1 "" } 
  { "iti" fts5Porter_MGt1 "" } 
  { "ous" fts5Porter_MGt1 "" } 
  { "ive" fts5Porter_MGt1 "" } 
  { "ize" fts5Porter_MGt1 "" } 
}

proc sort_cb {lhs rhs} {
  set L [string range [lindex $lhs 0] end-1 end-1]
  set R [string range [lindex $rhs 0] end-1 end-1]
  string compare $L $R
}

proc create_step_function {name data} {

  set T(function) {
static int fts5Porter${name}(char *aBuf, int *pnBuf){
  int ret = 0;
  int nBuf = *pnBuf;
  switch( aBuf[nBuf-2] ){
    ${switchbody}
  }
  return ret;
}
  }

  set T(case) {
    case '${k}': 
      ${ifstmts}
      break;
  }

  set T(if_0_0_0) {
      if( ${match} ){
        *pnBuf = nBuf - $n;
      }
  }
  set T(if_1_0_0) {
      if( ${match} ){
        if( ${cond} ){
          *pnBuf = nBuf - $n;
        }
      }
  }
  set T(if_0_1_0) {
      if( ${match} ){
        ${memcpy}
        *pnBuf = nBuf - $n + $nRep;
      }
  }
  set T(if_1_1_0) {
      if( ${match} ){
        if( ${cond} ){
          ${memcpy}
          *pnBuf = nBuf - $n + $nRep;
        }
      }
  }
  set T(if_1_0_1) {
      if( ${match} ){
        if( ${cond} ){
          *pnBuf = nBuf - $n;
          ret = 1;
        }
      }
  }
  set T(if_0_1_1) {
      if( ${match} ){
        ${memcpy}
        *pnBuf = nBuf - $n + $nRep;
        ret = 1;
      }
  }
  set T(if_1_1_1) {
      if( ${match} ){
        if( ${cond} ){
          ${memcpy}
          *pnBuf = nBuf - $n + $nRep;
          ret = 1;
        }
      }
  }

  set switchbody ""

  foreach I $data {
    set k [string range [lindex $I 0] end-1 end-1]
    lappend aCase($k) $I
  }
  foreach k [lsort [array names aCase]] {
    set ifstmts ""
    foreach I $aCase($k) {
      set zSuffix [lindex $I 0]         ;# Suffix text for this rule
      set zRep [lindex $I 2]            ;# Replacement text for rule 
      set xCond [lindex $I 1]           ;# Condition callback (or "")

      set n [string length $zSuffix]
      set nRep [string length $zRep]

      set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
      set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
      set cond "${xCond}(aBuf, nBuf-$n)"

      set bMemcpy [expr {$nRep>0}]
      set bCond [expr {$xCond!=""}]
      set bRet [expr {[llength $I]>3 && [lindex $I 3]}]

      set t $T(if_${bCond}_${bMemcpy}_${bRet})
      lappend ifstmts [string trim [subst -nocommands $t]]
    }

    set ifstmts [join $ifstmts "else "]

    append switchbody [subst -nocommands $T(case)]
  }


  puts [subst -nocommands $T(function)]
}


puts [string trim {
/**************************************************************************
***************************************************************************
** GENERATED CODE STARTS HERE (mkportersteps.tcl)
*/
}]
foreach step [array names O] {
  create_step_function $step $O($step)
}
puts [string trim {
/* 
** GENERATED CODE ENDS HERE (mkportersteps.tcl)
***************************************************************************
**************************************************************************/
}]



Added ext/fts5/test/fts5_common.tcl.








































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# 2014 Dec 19
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#

if {![info exists testdir]} {
  set testdir [file join [file dirname [info script]] .. .. .. test]
}
source $testdir/tester.tcl

catch { 
  sqlite3_fts5_may_be_corrupt 0 
  append G(perm:dbconfig) "; load_static_extension \$::dbhandle fts5"
  reset_db
}

proc fts5_test_poslist {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [string map {{ } .} [$cmd xInst $i]]
  }
  set res
}

proc fts5_test_columnsize {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    lappend res [$cmd xColumnSize $i]
  }
  set res
}

proc fts5_test_columntext {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    lappend res [$cmd xColumnText $i]
  }
  set res
}

proc fts5_test_columntotalsize {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    lappend res [$cmd xColumnTotalSize $i]
  }
  set res
}

proc test_append_token {varname token iStart iEnd} {
  upvar $varname var
  lappend var $token
  return "SQLITE_OK"
}
proc fts5_test_tokenize {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
    set tokens [list]
    $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
    lappend res $tokens
  }
  set res
}

proc fts5_test_rowcount {cmd} {
  $cmd xRowCount
}

proc test_queryphrase_cb {cnt cmd} {
  upvar $cnt L 
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    foreach {ip ic io} [$cmd xInst $i] break
    set A($ic) 1
  }
  foreach ic [array names A] {
    lset L $ic [expr {[lindex $L $ic] + 1}]
  }
}
proc fts5_test_queryphrase {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
    set cnt [list]
    for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
    $cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
    lappend res $cnt
  }
  set res
}

proc fts5_test_all {cmd} {
  set res [list]
  lappend res columnsize      [fts5_test_columnsize $cmd]
  lappend res columntext      [fts5_test_columntext $cmd]
  lappend res columntotalsize [fts5_test_columntotalsize $cmd]
  lappend res poslist         [fts5_test_poslist $cmd]
  lappend res tokenize        [fts5_test_tokenize $cmd]
  lappend res rowcount        [fts5_test_rowcount $cmd]
  set res
}

proc fts5_aux_test_functions {db} {
  foreach f {
    fts5_test_columnsize
    fts5_test_columntext
    fts5_test_columntotalsize
    fts5_test_poslist
    fts5_test_tokenize
    fts5_test_rowcount
    fts5_test_all

    fts5_test_queryphrase
  } {
    sqlite3_fts5_create_function $db $f $f
  }
}

proc fts5_level_segs {tbl} {
  set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
  set ret [list]
  foreach L [lrange [db one $sql] 1 end] {
    lappend ret [expr [llength $L] - 2]
  }
  set ret
} 

proc fts5_level_segids {tbl} {
  set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
  set ret [list]
  foreach L [lrange [db one $sql] 1 end] {
    set lvl [list]
    foreach S [lrange $L 2 end] {
      regexp {id=([1234567890]*)} $S -> segid
      lappend lvl $segid
    }
    lappend ret $lvl
  }
  set ret
}

proc fts5_rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
  }
  set doc
}

#-------------------------------------------------------------------------
# Usage:
#
#   nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
#
# This command is used to test if a document (set of column values) matches
# the logical equivalent of a single FTS5 NEAR() clump and, if so, return
# the equivalent of an FTS5 position list.
#
# Parameter $aCol is passed a list of the column values for the document
# to test. Parameters $phrase1 and so on are the phrases.
#
# The result is a list of phrase hits. Each phrase hit is formatted as
# three integers separated by "." characters, in the following format:
#
#   <phrase number> . <column number> . <token offset>
#
# Options:
#
#   -near N        (NEAR distance. Default 10)
#   -col  C        (List of column indexes to match against)
#   -pc   VARNAME  (variable in caller frame to use for phrase numbering)
#
proc nearset {aCol args} {
  set O(-near) 10
  set O(-col)  {}
  set O(-pc)   ""

  set nOpt [lsearch -exact $args --]
  if {$nOpt<0} { error "no -- option" }

  foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
    if {[info exists O($k)]==0} { error "unrecognized option $k" }
    set O($k) $v
  }

  if {$O(-pc) == ""} {
    set counter 0
  } else {
    upvar $O(-pc) counter
  }

  # Set $phraselist to be a list of phrases. $nPhrase its length.
  set phraselist [lrange $args [expr $nOpt+1] end]
  set nPhrase [llength $phraselist]

  for {set j 0} {$j < [llength $aCol]} {incr j} {
    for {set i 0} {$i < $nPhrase} {incr i} { 
      set A($j,$i) [list]
    }
  }

  set iCol -1
  foreach col $aCol {
    incr iCol
    if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
    set nToken [llength $col]

    set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
    for { } {$iFL < $nToken} {incr iFL} {
      for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
        set B($iPhrase) [list]
      }
      
      for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
        set p [lindex $phraselist $iPhrase]
        set nPm1 [expr {[llength $p] - 1}]
        set iFirst [expr $iFL - $O(-near) - [llength $p]]

        for {set i $iFirst} {$i <= $iFL} {incr i} {
          if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i }
        }
        if {[llength $B($iPhrase)] == 0} break
      }

      if {$iPhrase==$nPhrase} {
        for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
          set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
          set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
        }
      }
    }
  }

  set res [list]
  #puts [array names A]

  for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
    for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
      foreach a $A($iCol,$iPhrase) {
        lappend res "$counter.$iCol.$a"
      }
    }
    incr counter
  }

  #puts $res
  sort_poslist $res
}

#-------------------------------------------------------------------------
# Usage:
#
#   sort_poslist LIST
#
# Sort a position list of the type returned by command [nearset]
#
proc sort_poslist {L} {
  lsort -command instcompare $L
}
proc instcompare {lhs rhs} {
  foreach {p1 c1 o1} [split $lhs .] {}
  foreach {p2 c2 o2} [split $rhs .] {}

  set res [expr $c1 - $c2]
  if {$res==0} { set res [expr $o1 - $o2] }
  if {$res==0} { set res [expr $p1 - $p2] }

  return $res
}

#-------------------------------------------------------------------------
# Logical operators used by the commands returned by fts5_tcl_expr().
#
proc AND {args} {
  foreach a $args {
    if {[llength $a]==0} { return [list] }
  }
  sort_poslist [concat {*}$args]
}
proc OR {args} {
  sort_poslist [concat {*}$args]
}
proc NOT {a b} {
  if {[llength $b]>0} { return [list] }
  return $a
}

Added ext/fts5/test/fts5aa.test.










































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aa

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, c);
  SELECT name, sql FROM sqlite_master;
} {
  t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)}
  t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)}
  t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)}
  t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)}
  t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID}
}

do_execsql_test 1.1 {
  DROP TABLE t1;
  SELECT name, sql FROM sqlite_master;
} {
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
foreach {i x y} {
   1  {g f d b f} {h h e i a}
   2  {f i g j e} {i j c f f}
   3  {e e i f a} {e h f d f}
   4  {h j f j i} {h a c f j}
   5  {d b j c g} {f e i b e}
   6  {a j a e e} {j d f d e}
   7  {g i j c h} {j d h c a}
   8  {j j i d d} {e e d f b}
   9  {c j j d c} {h j i f g}
   10 {b f h i a} {c f b b j}
} {
  do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
   1  {g f d b f} {h h e i a}
   2  {f i g j e} {i j c f f}
   3  {e e i f a} {e h f d f}
   4  {h j f j i} {h a c f j}
   5  {d b j c g} {f e i b e}
   6  {a j a e e} {j d f d e}
   7  {g i j c h} {j d h c a}
   8  {j j i d d} {e e d f b}
   9  {c j j d c} {h j i f g}
   10 {b f h i a} {c f b b j}
} {
  do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}
foreach {i x y} {
   1  {dd abc abc abc abcde} {aaa dd ddd ddd aab}
   2  {dd aab d aaa b} {abcde c aaa aaa aaa}
   3  {abcde dd b b dd} {abc abc d abc ddddd}
   4  {aaa abcde dddd dddd abcde} {abc b b abcde abc}
   5  {aab dddd d dddd c} {ddd abcde dddd abcde c}
   6  {ddd dd b aab abcde} {d ddddd dddd c abc}
   7  {d ddddd ddd c abcde} {c aab d abcde ddd}
   8  {abcde aaa aab c c} {ddd c dddd b aaa}
   9  {abcde aab ddddd c aab} {dddd dddd b c dd}
   10 {ddd abcde dddd dd c} {dddd c c d abcde}
} {
  do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#
breakpoint
reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_execsql_test 6.1 {
  INSERT  INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a');
  REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d');
}

do_execsql_test 6.2 {
  INSERT INTO t1(t1) VALUES('integrity-check') 
}

do_execsql_test 6.3 {
  REPLACE INTO t1(rowid, x, y) VALUES('22', 'l l l', 'l l l');
}

do_execsql_test 6.4 {
  INSERT INTO t1(t1) VALUES('integrity-check') 
}

#-------------------------------------------------------------------------
#
reset_db
expr srand(0)
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y,z);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

proc doc {} {
  set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
  set ret [list]
  for {set j 0} {$j < 20} {incr j} {
    lappend ret [lindex $v [expr int(rand()*[llength $v])]]
  }
  return $ret
}

proc dump_structure {} {
  db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
    foreach lvl [lrange $t 1 end] {
      set seg [string repeat . [expr [llength $lvl]-2]]
      puts "[lrange $lvl 0 1] $seg"
    }
  }
}

for {set i 1} {$i <= 10} {incr i} {
  do_test 7.$i {
    for {set j 0} {$j < 10} {incr j} {
      set x [doc]
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_execsql_test 8.1 {
  INSERT INTO t1 VALUES('the quick brown fox');
  INSERT INTO t1(t1) VALUES('integrity-check');
}


#-------------------------------------------------------------------------
#
reset_db

expr srand(0)

do_execsql_test 9.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

proc doc {} {
  set v [list aaa aab abc abcde b c d dd ddd dddd ddddd]
  set ret [list]
  for {set j 0} {$j < 20} {incr j} {
    lappend ret [lindex $v [expr int(rand()*[llength $v])]]
  }
  return $ret
}

proc dump_structure {} {
  db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} {
    foreach lvl [lrange $t 1 end] {
      set seg [string repeat . [expr [llength $lvl]-2]]
      puts "[lrange $lvl 0 1] $seg"
    }
  }
}

for {set i 1} {$i <= 10} {incr i} {
  do_test 9.$i {
    for {set j 0} {$j < 100} {incr j} {
      set x [doc]
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {[set_test_counter errors]} break
}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 10.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
}
set d10 {
   1  {g f d b f} {h h e i a}
   2  {f i g j e} {i j c f f}
   3  {e e i f a} {e h f d f}
   4  {h j f j i} {h a c f j}
   5  {d b j c g} {f e i b e}
   6  {a j a e e} {j d f d e}
   7  {g i j c h} {j d h c a}
   8  {j j i d d} {e e d f b}
   9  {c j j d c} {h j i f g}
  10  {b f h i a} {c f b b j}
}
foreach {rowid x y} $d10 {
  do_execsql_test 10.1.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 10.1.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach rowid {5 9 8 1 2 4 10 7 3 5 6} {
  do_execsql_test 10.2.$rowid.1 { DELETE FROM t1 WHERE rowid = $rowid }
  do_execsql_test 10.2.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}
foreach {rowid x y} $d10 {
  do_execsql_test 10.3.$rowid.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 10.3.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}

do_execsql_test 10.4.1 { DELETE FROM t1 }
do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') }

#-------------------------------------------------------------------------
#
do_catchsql_test 11.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank);
} {1 {reserved fts5 column name: rank}}
do_catchsql_test 11.2 {
  CREATE VIRTUAL TABLE rank USING fts5(a, b, c);
} {1 {reserved fts5 table name: rank}}
do_catchsql_test 11.3 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid);
} {1 {reserved fts5 column name: rowid}}

#-------------------------------------------------------------------------
#
do_execsql_test 12.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(x,y);
} {}

do_catchsql_test 12.2 {
  SELECT t2 FROM t2 WHERE t2 MATCH '*stuff'
} {1 {unknown special query: stuff}}

do_test 12.3 {
  set res [db eval { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }]
  string is integer $res
} {1}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 13.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o');
} {}

do_execsql_test 13.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1 2}

do_execsql_test 13.4 {
  DELETE FROM t1 WHERE rowid=2;
} {}

do_execsql_test 13.5 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'o';
} {1}

do_execsql_test 13.6 {
  SELECT rowid FROM t1 WHERE t1 MATCH '.';
} {}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 14.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH d(x,y) AS (
    SELECT NULL, 'xyz xyz xyz xyz xyz xyz'
    UNION ALL 
    SELECT NULL, 'xyz xyz xyz xyz xyz xyz' FROM d
  )
  INSERT INTO t1 SELECT * FROM d LIMIT 200;
}

do_test 14.2 {
  set nRow 0
  db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
    db eval {
      BEGIN;
        CREATE TABLE t2(a, b);
      ROLLBACK;
    }
    incr nRow
  }
  set nRow
} {200}

do_test 14.3 {
  set nRow 0
  db eval { BEGIN; }
  db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } {
    db eval {
      SAVEPOINT aaa;
        CREATE TABLE t2(a, b);
      ROLLBACK TO aaa;
      RELEASE aaa;
    }
    incr nRow
  }
  set nRow
} {200}

do_execsql_test 15.0 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}
do_execsql_test 15.1 {
  UPDATE t1_content SET c1 = 'xyz xyz xyz xyz xyz abc' WHERE rowid = 1;
}
do_catchsql_test 15.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {1 {database disk image is malformed}}

#-------------------------------------------------------------------------
#
do_execsql_test 16.1 {
  CREATE VIRTUAL TABLE n1 USING fts5(a);
  INSERT INTO n1 VALUES('a b c d');
}

proc funk {} {
  set fd [db incrblob main n1_data block 10]
  fconfigure $fd -encoding binary -translation binary
  puts -nonewline $fd "\x44\x45"
  close $fd
  db eval { UPDATE n1_config SET v=50 WHERE k='version' }
}
db func funk funk

do_catchsql_test 16.2 {
  SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d'
} {1 {SQL logic error or missing database}}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
  CREATE VIRTUAL TABLE b2 USING fts5(x);
  INSERT INTO b2 VALUES('a');
  INSERT INTO b2 VALUES('b');
  INSERT INTO b2 VALUES('c');
}

do_test 17.2 {
  set res [list]
  db eval { SELECT * FROM b2 ORDER BY rowid ASC } {
    lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }]
  }
  set res
} {{a b c} {a b c} {a b c}}

reset_db
do_execsql_test 18.1 {
  CREATE VIRTUAL TABLE c2 USING fts5(x, y);
  INSERT INTO c2 VALUES('x x x', 'x x x');
  SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 17.1 {
  CREATE VIRTUAL TABLE uio USING fts5(ttt);
  INSERT INTO uio VALUES(NULL);
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  INSERT INTO uio SELECT NULL FROM uio;
  SELECT count(*) FROM uio;
} {256}

do_execsql_test 17.2 {
  SELECT count(*) FROM uio WHERE rowid BETWEEN 8 AND 17
} {10}
do_execsql_test 17.3 {
  SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17
} {8 9 10 11 12 13 14 15 16 17}
do_execsql_test 17.4 {
  SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 ORDER BY rowid DESC
} {17 16 15 14 13 12 11 10 9 8}
do_execsql_test 17.5 {
  SELECT count(*) FROM uio
} {256}

do_execsql_test 17.6 {
  INSERT INTO uio(rowid) VALUES(9223372036854775807);
  INSERT INTO uio(rowid) VALUES(-9223372036854775808);
  SELECT count(*) FROM uio;
} {258}
do_execsql_test 17.7 {
  SELECT min(rowid), max(rowid) FROM uio;
} {-9223372036854775808 9223372036854775807}

do_execsql_test 17.8 {
  INSERT INTO uio DEFAULT VALUES;
  SELECT min(rowid), max(rowid), count(*) FROM uio;
} {-9223372036854775808 9223372036854775807 259}

do_execsql_test 17.9 {
  SELECT min(rowid), max(rowid), count(*) FROM uio WHERE rowid < 10;
} {-9223372036854775808 9 10}

#--------------------------------------------------------------------
#
do_execsql_test 18.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b);
  CREATE VIRTUAL TABLE t2 USING fts5(c, d);
  INSERT INTO t1 VALUES('abc*', NULL);
  INSERT INTO t2 VALUES(1, 'abcdefg');
}
do_execsql_test 18.2 {
  SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}
do_execsql_test 18.3 {
  SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c
} {1 1}

finish_test


Added ext/fts5/test/fts5ab.test.


































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ab

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b);
  INSERT INTO t1 VALUES('hello', 'world');
  INSERT INTO t1 VALUES('one two', 'three four');
  INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five');
}

do_execsql_test 1.1 {
  SELECT * FROM t1 ORDER BY rowid DESC;
} { forty five {one two} {three four} hello world }

do_execsql_test 1.2 {
  SELECT rowid FROM t1 ORDER BY rowid DESC;
} {45 2 1}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 ORDER BY rowid ASC;
} {1 2 45}

do_execsql_test 1.4 {
  SELECT * FROM t1 WHERE rowid=2;
} {{one two} {three four}}

do_execsql_test 1.5 {
  SELECT * FROM t1 WHERE rowid=2.01;
} {}

do_execsql_test 1.6 {
  SELECT * FROM t1 WHERE rowid=1.99;
} {}

#-------------------------------------------------------------------------

reset_db
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  INSERT INTO t1 VALUES('one');
  INSERT INTO t1 VALUES('two');
  INSERT INTO t1 VALUES('three');
}

do_catchsql_test 2.2 {
  SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND'
} {1 {fts5: syntax error near "AND"}}

do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two}
do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three}
do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one}

do_execsql_test 2.6 {
  INSERT INTO t1 VALUES('a b c d e f g');
  INSERT INTO t1 VALUES('b d e a a a i');
  INSERT INTO t1 VALUES('x y z b c c c');
}

foreach {tn expr res} {
  1  a    {5 4}
  2  b    {6 5 4}
  3  c    {6 4}
  4  d    {5 4}
  5  e    {5 4}
  6  f    {4}
  7  g    {4}
  8  x    {6}
  9  y    {6}
  10 z    {6}
} {
  do_execsql_test 2.7.$tn.1 { 
    SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
  } $res
  do_execsql_test 2.7.$tn.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC
  } [lsort -integer $res]
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a,b);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

foreach {tn a b} {
   1 {abashed abandons abase abash abaft} {abases abased}
   2 {abasing abases abaft abated abandons} {abases abandoned}
   3 {abatement abash abash abated abase} {abasements abashing}
   4 {abaft abasements abase abasement abasing} {abasement abases}
   5 {abaft abashing abatement abash abasements} {abandons abandoning}
   6 {aback abate abasements abashes abandoned} {abasement abased}
   7 {abandons abated abased aback abandoning} {abases abandoned}
   8 {abashing abases abasement abaft abashing} {abashed abate}
   9 {abash abase abate abashing abashed} {abandon abandoned}
   10 {abate abandoning abandons abasement aback} {abandon abandoning}
} {
  do_execsql_test 3.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) } 
  do_execsql_test 3.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
}

foreach {tn expr res} {
  1 {abash} {9 5 3 1}
  2 {abase} {9 4 3 1}
  3 {abase + abash} {1}
  4 {abash + abase} {9}
  5 {abaft + abashing} {8 5}
  6 {abandon + abandoning} {10}
  7 {"abashing abases abasement abaft abashing"} {8}
} {
  do_execsql_test 3.2.$tn {
    SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC
  } $res
}

do_execsql_test 3.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'NEAR(aback abate, 2)'
} {6}

foreach {tn expr res} {
  1 {abash} {1 3 5 9}
  2 {abase} {1 3 4 9}
  3 {abase + abash} {1}
  4 {abash + abase} {9}
  5 {abaft + abashing} {5 8}
  6 {abandon + abandoning} {10}
  7 {"abashing abases abasement abaft abashing"} {8}
} {
  do_execsql_test 3.4.$tn {
    SELECT rowid FROM t1 WHERE t1 MATCH $expr
  } $res
}

#-------------------------------------------------------------------------
# Documents with more than 2M tokens.
#

do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE s1 USING fts5(x);
}
foreach {tn doc} [list \
  1 [string repeat {a x } 1500000]       \
  2 "[string repeat {a a } 1500000] x"   \
] {
  do_execsql_test 4.$tn { INSERT INTO s1 VALUES($doc) }
}

do_execsql_test 4.3 {
  SELECT rowid FROM s1 WHERE s1 MATCH 'x'
} {1 2}

do_execsql_test 4.4 {
  SELECT rowid FROM s1 WHERE s1 MATCH '"a x"'
} {1 2}

#-------------------------------------------------------------------------
# Check that a special case of segment promotion works. The case is where
# a new segment is written to level L, but the oldest segment within level
# (L-2) is larger than it.
#
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE s2 USING fts5(x);
  INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
  INSERT INTO s2(s2, rank) VALUES('automerge', 0);
}

proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
  }
  set doc
}
db func rnddoc rnddoc

do_test 5.1 {
  for {set i 1} {$i <= 65} {incr i} {
    execsql { INSERT INTO s2 VALUES(rnddoc(10)) }
  }
  for {set i 1} {$i <= 63} {incr i} {
    execsql { DELETE FROM s2 WHERE rowid = $i }
  }
  fts5_level_segs s2
} {0 8}

do_test 5.2 {
  execsql {
    INSERT INTO s2(s2, rank) VALUES('automerge', 8);
  }
  for {set i 0} {$i < 7} {incr i} {
    execsql { INSERT INTO s2 VALUES(rnddoc(50)) }
  }
  fts5_level_segs s2
} {8 0 0}

# Test also the other type of segment promotion - when a new segment is written
# that is larger than segments immediately following it.
do_test 5.3 {
  execsql {
    DROP TABLE s2;
    CREATE VIRTUAL TABLE s2 USING fts5(x);
    INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
    INSERT INTO s2(s2, rank) VALUES('automerge', 0);
  }

  for {set i 1} {$i <= 16} {incr i} {
    execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
  }
  fts5_level_segs s2
} {0 1}

do_test 5.4 {
  execsql { INSERT INTO s2 VALUES(rnddoc(160)) }
  fts5_level_segs s2
} {2 0}

#-------------------------------------------------------------------------
#
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE s3 USING fts5(x);
  BEGIN;
    INSERT INTO s3 VALUES('a b c');
    INSERT INTO s3 VALUES('A B C');
}

do_execsql_test 6.1.1 {
  SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}

do_execsql_test 6.1.2 {
  SELECT rowid FROM s3 WHERE s3 MATCH 'a' ORDER BY rowid DESC
} {2 1}

do_execsql_test 6.2 {
  COMMIT;
}

do_execsql_test 6.3 {
  SELECT rowid FROM s3 WHERE s3 MATCH 'a'
} {1 2}

do_test 6.4 {
  db close
  sqlite3 db test.db
  execsql {
    BEGIN;
      INSERT INTO s3(s3) VALUES('optimize');
    ROLLBACK;
  }
} {}

#-------------------------------------------------------------------------
#
set doc [string repeat "a b c " 500]
breakpoint
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(x);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
  INSERT INTO x1 VALUES($doc);
}



finish_test

Added ext/fts5/test/fts5ac.test.












































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ac

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

set data {
    0   {p o q e z k z p n f y u z y n y}   {l o o l v v k}
    1   {p k h h p y l l h i p v n}         {p p l u r i f a j g e r r x w}
    2   {l s z j k i m p s}                 {l w e j t j e e i t w r o p o}
    3   {x g y m y m h p}                   {k j j b r e y y a k y}
    4   {q m a i y i z}                     {o w a g k x g j m w e u k}
    5   {k o a w y b s z}                   {s g l m m l m g p}
    6   {d a q i z h b l c p k j g k}       {p x u j x t v c z}
    7   {f d a g o c t i}                   {w f c x l d r k i j}
    8   {y g w u b q p o m j y b p a e k}   {r i d k y w o z q m a t p}
    9   {r k o m c c j s x m x m x m q r}   {y r c a q d z k n x n}
    10  {k j q m g q a j d}                 {d d e z g w h c d o o g x d}
    11  {j z u m o y q j f w e e w t r j w} {g m o r x n t n w i f g l z f}
    12  {s y w a w d o h x m k}             {c w k z b p o r a}
    13  {u t h x e g s k n g i}             {f j w g c s r}
    14  {b f i c s u z t k}                 {c k q s j u i z o}
    15  {n a f n u s w h y n s i q e w}     {x g e g a s s h n}
    16  {k s q e j n p}                     {t r j f t o e k k l m i}
    17  {g d t u w r o p m n m n p h b o u} {h s w o s l j e}
    18  {f l q y q q g e e x j r}           {n b r r g e i r t x q k}
    19  {f i r g o a w e p i l o a w}       {e k r z t d g h g i b d i e m}
    20  {l d u u f p y}                     {g o m m u x m g l j t t x x u}
    21  {m c d k x i c z l}                 {m i a i e u h}
    22  {w b f o c g x y j}                 {z d w x d f h i p}
    23  {w u i u x t c h k i b}             {b y k h b v r t g j}
    24  {h f d j s w s b a p k}             {a q y u z e y m m j q r}
    25  {d i x y x x k i y f s d j h z p n} {l l q m e t c w g y h t s v g}
    26  {g s q w t d k x g f m j p k y}     {r m b x e l t d}
    27  {j l s q u g y v e c l o}           {m f l m m m h g x x l n c}
    28  {c t j g v r s b z j}               {l c f y d t q n}
    29  {e x z y w i h l}                   {b n b x e y q e n u m}
    30  {g y y h j b w r}                   {q b q f u s k c k g r}
    31  {g u l x l b r c m z b u c}         {k g t b x k x n t e z d h o}
    32  {w g v l z f b z h p s c v h}       {g e w v m h k r g w a r f q}
    33  {c g n f u d o y o b}               {e y o h x x y y i z s b h a j}
    34  {v y h c q u u s q y x x k s q}     {d n r m y k n t i r n w e}
    35  {o u c x l e b t a}                 {y b a x y f z x r}
    36  {x p h l j a a u u j h}             {x o f s z m b c q p}
    37  {k q t i c a q n m v v}             {v r z e f m y o}
    38  {r w t t t t r v v o e p g h}       {l w x a g a u h y}
    39  {o p v g v b a g o}                 {j t q c r b b g y z}
    40  {f s o r o d t h q f x l}           {r d b m k i f s t d l m y x j w}
    41  {t m o t m f m f}                   {i p i q j v n v m b q}
    42  {t x w a r l w d t b c o d o}       {a h f h w z d n s}
    43  {t u q c d g p q x j o l c x c}     {m n t o z z j a y}
    44  {v d i i k b f s z r v r z y}       {g n q y s x x m b x c l w}
    45  {p v v a c s z y e o l}             {m v t u d k m k q b d c v z r}
    46  {f y k l d r q w r s t r e}         {h m v r r l r r t f q e x y}
    47  {w l n l t y x}                     {n h s l a f c h u f l x x m v n o}
    48  {t n v i k e b p z p d j j l i o}   {i v z p g u e j s i k n h w d c}
    49  {z v x p n l t a j c}               {e j l e n c e t a d}
    50  {w u b x u i v h a i y m m r p m s} {s r h d o g z y f f x e}
    51  {d c c x b c a x g}                 {p r a j v u y}
    52  {f w g r c o d l t u e z h i}       {j l l s s b j m}
    53  {p m t f k i x}                     {u v y a z g w v v m x h i}
    54  {l c z g l o j i c d e b}           {b f v y w u i b e i y}
    55  {r h c x f x a d s}                 {z x y k f l r b q c v}
    56  {v x x c y h z x b g m o q n c}     {h n b i t g h a q b c o r u}
    57  {d g l o h t b s b r}               {n u e p t i m u}
    58  {t d y e t d c w u o s w x f c h}   {i o s v y b r d r}
    59  {l b a p q n d r}                   {k d c c d n y q h g a o p e x}
    60  {f r z v m p k r}                   {x x r i s b a g f c}
    61  {s a z i e r f i w c n y v z t k s} {y y i r y n l s b w i e k n}
    62  {n x p r e x q r m v i b y}         {f o o z n b s r q j}
    63  {y j s u j x o n r q t f}           {f v k n v x u s o a d e f e}
    64  {u s i l y c x q}                   {r k c h p c h b o s s u s p b}
    65  {m p i o s h o}                     {s w h u n d m n q t y k b w c}
    66  {l d f g m x x x o}                 {s w d d f b y j j h h t i y p j o}
    67  {c b m h f n v w n h}               {i r w i e x r w l z p x u g u l s}
    68  {y a h u h i m a y q}               {d d r x h e v q n z y c j}
    69  {c x f d x o n p o b r t b l p l}   {m i t k b x v f p t m l l y r o}
    70  {u t l w w m s}                     {m f m o l t k o p e}
    71  {f g q e l n d m z x q}             {z s i i i m f w w f n g p e q}
    72  {n l h a v u o d f j d e x}         {v v s l f g d g r a j x i f z x}
    73  {x v m v f i g q e w}               {r y s j i k m j j e d g r n o i f}
    74  {g d y n o h p s y q z j d w n h w} {x o d l t j i b r d o r y}
    75  {p g b i u r b e q d v o a g w m k} {q y z s f q o h}
    76  {u z a q u f i f f b}               {b s p b a a d x r r i q f}
    77  {w h h z t h p o a h h e e}         {h w r p h k z v y f r x}
    78  {c a r k i a p u x}                 {f w l p t e m l}
    79  {q q u k o t r k z}                 {f b m c w p s s o z}
    80  {t i g v y q s r x m r x z e f}     {x o j w a u e y s j c b u p p r o}
    81  {n j n h r l a r e o z w e}         {v o r r j a v b}
    82  {i f i d k w d n h}                 {o i d z i z l m w s b q v u}
    83  {m d g q q b k b w f q q p p}       {j m q f b y c i z k y q p l e a}
    84  {m x o n y f g}                     {y c n x n q j i y c l h b r q z}
    85  {v o z l n p c}                     {g n j n t b b x n c l d a g j v}
    86  {z n a y f b t k k t d b z a v}     {r p c n r u k u}
    87  {b q t x z e c w}                   {q a o a l o a h i m j r}
    88  {j f h o x x a z g b a f a m i b}   {j z c z y x e x w t}
    89  {t c t p r s u c q n}               {z x l i k n f q l n t}
    90  {w t d q j g m r f k n}             {l e w f w w a l y q k i q t p c t}
    91  {c b o k l i c b s j n m b l}       {y f p q o w g}
    92  {f y d j o q t c c q m f j s t}     {f h e d y m o k}
    93  {k x j r m a d o i z j}             {r t t t f e b r x i v j v g o}
    94  {s f e a e t i h h d q p z t q}     {b k m k w h c}
    95  {h b n j t k i h o q u}             {w n g i t o k c a m y p f l x c p}
    96  {f c x p y r b m o l m o a}         {p c a q s u n n x d c f a o}
    97  {u h h k m n k}                     {u b v n u a o c}
    98  {s p e t c z d f n w f}             {l s f j b l c e s h}
    99  {r c v w i v h a t a c v c r e}     {h h u m g o f b a e o}
}

# Argument $expr is an FTS5 match expression designed to be executed against
# an FTS5 table with the following schema:
# 
#   CREATE VIRTUAL TABLE xy USING fts5(x, y);
#
# Assuming the table contains the same records as stored int the global 
# $::data array (see above), this function returns a list containing one
# element for each match in the dataset. The elements are themselves lists
# formatted as follows:
#
#   <rowid> {<phrase 0 matches> <phrase 1 matches>...}
#
# where each <phrase X matches> element is a list of phrase matches in the
# same form as returned by auxiliary scalar function fts5_test().
#
proc matchdata {bPos expr {bAsc 1}} {

  set tclexpr [db one {
    SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y')
  }]
  set res [list]

  #puts $tclexpr
  foreach {id x y} $::data {
    set cols [list $x $y]
    set ::pc 0
    #set hits [lsort -command instcompare [eval $tclexpr]]
    set hits [eval $tclexpr]
    if {[llength $hits]>0} {
      if {$bPos} {
        lappend res [list $id $hits]
      } else {
        lappend res $id
      }
    }
  }

  if {$bAsc} {
    set res [lsort -integer -increasing -index 0 $res]
  } else {
    set res [lsort -integer -decreasing -index 0 $res]
  }

  return [concat {*}$res]
}

#
# End of test code
#-------------------------------------------------------------------------

proc fts5_test_poslist {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [string map {{ } .} [$cmd xInst $i]]
  }
  set res
}


foreach {tn2 sql} {
  1  {}
  2  {BEGIN}
} {
  reset_db
  sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist

  do_execsql_test 1.0 {
    CREATE VIRTUAL TABLE xx USING fts5(x,y);
    INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
  }

  execsql $sql

  do_test $tn2.1.1 {
    foreach {id x y} $data {
      execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
    }
    execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
  } {}

  #-------------------------------------------------------------------------
  # Test phrase queries.
  #
  foreach {tn phrase} {
    1 "o"
    2 "b q"
    3 "e a e"
    4 "m d g q q b k b w f q q p p"
    5 "l o o l v v k"
    6 "a"
    7 "b"
    8 "c"
    9 "no"
    10 "L O O L V V K"
  } {
    set expr "\"$phrase\""
    set res [matchdata 1 $expr]

    do_execsql_test $tn2.1.2.$tn.[llength $res] { 
      SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
    } $res
  }

  #-------------------------------------------------------------------------
  # Test some AND and OR queries.
  #
  foreach {tn expr} {
    1.1 "a   AND b"
    1.2 "a+b AND c"
    1.3 "d+c AND u"
    1.4 "d+c AND u+d"

    2.1 "a   OR b"
    2.2 "a+b OR c"
    2.3 "d+c OR u"
    2.4 "d+c OR u+d"

    3.1 { a AND b AND c }
  } {
    set res [matchdata 1 $expr]
    do_execsql_test $tn2.2.$tn.[llength $res] { 
      SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
    } $res
  }

  #-------------------------------------------------------------------------
  # Queries on a specific column.
  #
  foreach {tn expr} {
    1.1 "x:a"
    1.2 "y:a"
    1.3 "x:b"
    1.4 "y:b"
    2.1 "{x}:a"
    2.2 "{y}:a"
    2.3 "{x}:b"
    2.4 "{y}:b"

    3.1 "{x y}:a"
    3.2 "{y x}:a"
    3.3 "{x x}:b"
    3.4 "{y y}:b"

    4.1 {{"x" "y"}:a}
    4.2 {{"y" x}:a}
    4.3 {{x "x"}:b}
    4.4 {{"y" y}:b}
  } {
    set res [matchdata 1 $expr]
    do_execsql_test $tn2.3.$tn.[llength $res] { 
      SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
    } $res
  }

  #-------------------------------------------------------------------------
  # Some NEAR queries.
  #
  foreach {tn expr} {
    1 "NEAR(a b)"
    2 "NEAR(r c)"
    2 { NEAR(r c, 5) }
    3 { NEAR(r c, 3) }
    4 { NEAR(r c, 2) }
    5 { NEAR(r c, 0) }
    6 { NEAR(a b c) }
    7 { NEAR(a b c, 8) }
    8  { x : NEAR(r c) }
    9  { y : NEAR(r c) }
  } {
    set res [matchdata 1 $expr]
    do_execsql_test $tn2.4.1.$tn.[llength $res] { 
      SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr
    } $res
  }

  do_test $tn2.4.1  { nearset {{a b c}} -- a } {0.0.0}
  do_test $tn2.4.2  { nearset {{a b c}} -- c } {0.0.2}

  foreach {tn expr tclexpr} {
    1 {a b} {AND [N $x -- {a}] [N $x -- {b}]}
  } {
    do_execsql_test $tn2.5.$tn {
      SELECT fts5_expr_tcl($expr, 'N $x')
    } [list $tclexpr]
  }

  #-------------------------------------------------------------------------
  #
  do_execsql_test $tn2.6.integrity {
    INSERT INTO xx(xx) VALUES('integrity-check');
  }
  #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r}
  foreach {bAsc sql} {
    1 {SELECT rowid FROM xx WHERE xx MATCH $expr}
    0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC}
  } {
    foreach {tn expr} {
      0.1 x
      1 { NEAR(r c) }
      2 { NEAR(r c, 5) }
      3 { NEAR(r c, 3) }
      4 { NEAR(r c, 2) }
      5 { NEAR(r c, 0) }
      6 { NEAR(a b c) }
      7 { NEAR(a b c, 8) }
      8  { x : NEAR(r c) }
      9  { y : NEAR(r c) }
      10 { x : "r c" }
      11 { y : "r c" }
      12 { a AND b }
      13 { a AND b AND c }
      14a { a }
      14b { a OR b }
      15 { a OR b AND c }
      16 { c AND b OR a }
      17 { c AND (b OR a) }
      18 { c NOT (b OR a) }
      19 { c NOT b OR a AND d }
    } {
      set res [matchdata 0 $expr $bAsc]
      do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res
    }
  }
}

do_execsql_test 3.1 {
  SELECT fts5_expr_tcl('a AND b');
} {{AND [nearset -- {a}] [nearset -- {b}]}}

finish_test

Added ext/fts5/test/fts5ad.test.






















































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ad

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE yy USING fts5(x, y);
  INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching');
  INSERT INTO yy VALUES('indices (or all  matching', 'values if -inline is');
  INSERT INTO yy VALUES('specified as  well.) If', 'indices are returned, the');
} {}

foreach {tn match res} {
  1 {c*} {1}
  2 {i*} {3 2}
  3 {t*} {3 1}
  4 {r*} {3 1}
} {
  do_execsql_test 1.$tn {
    SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid DESC
  } $res
}

foreach {tn match res} {
  5 {c*} {1}
  6 {i*} {2 3}
  7 {t*} {1 3}
  8 {r*} {1 3}
} {
  do_execsql_test 1.$tn {
    SELECT rowid FROM yy WHERE yy MATCH $match
  } $res
}

foreach {T create} {
  2 {
    CREATE VIRTUAL TABLE t1 USING fts5(a, b);
    INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  }
  
  3 {
    CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
    INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  }

  4 {
    CREATE VIRTUAL TABLE t1 USING fts5(a, b);
    INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
    BEGIN;
  }
  
  5 {
    CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5);
    INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
    BEGIN;
  }

} {

  do_test $T.1 { 
    execsql { DROP TABLE IF EXISTS t1 }
    execsql $create
  } {}
  
  do_test $T.1 {
    foreach {rowid a b} {
      0   {fghij uvwxyz klmn pq uvwx}         {klmn f fgh uv fghij klmno}
      1   {uv f abcd abcd fghi}               {pq klm uv uv fgh uv a}
      2   {klmn klm pqrs fghij uv}            {f k uvw ab abcd pqr uv}
      3   {ab pqrst a fghi ab pqr fg}         {k klmno a fg abcd}
      4   {abcd pqrst uvwx a fgh}             {f klmno fghij kl pqrst}
      5   {uvwxyz k abcde u a}                {uv k k kl klmn}
      6   {uvwxyz k klmn pqrst uv}            {fghi pqrs abcde u k}
      7   {uvwxy klmn u p pqrst fgh}          {p f fghi abcd uvw kl uv}
      8   {f klmno pqrst uvwxy pqrst}         {uv abcde klm pq pqr}
      9   {f abcde a uvwxyz pqrst}            {fghij abc k uvwx pqr fghij uvwxy}
      10  {ab uv f fg pqrst uvwxy}            {fgh p uv k abc klm uvw}
      11  {pq klmno a uvw abcde uvwxyz}       {fghij pq uvwxyz pqr fghi}
      12  {fgh u pq fgh uvw}                  {uvw pqr f uvwxy uvwx}
      13  {uvwx klmn f fgh abcd pqr}          {uvw k fg uv klm abcd}
      14  {ab uvwx pqrst pqr uvwxyz pqrs}     {uvwxyz abcde ab ab uvw abcde}
      15  {abc abcde uvwxyz abc kl k pqr}     {klm k k klmno u fgh}
      16  {fghi abcd fghij uv uvwxyz ab uv}   {klmn pqr a uvw fghi}
      17  {abc pqrst fghi uvwx uvw klmn fghi} {ab fg pqr pqrs p}
      18  {pqr kl a fghij fgh fg kl}          {pqr uvwxyz uvw abcd uvwxyz}
      19  {fghi fghi pqr kl fghi f}           {klmn u u klmno klmno}
      20  {abc pqrst klmno kl pq uvwxy}       {abc k fghi pqrs klm}
      21  {a pqr uvwxyz uv fghi a fgh}        {abc pqrs pqrst pq klm}
      22  {klm abc uvwxyz klm pqrst}          {fghij k pq pqr u klm fghij}
      23  {p klm uv p a a}                    {uvwxy klmn uvw abcde pq}
      24  {uv fgh fg pq uvwxy u uvwxy}        {pqrs a uvw p uvwx uvwxyz fg}
      25  {fghij fghi klmn abcd pq kl}        {fghi abcde pqrs abcd fgh uvwxy}
      26  {pq fgh a abc klmno klmn}           {fgh p k p fg fghij}
      27  {fg pq kl uvwx fghij pqrst klmn}    {abcd uvw abcd fghij f fghij}
      28  {uvw fghi p fghij pq fgh uvwx}      {k fghij abcd uvwx pqr fghi}
      29  {klm pq abcd pq f uvwxy}            {pqrst p fghij pqr p}
      30  {ab uvwx fg uvwx klmn klm}          {klmn klmno fghij klmn klm}
      31  {pq k pqr abcd a pqrs}              {abcd abcd uvw a abcd klmno ab}
      32  {pqrst u abc pq klm}                {abc kl uvwxyz fghij u fghi p}
      33  {f uvwxy u k f uvw uvwx}            {pqrs uvw fghi fg pqrst klm}
      34  {pqrs pq fghij uvwxyz pqr}          {ab abc abc uvw f pq f}
      35  {uvwxy ab uvwxy klmno kl pqrs}      {abcde uvw pqrs uvwx k k}
      36  {uvwxyz k ab abcde abc uvw}         {uvw abcde uvw klmn uv klmn}
      37  {k kl uv abcde uvwx fg u}           {u abc uvwxy k fg abcd}
      38  {fghi pqrst fghi pqr pqrst uvwx}    {u uv uvwx fghi abcde}
      39  {k pqrst k uvw fg pqrst fghij}      {uvwxy ab kl klmn uvwxyz abcde}
      40  {fg uvwxy pqrs klmn uvwxyz klm p}   {k uv ab fghij fgh k pqrs}
      41  {uvwx abc f pq uvwxy k}             {ab uvwxyz abc f fghij}
      42  {uvwxy klmno uvwxyz uvwxyz pqrst}   {uv kl kl klmno k f abcde}
      43  {abcde ab pqrs fg f fgh}            {abc fghij fghi k k}
      44  {uvw abcd a ab pqrst klmn fg}       {pqrst u uvwx pqrst fghij f pqrst}
      45  {uvwxy p kl uvwxyz ab pqrst fghi}   {abc f pqr fg a k}
      46  {u p f a fgh}                       {a kl pq uv f}
      47  {pqrs abc fghij fg abcde ab a}      {p ab uv pqrs kl fghi abcd}
      48  {abcde uvwxy pqrst uv abc pqr uvwx} {uvwxy klm uvwxy uvwx k}
      49  {fgh klm abcde klmno u}             {a f fghij f uvwxyz abc u}
      50  {uv uvw uvwxyz uvwxyz uv ab}        {uvwx pq fg u k uvwxy}
      51  {uvwxy pq p kl fghi}                {pqrs fghi pqrs abcde uvwxyz ab}
      52  {pqr p uvwxy kl pqrs klmno fghij}   {ab abcde abc pqrst pqrs uv}
      53  {fgh pqrst p a klmno}               {ab ab pqrst pqr kl pqrst}
      54  {abcd klm ab uvw a fg u}            {f pqr f abcd uv}
      55  {u fg uvwxyz k uvw}                 {abc pqrs f fghij fg pqrs uvwxy}
      56  {klm fg p fghi fg a}                {uv a fghi uvwxyz a fghi}
      57  {uvwxy k abcde fgh f fghi}          {f kl klmn f fghi klm}
      58  {klm k fgh uvw fgh fghi}            {klmno uvwx u pqrst u}
      59  {fghi pqr pqrst p uvw fghij}        {uv pqrst pqrs pq fghij klm}
      60  {uvwx klm uvwxy uv klmn}            {p a a abc klmn ab k}
      61  {uvwxy uvwx klm uvwx klm}           {pqrs ab ab uvwxyz fg}
      62  {kl uv uv uvw fg kl k}              {abcde uvw fgh uvwxy klm}
      63  {a abc fgh u klm abcd}              {fgh pqr uv klmn fghij}
      64  {klmn k klmn klmno pqrs pqr}        {fg kl abcde klmno uvwxy kl pq}
      65  {uvwxyz klm fghi abc abcde kl}      {uvwxy uvw uvwxyz uvwxyz pq pqrst}
      66  {pq klm abc pqrst fgh f}            {u abcde pqrst abcde fg}
      67  {u pqrst kl u uvw klmno}            {u pqr pqrs fgh u p}
      68  {abc fghi uvwxy fgh k pq}           {uv p uvwx uvwxyz ab}
      69  {klmno f uvwxyz uvwxy klmn fg ab}   {fgh kl a pqr abcd pqr}
      70  {fghi pqrst pqrst uv a}             {uvwxy k p uvw uvwx a}
      71  {a fghij f p uvw}                   {klm fg abcd abcde klmno pqrs}
      72  {uv uvwx uvwx uvw klm}              {uv fghi klmno uvwxy uvw}
      73  {kl uvwxy ab f pq klm u}            {uvwxy klmn klm abcd pq fg k}
      74  {uvw pqrst abcd uvwxyz ab}          {fgh fgh klmn abc pq}
      75  {uvwxyz klm pq abcd klmno pqr uvwxyz} {kl f a fg pqr klmn}
      76  {uvw uvwxy pqr k pqrst kl}          {uvwxy abc uvw uvw u}
      77  {fgh klm u uvwxyz f uvwxy abcde}    {uv abcde klmno u u ab}
      78  {klmno abc pq pqr fgh}              {p uv abcd fgh abc u k}
      79  {fg pqr uvw pq uvwx}                {uv uvw fghij pqrs fg p}
      80  {abcd pqrs uvwx uvwxy uvwx}         {u uvw pqrst pqr abcde pqrs kl}
      81  {uvwxyz klm pq uvwxy fghij}         {p pq klm fghij u a a}
      82  {uvwx k uvwxyz klmno pqrst kl}      {abcde p f pqrst abcd uvwxyz p}
      83  {abcd abcde klm pqrst uvwxyz}       {uvw pqrst u p uvwxyz a pqrs}
      84  {k klm abc uv uvwxy klm klmn}       {k abc pqr a abc p kl}
      85  {klmn abcd pqrs p pq klm a}         {klmn kl ab uvw pq}
      86  {klmn a pqrs abc uvw pqrst}         {a pqr kl klm a k f}
      87  {pqrs ab uvwx uvwxy a pqr f}        {fg klm uvwx pqr pqr}
      88  {klmno ab k kl u uvwxyz}            {uv kl uvw fghi uv uvw}
      89  {pq fghi pqrst klmn uvwxy abc pqrs} {fg f f fg abc abcde klm}
      90  {kl a k fghi uvwx fghi u}           {ab uvw pqr fg a p abc}
      91  {uvwx pqrs klmno ab fgh uvwx}       {pqr uvwx abc kl f klmno kl}
      92  {fghij pq pqrs fghij f pqrst}       {u abcde fg pq pqr fgh k}
      93  {fgh u pqrs abcde klmno abc}        {abc fg pqrst pqr abcde}
      94  {uvwx p abc f pqr p}                {k pqrs kl klm abc fghi klm}
      95  {kl p klmno uvwxyz klmn}            {fghi ab a fghi pqrs kl}
      96  {pqr fgh pq uvwx a}                 {uvw klm klmno fg uvwxy uvwx}
      97  {fg abc uvwxyz fghi pqrst pq}       {abc k a ab abcde f}
      98  {uvwxy fghi uvwxy u abcde abcde uvw} {klmn uvwx pqrs uvw uvwxy abcde}
      99  {pq fg fghi uvwx uvwx fghij uvwxy}  {klmn klmn f abc fg a}
    } {
      execsql {
        INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b);
      }
    }
  } {}
  
  proc prefix_query {prefixlist} {
    set ret [list]
    db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} {
      set bMatch 1
      foreach pref $prefixlist {
        if { [lsearch -glob $a $pref]<0 && [lsearch -glob $b $pref]<0 } {
          set bMatch 0
          break
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }
  
  foreach {bAsc sql} {
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
      6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
      11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
      16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
      21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}
      27 {x*}
      28 {a f*} 29 {a* f*} 30 {a* fghij*}
    } {
      set res [prefix_query $prefix]
      if {$bAsc} {
        set res [lsort -integer -increasing $res]
      }
      set n [llength $res]
      if {$T==5} breakpoint 
      do_execsql_test $T.$bAsc.$tn.$n $sql $res
    }
  }

  catchsql COMMIT
}

finish_test

Added ext/fts5/test/fts5ae.test.


















































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ae

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_execsql_test 1.1 {
  INSERT INTO t1 VALUES('hello', 'world');
  SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1}

do_execsql_test 1.2 {
  INSERT INTO t1 VALUES('world', 'hello');
  SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}

do_execsql_test 1.3 {
  INSERT INTO t1 VALUES('world', 'world');
  SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2}

do_execsql_test 1.4.1 {
  INSERT INTO t1 VALUES('hello', 'hello');
}

do_execsql_test 1.4.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC;
} {1 2 4}

fts5_aux_test_functions db

#-------------------------------------------------------------------------
# 
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(x, y);
  INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e');
  INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p');
}

do_execsql_test 2.1 {
  SELECT rowid, fts5_test_poslist(t2) FROM t2 
  WHERE t2 MATCH 'm' ORDER BY rowid;
} {
  1 {0.0.5 0.1.0 0.1.2} 
  2 {0.0.7 0.1.5}
}

do_execsql_test 2.2 {
  SELECT rowid, fts5_test_poslist(t2) FROM t2 
  WHERE t2 MATCH 'u OR q' ORDER BY rowid;
} {
  1 {0.0.0}
  2 {1.0.2 1.0.10}
}

do_execsql_test 2.3 {
  SELECT rowid, fts5_test_poslist(t2) FROM t2 
  WHERE t2 MATCH 'y:o' ORDER BY rowid;
} {
  1 {0.1.3 0.1.7}
}

#-------------------------------------------------------------------------
# 
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(x, y);
  INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t');
  INSERT INTO t3 VALUES( 'r c', '');
}

do_execsql_test 3.1 {
  SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)';
} {
  1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15}
}

do_execsql_test 3.2 {
  SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)';
} {
  2 {0.0.0 1.0.1}
}

do_execsql_test 3.3 {
  INSERT INTO t3 
  VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
  SELECT rowid, fts5_test_poslist(t3) 
  FROM t3 WHERE t3 MATCH 'a OR b AND c';
} {
  1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2}
  3 0.0.5 
}

#-------------------------------------------------------------------------
# 
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE t4 USING fts5(x, y);
  INSERT INTO t4 
  VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o');
}

do_execsql_test 4.1 {
  SELECT rowid, fts5_test_poslist(t4) FROM t4 WHERE t4 MATCH 'a OR b AND c';
} {
  1 0.0.5
}

#-------------------------------------------------------------------------
# Test that the xColumnSize() and xColumnAvgsize() APIs work.
#
reset_db
fts5_aux_test_functions db

do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE t5 USING fts5(x, y);
  INSERT INTO t5 VALUES('a b c d', 'e f g h i j');
  INSERT INTO t5 VALUES('', 'a');
  INSERT INTO t5 VALUES('a', '');
}
do_execsql_test 5.2 {
  SELECT rowid, fts5_test_columnsize(t5) FROM t5 WHERE t5 MATCH 'a'
  ORDER BY rowid DESC;
} {
  3 {1 0}
  2 {0 1}
  1 {4 6}
}

do_execsql_test 5.3 {
  SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a'
  ORDER BY rowid DESC;
} {
  3 {a {}}
  2 {{} a}
  1 {{a b c d} {e f g h i j}}
}

do_execsql_test 5.4 {
  SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
  ORDER BY rowid DESC;
} {
  3 {5 7}
  2 {5 7}
  1 {5 7}
}

do_execsql_test 5.5 {
  INSERT INTO t5 VALUES('x y z', 'v w x y z');
  SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a'
  ORDER BY rowid DESC;
} {
  3 {8 12}
  2 {8 12}
  1 {8 12}
}

#-------------------------------------------------------------------------
# Test the xTokenize() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 6.1 {
  CREATE VIRTUAL TABLE t6 USING fts5(x, y);
  INSERT INTO t6 VALUES('There are more', 'things in heaven and earth');
  INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.');
}

do_execsql_test 6.2 {
  SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*'
} {
  1 {{there are more} {things in heaven and earth}}
  2 {{horatio than are} {dreamt of in your philosophy}}
}

#-------------------------------------------------------------------------
# Test the xQueryPhrase() API
#
reset_db
fts5_aux_test_functions db
do_execsql_test 7.1 {
  CREATE VIRTUAL TABLE t7 USING fts5(x, y);
}
do_test 7.2 {
  foreach {x y} {
    {q i b w s a a e l o} {i b z a l f p t e u}
    {b a z t a l o x d i} {b p a d b f h d w y}
    {z m h n p p u i e g} {v h d v b x j j c z}
    {a g i m v a u c b i} {p k s o t l r t b m}
    {v v c j o d a s c p} {f f v o k p o f o g}
  } {
    execsql {INSERT INTO t7 VALUES($x, $y)}
  }
  execsql { SELECT count(*) FROM t7 }
} {5}

foreach {tn q res} {
  1 a {{4 2}}
  2 b {{3 4}}
  3 c {{2 1}}
  4 d {{2 2}}
  5 {a AND b} {{4 2} {3 4}}
  6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}}
} {
  do_execsql_test 7.3.$tn { 
    SELECT fts5_test_queryphrase(t7) FROM t7 WHERE t7 MATCH $q LIMIT 1
  } [list $res]
}

do_execsql_test 7.4 {
  SELECT fts5_test_rowcount(t7) FROM t7 WHERE t7 MATCH 'a';
} {5 5 5 5}

#do_execsql_test 7.4 {
#  SELECT rowid, bm25debug(t7) FROM t7 WHERE t7 MATCH 'a';
#} {5 5 5 5}
#

#-------------------------------------------------------------------------
#
do_test 8.1 {
  execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) }
  foreach {rowid x y} {
     0 {A o}   {o o o C o o o o o o o o}
     1 {o o B} {o o o C C o o o o o o o}
     2 {A o o} {o o o o D D o o o o o o}
     3 {o B}   {o o o o o D o o o o o o}
     4 {E o G} {H o o o o o o o o o o o}
     5 {F o G} {I o J o o o o o o o o o}
     6 {E o o} {H o J o o o o o o o o o}
     7 {o o o} {o o o o o o o o o o o o}
     9 {o o o} {o o o o o o o o o o o o}
  } {
    execsql { INSERT INTO t8(rowid, x, y) VALUES($rowid, $x, $y) }
  }
} {}

foreach {tn q res} {
  1 {a} {0 2}
  2 {b} {3 1}
  3 {c} {1 0}
  4 {d} {2 3}
  5 {g AND (e OR f)} {5 4}
  6 {j AND (h OR i)} {5 6}
} {
  do_execsql_test 8.2.$tn.1 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8);
  } $res

  do_execsql_test 8.2.$tn.2 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank;
  } $res

  do_execsql_test 8.2.$tn.3 {
    SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank;
  } $res
}

finish_test

Added ext/fts5/test/fts5af.test.
































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
# 
# More specifically, the tests in this file focus on the built-in 
# snippet() function.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5af

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}

proc do_snippet_test {tn doc match res} {

  uplevel #0 [list set v1 $doc]
  uplevel #0 [list set v2 $match]

  do_execsql_test $tn.1 {
    DELETE FROM t1;
    INSERT INTO t1 VALUES($v1, NULL);
    SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
  } [list $res]

  do_execsql_test $tn.2 {
    DELETE FROM t1;
    INSERT INTO t1 VALUES(NULL, $v1);
    SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
  } [list $res]

  do_execsql_test $tn.3 {
    DELETE FROM t1;
    INSERT INTO t1 VALUES($v1, NULL);
    SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2
    ORDER BY rank DESC;
  } [list $res]


}


foreach {tn doc res} {

  1.1 {X o o o o o o} {[X] o o o o o o}
  1.2 {o X o o o o o} {o [X] o o o o o}
  1.3 {o o X o o o o} {o o [X] o o o o}
  1.4 {o o o X o o o} {o o o [X] o o o}
  1.5 {o o o o X o o} {o o o o [X] o o}
  1.6 {o o o o o X o} {o o o o o [X] o}
  1.7 {o o o o o o X} {o o o o o o [X]}

  2.1 {X o o o o o o o} {[X] o o o o o o...}
  2.2 {o X o o o o o o} {o [X] o o o o o...}
  2.3 {o o X o o o o o} {o o [X] o o o o...}
  2.4 {o o o X o o o o} {o o o [X] o o o...}
  2.5 {o o o o X o o o} {...o o o [X] o o o}
  2.6 {o o o o o X o o} {...o o o o [X] o o}
  2.7 {o o o o o o X o} {...o o o o o [X] o}
  2.8 {o o o o o o o X} {...o o o o o o [X]}

  3.1 {X o o o o o o o o} {[X] o o o o o o...}
  3.2 {o X o o o o o o o} {o [X] o o o o o...}
  3.3 {o o X o o o o o o} {o o [X] o o o o...}
  3.4 {o o o X o o o o o} {o o o [X] o o o...}
  3.5 {o o o o X o o o o} {...o o o [X] o o o...}
  3.6 {o o o o o X o o o} {...o o o [X] o o o}
  3.7 {o o o o o o X o o} {...o o o o [X] o o}
  3.8 {o o o o o o o X o} {...o o o o o [X] o}
  3.9 {o o o o o o o o X} {...o o o o o o [X]}

  4.1 {X o o o o o X o o} {[X] o o o o o [X]...}
  4.2 {o X o o o o o X o} {...[X] o o o o o [X]...}
  4.3 {o o X o o o o o X} {...[X] o o o o o [X]}

  5.1 {X o o o o X o o o} {[X] o o o o [X] o...}
  5.2 {o X o o o o X o o} {...[X] o o o o [X] o...}
  5.3 {o o X o o o o X o} {...[X] o o o o [X] o}
  5.4 {o o o X o o o o X} {...o [X] o o o o [X]}

  6.1 {X o o o X o o o} {[X] o o o [X] o o...}
  6.2 {o X o o o X o o o} {o [X] o o o [X] o...}
  6.3 {o o X o o o X o o} {...o [X] o o o [X] o...}
  6.4 {o o o X o o o X o} {...o [X] o o o [X] o}
  6.5 {o o o o X o o o X} {...o o [X] o o o [X]}

  7.1 {X o o X o o o o o} {[X] o o [X] o o o...}
  7.2 {o X o o X o o o o} {o [X] o o [X] o o...}
  7.3 {o o X o o X o o o} {...o [X] o o [X] o o...}
  7.4 {o o o X o o X o o} {...o [X] o o [X] o o}
  7.5 {o o o o X o o X o} {...o o [X] o o [X] o}
  7.6 {o o o o o X o o X} {...o o o [X] o o [X]}
} {
  do_snippet_test 1.$tn $doc X $res
}

foreach {tn doc res} {
  1.1 {X Y o o o o o} {[X Y] o o o o o}
  1.2 {o X Y o o o o} {o [X Y] o o o o}
  1.3 {o o X Y o o o} {o o [X Y] o o o}
  1.4 {o o o X Y o o} {o o o [X Y] o o}
  1.5 {o o o o X Y o} {o o o o [X Y] o}
  1.6 {o o o o o X Y} {o o o o o [X Y]}

  2.1 {X Y o o o o o o} {[X Y] o o o o o...}
  2.2 {o X Y o o o o o} {o [X Y] o o o o...}
  2.3 {o o X Y o o o o} {o o [X Y] o o o...}
  2.4 {o o o X Y o o o} {...o o [X Y] o o o}
  2.5 {o o o o X Y o o} {...o o o [X Y] o o}
  2.6 {o o o o o X Y o} {...o o o o [X Y] o}
  2.7 {o o o o o o X Y} {...o o o o o [X Y]}

  3.1 {X Y o o o o o o o} {[X Y] o o o o o...}
  3.2 {o X Y o o o o o o} {o [X Y] o o o o...}
  3.3 {o o X Y o o o o o} {o o [X Y] o o o...}
  3.4 {o o o X Y o o o o} {...o o [X Y] o o o...}
  3.5 {o o o o X Y o o o} {...o o [X Y] o o o}
  3.6 {o o o o o X Y o o} {...o o o [X Y] o o}
  3.7 {o o o o o o X Y o} {...o o o o [X Y] o}
  3.8 {o o o o o o o X Y} {...o o o o o [X Y]}

} {
  do_snippet_test 2.$tn $doc "X + Y" $res
}

finish_test

Added ext/fts5/test/fts5ag.test.




















































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ag

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# This file attempts to verify that the extension APIs work with 
# "ORDER BY rank" queries. This is done by comparing the results of
# the fts5_test() function when run with queries of the form:
#
#      ... WHERE fts MATCH ? ORDER BY bm25(fts) [ASC|DESC]
#
# and
#
#      ... WHERE fts MATCH ? ORDER BY rank [ASC|DESC]
#

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, y, z);
}

do_test 1.1 {
  foreach {x y z} {
    {j s m y m r n l u k} {z k f u z g h s w g} {r n o s s b v n w w}
    {m v g n d x q r r s} {q t d a q a v l h j} {s k l f s i n v q v}
    {m f f d h h s o h a} {y e v r q i u m h d} {b c k q m z l z h n}
    {j e m v k p e c j m} {m p v z d x l n i a} {v p u p m t p q i f}
    {v r w l e e t d z p} {c s b w k m n k o u} {w g y f v w v w v p}
    {k d g o u j p z n o} {t g e q l z i g b j} {f i q q j y h b g h}
    {j s w x o t j b t m} {v a v v r t x c q a} {r t k x w u l h a g}
    {j y b i u d e m d w} {y s o j h i n a u p} {n a g b u c w e b m}
    {b c k s c w j p w b} {m o c o w o b d q q} {n t y o y z y r z e}
    {p n q l e l h z q c} {n s e i h c v b b u} {m p d i t a o o f f}
    {k c o n v e z l b m} {s m n i n s d e s u} {t a u e q d a o u c}
    {h d t o i a g b b p} {k x c i g f g b b k} {x f i v n a n n j i}
    {f z k r b u s k z e} {n z v z w l e r h t} {t i s v v a v p n s}
    {k f e c t z r e f d} {f m g r c w q k b v} {v y s y f r b f e f}
    {z r c t d q q h x b} {u c g z n z u v s s} {y t n f f x b f d x}
    {u n p n u t i m e j} {p j j d m f k p m z} {d o l v c o e a h w}
    {h o q w t f v i c y} {c q u n r z s l l q} {z x a q w s b w s y}
    {y m s x k i m n x c} {b i a n v h z n k a} {w l q p b h h g d y}
    {z v s j f p v l f w} {c s b i z e k i g c} {x b v d w j f e d z}
    {r k k j e o m k g b} {h b d c h m y b t u} {u j s h k z c u d y}
    {v h i v s y z i k l} {d t m w q w c a z p} {r s e s x v d w k b}
    {u r e q j y h o o s} {x x z r x y t f j s} {k n h x i i u e c v}
    {q l f d a p w l q o} {y z q w j o p b o v} {s u h z h f d f n l}
    {q o e o x x l g q i} {j g m h q q w c d b} {o m d h w a g b f n}
    {m x k t s s y l v a} {j x t c a u w b w g} {n f j b v x y p u t}
    {u w k a q b u w k w} {a h j u o w f s k p} {j o f s h y t j h g}
    {x v b l m t l m h l} {t p y i y i q b q a} {k o o z w a c h c f}
    {j g c d k w b d t v} {a k v c m a v h v p} {i c a i j g h l j h}
    {l m v l c z j b p b} {z p z f l n k i b a} {j v q k g i x g i b}
    {m c i w u z m i s z} {i z r f n l q z k w} {x n b p b q r g i z}
    {d g i o o x l f x d} {r t m f b n q y c b} {i u g k w x n m p o}
    {t o s i q d z x d t} {v a k s q z j c o o} {z f n n r l y w v v}
    {w k h d t l j g n n} {r z m v y b l n c u} {v b v s c l n k g v}
    {m a g r a b u u n z} {u y l h v w v k b f} {x l p g i s j f x v}
    {v s g x k z a k a r} {l t g v j q l k p l} {f h n a x t v s t y}
    {z u v u x p s j y t} {g b q e e g l n w g} {e n p j i g j f u r}
    {q z l t w o l m p e} {t s g h r p r o t z} {y b f a o n u m z g}
    {d t w n y b o g f o} {d a j e r l g g s h} {d z e l w q l t h f}
    {f l u w q v x j a h} {f n u l l d m h h w} {d x c c e r o d q j}
    {b y f q s q f u l g} {u z w l f d b i a g} {m v q b g u o z e z}
    {h z p t s e x i v m} {l h q m e o x x x j} {e e d n p r m g j f}
    {k h s g o n s d a x} {u d t t s j o v h a} {z r b a e u v o e s}
    {m b b g a f c p a t} {w c m j o d b l g e} {f p j p m o s y v j}
    {c r n h d w c a b l} {s g e u s d n j b g} {b o n a x a b x y l}
    {r h u x f c d z n o} {x y l g u m i i w d} {t f h b z v r s r g}
    {t i o r b v g g p a} {d x l u q k m o s u} {j f h t u n z u k m}
    {g j t y d c n j y g} {w e s k v c w i g t} {g a h r g v g h r o}
    {e j l a q j g i n h} {d z k c u p n u p p} {t u e e v z v r r g}
    {l j s g k j k h z l} {p v d a t x d e q u} {r l u z b m g k s j}
    {i e y d u x d i n l} {p f z k m m w p u l} {z l p m r q w n d a}
  } {
    execsql { INSERT INTO t1 VALUES($x, $y, $z) }
  }
  set {} {}
} {}

fts5_aux_test_functions db

proc do_fts5ag_test {tn E} {
  set q1 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank}
  set q2 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)}

  set res [execsql $q1]
  set expected [execsql $q2]
  uplevel [list do_test $tn.1 [list set {} $res] $expected]

  append q1 " DESC"
  append q2 " DESC"

  set res [execsql $q1]
  set expected [execsql $q2]
  uplevel [list do_test $tn.2 [list set {} $res] $expected]
}

foreach {tn expr} {
  2.1 a
  2.2 b
  2.3 c
  2.4 d

  2.5 {"m m"}
  2.6 {e + s}

  3.0 {a AND b}
  3.1 {a OR b}
  3.2 {b OR c AND d}
  3.3 {NEAR(c d)}
} {
  do_fts5ag_test $tn $expr

  if {[set_test_counter errors]} break
}



finish_test

Added ext/fts5/test/fts5ah.test.












































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ah

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# This file contains tests for very large doclists.
#

do_test 1.0 {
  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) }
  execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) }
  set v {w w w w w w w w w w w w w w w w w w w w}
  execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) }
  for {set i 1} {$i <= 10000} {incr i} {
    set v {x x x x x x x x x x x x x x x x x x x x}
    if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i}
    if {($i % 1577)==0} {lset v 5 W ; lappend W $i}
    execsql { INSERT INTO t1 VALUES($v) }
  }
  set v {w w w w w w w w w w w w w w w w w w w w}
  execsql { INSERT INTO t1 VALUES($v) }
} {}

do_execsql_test 1.1.1 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w'
} [lsort -integer -incr $W]

do_execsql_test 1.1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*'
} [lsort -integer -incr $W]

do_execsql_test 1.2 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x'
} [lsort -integer -incr $Y]

do_execsql_test 1.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

proc reads {} {
  db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'}
}

proc execsql_reads {sql} {
  set nRead [reads]
  execsql $sql
  expr [reads] - $nRead
}

do_test 1.4 {
  set nRead [reads]
  execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' }
  set nReadX [expr [reads] - $nRead]
  expr $nReadX>1000
} {1}

do_test 1.5 {
  set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }]
  set bwd [execsql_reads {
    SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC 
  }]
  expr {$bwd < $fwd + 12}
} {1}

foreach {tn q res} "
  1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x'   }  [list $W]
  2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w'   }  [list $W]
  3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' }  [list $W]
  4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' }  [list $Y]
" {

  do_test 1.6.$tn.1 {
    set n [execsql_reads $q]
    puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

  do_test 1.6.$tn.2 {
    set n [execsql_reads "$q ORDER BY rowid DESC"]
    puts -nonewline "(n=$n nReadX=$nReadX)"
    expr {$n < ($nReadX / 8)}
  } {1}

  do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res]
  do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res]
}

#-------------------------------------------------------------------------
# Now test that adding range constraints on the rowid field reduces the
# number of pages loaded from disk.
#
foreach {tn fraction tail cnt} {
  1 0.6 {rowid > 5000} 5000
  2 0.2 {rowid > 9000} 1000
  3 0.2 {rowid < 1000}  999
  4 0.2 {rowid BETWEEN 4000 AND 5000}  1001
  5 0.6 {rowid >= 5000} 5001
  6 0.2 {rowid >= 9000} 1001
  7 0.2 {rowid <= 1000} 1000
  8 0.6 {rowid > '5000'} 5000
  9 0.2 {rowid > '9000'} 1000
  10 0.1 {rowid = 444} 1
} {
  set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail"
  set n [execsql_reads $q]
  set ret [llength [execsql $q]]

  do_test "1.7.$tn.asc.(n=$n ret=$ret)" {
    expr {$n < ($fraction*$nReadX) && $ret==$cnt}
  } {1}

  set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC"
  set n [execsql_reads $q]
  set ret [llength [execsql $q]]
  do_test "1.7.$tn.desc.(n=$n ret=$ret)" {
    expr {$n < 2*$fraction*$nReadX && $ret==$cnt}
  } {1}
}

do_execsql_test 1.8.1 {
  SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND +rowid < 'text';
} {10000}
do_execsql_test 1.8.2 {
  SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text';
} {10000}


#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

finish_test

Added ext/fts5/test/fts5ai.test.














































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
# Specifically, it tests transactions and savepoints
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ai

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a);
} {}

do_execsql_test 1.1 {
  BEGIN;
    INSERT INTO t1 VALUES('a b c');
    INSERT INTO t1 VALUES('d e f');
    SAVEPOINT one;
      INSERT INTO t1 VALUES('g h i');
      SAVEPOINT two;
        INSERT INTO t1 VALUES('j k l');
    ROLLBACK TO one;
      INSERT INTO t1 VALUES('m n o');
        SAVEPOINT two;
        INSERT INTO t1 VALUES('p q r');
    RELEASE one;
    SAVEPOINT one;
      INSERT INTO t1 VALUES('s t u');
    ROLLBACK TO one;
  COMMIT;
}

do_execsql_test 1.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}


finish_test

Added ext/fts5/test/fts5aj.test.










































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
# Specifically, this tests that, provided the amount of data remains 
# constant, the FTS index does not grow indefinitely as rows are inserted 
# and deleted,
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aj

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc doc {} {
  set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
  set res [list]
  for {set i 0} {$i < 20} {incr i} {
    lappend res [lindex $dict [expr int(rand() * 26)]]
  }
  set res
}

proc structure {} {
  set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}]
  foreach lvl [lrange $val 1 end] {
    lappend res [expr [llength $lvl]-2]
  }
  set res
}

expr srand(0)
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
}

for {set iTest 0} {$iTest < 50000} {incr iTest} {
  if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } }
  set new [doc]
  execsql { INSERT INTO t1 VALUES($new) }
  if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] }
  if {0==($iTest % 1000)} {
    set sz [db one {SELECT count(*) FROM t1_data}]
    set s [structure]
    do_execsql_test 1.$iTest.$sz.{$s} {
      INSERT INTO t1(t1) VALUES('integrity-check') 
    }
  }
}

do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') }


finish_test

Added ext/fts5/test/fts5ak.test.






























































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# 2014 November 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
# Specifically, the auxiliary function "highlight".
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ak

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  INSERT INTO ft1 VALUES('i d d a g i b g d d');
  INSERT INTO ft1 VALUES('h d b j c c g a c a');
  INSERT INTO ft1 VALUES('e j a e f h b f h h');
  INSERT INTO ft1 VALUES('j f h d g h i b d f');
  INSERT INTO ft1 VALUES('d c j d c j b c g e');
  INSERT INTO ft1 VALUES('i a d e g j g d a a');
  INSERT INTO ft1 VALUES('j f c e d a h j d b');
  INSERT INTO ft1 VALUES('i c c f a d g h j e');
  INSERT INTO ft1 VALUES('i d i g c d c h b f');
  INSERT INTO ft1 VALUES('g d a e h a b c f j');
}

do_execsql_test 1.2 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e';
} {
  {[e] j a [e] f h b f h h}
  {d c j d c j b c g [e]}
  {i a d [e] g j g d a a}
  {j f c [e] d a h j d b}
  {i c c f a d g h j [e]}
  {g d a [e] h a b c f j}
}

do_execsql_test 1.3 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d';
} {
  {[h d] b j c c g a c a}
  {j f [h d] g h i b d f} 
}

do_execsql_test 1.4 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d';
} {
  {i [d d] a g i b g [d d]}
}

do_execsql_test 1.5 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e'
} {
  {[e] j a [e] f h b f h h}
  {d c j d c j b c g [e]}
  {i a d [e] g j g d a a}
  {j f c [e] d a h j d b}
  {i c c f a d g h j [e]}
  {g d a [e] h a b c f j}
}

do_execsql_test 1.6 {
  SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d';
} {
  {i [d d] a g i b g [d d]}
}

do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x);
  INSERT INTO ft2 VALUES('a b c d e f g h i j');
}

do_execsql_test 2.2 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e'
} {{a [b c d e] f g h i j}}

do_execsql_test 2.3 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g'
} {
  {a [b c d] [e f g] h i j}
}

do_execsql_test 2.4 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c'
} {
  {a [b c d] e f g h i j}
}

do_execsql_test 2.5 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e'
} {
  {a [b c d e] f g h i j}
}

do_execsql_test 2.6.1 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d'
} {
  {a b c [d] e [f] g h i j}
}

do_execsql_test 2.6.2 {
  SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f'
} {
  {a b c [d] e [f] g h i j}
}

#-------------------------------------------------------------------------
# The example from the docs.
#
do_execsql_test 3.1 {
  -- Assuming this:
  CREATE VIRTUAL TABLE ft USING fts5(a);
  INSERT INTO ft VALUES('a b c x c d e');
  INSERT INTO ft VALUES('a b c c d e');
  INSERT INTO ft VALUES('a b c d e');

  -- The following SELECT statement returns these three rows:
  --   '[a b c] x [c d e]'
  --   '[a b c] [c d e]'
  --   '[a b c d e]'
  SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e';
} {
  {[a b c] x [c d e]}
  {[a b c] [c d e]}
  {[a b c d e]}
}


finish_test

Added ext/fts5/test/fts5al.test.


















































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# 2014 November 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#
# Specifically, this function tests the %_config table.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5al

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  SELECT * FROM ft1_config;
} {version 2}

do_execsql_test 1.2 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
  SELECT * FROM ft1_config;
} {pgsz 32 version 2}

do_execsql_test 1.3 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
  SELECT * FROM ft1_config;
} {pgsz 64 version 2}

#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
  1 "fname()"
  2 "fname(1)"
  3 "fname(1,2)"
  4 "fname(null,NULL,nUlL)"
  5 "  fname    (   null  ,   NULL  ,  nUlL  )  "
  6 "fname('abc')"
  7 "fname('a''bc')"
  8 "fname('''abc')"
  9 "fname('abc''')"

  7 "fname(  'a''bc'  )"
  8 "fname('''abc'  )"
  9 "fname(  'abc''' )"

  10 "fname(X'1234ab')"

  11 "myfunc(1.2)"
  12 "myfunc(-1.0)"
  13 "myfunc(.01,'abc')"
} {
  do_execsql_test 2.1.$tn {
    INSERT INTO ft1(ft1, rank) VALUES('rank', $defn);
  }
}

foreach {tn defn} {
  1 ""
  2 "fname"
  3 "fname(X'234ab')"
  4 "myfunc(-1.,'abc')"
} {
  do_test 2.2.$tn {
    catchsql { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn) }
  } {1 {SQL logic error or missing database}}
}

#-------------------------------------------------------------------------
# Assorted tests of the tcl interface for creating extension functions.
#

do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1 VALUES('q w e r t y');
  INSERT INTO t1 VALUES('y t r e w q');
}

proc argtest {cmd args} { return $args }
sqlite3_fts5_create_function db argtest argtest

do_execsql_test 3.2.1 {
  SELECT argtest(t1, 123) FROM t1 WHERE t1 MATCH 'q'
} {123 123}

do_execsql_test 3.2.2 {
  SELECT argtest(t1, 123, 456) FROM t1 WHERE t1 MATCH 'q'
} {{123 456} {123 456}}

proc rowidtest {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db rowidtest rowidtest

do_execsql_test 3.3.1 {
  SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q'
} {1 2}

proc insttest {cmd} {
  set res [list]
  for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
    lappend res [$cmd xInst $i]
  }
  set res
}
sqlite3_fts5_create_function db insttest insttest

do_execsql_test 3.4.1 {
  SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
  {{0 0 0}}
  {{0 0 5}} 
}

do_execsql_test 3.4.2 {
  SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w'
} {
  {{1 0 1}}
  {{0 0 2} {1 0 4}} 
}

proc coltest {cmd} {
  list [$cmd xColumnSize 0] [$cmd xColumnText 0]
}
sqlite3_fts5_create_function db coltest coltest

do_execsql_test 3.5.1 {
  SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q'
} {
  {6 {q w e r t y}}
  {6 {y t r e w q}} 
}

#-------------------------------------------------------------------------
# Tests for remapping the "rank" column.
#
#   4.1.*: Mapped to a function with no arguments.
#   4.2.*: Mapped to a function with one or more arguments.
#

do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b);
  INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e');
  INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q');
  INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t');
  INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s');
  INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l');
  INSERT INTO t2 VALUES('k r i l j b g i p a', 't q c h a i m g n l');
  INSERT INTO t2 VALUES('a e c q n m o m d g', 'l c t g i s q g q e');
  INSERT INTO t2 VALUES('b o j h f o g b p e', 'r t l h s b g i c p');
  INSERT INTO t2 VALUES('s q k f q b j g h f', 'n m a o p e i e k t');
  INSERT INTO t2 VALUES('o q g g q c o k a b', 'r t k p t f t h p c');
}

proc firstinst {cmd} { 
  foreach {p c o} [$cmd xInst 0] {}
  expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst

do_execsql_test 4.1.1 {
  SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
  1 0 2 4 3 6   5  103
  6 9 7 0 9 102 10 8
}

do_execsql_test 4.1.2 {
  SELECT rowid, rank FROM t2 
  WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' 
  ORDER BY rowid ASC
} {
  1 0 2 4 3 6   5  103
  6 9 7 0 9 102 10 8
}

do_execsql_test 4.1.3 {
  SELECT rowid, rank FROM t2 
  WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()'
  ORDER BY rank DESC
} {
  5 103  9 102  6 9  10 8  3 6  2 4  1 0  7 0  
}

do_execsql_test 4.1.4 {
  INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()');
  SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC
} {
  1 0 2 4 3 6   5  103
  6 9 7 0 9 102 10 8
}

do_execsql_test 4.1.5 {
  SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
  5 103  9 102  6 9  10 8  3 6  2 4  1 0  7 0  
}

do_execsql_test 4.1.6 {
  INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst (    ) ');
  SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC
} {
  5 103  9 102  6 9  10 8  3 6  2 4   1 0  7 0  
}

proc rowidplus {cmd ival} { 
  expr [$cmd xRowid] + $ival
}
sqlite3_fts5_create_function db rowidplus rowidplus

do_execsql_test 4.2.1 {
  INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) ');
  SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
  10 110
}
do_execsql_test 4.2.2 {
  INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) ');
  SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g'
} {
  10 121
}

do_execsql_test 4.2.3 {
  SELECT rowid, rank FROM t2 
  WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)'
} {
  10 122
}

proc rowidmod {cmd imod} { 
  expr [$cmd xRowid] % $imod
}
sqlite3_fts5_create_function db rowidmod rowidmod
do_execsql_test 4.3.1 {
  CREATE VIRTUAL TABLE t3 USING fts5(x);
  INSERT INTO t3 VALUES('a one');
  INSERT INTO t3 VALUES('a two');
  INSERT INTO t3 VALUES('a three');
  INSERT INTO t3 VALUES('a four');
  INSERT INTO t3 VALUES('a five');
  INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()');
}
breakpoint

do_execsql_test 4.3.2 {
  SELECT * FROM t3
  WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' 
  ORDER BY rank ASC
} {
  {a four} {a one} {a five} {a two} {a three}
}
do_execsql_test 4.3.3 {
  SELECT *, rank FROM t3
  WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' 
  ORDER BY rank ASC
} {
  {a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2 
}

do_catchsql_test 4.4.3 {
  SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'xyz(3)' 
} {1 {no such function: xyz}}
do_catchsql_test 4.4.4 {
  SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL
} {1 {parse error in rank function: }}



finish_test

Added ext/fts5/test/fts5alter.test.












































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# 2015 Jun 10
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on renaming FTS5 tables using the
# "ALTER TABLE ... RENAME TO ..." command
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5alter

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Test renaming regular, contentless and columnsize=0 FTS5 tables.
#
do_execsql_test 1.1.0 {
  CREATE VIRTUAL TABLE "a x" USING fts5(a, x);
  INSERT INTO "a x" VALUES('a a a', 'x x x');
  ALTER TABLE "a x" RENAME TO "x y";
}
do_execsql_test 1.1.1 {
  SELECT * FROM "x y";
  SELECT rowid FROM "x y" WHERE "x y" MATCH 'a'
} {{a a a} {x x x} 1}

do_execsql_test 1.2.0 {
  CREATE VIRTUAL TABLE "one/two" USING fts5(one, columnsize=0);
  INSERT INTO "one/two"(rowid, one) VALUES(456, 'd d d');
  ALTER TABLE "one/two" RENAME TO "three/four";
}
do_execsql_test 1.2.1 {
  SELECT * FROM "three/four";
  SELECT rowid FROM "three/four" WHERE "three/four" MATCH 'd'
} {{d d d} 456}

do_execsql_test 1.3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(val, content='');
  INSERT INTO t1(rowid, val) VALUES(-1, 'drop table');
  INSERT INTO t1(rowid, val) VALUES(-2, 'drop view');
  ALTER TABLE t1 RENAME TO t2;
}
do_execsql_test 1.3.1 {
  SELECT rowid, * FROM t2;
  SELECT rowid FROM t2 WHERE t2 MATCH 'table'
} {-2 {} -1 {} -1}

#-------------------------------------------------------------------------
# Test renaming an FTS5 table within a transaction.
#
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE zz USING fts5(a);
  INSERT INTO zz(rowid, a) VALUES(-56, 'a b c');
  BEGIN;
    INSERT INTO zz(rowid, a) VALUES(-22, 'a b c');
    ALTER TABLE zz RENAME TO yy;
    SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
  COMMIT;
} {-56 -22}

do_execsql_test 2.2 {
  BEGIN;
    ALTER TABLE yy RENAME TO ww;
    INSERT INTO ww(rowid, a) VALUES(-11, 'a b c');
    SELECT rowid FROM ww WHERE ww MATCH 'a + b + c';
} {-56 -22 -11}

do_execsql_test 2.3 {
  ROLLBACK;
  SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}


finish_test

Added ext/fts5/test/fts5auto.test.






















































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# 2015 May 30
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file contains automatically generated tests for various types
# of MATCH expressions.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auto

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


set data {
    -4026076
    {n x w k b p x b n t t d s}     {f j j s p j o}               
    {w v i y r}                     {i p y s}                     
    {a o q v e n q r}               {q v g u c y a z y}           
    3995120
    {c}                             {e e w d t}                   
    {x c p f w r s m l r b f d}     {g g u e}                     
    {s n u t d v p d}               {b k v p m f}                 
    -2913881
    {k m}                           {a}                           
    {w r j z n s l}                 {m j i w d t w e l}           
    {z n c}                         {v f b m}                     
    174082
    {j}                             {q l w u k e q v r i}         
    {j l}                           {u v w r s p e l}             
    {p i k j k q c t g u s}         {g u y s m h q k g t e s o r} 
    3207399
    {e t}                           {}                            
    {p}                             {y v r b e k h d e v}         
    {t m w z b g q t s d d h}       {o n v u i t o y k j}         
    182399
    {}                              {m o s o x d y f a x j z}     
    {x n z r c d}                   {n r x i r}                   
    {s v s}                         {a u}                         
    768994
    {e u t q v z q k j p u f j p}   {y c b}                       
    {p s d}                         {k n w p m p p}               
    {u o x s d}                     {f s g r d b d r m m m z y}   
    3931037
    {c j p x e}                     {c n k t h z o i}             
    {}                              {r r p j k x w q}             
    {o r d z d}                     {x}                           
    3105748
    {p x r u}                       {x i s w o t o g x m z i w}   
    {q x m z}                       {h c j w b l y w x c o}       
    {m b k v}                       {t v q i s a d x}             
    -2501642
    {o u d n w o m o o s n t r h}   {k p e u y p e z d j r y g}   
    {v b b h d d q y j q j}         {a m w d t}                   
    {y e f n}                       {a k x i x}                   
    -1745680
    {z u w j f d b f}               {j w i c g u d w e}           
    {m f p v m a s p v c o s}       {s c r z o t w l b e a q}     
    {m k q}                         {k b a v o}                   
    -932328
    {r v i u m q d r}               {f z u v h c m r f g}         
    {r x r}                         {k p i d h h w h z u a x}     
    {k m j p}                       {h l j a e u c i q x x f x g} 
    -3923818
    {t t p b n u i h e c k}         {m z}                         
    {v u d c}                       {v y y j s g}                 
    {o a f k k q p h g x e n z x}   {h d w c o l}                 
    -2145922
    {z z l f a l g e d c d h}       {j b j p k o o u b q}         
    {d i g q t f d r h k}           {n w g j c x r p t y f l c t} 
    {d o c u k f o}                 {r y s x z s p p h g t p y c} 
    4552917
    {j w j y h l k u}               {n a}                         
    {y h w c n k}                   {b}                           
    {w}                             {z l r t s i m v c y}         
    2292008
    {q v q j w y y x u t}           {r q z n h a b o}             
    {d q y}                         {y v o e j}                   
    {}                              {a b h c d l p d x}           
    1407892
    {n j j u q d o a u c f}         {r d b w o q n g}             
    {d e v w s}                     {v d v o u o x s l s j z y}   
    {j y w h i f g i h m}           {v n z b n y}                 
    -4412544
    {g h h r s}                     {h e r e}                     
    {n q s}                         {o p z r m l l t}             
    {p}                             {f s u o b j}                 
    1209110
    {o a a z t t u h j}             {z z i r k r}                 
    {i c x q w g v o x z i z p}     {q o g k i n z x e d v w v}   
    {p f v b g f e d n p u c y k}   {q z z a i p a a s r e z}     
    3448977
    {i v}                           {l u x t b o k}               
    {f h u v p}                     {k a o y j}                   
    {d m k c j}                     {v c e r u e f i t}           
    -4703774
    {d h v w u z r e h x o l t}     {p s f y w y r q d a m w}     
    {c h g c g j j f t b i c q}     {s e}                         
    {c t q j g f}                   {v n r w y r a g e j d}       
    2414151
    {s o o s d s k q b f q v p e}   {j r o b t o p d l o o x}     
    {d d k t v e}                   {}                            
    {t v o d w}                     {w e q w h y c y y i j b a m} 
    -3342407
    {m c h n e p d o c r w n t}     {j d k s p q l}               
    {t g s r w x j l r z r}         {h}                           
    {r q v x i r a n h s}           {m y p b v w r a u o g q r}   
    -993951
    {l n p u o j d x t u u c o j}   {k r n a r e k v i t o e}     
    {q f t t a a c z v f}           {o n m p v f o e n}           
    {h z h i p s b j z h}           {i t w m k c u g n i}         
    1575251
    {}                              {z s i j d o x j a r t}       
    {h g j u j n v e n z}           {p z j n n f}                 
    {s q q f d w r l y i z d o m}   {b a n d h t b y g h d}       
    4263668
    {q g t h f s}                   {s g x p f q z i s o f l i}   
    {q k}                           {w v h a x n a r b}           
    {m j a h o b i x k r w z q u}   {m t r g j o e q t m p u l}   
    2487819
    {m w g x r n e u t s r}         {b x a t u u j c r n}         
    {j}                             {w f j r e e y l p}           
    {o u h b}                       {o c a c a b v}               
    167966
    {o d b s d o a u m o x y}       {c}                           
    {r w d o b v}                   {z e b}                       
    {i n z a f g z o}               {m u b a g}                   
    1948599
    {n r g q d j s}                 {n k}                         
    {l b p d v t k h y y}           {u m k e c}                   
    {t b n y o t b}                 {j w c i r x x}               
    2941631
    {l d p l b g f}                 {e k e}                       
    {p j}                           {m c s w t b k n l d x}       
    {f o v y v l}                   {c w p s w j w c u t y}       
    3561104
    {d r j j r j i g p}             {u}                           
    {g r j q}                       {z l p d s n f c h t d c v z} 
    {w r c f s x z y}               {g f o k g g}                 
    -2223281
    {y e t j j z f p o m m z}       {h k o g o}                   
    {m x a t}                       {l q x l}                     
    {r w k d l s y b}               {q g k b}                     
    -4502874
    {k k b x k l f}                 {r}                           
    {}                              {q m z b k h k u n e z}       
    {z q g y m y u}                 {}                            
    1757599
    {d p z j y u r}                 {z p l q w j t j}             
    {n i r x r y j}                 {}                            
    {h}                             {w t d q c x z z x e e}       
    -4809589
    {}                              {z p x u h i i n g}           
    {w q s u d b f x n}             {l y k b b r x t i}           
    {n d v j q o t o d p z e}       {u r y u v u c}               
    1068408
    {y e}                           {e g s k e w t p v o b k}     
    {z c m s}                       {r u r u h n h b p q g b}     
    {j k b l}                       {m c d t s r s q a d b o f}   
    -1972554
    {m s w}                         {d k v s a r k p a r i v}     
    {g j z k p}                     {y k c v r e u o q f i b a}   
    {i p i}                         {c z w c y b n z i v}         
    -2052385
    {}                              {x e u f f g n c i x n e i e} 
    {}                              {p s w d x p g}               
    {}                              {s j a h n}                   
    2805981
    {m x g c w o e}                 {k g u y r y i u e g g}       
    {f k j v t x p h x k u}         {w i}                         
    {b l f z f v t n}               {i u d o d p h s m u}         
    2507621
    {}                              {u b n l x f n j t}           
    {u r x l h}                     {h r l m r}                   
    {d y e n b s q v t k n q q}     {x l t v w h a s k}           
    -3138375
    {e o f j y x u w v e w z}       {r d q g k n n v r c z n e w} 
    {l y i q z k j p u f q s k}     {c i l l i m a a g a z r x f} 
    {a v k h m q z b y n z}         {q g w c y r r o a}           
    -457971
    {j x a w e c s h f l f}         {q}                           
    {j f v j u m d q r v v}         {x n v a w}                   
    {i e h d h f u w t t z}         {v s u l s v o v i k n e}     
    2265221
    {z t c y w n y r t}             {n b a x s}                   
    {q w a v}                       {a b s d x i g w t e z h}     
    {t l}                           {j k r w f f y j o k u}       
    -3941280
    {r x t o z}                     {f j n z k}                   
    {t x e b t d b k w i s}         {j t y h i h}                 
    {y q g n g s u v c z j z n g}   {n n g t l p h}               
    2084745
    {z d z d}                       {j}                           
    {o e k t b k a z l w}           {o p i h k c x}               
    {c r b t i j f}                 {z e n m}                     
    1265843
    {}                              {j s g j j x u y}             
    {u q t f}                       {g o g}                       
    {w o j e d}                     {w q n a c t q x j}           
    -2941116
    {i n c u o}                     {f b}                         
    {o m s q d o z a q}             {f s v o b b}                 
    {o a z c h r}                   {j e w h b f z}               
    -1265441
    {p g z q v a o a x a}           {s t h}                       
    {w i p o c}                     {s n d g f z w q o d v v l j} 
    {y f b i a s v}                 {u m o z k k s t s d p b l p} 
    -1989158
    {r i c n}                       {r e w w i n z}               
    {q u s y b w u g y g f o}       {y}                           
    {d}                             {j x i b x u y d c p v a h}   
    2391989
    {b n w x w f q h p i}           {e u b b i n a i o c d g}     
    {v a z o i e n l x l r}         {r u f o r k w m d w}         
    {k s}                           {r f e j q p w}               
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
} {}

fts5_aux_test_functions db

proc matchdata {expr tbl collist {order ASC}} {

  set cols ""
  foreach e $collist {
    append cols ", '$e'"
  }

  set tclexpr [db one [subst -novar {
    SELECT fts5_expr_tcl(
      $expr, 'nearset $cols -pc ::pc' [set cols]
    )
  }]]
  set res [list]

  db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
    set cols [list]
    foreach col $x(*) {
      if {$col != "rowid"} { lappend cols $x($col) }
    }
    # set cols [list $a $b $c $d $e $f]
    set ::pc 0
    set rowdata [eval $tclexpr]
    if {$rowdata != ""} { lappend res $x(rowid) $rowdata }
  }

  set res
}

proc do_auto_test {tn tbl cols expr} { 
  foreach order {asc desc} {
    set res [matchdata $expr $tbl $cols $order]
    set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"

    set ::autotest_expr $expr
    do_execsql_test $testname [subst -novar {
      SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] 
      WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order]
    }] $res
  }


}

#-------------------------------------------------------------------------
#

for {set fold 0} {$fold < 3} {incr fold} {
  switch $fold {
    0 { set map {} }
    1 { set map {
      a a  b a  c b  d b  e c  f c  g d  h d  
      i e  j e  k f  l f  m g  g g  o h  p h
      q i  r i  s j  t j  u k  v k  w l  x l
      y m  z m
    }}

    2 { set map {
      a a  b a  c a  d a  e a  f a  g a  h a  
      i b  j b  k b  l b  m b  g b  o b  p b
      q c  r c  s c  t c  u c  v c  w c  x c
    }}
  }

  execsql {
    BEGIN;
    DELETE FROM tt;
  }
  foreach {rowid a b c d e f} [string map $map $data] {
  if {$rowid==-4703774} {
    execsql {
      INSERT INTO tt(rowid, a, b, c, d, e, f) 
      VALUES($rowid, $a, $b, $c, $d, $e, $f)
    }
    }
  }
  execsql COMMIT


  foreach {tn expr} {
    A.1 { {a} : x }
    A.2 { {a b} : x }
    A.3 { {a b f} : x }
    A.4 { {f a b} : x }
    A.5 { {f a b} : x y }
    A.6 { {f a b} : x + y }
    A.7 { {c a b} : x + c }
    A.8 { {c d} : "l m" }
    A.9 { {c e} : "l m" }
    A.10 { {a b c a b c a b c f f e} : "l m" }

    B.1 { a NOT b }
    B.2 { a NOT a:b }
    B.3 { a OR (b AND c) }
    B.4 { a OR (b AND {a b c}:c) }
    B.5 { a OR "b c" }
    B.6 { a OR b OR c }

    C.1 { a OR (b AND "b c") }
    C.2 { a OR (b AND "z c") }
  } {
    do_auto_test 3.$fold.$tn tt {a b c d e f} $expr
  }
}

proc replace_elems {list args} {
  set ret $list
  foreach {idx elem} $args {
    set ret [lreplace $ret $idx $idx $elem]
  }
  set ret
}

#-------------------------------------------------------------------------
#
set bigdoc [string trim [string repeat "a " 1000]]
do_test 4.0 {
  set a [replace_elems $bigdoc  50 x  950 x]
  set b [replace_elems $bigdoc  20 y   21 x  887 x 888 y]
  set c [replace_elems $bigdoc   1 z  444 z  789 z]
  execsql {
    CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3);
    INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c);
    INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c);
  }
} {}

foreach {tn expr} {
  1 x    
  2 y    
  3 z

  4 {c1 : x} 5 {c2 : x} 6 {c3 : x}
  7 {c1 : y} 8 {c2 : y} 9 {c3 : y}
  10 {c1 : z} 11 {c2 : z} 12 {c3 : z}


} {
breakpoint
  do_auto_test 4.$tn yy {c1 c2 c3} $expr
}



finish_test

Added ext/fts5/test/fts5aux.test.




















































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the auxiliary function APIs.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5aux

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc inst {cmd i} { 
  $cmd xInst $i
}
sqlite3_fts5_create_function db inst inst

proc colsize {cmd i} { 
  $cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize

proc totalsize {cmd i} { 
  $cmd xColumnTotalSize $i
}
sqlite3_fts5_create_function db totalsize totalsize

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE f1 USING fts5(a, b);
  INSERT INTO f1 VALUES('one two', 'two one zero');
  INSERT INTO f1 VALUES('one one', 'one one one');
}

do_catchsql_test 1.1 {
  SELECT inst(f1, -1) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_catchsql_test 1.2 {
  SELECT inst(f1, 0) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 0 1}}}
do_catchsql_test 1.3 {
  SELECT inst(f1, 1) FROM f1 WHERE f1 MATCH 'two';
} {0 {{0 1 0}}}
do_catchsql_test 1.4 {
  SELECT inst(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}

do_catchsql_test 2.1 {
  SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two';
} {1 SQLITE_RANGE}
do_execsql_test 2.2 {
  SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {2 3}
do_execsql_test 2.3 {
  SELECT colsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {5}

do_execsql_test 2.4.1 {
  SELECT totalsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero';
} {10}
do_execsql_test 2.4.2 {
  SELECT totalsize(f1, 0) FROM f1 WHERE f1 MATCH 'zero';
} {4}
do_execsql_test 2.4.3 {
  SELECT totalsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero';
} {6}
do_catchsql_test 2.4.4 {
  SELECT totalsize(f1, 2) FROM f1 WHERE f1 MATCH 'zero';
} {1 SQLITE_RANGE}

#-------------------------------------------------------------------------
# Test the xSet and xGetAuxdata APIs with a NULL destructor.
#
proc prevrowid {add cmd} {
  set res [$cmd xGetAuxdataInt 0]
  set r [$cmd xRowid]
  $cmd xSetAuxdataInt $r
  return [expr $res + $add]
}
sqlite3_fts5_create_function db prevrowid  [list prevrowid 0]
sqlite3_fts5_create_function db prevrowid1 [list prevrowid 1]

do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE e5 USING fts5(x);
  INSERT INTO e5 VALUES('a b c');
  INSERT INTO e5 VALUES('d e f');
  INSERT INTO e5 VALUES('a b c');
  INSERT INTO e5 VALUES('d e f');
  INSERT INTO e5 VALUES('a b c');
}

do_execsql_test 3.1 {
  SELECT prevrowid(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'c'
} {0+1   1+3   3+5}

do_execsql_test 3.2 {
  SELECT prevrowid(e5) || '+' || prevrowid1(e5) || '+' || rowid 
  FROM e5 WHERE e5 MATCH 'e'
} {0+1+2    2+3+4}

#-------------------------------------------------------------------------
# Test that if the xQueryPhrase callback returns other than SQLITE_OK,
# the query is abandoned. And that if it returns an error code other than 
# SQLITE_DONE, the error is propagated back to the caller.
#
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE e7 USING fts5(x);
  INSERT INTO e7 VALUES('a x a');
  INSERT INTO e7 VALUES('b x b');
  INSERT INTO e7 VALUES('c x c');
  INSERT INTO e7 VALUES('d x d');
  INSERT INTO e7 VALUES('e x e');
}

proc xCallback {rowid code cmd} {
  set r [$cmd xRowid]
  lappend ::cb $r
  if {$r==$rowid} { return $code }
  return ""
}

proc phrasequery {cmd code} {
  set ::cb [list]
  $cmd xQueryPhrase 1 [list xCallback [$cmd xRowid] $code]
  set ::cb
}

sqlite3_fts5_create_function db phrasequery phrasequery

do_execsql_test 4.1 {
  SELECT phrasequery(e7, 'SQLITE_OK') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3 4 5}}

do_execsql_test 4.2 {
  SELECT phrasequery(e7, 'SQLITE_DONE') FROM e7 WHERE e7 MATCH 'c x'
} {{1 2 3}}

do_catchsql_test 4.3 {
  SELECT phrasequery(e7, 'SQLITE_ERROR') FROM e7 WHERE e7 MATCH 'c x'
} {1 SQLITE_ERROR}

#-------------------------------------------------------------------------
# Auxiliary function calls with many cursors in the global cursor list.
#
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE e9 USING fts5(y);
  INSERT INTO e9(rowid, y) VALUES(1, 'i iii');
  INSERT INTO e9(rowid, y) VALUES(2, 'ii iv');
  INSERT INTO e9(rowid, y) VALUES(3, 'ii');
  INSERT INTO e9(rowid, y) VALUES(4, 'i iv');
  INSERT INTO e9(rowid, y) VALUES(5, 'iii');
}

proc my_rowid {cmd} { $cmd xRowid }
sqlite3_fts5_create_function db my_rowid my_rowid

foreach {var q} {
  s1 i
  s2 ii
  s3 iii
  s4 iv
} {
  set sql "SELECT my_rowid(e9) FROM e9 WHERE e9 MATCH '$q'"
  set $var [sqlite3_prepare db $sql -1 dummy]
}

do_test 5.1.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 1
do_test 5.1.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 2
do_test 5.1.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 1
do_test 5.1.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 2

do_test 5.2.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 4
do_test 5.2.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 3
do_test 5.2.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 5
do_test 5.2.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 4

sqlite3_finalize $s1
sqlite3_finalize $s2
sqlite3_finalize $s3
sqlite3_finalize $s4

#-------------------------------------------------------------------------
# Passing an invalid first argument to an auxiliary function is detected.
#
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE e11 USING fts5(y, z);
  INSERT INTO e11(rowid, y, z) VALUES(1, 'a b', 45);
  INSERT INTO e11(rowid, y, z) VALUES(2, 'b c', 46);
}

do_catchsql_test 6.1 {
  SELECT my_rowid(z) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 45}}

do_catchsql_test 6.2 {
  SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b'
} {1 {no such cursor: 0}}

#-------------------------------------------------------------------------
# Test passing an out-of-range phrase number to xPhraseSize (should 
# return 0).
#
proc my_phrasesize {cmd iPhrase} { $cmd xPhraseSize $iPhrase }
sqlite3_fts5_create_function db my_phrasesize my_phrasesize

do_execsql_test 7.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a);
  INSERT INTO t1 VALUES('a b c');
}
do_execsql_test 7.2 {
  SELECT 
    my_phrasesize(t1, -1),
    my_phrasesize(t1, 0),
    my_phrasesize(t1, 1),
    my_phrasesize(t1, 2)
  FROM t1 WHERE t1 MATCH 'a OR b+c'
} {0 1 2 0}

#-------------------------------------------------------------------------
#
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(a);
}

foreach {tn lRow res} {
  4  {"a a a" "b" "a d"} {"[a] [a] [a]" "[a] d"}
  1  {"b d" "a b"}       {"[b] [d]" "[a] b"}
  2  {"d b" "a d"}       {"[d] [b]" "[a] d"}
  3  {"a a d"}           {"[a] [a] d"}
} {
  execsql { DELETE FROM x1 }
  foreach row $lRow { execsql { INSERT INTO x1 VALUES($row) } }
  breakpoint
  do_execsql_test 8.$tn {
    SELECT highlight(x1, 0, '[', ']') FROM x1 WHERE x1 MATCH 'a OR (b AND d)';
  } $res
}

finish_test

Added ext/fts5/test/fts5auxdata.test.






































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5auxdata

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE f1 USING fts5(a, b);
  INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1');
  INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2');
  INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3');
  INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4');
  INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5');
}

proc aux_function_1 {cmd tn} {
  switch [$cmd xRowid] {
    1 {
      do_test $tn.1 [list $cmd xGetAuxdata 0 ] {}
      $cmd xSetAuxdata "one"
    }

    2 {
      do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one}
      $cmd xSetAuxdata "two"
    }

    3 {
      do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two}
    }

    4 {
      do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two}
    }

    5 {
      do_test $tn.5 [list $cmd xGetAuxdata 0 ] {}
    }
  }
}

sqlite3_fts5_create_function db aux_function_1 aux_function_1
db eval { 
  SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a'
  ORDER BY rowid ASC
}

proc aux_function_2 {cmd tn inst} {
  if {$inst == "A"} {
    switch [$cmd xRowid] {
      1 {
        do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {}
        $cmd xSetAuxdata "one $inst"
      }
      2 {
        do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst"
        $cmd xSetAuxdata "two $inst"
      }
      3 {
        do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst"
      }
      4 {
        do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst"
      }
      5 {
        do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
      }
    }
  } else {
    switch [$cmd xRowid] {
      1 {
        do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A"
      }
      2 {
        do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A"
      }
      3 {
        do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A"
      }
      4 {
        do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {}
      }
      5 {
        do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {}
      }
    }
  }
}

sqlite3_fts5_create_function db aux_function_2 aux_function_2
db eval { 
  SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B') 
  FROM f1 WHERE f1 MATCH 'a'
  ORDER BY rowid ASC
}

finish_test

Added ext/fts5/test/fts5bigpl.test.
































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# 2015 April 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on really large position lists. Those that require
# 4 or 5 byte position-list size varints. Because of the amount of memory
# required, these tests only run on 64-bit platforms.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5bigpl

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

if { $tcl_platform(wordSize)<8 } {
  finish_test
  return
}

do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) }

do_test 1.1 {
  foreach t {a b c d e f g h i j} {
    set doc [string repeat "$t " 1200000]
    execsql { INSERT INTO t1 VALUES($doc) }
  }
  execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}

do_test 1.2 {
  execsql { DELETE FROM t1 }
  foreach t {"a b" "b a" "c d" "d c"} {
    set doc [string repeat "$t " 600000]
    execsql { INSERT INTO t1 VALUES($doc) }
  }
  execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}


# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation.
# The generated database is roughly 635MiB.
#
do_test 2.1...slow {
  execsql { DELETE FROM t1 }
  foreach t {a} {
    set doc [string repeat "$t " 150000000]
    execsql { INSERT INTO t1 VALUES($doc) }
  }
  execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {}

finish_test

Added ext/fts5/test/fts5columnsize.test.




















































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# 2015 Jun 10
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on fts5 tables with the columnsize=0 option.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5columnsize

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Check that the option can be parsed and that the %_docsize table is
# only created if it is set to true.
#
foreach {tn outcome stmt} {
  1 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0) }
  2 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=1) }
  3 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='0') }
  4 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='1') }
  5 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='') }
  6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) }
  7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) }
  8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
  9 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=11) }
} {
  execsql { 
    DROP TABLE IF EXISTS t1;
  }
  if {$outcome==2} {
    do_catchsql_test 1.$tn.1 $stmt {1 {malformed columnsize=... directive}}
  } else {
    do_execsql_test 1.$tn.2 $stmt
    do_execsql_test 1.$tn.3 {
      SELECT count(*) FROM sqlite_master WHERE name = 't1_docsize'
    } $outcome
  }
}

#-------------------------------------------------------------------------
# Run tests on a table with no %_content or %_docsize backing store.
#
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(x, columnsize=0, content='');
}
do_catchsql_test 2.1 {
  INSERT INTO t2 VALUES('a b c d e f');
} {1 {datatype mismatch}}
do_execsql_test 2.2 {
  INSERT INTO t2(rowid, x) VALUES(1, 'c d e f');
  INSERT INTO t2(rowid, x) VALUES(2, 'c d e f g h');
  INSERT INTO t2(rowid, x) VALUES(3, 'a b c d e f g h');
} {}
do_execsql_test 2.3 {
  SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'h'; 
} {3 :: 1 2 3 :: 2 3}
do_execsql_test 2.4 {
  INSERT INTO t2(t2, rowid, x) VALUES('delete', 2, 'c d e f g h');
  SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'h'; 
} {3 :: 1 3 :: 3}
do_execsql_test 2.5 {
  INSERT INTO t2(t2) VALUES('delete-all');
  SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::';
  SELECT rowid FROM t2 WHERE t2 MATCH 'h'; 
} {:: ::}
do_execsql_test 2.6 {
  INSERT INTO t2(rowid, x) VALUES(1, 'o t t f');
  INSERT INTO t2(rowid, x) VALUES(2, 'f s s e');
  INSERT INTO t2(rowid, x) VALUES(3, 'n t e t');
}

do_catchsql_test 2.7.1 {
  SELECT rowid FROM t2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.2 {
  SELECT rowid FROM t2 WHERE rowid=2
} {1 {t2: table does not support scanning}}
do_catchsql_test 2.7.3 {
  SELECT rowid FROM t2 WHERE rowid BETWEEN 1 AND 3
} {1 {t2: table does not support scanning}}

do_execsql_test 2.X {
  DROP TABLE t2
}

#-------------------------------------------------------------------------
# Test the xColumnSize() API
#
fts5_aux_test_functions db

do_execsql_test 3.1.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0);
  INSERT INTO t3 VALUES('a a', 'b b b', 'c');
  INSERT INTO t3 VALUES('x a x', 'b b b y', '');
}
do_execsql_test 3.1.1 {
  SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
  1 {2 0 1} 2 {3 0 0}
}
do_execsql_test 3.1.2 {
  INSERT INTO t3 VALUES(NULL, NULL, 'a a a a');
  DELETE FROM t3 WHERE rowid = 1;
  SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a'
} {
  2 {3 0 0} 3 {0 0 4}
}

do_execsql_test 3.2.0 {
  CREATE VIRTUAL TABLE t4 USING fts5(x, y UNINDEXED, z, columnsize=0, content='');
  INSERT INTO t4(rowid, x, y, z) VALUES(1, 'a a', 'b b b', 'c');
  INSERT INTO t4(rowid, x, y, z) VALUES(2, 'x a x', 'b b b y', '');
}
do_execsql_test 3.2.1 {
  SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a'
} {
  1 {-1 0 -1} 2 {-1 0 -1}
}


finish_test
Added ext/fts5/test/fts5config.test.
































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# 2015 Jan 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on the code in fts5_config.c, which is largely concerned
# with parsing the various configuration and CREATE TABLE options.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5config

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Try different types of quote characters.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5('a', "b", [c], `d`);
  PRAGMA table_info = t1;
} {
  0 a {} 0 {} 0 
  1 b {} 0 {} 0 
  2 c {} 0 {} 0 
  3 d {} 0 {} 0
}

#-------------------------------------------------------------------------
# Syntax errors in the prefix= option.
#
foreach {tn opt} {
  1 {prefix=x}  
  2 {prefix='x'}
  3 {prefix='$'}
} {
  set res [list 1 {malformed prefix=... directive}]
  do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}

#-------------------------------------------------------------------------
# Syntax errors in the 'rank' option.
#
foreach {tn val} {
  1 "f1(xyz)"
  2 "f1(zyx)"
  3 "f1(nzz)"
  4 "f1(x'!!')"
  5 "f1(x':;')"
  6 "f1(x'[]')"
  7 "f1(x'{}')"
  8 "f1('abc)"
} {
  do_catchsql_test 3.$tn {
    INSERT INTO t1(t1, rank) VALUES('rank', $val);
  } {1 {SQL logic error or missing database}}
}

#-------------------------------------------------------------------------
# The parsing of SQL literals specified as part of 'rank' options.
#
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE zzz USING fts5(one);
  INSERT INTO zzz VALUES('a b c');
}
proc first {cmd A} { return $A }
sqlite3_fts5_create_function db first first

foreach {tn arg} {
  1 "123"
  2 "'01234567890ABCDEF'"
  3 "x'0123'"
  4 "x'ABCD'"
  5 "x'0123456789ABCDEF'"
  6 "x'0123456789abcdef'"
  7 "22.5"
  8 "-91.5"
  9 "-.5"
  10 "''''"
  11 "+.5"
} {
  set func [string map {' ''} "first($arg)"]
  do_execsql_test 4.1.$tn "
    INSERT INTO zzz(zzz, rank) VALUES('rank', '$func');
    SELECT rank IS $arg FROM zzz WHERE zzz MATCH 'a + b + c'
  " 1
}

do_execsql_test 4.2 {
  INSERT INTO zzz(zzz, rank) VALUES('rank', 'f1()');
} {}

#-------------------------------------------------------------------------
# Misquoting in tokenize= and other options. 
#
do_catchsql_test 5.1 {
  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize="porter 'ascii");
} {1 {parse error in tokenize directive}} 

breakpoint
do_catchsql_test 5.2 {
  CREATE VIRTUAL TABLE xx USING fts5(x, [y[]);
} {0 {}}

do_catchsql_test 5.3 {
  CREATE VIRTUAL TABLE yy USING fts5(x, [y]]);
} {1 {unrecognized token: "]"}}

#-------------------------------------------------------------------------
# Errors in prefix= directives.
#
do_catchsql_test 6.1 {
  CREATE VIRTUAL TABLE abc USING fts5(a, prefix=1, prefix=2);
} {1 {multiple prefix=... directives}}
do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1, 2, 1001');
} {1 {prefix length out of range: 1001}}
do_catchsql_test 6.3 {
  CREATE VIRTUAL TAbLE abc USING fts5(a, prefix='1, 2, 0000');
} {1 {prefix length out of range: 0}}
do_catchsql_test 6.4 {
  CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1  , 1000000');
} {1 {malformed prefix=... directive}}

#-------------------------------------------------------------------------
# Duplicate tokenize= and other options.
#
do_catchsql_test 7.1 {
  CREATE VIRTUAL TABLE abc USING fts5(a, tokenize=porter, tokenize=ascii);
} {1 {multiple tokenize=... directives}}
do_catchsql_test 7.2 {
  CREATE VIRTUAL TABLE abc USING fts5(a, content=porter, content=ascii);
} {1 {multiple content=... directives}}
do_catchsql_test 7.3 {
  CREATE VIRTUAL TABLE abc USING fts5(a, content_rowid=porter, content_rowid=a);
} {1 {multiple content_rowid=... directives}}

#-------------------------------------------------------------------------
# Unrecognized option.
#
do_catchsql_test 8.0 {
  CREATE VIRTUAL TABLE abc USING fts5(a, nosuchoption=123);
} {1 {unrecognized option: "nosuchoption"}}
do_catchsql_test 8.1 {
  CREATE VIRTUAL TABLE abc USING fts5(a, "nosuchoption"=123);
} {1 {parse error in ""nosuchoption"=123"}}

#-------------------------------------------------------------------------
# Errors in:
#
#   9.1.* 'pgsz' options.
#   9.2.* 'automerge' options.
#   9.3.* 'crisismerge' options.
#
do_execsql_test 9.0 {
  CREATE VIRTUAL TABLE abc USING fts5(a, b);
} {}
do_catchsql_test 9.1.1 {
  INSERT INTO abc(abc, rank) VALUES('pgsz', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.2 {
  INSERT INTO abc(abc, rank) VALUES('pgsz', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.1.3 {
  INSERT INTO abc(abc, rank) VALUES('pgsz', 66.67);
} {1 {SQL logic error or missing database}}

do_catchsql_test 9.2.1 {
  INSERT INTO abc(abc, rank) VALUES('automerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.2 {
  INSERT INTO abc(abc, rank) VALUES('automerge', 50000000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.2.3 {
  INSERT INTO abc(abc, rank) VALUES('automerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.2.4 {
  INSERT INTO abc(abc, rank) VALUES('automerge', 1);
} {}

do_catchsql_test 9.3.1 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', -5);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.3.2 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', 66.67);
} {1 {SQL logic error or missing database}}
do_execsql_test 9.3.3 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', 1);
} {}
do_execsql_test 9.3.4 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000);
} {}

do_catchsql_test 9.4.1 {
  INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1);
} {1 {SQL logic error or missing database}}

finish_test

Added ext/fts5/test/fts5content.test.




































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file contains tests for the content= and content_rowid= options.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5content

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Contentless tables
#
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE f1 USING fts5(a, b, content='');
  INSERT INTO f1(rowid, a, b) VALUES(1, 'one',   'o n e');
  INSERT INTO f1(rowid, a, b) VALUES(2, 'two',   't w o');
  INSERT INTO f1(rowid, a, b) VALUES(3, 'three', 't h r e e');
}

do_execsql_test 1.2 {
  SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2}

do_execsql_test 1.3 {
  INSERT INTO f1(a, b) VALUES('four',   'f o u r');
  SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 2 4}

do_execsql_test 1.4 {
  SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o';
} {1 {} {} 2 {} {} 4 {} {}}

do_execsql_test 1.5 {
  SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o';
} {1 {} 2 {} 4 {}}

do_execsql_test 1.6 {
  SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}

do_execsql_test 1.7 {
  SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL 
  FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}

do_execsql_test 1.8 {
  SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL 
  FROM f1 WHERE f1 MATCH 'o';
} {1 1 2 1 4 1}

do_execsql_test 1.9 {
  SELECT rowid FROM f1;
} {1 2 3 4}

do_execsql_test 1.10 {
  SELECT * FROM f1;
} {{} {}  {} {}  {} {}  {} {}}

do_execsql_test 1.11 {
  SELECT rowid, a, b FROM f1 ORDER BY rowid ASC;
} {1 {} {}  2 {} {}  3 {} {}  4 {} {}}

do_execsql_test 1.12 {
  SELECT a IS NULL FROM f1;
} {1 1 1 1}

do_catchsql_test 1.13 {
  DELETE FROM f1 WHERE rowid = 2;
} {1 {cannot DELETE from contentless fts5 table: f1}}

do_catchsql_test 1.14 {
  UPDATE f1 SET a = 'a b c' WHERE rowid = 2;
} {1 {cannot UPDATE contentless fts5 table: f1}}

do_execsql_test 1.15 {
  INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o');
} {}

do_execsql_test 1.16 {
  SELECT rowid FROM f1 WHERE f1 MATCH 'o';
} {1 4}

do_execsql_test 1.17 {
  SELECT rowid FROM f1;
} {1 3 4}

#-------------------------------------------------------------------------
# External content tables
#
reset_db
do_execsql_test 2.1 {
  -- Create a table. And an external content fts5 table to index it.
  CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c);
  CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a');

  -- Triggers to keep the FTS index up to date.
  CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN
    INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
  END;
  CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN
    INSERT INTO fts_idx(fts_idx, rowid, b, c) 
        VALUES('delete', old.a, old.b, old.c);
  END;
  CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN
    INSERT INTO fts_idx(fts_idx, rowid, b, c) 
        VALUES('delete', old.a, old.b, old.c);
    INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c);
  END;
}

do_execsql_test 2.2 {
  INSERT INTO tbl VALUES(1, 'one', 'o n e');
  INSERT INTO tbl VALUES(NULL, 'two', 't w o');
  INSERT INTO tbl VALUES(3, 'three', 't h r e e');
}

do_execsql_test 2.3 {
  INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}

do_execsql_test 2.4 {
  DELETE FROM tbl WHERE rowid=2;
  INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}

do_execsql_test 2.5 {
  UPDATE tbl SET c = c || ' x y z';
  INSERT INTO fts_idx(fts_idx) VALUES('integrity-check');
}

do_execsql_test 2.6 {
  SELECT * FROM fts_idx WHERE fts_idx MATCH 't AND x';
} {three {t h r e e x y z}}

do_execsql_test 2.7 {
  SELECT highlight(fts_idx, 1, '[', ']') FROM fts_idx 
  WHERE fts_idx MATCH 't AND x';
} {{[t] h r e e [x] y z}}

#-------------------------------------------------------------------------
# Quick tests of the 'delete-all' command.
#
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t3 USING fts5(x, content='');
  INSERT INTO t3 VALUES('a b c');
  INSERT INTO t3 VALUES('d e f');
}

do_execsql_test 3.2 {
  SELECT count(*) FROM t3_docsize;
  SELECT count(*) FROM t3_data;
} {2 4}

do_execsql_test 3.3 {
  INSERT INTO t3(t3) VALUES('delete-all');
  SELECT count(*) FROM t3_docsize;
  SELECT count(*) FROM t3_data;
} {0 2}

do_execsql_test 3.4 {
  INSERT INTO t3 VALUES('a b c');
  INSERT INTO t3 VALUES('d e f');
  SELECT rowid FROM t3 WHERE t3 MATCH 'e';
} {2}

do_execsql_test 3.5 {
  SELECT rowid FROM t3 WHERE t3 MATCH 'c';
} {1}

do_execsql_test 3.6 {
  SELECT count(*) FROM t3_docsize;
  SELECT count(*) FROM t3_data;
} {2 4}

do_execsql_test 3.7 {
  CREATE VIRTUAL TABLE t4 USING fts5(x);
} {}
do_catchsql_test 3.8 {
  INSERT INTO t4(t4) VALUES('delete-all');
} {1 {'delete-all' may only be used with a contentless or external content fts5 table}}

#-------------------------------------------------------------------------
# Test an external content table with a more interesting schema.
#
do_execsql_test 4.1 {
  CREATE TABLE x2(a, "key col" PRIMARY KEY, b, c) WITHOUT ROWID;
  INSERT INTO x2 VALUES('a b',   1, 'c d' , 'e f');
  INSERT INTO x2 VALUES('x y', -40, 'z z' , 'y x');

  CREATE VIRTUAL TABLE t2 USING fts5(a, c, content=x2, content_rowid='key col');
  INSERT INTO t2(t2) VALUES('rebuild');
}

do_execsql_test 4.2 { SELECT rowid FROM t2 } {-40 1}
do_execsql_test 4.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'c'} {}
do_execsql_test 4.4 { SELECT rowid FROM t2 WHERE t2 MATCH 'a'} {1}
do_execsql_test 4.5 { SELECT rowid FROM t2 WHERE t2 MATCH 'x'} {-40}

do_execsql_test 4.6 { INSERT INTO t2(t2) VALUES('integrity-check') } {}

do_execsql_test 4.7 { 
  DELETE FROM x2 WHERE "key col" = 1;
  INSERT INTO t2(t2, rowid, a, c) VALUES('delete', 1, 'a b', 'e f');
  INSERT INTO t2(t2) VALUES('integrity-check');
}

do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {}
do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40}

#-------------------------------------------------------------------------
# Test that if the 'rowid' field of a 'delete' is not an integer, no
# changes are made to the FTS index.
#
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE t5 USING fts5(a, b, content=);
  INSERT INTO t5(rowid, a, b) VALUES(-1, 'one',   'two');
  INSERT INTO t5(rowid, a, b) VALUES( 0, 'three', 'four');
  INSERT INTO t5(rowid, a, b) VALUES( 1, 'five',  'six');
}

set ::checksum [execsql {SELECT md5sum(id, block) FROM t5_data}]

do_execsql_test 5.1 {
  INSERT INTO t5(t5, rowid, a, b) VALUES('delete', NULL, 'three', 'four');
  SELECT md5sum(id, block) FROM t5_data;
} $::checksum


#-------------------------------------------------------------------------
# Check that a contentless table can be dropped.
#
reset_db
do_execsql_test 6.1 {
  CREATE VIRTUAL TABLE xx USING fts5(x, y, content="");
  SELECT name FROM sqlite_master;
} {xx xx_data xx_docsize xx_config}
do_execsql_test 6.2 {
  DROP TABLE xx;
  SELECT name FROM sqlite_master;
} {}


finish_test

Added ext/fts5/test/fts5corrupt.test.






































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that the FTS5 'integrity-check' command detects 
# inconsistencies (corruption) in the on-disk backing tables.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_test 1.1 {
  db transaction {
    for {set i 1} {$i < 200} {incr i} {
      set doc [list [string repeat x $i] [string repeat y $i]]
      execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
    }
  }
  fts5_level_segs t1
} {1}
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]

do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

do_test 1.4 {
  db_restore_and_reopen
  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid('segment', $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}


#--------------------------------------------------------------------
#
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 64);
}
db func rnddoc fts5_rnddoc
do_test 2.1 {
  for {set i 0} {$i < 500} {incr i} {
    execsql { INSERT INTO t2 VALUES(rnddoc(50)) }
  }
  execsql { INSERT INTO t2(t2) VALUES('integrity-check') }
} {}

#--------------------------------------------------------------------
# A mundane test - missing row in the %_content table.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(x);
  INSERT INTO t3 VALUES('one o');
  INSERT INTO t3 VALUES('two e');
  INSERT INTO t3 VALUES('three o');
  INSERT INTO t3 VALUES('four e');
  INSERT INTO t3 VALUES('five o');
}
do_execsql_test 3.1 {
  SELECT * FROM t3 WHERE t3 MATCH 'o'
} {{one o} {three o} {five o}}

do_catchsql_test 3.1 {
  DELETE FROM t3_content WHERE rowid = 3;
  SELECT * FROM t3 WHERE t3 MATCH 'o';
} {1 {database disk image is malformed}}

finish_test

Added ext/fts5/test/fts5corrupt2.test.
































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# 2015 Apr 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case 
# "correctly" means without crashing.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt2

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}
sqlite3_fts5_may_be_corrupt 1

# Create a simple FTS5 table containing 100 documents. Each document 
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]

if 1 {

# Test 1:
#
#   For each page in the t1_data table, open a transaction and DELETE
#   the t1_data entry. Then run:
#
#     * an integrity-check, and
#     * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'"
#
#   and check that the corruption is detected in both cases. The 
#   rollback the transaction.
#
# Test 2:
#
#   Same thing, except instead of deleting a row from t1_data, replace its
#   blob content with integer value 14.
#
foreach {tno stmt} {
  1 { DELETE FROM t1_data WHERE rowid=$rowid }
  2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid }
} {
  set tn 0
  foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] {
    incr tn
    #if {$tn!=224} continue
  
    do_test 1.$tno.$tn.1.$rowid {
      execsql { BEGIN }
      execsql $stmt
      catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
    } {1 {database disk image is malformed}}
  
    if {($rowid & $mask)==0} {
      # Node is a leaf node, not a b-tree node.
      do_catchsql_test 1.$tno.$tn.2.$rowid {
        SELECT rowid FROM t1 WHERE t1 MATCH 'x*'
      } {1 {database disk image is malformed}}
    }
  
    do_execsql_test 1.$tno.$tn.3.$rowid {
      ROLLBACK;
      INSERT INTO t1(t1) VALUES('integrity-check');
    } {}
  }
}

# Using the same database as the 1.* tests.
#
# Run N-1 tests, where N is the number of bytes in the rightmost leaf page
# of the fts index. For test $i, truncate the rightmost leafpage to $i
# bytes. Then test both the integrity-check detects the corruption.
#
# Also tested is that "MATCH 'x*'" does not crash and sometimes reports
# corruption. It may not report the db as corrupt because truncating the
# final leaf to some sizes may create a valid leaf page.
#
set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}] 
set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}]
set all [db eval {SELECT rowid FROM t1}]
for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} {
  do_execsql_test 2.$i.1 {
    BEGIN;
      UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid;
  }

  do_catchsql_test 2.$i.2 {
    INSERT INTO t1(t1) VALUES('integrity-check');
  } {1 {database disk image is malformed}}

  do_test 2.$i.3 {
    set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}]
    expr {
        $res=="1 {database disk image is malformed}" 
     || $res=="0 {$all}" 
    }
  } 1

  do_execsql_test 2.$i.4 {
    ROLLBACK;
    INSERT INTO t1(t1) VALUES('integrity-check');
  } {}
}

#-------------------------------------------------------------------------
# Test that corruption in leaf page headers is detected by queries that use
# doclist-indexes.
#
set doc "A B C D E F G H I J "
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE x3 USING fts5(tt);
  INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000) 
  INSERT INTO x3 
  SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}

foreach {tn hdr} {
  1 "\x00\x00\x00\x00"
  2 "\xFF\xFF\xFF\xFF"
  3 "\x44\x45"
} {
  set tn2 0
  set nCorrupt 0
  set nCorrupt2 0
  foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 3.$tn.$tn2.1 {
      execsql BEGIN

      set fd [db incrblob main x3_data block $rowid]
      fconfigure $fd -encoding binary -translation binary
      set existing [read $fd [string length $hdr]]
      seek $fd 0
      puts -nonewline $fd $hdr
      close $fd

      set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}]
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    if {($tn2 % 10)==0 && $existing != $hdr} {
      do_test 3.$tn.$tn2.2 {
        catchsql { INSERT INTO x3(x3) VALUES('integrity-check') }
      } {1 {database disk image is malformed}}
    }

    execsql ROLLBACK
  }

  do_test 3.$tn.x { expr $nCorrupt>0 } 1
}

#--------------------------------------------------------------------
#
set doc "A B C D E F G H I J "
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE x4 USING fts5(tt);
  INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
  INSERT INTO x4 
  SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii;
}

foreach {tn nCut} {
  1 1
  2 10
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 4.$tn.$tn2 {
      execsql {
        BEGIN;
          UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut) 
          WHERE id = $rowid;
      }

      set res [catchsql {
        SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC
      }]
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    execsql ROLLBACK
  }

  do_test 4.$tn.x { expr $nCorrupt>0 } 1
}

}

set doc [string repeat "A B C " 1000]
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
  INSERT INTO x5 SELECT $doc FROM ii;
}

foreach {tn hdr} {
  1 "\x00\x01"
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 4.$tn.$tn2 {
      execsql BEGIN

      set fd [db incrblob main x5_data block $rowid]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $hdr
      close $fd

      catchsql { INSERT INTO x5(x5) VALUES('integrity-check') }
      set {} {}
    } {}

    execsql ROLLBACK
  }
}

#--------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5 VALUES('a');
  INSERT INTO x5 VALUES('a a');
  INSERT INTO x5 VALUES('a a a');
  INSERT INTO x5 VALUES('a a a a');

  UPDATE x5_docsize SET sz = X'' WHERE id=3;
}
proc colsize {cmd i} { 
  $cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize

do_catchsql_test 5.2 {
  SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}


sqlite3_fts5_may_be_corrupt 0
finish_test

Added ext/fts5/test/fts5corrupt3.test.
































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# 2015 Apr 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file tests that FTS5 handles corrupt databases (i.e. internal
# inconsistencies in the backing tables) correctly. In this case 
# "correctly" means without crashing.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt3

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}
sqlite3_fts5_may_be_corrupt 1

# Create a simple FTS5 table containing 100 documents. Each document 
# contains 10 terms, each of which start with the character "x".
#
expr srand(0)
db func rnddoc fts5_rnddoc
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO t1 SELECT rnddoc(10) FROM ii;
}
set mask [expr 31 << 31]

do_test 1.1 {
  # Pick out the rowid of the right-most b-tree leaf in the new segment.
  set rowid [db one {
    SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1
  }]
  set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}]
  set {} {}
} {} 

for {set i 0} {$i < $L} {incr i} {
  do_test 1.2.$i {
    catchsql {
      BEGIN;
      UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid;
      INSERT INTO t1(t1) VALUES('integrity-check');
    }
  } {1 {database disk image is malformed}}
  catchsql ROLLBACK
}
 
#-------------------------------------------------------------------------
# Test that trailing bytes appended to the averages record are ignored.
#
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2 VALUES(rnddoc(10));
  INSERT INTO t2 VALUES(rnddoc(10));
  SELECT length(block) FROM t2_data WHERE id=1;
} {2}
do_execsql_test 2.2 {
  UPDATE t2_data SET block = block || 'abcd' WHERE id=1;
  SELECT length(block) FROM t2_data WHERE id=1;
} {6}
do_execsql_test 2.2 {
  INSERT INTO t2 VALUES(rnddoc(10));
  SELECT length(block) FROM t2_data WHERE id=1;
} {2}

sqlite3_fts5_may_be_corrupt 0
finish_test

Added ext/fts5/test/fts5dlidx.test.








































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# 2015 April 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on uses of doclist-index records.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5dlidx

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

if { $tcl_platform(wordSize)<8 } {
  finish_test
  return
}

proc do_fb_test {tn sql res} {
  set res2 [lsort -integer -decr $res]
  uplevel [list do_execsql_test $tn.1 $sql $res]
  uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2]
}

# This test populates the FTS5 table containing $nEntry entries. Rows are 
# numbered from 0 to ($nEntry-1). The rowid for row $i is:
#
#   ($iFirst + $i*$nStep)
#
# Each document is of the form "a b c a b c a b c...". If the row number ($i)
# is an integer multiple of $spc1, then an "x" token is appended to the
# document. If it is *also* a multiple of $spc2, a "y" token is also appended.
#
proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} {

  do_execsql_test $tn.0 { DELETE FROM t1 }

  set xdoc [list]
  set ydoc [list]

  execsql BEGIN
  for {set i 0} {$i < $nEntry} {incr i} {
    set rowid [expr $i * $nStep]
    set doc [string trim [string repeat "a b c " 100]]
    if {($i % $spc1)==0} {
      lappend xdoc $rowid
      append doc " x" 
      if {($i % $spc2)==0} { 
        lappend ydoc $rowid
        append doc " y" 
      }
    }
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) }
  }
  execsql COMMIT

  breakpoint
  do_test $tn.1 {
    execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
  } {}
  
  do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc
  do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc
  
  do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc
  do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc
  
  do_fb_test $tn.5.1 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc
  do_fb_test $tn.5.2 { 
    SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc
}


foreach {tn pgsz} {
  1 32
  2 200
} {
  do_execsql_test $tn.0 { 
    DROP TABLE IF EXISTS t1;
    CREATE VIRTUAL TABLE t1 USING fts5(x);
    INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz);
  }

  do_dlidx_test1 1.$tn.1     10 100 10000 0 1000
  do_dlidx_test1 1.$tn.2     10 10  10000 0 128
  do_dlidx_test1 1.$tn.3     10 10  66    0 36028797018963970
  do_dlidx_test1 1.$tn.4     10 10  50    0 150000000000000000
  do_dlidx_test1 1.$tn.5     10 10  200   0 [expr 1<<55]
  do_dlidx_test1 1.$tn.6      10 10  30    0 [expr 1<<58]
}

proc do_dlidx_test2 {tn nEntry iFirst nStep} {
  set str [string repeat "a " 500]
  execsql {
    BEGIN;
    DROP TABLE IF EXISTS t1;
    CREATE VIRTUAL TABLE t1 USING fts5(x);
    INSERT INTO t1(t1, rank) VALUES('pgsz', 64);
    INSERT INTO t1 VALUES('b a');

    WITH iii(ii, i) AS (
      SELECT 1,     $iFirst UNION ALL 
      SELECT ii+1, i+$nStep FROM iii WHERE ii<$nEntry
    )
    INSERT INTO t1(rowid,x) SELECT i, $str FROM iii;
    COMMIT;
  }

  do_execsql_test $tn.1 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a'
  } {1}
  breakpoint
  do_execsql_test $tn.2 {
    SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC
  } {1}
}

do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128]

finish_test

Added ext/fts5/test/fts5doclist.test.






























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 2015 April 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This test is focused on edge cases in the doclist format.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5doclist

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


#-------------------------------------------------------------------------
# Create a table with 1000 columns. Then add some large documents to it.
# All text is in the right most column of the table.
#
do_test 1.0 {
  set cols [list]
  for {set i 0} {$i < 900} {incr i} { lappend cols "x$i" }
  execsql "CREATE VIRTUAL TABLE ccc USING fts5([join $cols ,])"
} {}

db func rnddoc fts5_rnddoc 
do_execsql_test 1.1 {
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100)
  INSERT INTO ccc(x899) SELECT rnddoc(500) FROM ii;
}

do_execsql_test 1.2 {
  INSERT INTO ccc(ccc) VALUES('integrity-check');
}


finish_test

Added ext/fts5/test/fts5ea.test.


























































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# Test the fts5 expression parser directly using the fts5_expr() SQL
# test function.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5ea

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc do_syntax_error_test {tn expr err} {
  set ::se_expr $expr
  do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {abc def}                       {"abc" AND "def"}
  3  {abc*}                          {"abc" *}
  4  {"abc def ghi" *}               {"abc" + "def" + "ghi" *}
  5  {one AND two}                   {"one" AND "two"}
  6  {one+two}                       {"one" + "two"}
  7  {one AND two OR three}          {("one" AND "two") OR "three"}
  8  {one OR two AND three}          {"one" OR ("two" AND "three")}
  9  {NEAR(one two)}                 {NEAR("one" "two", 10)}
  10 {NEAR("one three"* two, 5)}     {NEAR("one" + "three" * "two", 5)}
  11 {a OR b NOT c}                  {"a" OR ("b" NOT "c")}
  12 "\x20one\x20two\x20three"       {"one" AND "two" AND "three"}
  13 "\x09one\x0Atwo\x0Dthree"       {"one" AND "two" AND "three"}
  14 {"abc""def"}                    {"abc" + "def"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}

foreach {tn expr res} {
  1 {c1:abc}                           
    {c1 : "abc"}
  2 {c2 : NEAR(one two) c1:"hello world"} 
    {c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"}
} {
  do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res]
}

foreach {tn expr err} {
  1 {AND}                          {fts5: syntax error near "AND"}
  2 {abc def AND}                  {fts5: syntax error near ""}
  3 {abc OR AND}                   {fts5: syntax error near "AND"}
  4 {(a OR b) abc}                 {fts5: syntax error near "abc"}
  5 {NEaR (a b)}                   {fts5: syntax error near "NEaR"}
  6 {NEa (a b)}                    {fts5: syntax error near "NEa"}
  7 {(a OR b) NOT c)}              {fts5: syntax error near ")"}
  8 {nosuch: a nosuch2: b}         {no such column: nosuch}
  9 {addr: a nosuch2: b}           {no such column: nosuch2}
  10 {NOT}                          {fts5: syntax error near "NOT"}
  11 {a AND "abc}                  {unterminated string}

  12 {NEAR(a b, xyz)}              {expected integer, got "xyz"}
  13 {NEAR(a b, // )}              {fts5: syntax error near "/"}
  14 {NEAR(a b, "xyz" )}           {expected integer, got ""xyz""}
} {
  do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err]
}

#-------------------------------------------------------------------------
# Experiment with a tokenizer that considers " to be a token character.
#
do_execsql_test 4.0 {
  SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"');
} {{"a" AND """"}}




finish_test
Added ext/fts5/test/fts5eb.test.










































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5eb

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc do_syntax_error_test {tn expr err} {
  set ::se_expr $expr
  do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err]
}

proc do_syntax_test {tn expr res} {
  set ::se_expr $expr
  do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res]
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {abc .}                         {"abc"}
  3  {.}                             {}
  4  {abc OR .}                      {"abc"}
  5  {abc NOT .}                     {"abc"}
  6  {abc AND .}                     {"abc"}
  7  {. OR abc}                      {"abc"}
  8  {. NOT abc}                     {"abc"}
  9  {. AND abc}                     {"abc"}
  10 {abc + . + def}                 {"abc" + "def"}
  11 {abc . def}                     {"abc" AND "def"}
  12 {r+e OR w}                      {"r" + "e" OR "w"}
} {
  do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res]
}


finish_test



Added ext/fts5/test/fts5fault1.test.


































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS5 module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault1

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

# Simple tests:
#
#   1: CREATE VIRTUAL TABLE
#   2: INSERT statement
#   3: DELETE statement
#   4: MATCH expressions
#
#

faultsim_save_and_close
do_faultsim_test 1 -faults ioerr-t* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') }
} -test {
  faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}}
}

reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
}
faultsim_save_and_close
do_faultsim_test 2 -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { 
    INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
  }
} -test {
  faultsim_test_result {0 {}} 
}

reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3');
  INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno');
}
faultsim_save_and_close
do_faultsim_test 3 -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { DELETE FROM t1 }
} -test {
  faultsim_test_result {0 {}} 
}

reset_db
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b);
  INSERT INTO t2 VALUES('m f a jj th q jr ar',   'hj n h h sg j i m');
  INSERT INTO t2 VALUES('nr s t g od j kf h',    'sb h aq rg op rb n nl');
  INSERT INTO t2 VALUES('do h h pb p p q fr',    'c rj qs or cr a l i');
  INSERT INTO t2 VALUES('lk gp t i lq mq qm p',  'h mr g f op ld aj h');
  INSERT INTO t2 VALUES('ct d sq kc qi k f j',   'sn gh c of g s qt q');
  INSERT INTO t2 VALUES('d ea d d om mp s ab',   'dm hg l df cm ft pa c');
  INSERT INTO t2 VALUES('tc dk c jn n t sr ge',  'a a kn bc n i af h');
  INSERT INTO t2 VALUES('ie ii d i b sa qo rf',  'a h m aq i b m fn');
  INSERT INTO t2 VALUES('gs r fo a er m h li',   'tm c p gl eb ml q r');
  INSERT INTO t2 VALUES('k fe fd rd a gi ho kk', 'ng m c r d ml rm r');
}
faultsim_save_and_close

foreach {tn expr res} {
  1 { dk  }           7
  2 { m f }           1
  3 { f*  }           {1 3 4 5 6 8 9 10}
  4 { m OR f }        {1 4 5 8 9 10}
  5 { sn + gh }       {5}
  6 { "sn gh" }       {5}
  7 { NEAR(r a, 5) }  {9}
  8 { m* f* }         {1 4 6 8 9 10}
  9 { m* + f* }       {1 8}
} {
  do_faultsim_test 4.$tn -prep {
    faultsim_restore_and_reopen
  } -body "
    execsql { SELECT rowid FROM t2 WHERE t2 MATCH '$expr' }
  " -test "
    faultsim_test_result {[list 0 $res]}
  "
}


#-------------------------------------------------------------------------
# The following tests use a larger database populated with random data.
#
# The database page size is set to 512 bytes and the FTS5 page size left
# at the default 1000 bytes. This means that reading a node may require
# pulling an overflow page from disk, which is an extra opportunity for
# an error to occur.
#
reset_db
do_execsql_test 5.0.1 { 
  PRAGMA main.page_size = 512;
  CREATE VIRTUAL TABLE x1 USING fts5(a, b);
  PRAGMA main.page_size;
} {512}

proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]]
  }
  set doc
}
db func rnddoc rnddoc

do_execsql_test 5.0.2 {
  WITH r(a, b) AS (
    SELECT rnddoc(6), rnddoc(6) UNION ALL
    SELECT rnddoc(6), rnddoc(6) FROM r
  )
  INSERT INTO x1 SELECT * FROM r LIMIT 10000;
}

set res [db one {
  SELECT count(*) FROM x1 WHERE x1.a LIKE '%abc%' OR x1.b LIKE '%abc%'}
]

do_faultsim_test 5.1 -faults oom* -body {
  execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abc' }
} -test {
  faultsim_test_result [list 0 $::res]
}
do_faultsim_test 5.2 -faults oom* -body {
  execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abcd' }
} -test {
  faultsim_test_result [list 0 0]
}

proc test_astar {a b} {
  return [expr { [regexp {a[^ ][^ ]} $a] || [regexp {a[^ ][^ ]} $b] }]
}
db func test_astar test_astar

set res [db one { SELECT count(*) FROM x1 WHERE test_astar(a, b) } ]
do_faultsim_test 5.3 -faults oom* -body {
  execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'a*' }
} -test {
  faultsim_test_result [list 0 $::res]
}

do_faultsim_test 5.4 -faults oom* -prep {
  db close
  sqlite3 db test.db
} -body {
  execsql { INSERT INTO x1 VALUES('a b c d', 'e f g h') }
} -test {
  faultsim_test_result [list 0 {}]
}

do_faultsim_test 5.5.1 -faults oom* -body {
  execsql { 
    SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=1
  }
} -test {
  faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.2 -faults oom* -body {
  execsql { 
    SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=10
  }
} -test {
  faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.3 -faults oom* -body {
  execsql { 
    SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
      SELECT min(rowid) FROM x1_data WHERE rowid>20
    )
  }
} -test {
  faultsim_test_result [list 0 1]
}
do_faultsim_test 5.5.4 -faults oom* -body {
  execsql { 
    SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = (
      SELECT max(rowid) FROM x1_data 
    )
  }
} -test {
  faultsim_test_result [list 0 1]
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(x);
  INSERT INTO x1(x1, rank) VALUES('automerge', 0);

  INSERT INTO x1 VALUES('a b c'); -- 1
  INSERT INTO x1 VALUES('a b c'); -- 2
  INSERT INTO x1 VALUES('a b c'); -- 3
  INSERT INTO x1 VALUES('a b c'); -- 4
  INSERT INTO x1 VALUES('a b c'); -- 5
  INSERT INTO x1 VALUES('a b c'); -- 6
  INSERT INTO x1 VALUES('a b c'); -- 7
  INSERT INTO x1 VALUES('a b c'); -- 8
  INSERT INTO x1 VALUES('a b c'); -- 9
  INSERT INTO x1 VALUES('a b c'); -- 10
  INSERT INTO x1 VALUES('a b c'); -- 11
  INSERT INTO x1 VALUES('a b c'); -- 12
  INSERT INTO x1 VALUES('a b c'); -- 13
  INSERT INTO x1 VALUES('a b c'); -- 14
  INSERT INTO x1 VALUES('a b c'); -- 15

  SELECT count(*) FROM x1_data;
} {17}

faultsim_save_and_close

do_faultsim_test 6.1 -faults oom* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO x1 VALUES('d e f') }
} -test {
  faultsim_test_result [list 0 {}]
  if {$testrc==0} {
    set nCnt [db one {SELECT count(*) FROM x1_data}]
    if {$nCnt!=3} { error "expected 3 entries but there are $nCnt" }
  }
}

do_faultsim_test 6.2 -faults oom* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO x1(x1, rank) VALUES('pgsz', 32) }
} -test {
  faultsim_test_result [list 0 {}]
}

do_faultsim_test 6.3 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO x1(x1) VALUES('integrity-check') }
} -test {
  faultsim_test_result [list 0 {}]
}

do_faultsim_test 6.4 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO x1(x1) VALUES('optimize') }
} -test {
  faultsim_test_result [list 0 {}]
}

#-------------------------------------------------------------------------
#
do_faultsim_test 7.0 -faults oom* -prep {
  catch { db close }
} -body {
  sqlite3 db test.db
} -test {
  faultsim_test_result [list 0 {}] {1 {}} {1 {initialization of fts5 failed: }}
}

#-------------------------------------------------------------------------
# A prefix query against a large document set.
#
proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
  }
  set doc
}

reset_db
db func rnddoc rnddoc

do_test 8.0 {
  execsql { CREATE VIRTUAL TABLE x1 USING fts5(a) }
  set ::res [list]
  for {set i 1} {$i<100} {incr i 1} {
    execsql { INSERT INTO x1 VALUES( rnddoc(50) ) }
    lappend ::res $i
  }
} {}

do_faultsim_test 8.1 -faults oom* -prep {
} -body {
  execsql { 
    SELECT rowid FROM x1 WHERE x1 MATCH 'x*'
  }
} -test {
  faultsim_test_result [list 0 $::res]
}

#-------------------------------------------------------------------------
# Segment promotion.
#
do_test 9.0 {
  reset_db
  db func rnddoc fts5_rnddoc
  execsql {
    CREATE VIRTUAL TABLE s2 USING fts5(x);
    INSERT INTO s2(s2, rank) VALUES('pgsz', 32);
    INSERT INTO s2(s2, rank) VALUES('automerge', 0);
  }

  for {set i 1} {$i <= 16} {incr i} {
    execsql { INSERT INTO s2 VALUES(rnddoc(5)) }
  }
  fts5_level_segs s2
} {0 1}
set insert_doc [db one {SELECT rnddoc(160)}]
faultsim_save_and_close

do_faultsim_test 9.1 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO s2 VALUES($::insert_doc) }
} -test {
  faultsim_test_result {0 {}}
  if {$testrc==0} {
    set ls [fts5_level_segs s2]
    if {$ls != "2 0"} { error "fts5_level_segs says {$ls}" }
  }
}



finish_test

Added ext/fts5/test/fts5fault2.test.
























































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault2

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

set doc [string trim [string repeat "x y z " 200]]
do_execsql_test 1.0 {
  CREATE TABLE t1(a INTEGER PRIMARY KEY, x);
  CREATE VIRTUAL TABLE x1 USING fts5(x, content='t1', content_rowid='a');
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
  WITH input(a,b) AS (
    SELECT 1, $doc UNION ALL
    SELECT a+1, ($doc || CASE WHEN (a+1)%100 THEN '' ELSE ' xyz' END) 
    FROM input WHERE a < 1000
  )
  INSERT INTO t1 SELECT * FROM input;

  INSERT INTO x1(x1) VALUES('rebuild');
}

do_faultsim_test 1.1 -faults oom-* -prep {
} -body {
  execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z AND xyz' }
} -test {
  faultsim_test_result {0 {100 200 300 400 500 600 700 800 900 1000}}
}

do_faultsim_test 1.2 -faults oom-* -prep {
} -body {
  execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC}
} -test {
  faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}}
}

#-------------------------------------------------------------------------
# OOM within a query that accesses the in-memory hash table. 
#
reset_db 
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c);
  INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four');
  INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two');
}

do_faultsim_test 2.1 -faults oom-trans* -prep {
  execsql {
    BEGIN;
      INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten');
  }
} -body {
  execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' }
} -test {
  faultsim_test_result {0 {1 3}}
  catchsql { ROLLBACK }
}

#-------------------------------------------------------------------------
# OOM within an 'optimize' operation that writes multiple pages to disk.
#
reset_db 
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE zzz USING fts5(z);
  INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
  INSERT INTO zzz VALUES('a b c d');
  INSERT INTO zzz SELECT 'c d e f' FROM zzz;
  INSERT INTO zzz SELECT 'e f g h' FROM zzz;
  INSERT INTO zzz SELECT 'i j k l' FROM zzz;
  INSERT INTO zzz SELECT 'l k m n' FROM zzz;
  INSERT INTO zzz SELECT 'o p q r' FROM zzz;
}
faultsim_save_and_close

do_faultsim_test 3.1 -faults oom-trans* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT rowid FROM zzz }
} -body {
  execsql { INSERT INTO zzz(zzz) VALUES('optimize') }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM within an 'integrity-check' operation.
#
reset_db 
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE zzz USING fts5(z);
  INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<10)
  INSERT INTO zzz SELECT rnddoc(10) || ' xccc' FROM ii;
}

do_faultsim_test 4.1 -faults oom-trans* -prep {
} -body {
  execsql { INSERT INTO zzz(zzz) VALUES('integrity-check') }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while parsing a tokenize=option
#
reset_db
faultsim_save_and_close
do_faultsim_test 5.0 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { 
    CREATE VIRTUAL TABLE uio USING fts5(a, b, 
      tokenize="porter 'ascii'",
      content="another table",
      content_rowid="somecolumn"
    );
  }
} -test {
  faultsim_test_result {0 {}}
}

finish_test

Added ext/fts5/test/fts5fault3.test.


































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault3

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# An OOM while resuming a partially completed segment merge.
#
db func rnddoc fts5_rnddoc 
do_test 1.0 {
  expr srand(0)
  execsql {
    CREATE VIRTUAL TABLE xx USING fts5(x);
    INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
    INSERT INTO xx(xx, rank) VALUES('automerge', 16);
  }
  for {set i 0} {$i < 10} {incr i} {
    execsql {
      BEGIN;
        INSERT INTO xx(x) VALUES(rnddoc(20));
        INSERT INTO xx(x) VALUES(rnddoc(20));
        INSERT INTO xx(x) VALUES(rnddoc(20));
      COMMIT
    }
  }

  execsql {
    INSERT INTO xx(xx, rank) VALUES('automerge', 2);
    INSERT INTO xx(xx, rank) VALUES('merge', 50);
  }
} {}
faultsim_save_and_close

do_faultsim_test 1 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO xx(xx, rank) VALUES('merge', 1) }
} -test {
  faultsim_test_result [list 0 {}]
}

#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x);
  INSERT INTO xx(xx, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close

set    doc "a long term abcdefghijklmnopqrstuvwxyz "
append doc "and then abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz "
append doc [string repeat "abcdefghijklmnopqrstuvwxyz" 10]

do_faultsim_test 2 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
  faultsim_test_result [list 0 {}]
}

#-------------------------------------------------------------------------
# An OOM while flushing an unusually large term to disk.
#
reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x);
}
faultsim_save_and_close

set doc [fts5_rnddoc 1000]
do_faultsim_test 3.1 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
  faultsim_test_result [list 0 {}]
}

set doc [string repeat "abc " 100]
do_faultsim_test 3.2 -faults oom-* -prep {
  faultsim_restore_and_reopen
} -body {
  execsql { INSERT INTO xx(x) VALUES ($::doc) }
} -test {
  faultsim_test_result [list 0 {}]
}



finish_test

Added ext/fts5/test/fts5fault4.test.






































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault4

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# An OOM while dropping an fts5 table.
#
db func rnddoc fts5_rnddoc 
do_test 1.0 {
  execsql { CREATE VIRTUAL TABLE xx USING fts5(x) }
} {}
faultsim_save_and_close

do_faultsim_test 1 -faults oom-* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT * FROM xx }
} -body {
  execsql { DROP TABLE xx }
} -test {
  faultsim_test_result [list 0 {}]
}

#-------------------------------------------------------------------------
# An OOM within an "ORDER BY rank" query.
#
db func rnddoc fts5_rnddoc 
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x);
  INSERT INTO xx VALUES ('abc ' || rnddoc(10));
  INSERT INTO xx VALUES ('abc abc' || rnddoc(9));
  INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8));
} {}
faultsim_save_and_close

do_faultsim_test 2 -faults oom-* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT * FROM xx }
} -body {
  execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank }
} -test {
  faultsim_test_result [list 0 {3 2 1}]
}

#-------------------------------------------------------------------------
# An OOM while "reseeking" an FTS cursor.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE jj USING fts5(j);
  INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s');
  INSERT INTO jj(rowid, j) VALUES(202, 't w t f s');
  INSERT INTO jj(rowid, j) VALUES(303, 'w t f');
  INSERT INTO jj(rowid, j) VALUES(404, 't');
}
faultsim_save_and_close

do_faultsim_test 3 -faults oom-* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT * FROM jj }
} -body {
  set res [list]
  db eval { SELECT rowid FROM jj WHERE jj MATCH 't' } {
    lappend res $rowid
    if {$rowid==303} {
      execsql { DELETE FROM jj WHERE rowid=404 }
    }
  }
  set res
} -test {
  faultsim_test_result [list 0 {101 202 303}]
}

#-------------------------------------------------------------------------
# An OOM within a special "*reads" query.
#
reset_db
db func rnddoc fts5_rnddoc
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(x);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);

  WITH ii(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10 )
  INSERT INTO x1 SELECT rnddoc(5) FROM ii;
}

set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}]

do_faultsim_test 4 -faults oom-* -body {
  db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'}
} -test {
  faultsim_test_result {0 {0 {} 3}}
}

#-------------------------------------------------------------------------
# An OOM within a query that uses a custom rank function.
#
reset_db
do_execsql_test 5.0 {
  PRAGMA encoding='utf16';
  CREATE VIRTUAL TABLE x2 USING fts5(x);
  INSERT INTO x2(rowid, x) VALUES(10, 'a b c'); -- 3
  INSERT INTO x2(rowid, x) VALUES(20, 'a b c'); -- 6
  INSERT INTO x2(rowid, x) VALUES(30, 'a b c'); -- 2
  INSERT INTO x2(rowid, x) VALUES(40, 'a b c'); -- 5
  INSERT INTO x2(rowid, x) VALUES(50, 'a b c'); -- 1
}

proc rowidmod {cmd mod} { 
  set row [$cmd xRowid]
  expr {$row % $mod}
}
sqlite3_fts5_create_function db rowidmod rowidmod

do_faultsim_test 5.1 -faults oom-* -body {
  db eval {
    SELECT rowid || '-' || rank FROM x2 WHERE x2 MATCH 'b' AND 
    rank MATCH "rowidmod('7')" ORDER BY rank
  }
} -test {
  faultsim_test_result {0 {50-1 30-2 10-3 40-5 20-6}}
}

proc rowidprefix {cmd prefix} { 
  set row [$cmd xRowid]
  set {} "${row}-${prefix}"
}
sqlite3_fts5_create_function db rowidprefix rowidprefix

set str [string repeat abcdefghijklmnopqrstuvwxyz 10]
do_faultsim_test 5.2 -faults oom-* -body {
  db eval "
    SELECT rank, x FROM x2 WHERE x2 MATCH 'b' AND 
    rank MATCH 'rowidprefix(''$::str'')'
    LIMIT 1
  "
} -test {
  faultsim_test_result "0 {10-$::str {a b c}}"
}


#-------------------------------------------------------------------------
# OOM errors within auxiliary functions.
#
reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE x3 USING fts5(xxx);
  INSERT INTO x3 VALUES('a b c d c b a');
  INSERT INTO x3 VALUES('a a a a a a a');
  INSERT INTO x3 VALUES('a a a a a a a');
}

do_faultsim_test 6.1 -faults oom-t* -body {
  db eval { SELECT highlight(x3, 0, '*', '*') FROM x3 WHERE x3 MATCH 'c' }
} -test {
  faultsim_test_result {0 {{a b *c* d *c* b a}}}
}

proc firstinst {cmd} { 
  foreach {p c o} [$cmd xInst 0] {}
  expr $c*100 + $o
}
sqlite3_fts5_create_function db firstinst firstinst

do_faultsim_test 6.2 -faults oom-t* -body {
  db eval { SELECT firstinst(x3) FROM x3 WHERE x3 MATCH 'c' }
} -test {
  faultsim_test_result {0 2} {1 SQLITE_NOMEM}
}

proc previc {cmd} {
  set res [$cmd xGetAuxdataInt 0]
  $cmd xSetAuxdataInt [$cmd xInstCount]
  return $res
}
sqlite3_fts5_create_function db previc  previc

do_faultsim_test 6.2 -faults oom-t* -body {
  db eval { SELECT previc(x3) FROM x3 WHERE x3 MATCH 'a' }
} -test {
  faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM}
}

#-------------------------------------------------------------------------
# OOM error when querying for a phrase with many tokens.
#
reset_db
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x, y);
  INSERT INTO tt VALUES('f b g b c b', 'f a d c c b');  -- 1
  INSERT INTO tt VALUES('d a e f e d', 'f b b d e e');  -- 2
  INSERT INTO tt VALUES('f b g a d c', 'e f c f a d');  -- 3
  INSERT INTO tt VALUES('f f c d g f', 'f a e b g b');  -- 4
  INSERT INTO tt VALUES('a g b d a g', 'e g a e a c');  -- 5
  INSERT INTO tt VALUES('c d b d e f', 'f g e g e e');  -- 6
  INSERT INTO tt VALUES('e g f f b c', 'f c e f g f');  -- 7
  INSERT INTO tt VALUES('e g c f c e', 'f e e a f g');  -- 8
  INSERT INTO tt VALUES('e a e b e e', 'd c c f f f');  -- 9
  INSERT INTO tt VALUES('f a g g c c', 'e g d g c e');  -- 10
  INSERT INTO tt VALUES('c d b a e f', 'f g e h e e');  -- 11

  CREATE VIRTUAL TABLE tt2 USING fts5(o);
  INSERT INTO tt2(rowid, o) SELECT rowid, x||' '||y FROM tt;
  INSERT INTO tt2(rowid, o) VALUES(12, 'a b c d e f g h i j k l');
}

do_faultsim_test 7.2 -faults oom-* -body {
  db eval { SELECT rowid FROM tt WHERE tt MATCH 'f+g+e+g+e+e' }
} -test {
  faultsim_test_result {0 6} {1 SQLITE_NOMEM}
}

do_faultsim_test 7.3 -faults oom-* -body {
  db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d e f)' }
} -test {
  faultsim_test_result {0 11} {1 SQLITE_NOMEM}
}

do_faultsim_test 7.4 -faults oom-t* -body {
  db eval { SELECT rowid FROM tt2 WHERE tt2 MATCH '"g c f c e f e e a f"' }
} -test {
  faultsim_test_result {0 8} {1 SQLITE_NOMEM}
}

do_faultsim_test 7.5 -faults oom-* -body {
  db eval {SELECT rowid FROM tt2 WHERE tt2 MATCH 'NEAR(a b c d e f g h i j k)'}
} -test {
  faultsim_test_result {0 12} {1 SQLITE_NOMEM}
}

do_faultsim_test 7.6 -faults oom-* -body {
  db eval {SELECT rowid FROM tt WHERE tt MATCH 'y: "c c"'}
} -test {
  faultsim_test_result {0 {1 9}} {1 SQLITE_NOMEM}
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  BEGIN;
    INSERT INTO tt(rowid, x) VALUES(1, 'a b c d x x');
    WITH ii(i) AS (SELECT 2 UNION ALL SELECT i+1 FROM ii WHERE i<99)
      INSERT INTO tt(rowid, x) SELECT i, 'a b c x x d' FROM ii;
    INSERT INTO tt(rowid, x) VALUES(100, 'a b c d x x');
  COMMIT;
}

do_faultsim_test 8.1 -faults oom-t* -body {
  db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d, 2)' }
} -test {
  faultsim_test_result {0 {1 100}} {1 SQLITE_NOMEM}
}

do_faultsim_test 8.2 -faults oom-t* -body {
  db eval { SELECT count(*) FROM tt WHERE tt MATCH 'a OR d' }
} -test {
  faultsim_test_result {0 100} {1 SQLITE_NOMEM}
}


#-------------------------------------------------------------------------
# Fault in NOT query.
#
reset_db
do_execsql_test 9.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  BEGIN;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200)
      INSERT INTO tt(rowid, x) 
      SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END 
      FROM ii;
  COMMIT;
}

do_faultsim_test 9.1 -faults oom-* -body {
  db eval { SELECT rowid FROM tt WHERE tt MATCH 'a NOT x' }
} -test {
  faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM}
}

#-------------------------------------------------------------------------
# OOM in fts5_expr() SQL function.
#
do_faultsim_test 10.1 -faults oom-t* -body {
  db one { SELECT fts5_expr('a AND b NEAR(a b)') }
} -test {
  faultsim_test_result {0 {"a" AND "b" AND NEAR("a" "b", 10)}} 
}

do_faultsim_test 10.2 -faults oom-t* -body {
  db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') }
} -test {
  set res {AND [ns -col 0 -- {a b c}] [ns -- {b}] [ns -near 10 -- {a} {b}]}
  faultsim_test_result [list 0 $res]
}

do_faultsim_test 10.3 -faults oom-t* -body {
  db one { SELECT fts5_expr('x:a', 'x') }
} -test {
  faultsim_test_result {0 {x : "a"}}
}

#-------------------------------------------------------------------------
# OOM while configuring 'rank' option.
#
reset_db
do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x);
}
do_faultsim_test 11.1 -faults oom-t* -body {
  db eval { INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)') }
} -test {
  faultsim_test_result {0 {}} {1 {disk I/O error}}
}

#-------------------------------------------------------------------------
# OOM while creating an fts5vocab table.
#
reset_db
do_execsql_test 12.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x);
}
faultsim_save_and_close
do_faultsim_test 12.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM sqlite_master }
} -body {
  db eval { CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row') }
} -test {
  faultsim_test_result {0 {}} 
}

#-------------------------------------------------------------------------
# OOM while querying an fts5vocab table.
#
reset_db
do_execsql_test 13.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x);
  INSERT INTO ft VALUES('a b');
  CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row');
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM vv }
} -body {
  db eval { SELECT * FROM vv }
} -test {
  faultsim_test_result {0 {a 1 1 b 1 1}} 
}

#-------------------------------------------------------------------------
# OOM in multi-column token query.
#
reset_db
do_execsql_test 13.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, y, z);
  INSERT INTO ft(ft, rank) VALUES('pgsz', 32);
  INSERT INTO ft VALUES(
      'x x x x x x x x x x x x x x x x',
      'y y y y y y y y y y y y y y y y',
      'z z z z z z z z x x x x x x x x'
  );
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
  INSERT INTO ft SELECT * FROM ft;
}
faultsim_save_and_close
do_faultsim_test 13.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM ft }
} -body {
  db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' }
} -test {
  faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}}
}

#-------------------------------------------------------------------------
# OOM in an "ALTER TABLE RENAME TO"
#
reset_db
do_execsql_test 14.0 {
  CREATE VIRTUAL TABLE "tbl one" USING fts5(x, y, z);
}
faultsim_save_and_close
do_faultsim_test 14.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM "tbl one" }
} -body {
  db eval { ALTER TABLE "tbl one" RENAME TO "tbl two" }
} -test {
  faultsim_test_result {0 {}}
}

finish_test

Added ext/fts5/test/fts5fault5.test.
































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault5

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# OOM while creating an FTS5 table.
#
do_faultsim_test 1.1 -faults oom-t* -prep {
  db eval { DROP TABLE IF EXISTS abc }
} -body {
  db eval { CREATE VIRTUAL TABLE abc USING fts5(x,y) }
} -test {
  faultsim_test_result {0 {}}
}


#-------------------------------------------------------------------------
# OOM while writing a multi-tier doclist-index. And while running
# integrity-check on the same.
#
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
}
faultsim_save_and_close

do_faultsim_test 2.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM tt }
} -body {
  set str [string repeat "abc " 50]
  db eval {
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
      INSERT INTO tt(rowid, x) SELECT i, $str FROM ii;
  }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 2.2 -faults oom-t* -body {
  db eval { INSERT INTO tt(tt) VALUES('integrity-check') }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while scanning an fts5vocab table.
#
reset_db
do_test 3.0 {
  execsql {
    CREATE VIRTUAL TABLE tt USING fts5(x);
    CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'row');
    INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
    BEGIN;
  }
  for {set i 0} {$i < 20} {incr i} {
    set str [string repeat "$i " 50]
    execsql { INSERT INTO tt VALUES($str) }
  }
  execsql COMMIT
} {}

do_faultsim_test 3.1 -faults oom-t* -body {
  db eval {
    SELECT term FROM tv;
  }
} -test {
  faultsim_test_result {0 {0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9}}
}



finish_test

Added ext/fts5/test/fts5fault6.test.
















































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# 2014 June 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault6

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# OOM while rebuilding an FTS5 table.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b);
  INSERT INTO tt VALUES('c d c g g f', 'a a a d g a');
  INSERT INTO tt VALUES('c d g b f d', 'b g e c g c');
  INSERT INTO tt VALUES('c c f d e d', 'c e g d b c');
  INSERT INTO tt VALUES('e a f c e f', 'g b a c d g');
  INSERT INTO tt VALUES('c g f b b d', 'g c d c f g');
  INSERT INTO tt VALUES('d a g a b b', 'g c g g c e');
  INSERT INTO tt VALUES('e f a b c e', 'f d c d c c');
  INSERT INTO tt VALUES('e c a g c d', 'b b g f f b');
  INSERT INTO tt VALUES('g b d d e b', 'f f b d a c');
  INSERT INTO tt VALUES('e a d a e d', 'c e a e f g');
}
faultsim_save_and_close

do_faultsim_test 1.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt(tt) VALUES('rebuild') }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 1.2 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { REPLACE INTO tt(rowid, a, b) VALUES(6, 'x y z', 'l l l'); }
} -test {
  faultsim_test_result {0 {}}
}


#-------------------------------------------------------------------------
# OOM within a special delete.
#
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, content="");
  INSERT INTO tt VALUES('c d c g g f');
  INSERT INTO tt VALUES('c d g b f d');
  INSERT INTO tt VALUES('c c f d e d');
  INSERT INTO tt VALUES('e a f c e f');
  INSERT INTO tt VALUES('c g f b b d');
  INSERT INTO tt VALUES('d a g a b b');
  INSERT INTO tt VALUES('e f a b c e');
  INSERT INTO tt VALUES('e c a g c d');
  INSERT INTO tt VALUES('g b d d e b');
  INSERT INTO tt VALUES('e a d a e d');
}
faultsim_save_and_close

do_faultsim_test 2.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt(tt, rowid, a) VALUES('delete', 3, 'c d g b f d'); }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 2.2 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt(tt) VALUES('delete-all') }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 2.3 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt VALUES('x y z') }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM in the ASCII tokenizer with very large tokens. 
#
# Also the unicode tokenizer.
#
set t1 [string repeat wxyz 20]
set t2 [string repeat wxyz 200]
set t3 [string repeat wxyz 2000]
set doc "$t1 $t2 $t3"
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content="");
  CREATE VIRTUAL TABLE xyz2 USING fts5(c, content="");
}
faultsim_save_and_close

do_faultsim_test 3.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM xyz }
} -body {
  db eval { INSERT INTO xyz VALUES($::doc) }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 3.2 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM xyz2 }
} -body {
  db eval { INSERT INTO xyz2 VALUES($::doc) }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while initializing a unicode61 tokenizer.
#
reset_db
faultsim_save_and_close
do_faultsim_test 4.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

finish_test

Added ext/fts5/test/fts5full.test.






















































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that SQLITE_FULL is returned if the FTS5 table cannot find a free 
# segid to use. In practice this can only really happen when automerge and
# crisismerge are both disabled.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5full

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE x8 USING fts5(i);
  INSERT INTO x8(x8, rank) VALUES('automerge', 0);
  INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000);
}

db func rnddoc fts5_rnddoc
do_test 1.1 {
  list [catch {
    for {set i 0} {$i < 2500} {incr i} {
      execsql { INSERT INTO x8 VALUES( rnddoc(5) ); }
    }
  } msg] $msg
} {1 {database or disk is full}}


finish_test

Added ext/fts5/test/fts5hash.test.
























































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# 2015 April 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file are focused on the code in fts5_hash.c.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5hash

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Return a list of tokens (a vocabulary) that all share the same hash
# key value. This can be used to test hash collisions.
#
proc build_vocab1 {args} {

  set O(-nslot) 1024
  set O(-nword)   20
  set O(-hash)    88
  set O(-prefix)  ""

  if {[llength $args] % 2} { error "bad args" }
  array set O2 $args
  foreach {k v} $args {
    if {[info exists O($k)]==0} { error "bad option: $k" }
    set O($k) $v
  }

  set L [list]
  while {[llength $L] < $O(-nword)} {
    set t "$O(-prefix)[random_token]"
    set h [sqlite3_fts5_token_hash $O(-nslot) $t]
    if {$O(-hash)==$h} { lappend L $t }
  }
  return $L
}

proc random_token {} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set iVal [expr int(rand() * 2000000)]
  return [string map $map $iVal]
}

proc random_doc {vocab nWord} {
  set doc ""
  set nVocab [llength $vocab]
  for {set i 0} {$i<$nWord} {incr i} {
    set j [expr {int(rand() * $nVocab)}]
    lappend doc [lindex $vocab $j]
  }
  return $doc
}

set vocab [build_vocab1]
db func r random_doc 

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE eee USING fts5(e, ee);
  BEGIN;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
    INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
    INSERT INTO eee(eee) VALUES('integrity-check');
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}

set hash [sqlite3_fts5_token_hash 1024 xyz]
set vocab [build_vocab1 -prefix xyz -hash $hash]
lappend vocab xyz

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row'); 
  BEGIN;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
    INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
    INSERT INTO eee(eee) VALUES('integrity-check');
}

do_test 1.2 {
  db eval { SELECT term, doc FROM vocab } {
    set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
    if {$nRow != $doc} {
      error "term=$term fts5vocab=$doc cnt=$nRow"
    }
  }
  set {} {}
} {}

do_execsql_test 1.3 {
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}

finish_test

Added ext/fts5/test/fts5integrity.test.






















































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# 2015 Jan 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on the integrity-check procedure.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5integrity

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x);
  INSERT INTO xx VALUES('term');
}
do_execsql_test 1.1 {
  INSERT INTO xx(xx) VALUES('integrity-check');
}

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1);
  INSERT INTO yy VALUES('term');
}
do_execsql_test 2.1 {
  INSERT INTO yy(yy) VALUES('integrity-check');
}

#--------------------------------------------------------------------
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE zz USING fts5(z);
  INSERT INTO zz(zz, rank) VALUES('pgsz', 32);
  INSERT INTO zz VALUES('b b b b b b b b b b b b b b');
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz SELECT z FROM zz;
  INSERT INTO zz(zz) VALUES('optimize');
}

do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); }

#--------------------------------------------------------------------
# Mess around with a docsize record. And the averages record. Then
# check that integrity-check picks it up.
#
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE aa USING fts5(zz);
  INSERT INTO aa(zz) VALUES('a b c d e');
  INSERT INTO aa(zz) VALUES('a b c d');
  INSERT INTO aa(zz) VALUES('a b c');
  INSERT INTO aa(zz) VALUES('a b');
  INSERT INTO aa(zz) VALUES('a');
  SELECT length(sz) FROM aa_docsize;
} {1 1 1 1 1}
do_execsql_test 4.1 { 
  INSERT INTO aa(aa) VALUES('integrity-check'); 
}

do_catchsql_test 4.2 { 
  BEGIN;
    UPDATE aa_docsize SET sz = X'44' WHERE rowid = 3;
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

do_catchsql_test 4.3 { 
  ROLLBACK;
  BEGIN;
    UPDATE aa_data SET block = X'44' WHERE rowid = 1;
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

do_catchsql_test 4.4 { 
  ROLLBACK;
  BEGIN;
    INSERT INTO aa_docsize VALUES(23, X'04');
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

do_catchsql_test 4.5 { 
  ROLLBACK;
  BEGIN;
    INSERT INTO aa_docsize VALUES(23, X'00');
    INSERT INTO aa_content VALUES(23, '');
    INSERT INTO aa(aa) VALUES('integrity-check'); 
} {1 {database disk image is malformed}}

#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r}
#exit


finish_test

Added ext/fts5/test/fts5merge.test.




































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test that focus on incremental merges of segments.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5merge

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

db func repeat [list string repeat]

#-------------------------------------------------------------------------
# Create an fts index so that:
#
#   * the index consists of two top-level segments
#   * each segment contains records related to $nRowPerSeg rows
#   * all rows consist of tokens "x" and "y" only.
#
# Then run ('merge', 1) until everything is completely merged.
#
proc do_merge1_test {testname nRowPerSeg} {
  set ::nRowPerSeg [expr $nRowPerSeg]
  do_execsql_test $testname.0 {
    DROP TABLE IF EXISTS x8;
    CREATE VIRTUAL TABLE x8 USING fts5(i);
    INSERT INTO x8(x8, rank) VALUES('pgsz', 32);

    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
      INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;

    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg)
      INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii;

    INSERT INTO x8(x8, rank) VALUES('automerge', 2);
  }

  for {set tn 1} {[lindex [fts5_level_segs x8] 0]>0} {incr tn} {
    do_execsql_test $testname.$tn {
      INSERT INTO x8(x8, rank) VALUES('merge', 1);
      INSERT INTO x8(x8) VALUES('integrity-check');
    }
    if {$tn>5} break
  }

  do_test $testname.x [list expr "$tn < 5"] 1
}

do_merge1_test 1.1   1
do_merge1_test 1.2   2
do_merge1_test 1.3   3
do_merge1_test 1.4   4
do_merge1_test 1.5  10
do_merge1_test 1.6  20
do_merge1_test 1.7 100

#-------------------------------------------------------------------------
#
proc do_merge2_test {testname nRow} {
  db func rnddoc fts5_rnddoc

  do_execsql_test $testname.0 {
    DROP TABLE IF EXISTS x8;
    CREATE VIRTUAL TABLE x8 USING fts5(i);
    INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
  }

  set ::nRow $nRow
  do_test $testname.1 {
    for {set i 0} {$i < $::nRow} {incr i} {
      execsql { INSERT INTO x8 VALUES( rnddoc(($i%16) + 5) ) }
      while {[not_merged x8]} {
        execsql {
          INSERT INTO x8(x8, rank) VALUES('automerge', 2);
          INSERT INTO x8(x8, rank) VALUES('merge', 1);
          INSERT INTO x8(x8, rank) VALUES('automerge', 16);
          INSERT INTO x8(x8) VALUES('integrity-check');
        }
      }
    }
  } {}
}
proc not_merged {tbl} {
  set segs [fts5_level_segs $tbl]
  foreach s $segs { if {$s>1} { return 1 } }
  return 0
}

do_merge2_test 2.1    5
do_merge2_test 2.2   10
do_merge2_test 2.3   20

#-------------------------------------------------------------------------
# Test that an auto-merge will complete any merge that has already been
# started, even if the number of input segments is less than the current
# value of the 'automerge' configuration parameter.
#
db func rnddoc fts5_rnddoc

do_execsql_test 3.1 {
  DROP TABLE IF EXISTS x8;
  CREATE VIRTUAL TABLE x8 USING fts5(i);
  INSERT INTO x8(x8, rank) VALUES('pgsz', 32);
  INSERT INTO x8 VALUES(rnddoc(100));
  INSERT INTO x8 VALUES(rnddoc(100));
}
do_test 3.2 {
  execsql {
    INSERT INTO x8(x8, rank) VALUES('automerge', 4);
    INSERT INTO x8(x8, rank) VALUES('merge', 1);
  }
  fts5_level_segs x8
} {2}

do_test 3.3 {
  execsql {
    INSERT INTO x8(x8, rank) VALUES('automerge', 2);
    INSERT INTO x8(x8, rank) VALUES('merge', 1);
  }
  fts5_level_segs x8
} {2 1}

do_test 3.4 {
  execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4) }
  while {[not_merged x8]} {
    execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) }
  }
  fts5_level_segs x8
} {0 1}

#-------------------------------------------------------------------------
#
proc mydoc {} {
  set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]]
  return [string repeat "$x " 30]
}
db func mydoc mydoc

proc mycount {} {
  set res [list]
  foreach x {a b c d e f g h i j} {
    lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}]
  }
  set res
}

  #1 32
foreach {tn pgsz} {
  2 1000
} {
  do_execsql_test 4.$tn.1 {
    DROP TABLE IF EXISTS x8;
    CREATE VIRTUAL TABLE x8 USING fts5(i);
    INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz);
  }

  do_execsql_test 4.$tn.2 {
    INSERT INTO x8(x8, rank) VALUES('merge', 1);
  }

  do_execsql_test 4.$tn.3 {
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
      INSERT INTO x8 SELECT mydoc() FROM ii;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
      INSERT INTO x8 SELECT mydoc() FROM ii;
    INSERT INTO x8(x8, rank) VALUES('automerge', 2);
  }

  set expect [mycount]
    for {set i 0} {$i < 20} {incr i} {
      do_test 4.$tn.4.$i {
        execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); }
        mycount
      } $expect
      break
    }
#  db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r }
}

finish_test

Added ext/fts5/test/fts5near.test.














































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# 2014 Jan 08
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focused on the NEAR operator.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5near

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc do_near_test {tn doc near res} {
  uplevel [list do_execsql_test $tn "
    DELETE FROM t1;
    INSERT INTO t1 VALUES('$doc');
    SELECT count(*) FROM t1 WHERE t1 MATCH '$near';
  " $res]
}

execsql { 
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "ascii tokenchars '.'") 
}

do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1
do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1
do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1
do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0

do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1
do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1
do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1
do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0

do_near_test 1.9  ". a b . . . c . ." { NEAR("a b" c, 3) } 1
do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0
do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1
do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0

do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1
do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0
do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1
do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0

do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1
do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0
do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1

do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1
do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0

do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1
do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0

do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1
do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0


finish_test

Added ext/fts5/test/fts5optimize.test.




































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5optimize

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc rnddoc {nWord} {
  set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z}
  set nVocab [llength $vocab]
  set ret [list]
  for {set i 0} {$i < $nWord} {incr i} {
    lappend ret [lindex $vocab [expr {int(rand() * $nVocab)}]]
  }
  return $ret
}


foreach {tn nStep} {
  1 2
  2 10
  3 50
  4 500
} {
if {$tn!=4} continue
  reset_db
  db func rnddoc rnddoc
  do_execsql_test 1.$tn.1 {
    CREATE VIRTUAL TABLE t1 USING fts5(x, y);
  }
  do_test 1.$tn.2 {
    for {set i 0} {$i < $nStep} {incr i} {
      execsql { INSERT INTO t1 VALUES( rnddoc(5), rnddoc(5) ) }
    }
  } {}

  do_execsql_test 1.$tn.3 {
    INSERT INTO t1(t1) VALUES('integrity-check');
  }

  do_execsql_test 1.$tn.4 {
    INSERT INTO t1(t1) VALUES('optimize');
  }

  do_execsql_test 1.$tn.5 {
    INSERT INTO t1(t1) VALUES('integrity-check');
  }
}

finish_test

Added ext/fts5/test/fts5plan.test.






































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing the planner (xBestIndex function).
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5plan

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE TABLE t1(x, y);
  CREATE VIRTUAL TABLE f1 USING fts5(ff);
}

do_eqp_test 1.1 {
  SELECT * FROM t1, f1 WHERE f1 MATCH t1.x
} {
  0 0 0 {SCAN TABLE t1} 
  0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
}

do_eqp_test 1.2 {
  SELECT * FROM t1, f1 WHERE f1 > t1.x
} {
  0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
  0 1 0 {SCAN TABLE t1} 
}

do_eqp_test 1.3 {
  SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff
} {
  0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:}
  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}

do_eqp_test 1.4 {
  SELECT * FROM f1 ORDER BY rank
} {
  0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:}
  0 0 0 {USE TEMP B-TREE FOR ORDER BY}
}

do_eqp_test 1.5 {
  SELECT * FROM f1 WHERE rank MATCH ?
} {
  0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:}
}




finish_test

Added ext/fts5/test/fts5porter.test.

more than 10,000 changes

Added ext/fts5/test/fts5porter2.test.












































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 porter stemmer implementation.
#
# These are extra tests added to those in fts5porter.test in order to
# improve test coverage of the porter stemmer implementation.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5porter2

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

set test_vocab {
  tion          tion
  ation         ation
  vation        vation
  avation       avat
  vion          vion
  ion           ion
  relational    relat
  relation      relat
  relate        relat
  zzz           zzz
  ii            ii
  iiing         ii
  xtional       xtional
  xenci         xenci
  xlogi         xlogi
  realization   realiz
  realize       realiz
  xization      xizat
  capitalism    capit
  talism        talism
  xiveness      xive
  xfulness      xful
  xousness      xous
  xical         xical
  xicate        xicat
  xicity        xiciti
  ies           ie
  eed           e
  eing           e
  s             s
}

set i 0
foreach {in out} $test_vocab {
  do_test "1.$i.($in -> $out)" {
    lindex [sqlite3_fts5_tokenize db porter $in] 0
  } $out
  incr i
}


finish_test

Added ext/fts5/test/fts5prefix.test.






































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# 2015 Jan 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on prefix indexes.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5prefix

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
  INSERT INTO xx VALUES('one two three');
  INSERT INTO xx VALUES('four five six');
  INSERT INTO xx VALUES('seven eight nine ten');
}

do_execsql_test 1.1 {
  SELECT rowid FROM xx WHERE xx MATCH 't*'
} {1 3}


#-------------------------------------------------------------------------
# Check that prefix indexes really do index n-character prefixes, not 
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
# diacritic removal.
#
do_execsql_test 2.0 { 
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) 
}

do_test 2.1 {
  foreach {rowid string} {
    1 "\xCA\xCB\xCC\xCD"
    2 "\u1234\u5678\u4321\u8765"
  } {
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
  }
} {}

do_execsql_test 2.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

foreach {tn q res} {
  1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
  2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
  do_execsql_test 2.3.$tn $q $res
}


finish_test

Added ext/fts5/test/fts5rank.test.


























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing queries that use the "rank" column.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rank

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


#-------------------------------------------------------------------------
# "ORDER BY rank" + highlight() + large poslists.
#
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE xyz USING fts5(z);
}
do_test 1.1 {
  set doc [string trim [string repeat "x y " 500]]
  execsql { INSERT INTO xyz VALUES($doc) }
} {}
do_execsql_test 1.2 {
  SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x' ORDER BY rank
} [list [string map {x [x]} $doc]]

do_execsql_test 1.3 {
  SELECT highlight(xyz, 0, '[', ']') FROM xyz
  WHERE xyz MATCH 'x AND y' ORDER BY rank
} [list [string map {x [x] y [y]} $doc]]

finish_test

Added ext/fts5/test/fts5rebuild.test.






































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rebuild

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE f1 USING fts5(a, b);
  INSERT INTO f1(a, b) VALUES('one',   'o n e');
  INSERT INTO f1(a, b) VALUES('two',   't w o');
  INSERT INTO f1(a, b) VALUES('three', 't h r e e');
}

do_execsql_test 1.2 {
  INSERT INTO f1(f1) VALUES('integrity-check');
} {}

do_execsql_test 1.3 {
  INSERT INTO f1(f1) VALUES('rebuild');
} {}

do_execsql_test 1.4 {
  INSERT INTO f1(f1) VALUES('integrity-check');
} {}

do_execsql_test 1.5 {
  DELETE FROM f1_data;
} {}

do_catchsql_test 1.6 {
  INSERT INTO f1(f1) VALUES('integrity-check');
} {1 {database disk image is malformed}}

do_execsql_test 1.7 {
  INSERT INTO f1(f1) VALUES('rebuild');
  INSERT INTO f1(f1) VALUES('integrity-check');
} {}


#-------------------------------------------------------------------------
# Check that 'rebuild' may not be used with a contentless table.
#
do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE nc USING fts5(doc, content=);
}

do_catchsql_test 2.2 {
  INSERT INTO nc(nc) VALUES('rebuild');
} {1 {'rebuild' may not be used with a contentless fts5 table}}
finish_test

Added ext/fts5/test/fts5restart.test.
















































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# 2015 April 28
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file focuses on testing the planner (xBestIndex function).
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5restart

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE f1 USING fts5(ff);
}

#-------------------------------------------------------------------------
# Run the 'optimize' command. Check that it does not disturb ongoing
# full-text queries.
#
do_test 1.1 {
  for {set i 1} {$i < 1000} {incr i} {
    execsql { INSERT INTO f1 VALUES('a b c d e') }
    lappend lRowid $i
  }
} {}

do_execsql_test 1.2 {
  SELECT rowid FROM f1 WHERE f1 MATCH 'c';
} $lRowid

do_test 1.3 {
  set res [list]
  db eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } {
    if {$rowid == 100} {
      execsql { INSERT INTO f1(f1) VALUES('optimize') }
    }
    lappend res $rowid
  }
  set res
} $lRowid

do_test 1.4.1 {
  sqlite3 db2 test.db
  set res [list]
  db2 eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } {
    if {$rowid == 100} {
      set cres [catchsql { INSERT INTO f1(f1) VALUES('optimize') }]
    }
    lappend res $rowid
  }
  set res
} $lRowid

do_test 1.4.2 {
  db2 close
  set cres
} {1 {database is locked}}

#-------------------------------------------------------------------------
# Open a couple of cursors. Then close them in the same order.
#
do_test 2.1 {
  set ::s1 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'b'" -1 X]
  set ::s2 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'c'" -1 X]

  sqlite3_step $::s1
} {SQLITE_ROW}
do_test 2.2 {
  sqlite3_step $::s2
} {SQLITE_ROW}

do_test 2.1 {
  sqlite3_finalize $::s1
  sqlite3_finalize $::s2
} {SQLITE_OK}

#-------------------------------------------------------------------------
# Copy data between two FTS5 tables.
#
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE f2 USING fts5(gg);
  INSERT INTO f2 SELECT ff FROM f1 WHERE f1 MATCH 'b+c+d';
}
do_execsql_test 3.2 {
  SELECT rowid FROM f2 WHERE f2 MATCH 'a+b+c+d+e'
} $lRowid

#-------------------------------------------------------------------------
# Remove the row that an FTS5 cursor is currently pointing to. And 
# various other similar things. Check that this does not disturb 
# ongoing scans.
#
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE n4 USING fts5(n);
  INSERT INTO n4(rowid, n) VALUES(100, '1 2 3 4 5');
  INSERT INTO n4(rowid, n) VALUES(200, '1 2 3 4');
  INSERT INTO n4(rowid, n) VALUES(300, '2 3 4');
  INSERT INTO n4(rowid, n) VALUES(400, '2 3');
  INSERT INTO n4(rowid, n) VALUES(500, '3');
}

do_test 4.1 {
  set res [list]
  db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' } {
    if {$rowid==300} {
      execsql { DELETE FROM n4 WHERE rowid=300 }
    }
    lappend res $rowid
  }
  set res
} {100 200 300 400 500}

do_test 4.2 {
  execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') }
  set res [list]
  db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} {
    if {$rowid==300} {
      execsql { DELETE FROM n4 WHERE rowid=300 }
    }
    lappend res $rowid
  }
  set res
} {500 400 300 200 100}

do_test 4.3 {
  execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') }
  set res [list]
  db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} {
    if {$rowid==300} {
      execsql { DELETE FROM n4  }
    }
    lappend res $rowid
  }
  set res
} {500 400 300}



finish_test

Added ext/fts5/test/fts5rowid.test.










































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests of the scalar fts5_rowid() and fts5_decode() functions.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5rowid

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_catchsql_test 1.1 {
  SELECT fts5_rowid()
} {1 {should be: fts5_rowid(subject, ....)}}

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1, 1)
} {139586437121}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 


#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db
do_execsql_test 2.1 { 
  CREATE VIRTUAL TABLE x1 USING fts5(a, b);
  INSERT INTO x1(x1, rank) VALUES('pgsz', 32);
} {}

proc rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc [string map $map [format %.3d [expr int(rand()*100)]]]
  }
  set doc
}
db func rnddoc rnddoc

do_execsql_test 2.2 {
  WITH r(a, b) AS (
    SELECT rnddoc(6), rnddoc(6) UNION ALL
    SELECT rnddoc(6), rnddoc(6) FROM r
  )
  INSERT INTO x1 SELECT * FROM r LIMIT 10000;
}

set res [db one {SELECT count(*) FROM x1_data}]
do_execsql_test 2.3 {
  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
do_execsql_test 2.4 {
  UPDATE x1_data SET block = X'';
  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res

do_execsql_test 2.5 {
  INSERT INTO x1(x1, rank) VALUES('pgsz', 1024);
  INSERT INTO x1(x1) VALUES('rebuild');
}

set res [db one {SELECT count(*) FROM x1_data}]
do_execsql_test 2.6 {
  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res
do_execsql_test 2.7 {
  UPDATE x1_data SET block = X'';
  SELECT count(fts5_decode(rowid, block)) FROM x1_data;
} $res

#-------------------------------------------------------------------------
# Tests with very large tokens.
#
set strlist [list \
  "[string repeat x 400]"                       \
  "[string repeat x 300][string repeat w 100]"  \
  "[string repeat x 300][string repeat y 100]"  \
  "[string repeat x 300][string repeat z 600]"  \
]
do_test 3.0 {
  execsql {
    BEGIN;
    CREATE VIRTUAL TABLE x2 USING fts5(a);
  }
  foreach str $strlist { execsql { INSERT INTO x2 VALUES($str) } }
  execsql COMMIT
} {}

for {set tn 0} {$tn<[llength $strlist]} {incr tn} {
  set str [lindex $strlist $tn]
  do_execsql_test 3.1.$tn {
    SELECT rowid FROM x2 WHERE x2 MATCH $str
  } [expr $tn+1]
}

set res [db one {SELECT count(*) FROM x2_data}]
do_execsql_test 3.2 {
  SELECT count(fts5_decode(rowid, block)) FROM x2_data;
} $res

#-------------------------------------------------------------------------
# Leaf pages with no terms or rowids at all.
#
set strlist [list \
  "[string repeat {w } 400]"  \
  "[string repeat {x } 400]"  \
  "[string repeat {y } 400]"  \
  "[string repeat {z } 400]"  \
]
do_test 4.0 {
  execsql {
    BEGIN;
    CREATE VIRTUAL TABLE x3 USING fts5(a);
    INSERT INTO x3(x3, rank) VALUES('pgsz', 32);
  }
  foreach str $strlist { execsql { INSERT INTO x3 VALUES($str) } }
  execsql COMMIT
} {}

for {set tn 0} {$tn<[llength $strlist]} {incr tn} {
  set str [lindex $strlist $tn]
  do_execsql_test 4.1.$tn {
    SELECT rowid FROM x3 WHERE x3 MATCH $str
  } [expr $tn+1]
}

set res [db one {SELECT count(*) FROM x3_data}]
do_execsql_test 4.2 {
  SELECT count(fts5_decode(rowid, block)) FROM x3_data;
} $res

#-------------------------------------------------------------------------
# Position lists with large values.
#
set strlist [list \
  "[string repeat {w } 400]a"  \
  "[string repeat {x } 400]a"  \
  "[string repeat {y } 400]a"  \
  "[string repeat {z } 400]a"  \
]
do_test 5.0 {
  execsql {
    BEGIN;
    CREATE VIRTUAL TABLE x4 USING fts5(a);
    INSERT INTO x4(x4, rank) VALUES('pgsz', 32);
  }
  foreach str $strlist { execsql { INSERT INTO x4 VALUES($str) } }
  execsql COMMIT
} {}

do_execsql_test 5.1 {
  SELECT rowid FROM x4 WHERE x4 MATCH 'a'
} {1 2 3 4}

set res [db one {SELECT count(*) FROM x4_data}]
do_execsql_test 5.2 {
  SELECT count(fts5_decode(rowid, block)) FROM x4_data;
} $res

finish_test

Added ext/fts5/test/fts5tokenizer.test.






















































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the built-in fts5 tokenizers. 
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5tokenizer

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
  DROP TABLE ft1;
}
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter');
  DROP TABLE ft1;
}
do_execsql_test 1.2 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter);
  DROP TABLE ft1;
}
do_execsql_test 1.3 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter');
  DROP TABLE ft1;
}
do_execsql_test 1.4 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii');
  DROP TABLE ft1;
}

do_catchsql_test 1.5 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch');
} {1 {no such tokenizer: nosuch}}

do_catchsql_test 1.6 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch');
} {1 {error in tokenizer constructor}}

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter);
  INSERT INTO ft1 VALUES('embedded databases');
}
do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1
do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1
do_execsql_test 2.3 { 
  SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' 
} 1

proc tcl_create {args} { 
  set ::targs $args
  error "failed" 
}
sqlite3_fts5_create_tokenizer db tcl tcl_create

foreach {tn directive expected} {
  1 {tokenize='tcl a b c'}             {a b c}
  2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f}
  3 {tokenize="tcl 'g' 'h' 'i'"}       {g h i}
  4 {tokenize = tcl}                   {}
} {
  do_catchsql_test 3.$tn.1 "
    CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive)
  " {1 {error in tokenizer constructor}}
  do_test 3.$tn.2 { set ::targs } $expected
}

do_catchsql_test 4.1 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc);
} {1 {parse error in "tokenize = tcl abc"}}
do_catchsql_test 4.2 {
  CREATE VIRTUAL TABLE ft2 USING fts5(x y)
} {1 {unrecognized column option: y}}

#-------------------------------------------------------------------------
# Test the "separators" and "tokenchars" options a bit.
#
foreach {tn tokenizer} {1 ascii 2 unicode61} {
  reset_db
  set T "$tokenizer tokenchars ',.:' separators 'xyz'"
  execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")"
  do_execsql_test 5.$tn.1 {
    INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz');
  }
  foreach {tn2 token res} {
    1 abc 1     2 def 1     3 ghi 1    4 jkl {}
    5 mno {}    6 pqr {}    7 stu {}   8 jkl.mno,pqr:stu 1
    9 vw  1
  } {
    do_execsql_test 5.$tn.2.$tn2 "
      SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"'
    " $res
  }
}

#-------------------------------------------------------------------------
# Miscellaneous tests for the ascii tokenizer.
#
# 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the
#        'separators' option. But unicode61 does not.
#
# 5.2.*: An option without an argument is an error.
#

do_test 5.1.1 {
  execsql "
    CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`);
    INSERT INTO a1 VALUES('abc\u1234def');
  "
  execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' } 
} {}

do_test 5.1.2 {
  execsql "
    CREATE VIRTUAL TABLE a2 USING fts5(
        x, tokenize=`unicode61 separators '\u1234'`);
    INSERT INTO a2 VALUES('abc\u1234def');
  "
  execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' } 
} {1}

do_catchsql_test 5.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 5.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg');
} {1 {error in tokenizer constructor}}

#-------------------------------------------------------------------------
# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE 
# correctly.
#

proc test_token_cb {varname token iStart iEnd} {
  upvar $varname var
  lappend var $token
  if {[llength $var]==3} { return "SQLITE_DONE" }
  return "SQLITE_OK"
}

proc tokenize {cmd} {
  set res [list]
  $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res]
  set res
}
sqlite3_fts5_create_function db tokenize tokenize

do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii);
  INSERT INTO x1 VALUES('q w e r t y');
  INSERT INTO x1 VALUES('y t r e w q');
  SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r';
} {
  {q w e} {y t r}
}

do_execsql_test 6.1 {
  CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61);
  INSERT INTO x2 VALUES('q w e r t y');
  INSERT INTO x2 VALUES('y t r e w q');
  SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r';
} {
  {q w e} {y t r}
}


#-------------------------------------------------------------------------
# Miscellaneous tests for the unicode tokenizer.
#
do_catchsql_test 6.1 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 2'
  );
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.4 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 10'
  );
} {1 {error in tokenizer constructor}}

#-------------------------------------------------------------------------
# Porter tokenizer with very large tokens.
#
set a [string repeat a 100]
set b [string repeat b 500]
set c [string repeat c 1000]
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter);
  INSERT INTO e5 VALUES($a || ' ' || $b);
  INSERT INTO e5 VALUES($b || ' ' || $c);
  INSERT INTO e5 VALUES($c || ' ' || $a);
}

do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 }
do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }

#-------------------------------------------------------------------------
# Test the 'separators' option with the unicode61 tokenizer.
#
do_execsql_test 8.1 {
  BEGIN;
  CREATE VIRTUAL TABLE e6 USING fts5(x,
    tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  );
  INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog');
  CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
  SELECT term FROM e7;
  ROLLBACK;
} {
  brown dog fox jumped lazy over quick the
}

do_execsql_test 8.2 [subst {
  BEGIN;
  CREATE VIRTUAL TABLE e6 USING fts5(x,
    tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'"
  );
  INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' 
                     || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog'
  );
  INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09');
  CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row');
  SELECT term FROM e7;
  ROLLBACK;
}] [subst {
  brown dog fox jumped lazy over quick the \u0E08 \u0E09
}]

finish_test

Added ext/fts5/test/fts5unicode.test.




























































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc tokenize_test {tn tokenizer input output} {
  uplevel [list do_test $tn [subst -nocommands {
    set ret {}
    foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
      lappend ret [set z]
    }
    set ret
  }] [list {*}$output]]
}

foreach {tn t} {1 ascii 2 unicode61} {
  tokenize_test 1.$tn.0 $t {A B C D} {a b c d}
  tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely}
  tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely}
  tokenize_test 1.$tn.3 $t {} {}
}

#-------------------------------------------------------------------------
# Check that "unicode61" really is the default tokenizer.
#

do_execsql_test 2.0 "
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61);
  CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii);
  INSERT INTO t1 VALUES('\xC0\xC8\xCC');
  INSERT INTO t2 VALUES('\xC0\xC8\xCC');
  INSERT INTO t3 VALUES('\xC0\xC8\xCC');
"
breakpoint
do_execsql_test 2.1 "
  SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC';
  SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC';
  SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC';
" {t1 t2}


finish_test

Added ext/fts5/test/fts5unicode2.test.


























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
# 2012 May 25
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
# This is a modified copy of FTS4 test file "fts4_unicode.test".
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode2

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc do_unicode_token_test {tn input res} {
  uplevel [list do_test $tn [list \
    sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
  ] [list {*}$res]]
}

proc do_unicode_token_test2 {tn input res} {
  uplevel [list do_test $tn [list \
    sqlite3_fts5_tokenize -subst db "unicode61" $input
  ] [list {*}$res]]
}

proc do_unicode_token_test3 {tn args} {
  set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
  set input [lindex $args end-1]
  set res [lindex $args end]
  uplevel [list do_test $tn [list \
    sqlite3_fts5_tokenize -subst db $tokenizer $input
  ] [list {*}$res]]
}

do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}

do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
    "\uE4 \uC4 \uF6 \uD6 \uFC \uDC"

do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
    "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"

# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"

do_unicode_token_test 1.5 "The quick brown fox" {
  the The quick quick brown brown fox fox
}
do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
  the The quick quick brown brown fox fox
}

do_unicode_token_test2 1.7  {a B c D} {a a b B c c d D}
do_unicode_token_test2 1.8  "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"

do_unicode_token_test2 1.9  "x\uC4x x\uD6x x\uDCx" \
    "xax x\uC4x xox x\uD6x xux x\uDCx"

# Check that diacritics are removed if remove_diacritics=1 is specified.
# And that they do not break tokens.
do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"

# Title-case mappings work
do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"

do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
    "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"

do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
    "abc abc def def"

#-------------------------------------------------------------------------
#
set docs [list {
  Enhance the INSERT syntax to allow multiple rows to be inserted via the
  VALUES clause.
} {
  Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
} {
  Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
} {
  Added the sqlite3_db_readonly() interface.
} {
  Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
  ability to add new PRAGMA statements or to override built-in PRAGMAs.  
} {
  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
  the same row that contains the maximum x value.
} {
  Added support for the FTS4 languageid option.
} {
  Documented support for the FTS4 content option. This feature has actually
  been in the code since version 3.7.9 but is only now considered to be
  officially supported.  
} {
  Pending statements no longer block ROLLBACK. Instead, the pending statement
  will return SQLITE_ABORT upon next access after the ROLLBACK.  
} {
  Improvements to the handling of CSV inputs in the command-line shell
} {
  Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
  incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
  connected by OR.  
}]

set map(a) [list "\u00C4" "\u00E4"]  ; # LATIN LETTER A WITH DIAERESIS
set map(e) [list "\u00CB" "\u00EB"]  ; # LATIN LETTER E WITH DIAERESIS
set map(i) [list "\u00CF" "\u00EF"]  ; # LATIN LETTER I WITH DIAERESIS
set map(o) [list "\u00D6" "\u00F6"]  ; # LATIN LETTER O WITH DIAERESIS
set map(u) [list "\u00DC" "\u00FC"]  ; # LATIN LETTER U WITH DIAERESIS
set map(y) [list "\u0178" "\u00FF"]  ; # LATIN LETTER Y WITH DIAERESIS
set map(h) [list "\u1E26" "\u1E27"]  ; # LATIN LETTER H WITH DIAERESIS
set map(w) [list "\u1E84" "\u1E85"]  ; # LATIN LETTER W WITH DIAERESIS
set map(x) [list "\u1E8C" "\u1E8D"]  ; # LATIN LETTER X WITH DIAERESIS
foreach k [array names map] {
  lappend mappings [string toupper $k] [lindex $map($k) 0] 
  lappend mappings $k [lindex $map($k) 1]
}
proc mapdoc {doc} { 
  set doc [regsub -all {[[:space:]]+} $doc " "]
  string map $::mappings [string trim $doc] 
}

do_test 2.0 {
  execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
  foreach doc $docs {
    set d [mapdoc $doc]
    execsql { INSERT INTO t2 VALUES($d) }
  }
} {}

do_test 2.1 {
  set q [mapdoc "row"]
  execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
} [list [mapdoc {
  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
  the same row that contains the maximum x value.
}]]

foreach {tn query snippet} {
  2 "row" {
     ...returns the value of y on the same [row] that contains 
     the maximum x value.
  }
  3 "ROW" {
     ...returns the value of y on the same [row] that contains 
     the maximum x value.
  }
  4 "rollback" {
     ...[ROLLBACK]. Instead, the pending statement
     will return SQLITE_ABORT upon next access after the [ROLLBACK].
  }
  5 "rOllback" {
     ...[ROLLBACK]. Instead, the pending statement
     will return SQLITE_ABORT upon next access after the [ROLLBACK].
  }
  6 "lang*" {
     Added support for the FTS4 [languageid] option.
  }
} {
  do_test 2.$tn {
    set q [mapdoc $query]
    execsql { 
      SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q 
    }
  } [list [mapdoc $snippet]]
}

#-------------------------------------------------------------------------
# Make sure the unicode61 tokenizer does not crash if it is passed a 
# NULL pointer.
reset_db
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
  INSERT INTO t1 VALUES(NULL, 'a b c');
}

do_execsql_test 3.2 {
  SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
} {{a [b] c}}

do_execsql_test 3.3 {
  BEGIN;
  DELETE FROM t1;
  INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 SELECT * FROM t1;
  INSERT INTO t1 VALUES('a b c', NULL);
  INSERT INTO t1 VALUES('a x c', NULL);
  COMMIT;
}

do_execsql_test 3.4 {
  SELECT * FROM t1 WHERE t1 MATCH 'a b';
} {{a b c} {}}

#-------------------------------------------------------------------------
#
reset_db

do_test 4.1 {
  set a "abc\uFFFEdef"
  set b "abc\uD800def"
  set c "\uFFFEdef"
  set d "\uD800def"
  execsql {
    CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
    INSERT INTO t1 VALUES($a);
    INSERT INTO t1 VALUES($b);
    INSERT INTO t1 VALUES($c);
    INSERT INTO t1 VALUES($d);
  }

  execsql "CREATE VIRTUAL TABLE t8 USING fts5(
      a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
  )"
} {}

do_test 4.2 {
  set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
  set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
  set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
  set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
  execsql {
    INSERT INTO t1 VALUES($a);
    INSERT INTO t1 VALUES($b);
    INSERT INTO t1 VALUES($c);
    INSERT INTO t1 VALUES($d);
  }
} {}

do_test 4.3 {
  set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
  set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
  set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
  set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
  execsql {
    INSERT INTO t1 VALUES($a);
    INSERT INTO t1 VALUES($b);
    INSERT INTO t1 VALUES($c);
    INSERT INTO t1 VALUES($d);
  }
} {}

do_test 4.4 {
  sqlite3_exec_hex db {
    CREATE VIRTUAL TABLE t9 USING fts5(a, b, 
      tokenize="unicode61 separators '%C09004'"
    );
    INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
  }
} {0 {}}


#-------------------------------------------------------------------------

breakpoint
do_unicode_token_test3 5.1 {tokenchars {}} {
  sqlite3_reset sqlite3_column_int
} {
  sqlite3 sqlite3 
  reset reset 
  sqlite3 sqlite3 
  column column 
  int int
}

do_unicode_token_test3 5.2 {tokenchars _} {
  sqlite3_reset sqlite3_column_int
} {
  sqlite3_reset sqlite3_reset 
  sqlite3_column_int sqlite3_column_int
}

do_unicode_token_test3 5.3 {separators xyz} {
  Laotianxhorseyrunszfast
} {
  laotian Laotian
  horse horse
  runs runs
  fast fast
}

do_unicode_token_test3 5.4 {tokenchars xyz} {
  Laotianxhorseyrunszfast
} {
  laotianxhorseyrunszfast Laotianxhorseyrunszfast
}

do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
  sqlite3_resetxsqlite3_column_intyhonda_phantom
} {
  sqlite3_reset sqlite3_reset 
  sqlite3_column_int sqlite3_column_int
  honda_phantom honda_phantom
}

do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
  abc abc def def
}

do_unicode_token_test3 5.7                             \
  "tokenchars \u2444\u2445"                            \
  "separators \u05D0\u05D1\u05D2"                      \
  "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
  [list                                                \
    \u2444fre\u2445sh \u2444fre\u2445sh              \
    water water                                      \
    fish fish                                        \
    \u2445timer \u2445timer                          \
  ]

# Check that it is not possible to add a standalone diacritic codepoint 
# to either separators or tokenchars.
do_unicode_token_test3 5.8 "separators \u0301" \
  "hello\u0301world \u0301helloworld"          \
  "helloworld hello\u0301world helloworld helloworld"

do_unicode_token_test3 5.9 "tokenchars \u0301" \
  "hello\u0301world \u0301helloworld"          \
  "helloworld hello\u0301world helloworld helloworld"

do_unicode_token_test3 5.10 "separators \u0301" \
  "remove_diacritics 0"                        \
  "hello\u0301world \u0301helloworld"          \
  "hello\u0301world hello\u0301world helloworld helloworld"

do_unicode_token_test3 5.11 "tokenchars \u0301" \
  "remove_diacritics 0"                         \
  "hello\u0301world \u0301helloworld"           \
  "hello\u0301world hello\u0301world helloworld helloworld"

#-------------------------------------------------------------------------

proc do_tokenize {tokenizer txt} {
  set res [list]
  foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
    lappend res $b
  }
  set res
}

# Argument $lCodepoint must be a list of codepoints (integers) that 
# correspond to whitespace characters. This command creates a string
# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" 
# using tokenizer $tokenizer. The test passes if the tokenizer successfully
# extracts the two 5 character tokens.
#
proc do_isspace_test {tn tokenizer lCp} {
  set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] 
  set txt "${whitespace}hello${whitespace}world${whitespace}"
  uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
}

set tokenizers [list unicode61]
ifcapable icu { lappend tokenizers icu }

# Some tests to check that the tokenizers can both identify white-space 
# codepoints. All codepoints tested below are of type "Zs" in the
# UnicodeData.txt file.
foreach T $tokenizers {
  do_isspace_test 6.$T.1 $T    32
  do_isspace_test 6.$T.2 $T    160
  do_isspace_test 6.$T.3 $T    5760
  do_isspace_test 6.$T.4 $T    6158
  do_isspace_test 6.$T.5 $T    8192
  do_isspace_test 6.$T.6 $T    8193
  do_isspace_test 6.$T.7 $T    8194
  do_isspace_test 6.$T.8 $T    8195
  do_isspace_test 6.$T.9 $T    8196
  do_isspace_test 6.$T.10 $T    8197
  do_isspace_test 6.$T.11 $T    8198
  do_isspace_test 6.$T.12 $T    8199
  do_isspace_test 6.$T.13 $T    8200
  do_isspace_test 6.$T.14 $T    8201
  do_isspace_test 6.$T.15 $T    8202
  do_isspace_test 6.$T.16 $T    8239
  do_isspace_test 6.$T.17 $T    8287
  do_isspace_test 6.$T.18 $T   12288

  do_isspace_test 6.$T.19 $T   {32 160 5760 6158}
  do_isspace_test 6.$T.20 $T   {8192 8193 8194 8195}
  do_isspace_test 6.$T.21 $T   {8196 8197 8198 8199}
  do_isspace_test 6.$T.22 $T   {8200 8201 8202 8239}
  do_isspace_test 6.$T.23 $T   {8287 12288}
}


#-------------------------------------------------------------------------
# Test that the private use ranges are treated as alphanumeric.
#
foreach {tn1 c} {
  1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
} {
  foreach {tn2 config res} {
    1 ""             "hello*world hello*world"
    2 "separators *" "hello hello world world"
  } {
    set config [string map [list * $c] $config]
    set input  [string map [list * $c] "hello*world"]
    set output [string map [list * $c] $res]
    do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
  }
}

#-------------------------------------------------------------------------
# Cursory test of remove_diacritics=0.
#
# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
#
do_execsql_test 8.1.1 "
  CREATE VIRTUAL TABLE t3 USING fts5(
    content, tokenize='unicode61 remove_diacritics 1'
  );
  INSERT INTO t3 VALUES('o');
  INSERT INTO t3 VALUES('a');
  INSERT INTO t3 VALUES('O');
  INSERT INTO t3 VALUES('A');
  INSERT INTO t3 VALUES('\xD6');
  INSERT INTO t3 VALUES('\xC4');
  INSERT INTO t3 VALUES('\xF6');
  INSERT INTO t3 VALUES('\xE4');
"
do_execsql_test 8.1.2 {
  SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
} {1 3 5 7}
do_execsql_test 8.1.3 {
  SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
} {2 4 6 8}
do_execsql_test 8.2.1 {
  CREATE VIRTUAL TABLE t4 USING fts5(
    content, tokenize='unicode61 remove_diacritics 0'
  );
  INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
}
do_execsql_test 8.2.2 {
  SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
} {1 3}
do_execsql_test 8.2.3 {
  SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
} {2 4}

#-------------------------------------------------------------------------
#
if 0 {
foreach {tn sql} {
  1 {
    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
    CREATE VIRTUAL TABLE t6 USING fts4(
        tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
  }
  2 {
    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
  }
  3 {
    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
  }
  4 {
    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
  }
} {
  do_execsql_test 9.$tn.0 { 
    DROP TABLE IF EXISTS t5;
    DROP TABLE IF EXISTS t5aux;
    DROP TABLE IF EXISTS t6;
    DROP TABLE IF EXISTS t6aux;
    DROP TABLE IF EXISTS t7;
    DROP TABLE IF EXISTS t7aux;
  }
  do_execsql_test 9.$tn.1 $sql

  do_execsql_test 9.$tn.2 {
    CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
    INSERT INTO t5 VALUES('one two three/four.five.six');
    SELECT * FROM t5aux;
  } {
    four.five.six   * 1 1 four.five.six   0 1 1 
    {one two three} * 1 1 {one two three} 0 1 1
  }

  do_execsql_test 9.$tn.3 {
    CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
    INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
    SELECT * FROM t6aux;
  } {
    {alpha=beta"gamma}   * 1 1 {alpha=beta"gamma} 0 1 1 
    {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
  }

  do_execsql_test 9.$tn.4 {
    CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
    INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
    SELECT * FROM t7aux;
  } {
    aleph * 1 1 aleph 0 1 1 
    beth  * 1 1 beth  0 1 1 
    gimel * 1 1 gimel 0 1 1
  }
}

# Check that multiple options are handled correctly.
#
do_execsql_test 10.1 {
  DROP TABLE IF EXISTS t1;
  CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
    "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
    "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
  );

  INSERT INTO t1 VALUES('oneatwoxthreeyfour');
  INSERT INTO t1 VALUES('a.single=word');
  CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
  SELECT * FROM t1aux;
} {
  .single=word * 1 1 .single=word 0 1 1 
  four         * 1 1 four         0 1 1 
  one          * 1 1 one          0 1 1 
  three        * 1 1 three        0 1 1 
  two          * 1 1 two          0 1 1
}

# Test that case folding happens after tokenization, not before.
#
do_execsql_test 10.2 {
  DROP TABLE IF EXISTS t2;
  CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
  INSERT INTO t2 VALUES('oneatwoBthree');
  INSERT INTO t2 VALUES('onebtwoAthree');
  CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
  SELECT * FROM t2aux;
} {
  one           * 1 1 one           0 1 1 
  onebtwoathree * 1 1 onebtwoathree 0 1 1 
  three         * 1 1 three         0 1 1 
  two           * 1 1 two           0 1 1
}

# Test that the tokenchars and separators options work with the 
# fts3tokenize table.
#
do_execsql_test 11.1 {
  CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
    "unicode61", "tokenchars=@.", "separators=1234567890"
  );
  SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
} {
  berlin@street sydney.road
}

}

finish_test
Added ext/fts5/test/fts5unicode3.test.


































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Tests focusing on the fts5 tokenizers
#

source [file join [file dirname [info script]] fts5_common.tcl]

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

proc fts3_unicode_path {file} {
  file join [file dirname [info script]] .. .. fts3 unicode $file
}

source [fts3_unicode_path parseunicode.tcl]
set testprefix fts5unicode3

set CF [fts3_unicode_path CaseFolding.txt]
set UD [fts3_unicode_path UnicodeData.txt]

tl_load_casefolding_txt $CF
foreach x [an_load_unicodedata_text $UD] {
  set aNotAlnum($x) 1
}

foreach {y} [rd_load_unicodedata_text $UD] {
  foreach {code ascii} $y {}
  if {$ascii==""} {
    set int 0
  } else {
    binary scan $ascii c int
  }
  set aDiacritic($code) $int
}

proc tcl_fold {i {bRemoveDiacritic 0}} {
  global tl_lookup_table
  global aDiacritic

  if {[info exists tl_lookup_table($i)]} {
    set i $tl_lookup_table($i)
  }
  if {$bRemoveDiacritic && [info exists aDiacritic($i)]} {
    set i $aDiacritic($i)
  }
  expr $i
}
db func tcl_fold tcl_fold

proc tcl_isalnum {i} {
  global aNotAlnum
  expr {![info exists aNotAlnum($i)]}
}
db func tcl_isalnum tcl_isalnum


do_catchsql_test 1.0.1 {
  SELECT fts5_isalnum(1, 2, 3);
} {1 {wrong number of arguments to function fts5_isalnum}}
do_catchsql_test 1.0.2 {
  SELECT fts5_fold();
} {1 {wrong number of arguments to function fts5_fold}}
do_catchsql_test 1.0.3 {
  SELECT fts5_fold(1,2,3);
} {1 {wrong number of arguments to function fts5_fold}}

do_execsql_test 1.1 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
} {0 {}}

do_execsql_test 1.2 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii 
  WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
} {0 {}}

do_execsql_test 1.3 {
  WITH ii(i) AS (
    SELECT -1
    UNION ALL
    SELECT i+1 FROM ii WHERE i<100000
  )
  SELECT count(*), min(i) FROM ii 
  WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
} {0 {}}

do_test 1.4 {
  set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
  append str {"unicode61 separators '}
  for {set i 700} {$i<900} {incr i} {
    append str [format %c $i]
  }
  append str {'");}
  execsql $str
} {}
do_test 1.5 {
  set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
  append str {"unicode61 tokenchars '}
  for {set i 700} {$i<900} {incr i} {
    append str [format %c $i]
  }
  append str {'");}
  execsql $str
} {}


finish_test

Added ext/fts5/test/fts5unindexed.test.






























































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# 2015 Apr 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on "unindexed" columns.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unindexed

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED);
  INSERT INTO t1 VALUES('a b c', 'd e f');
  INSERT INTO t1 VALUES('g h i', 'j k l');
} {}

do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}

do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {}

do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1}
do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {}

do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {}

do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {}
do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {}
do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {}

do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2}
do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {}

do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED);
  INSERT INTO t1 VALUES('a b c', 'd e f');
  INSERT INTO t1 VALUES('g h i', 'j k l');
  SELECT rowid FROM t2_data;
} {1 10}
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('rebuild');
  INSERT INTO t2(t2) VALUES('integrity-check');
  SELECT rowid FROM t2_data;
} {1 10}

do_execsql_test 3.1 {
  CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c);
  CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4);
  INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i');
  INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r');
  INSERT INTO t4(t4) VALUES('rebuild');
  INSERT INTO t4(t4) VALUES('integrity-check');
} {}

do_execsql_test 3.2 {
  INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r');
  DELETE FROM x4 WHERE rowid=20;
  INSERT INTO t4(t4) VALUES('integrity-check');
} {}


finish_test

Added ext/fts5/test/fts5version.test.


































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# 2015 Apr 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on testing that unrecognized file-format
# versions are detected and reported.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5version

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one);
  INSERT INTO t1 VALUES('a b c d');
} {}

do_execsql_test 1.2 {
  SELECT * FROM t1_config WHERE k='version'
} {version 2}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}

do_execsql_test 1.4 {
  UPDATE t1_config set v=3 WHERE k='version';
} 

do_test 1.5 {
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}

breakpoint
do_test 1.6 {
  db close
  sqlite3 db test.db
  catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}}

do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}}


finish_test

Added ext/fts5/test/fts5vocab.test.


















































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# 2015 Apr 24
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# The tests in this file focus on testing the fts5vocab module.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5vocab

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


do_execsql_test 1.1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1);
  CREATE VIRTUAL TABLE v1 USING fts5vocab(t1, 'row');
  PRAGMA table_info = v1;
} {
  0 term {} 0 {} 0
  1 doc {} 0 {} 0
  2 cnt {} 0 {} 0
}

do_execsql_test 1.1.2 {
  CREATE VIRTUAL TABLE v2 USING fts5vocab(t1, 'col');
  PRAGMA table_info = v2;
} {
  0 term {} 0 {} 0
  1 col {} 0 {} 0
  2 doc {} 0 {} 0
  3 cnt {} 0 {} 0
}

do_execsql_test 1.2.1 { SELECT * FROM v1 } { }
do_execsql_test 1.2.2 { SELECT * FROM v2 } { }

do_execsql_test 1.3 {
  INSERT INTO t1 VALUES('x y z');
  INSERT INTO t1 VALUES('x x x');
}

do_execsql_test 1.4.1 {
  SELECT * FROM v1;
} {x 2 4  y 1 1  z 1 1}

do_execsql_test 1.4.2 {
  SELECT * FROM v2;
} {x 0 2 4  y 0 1 1  z 0 1 1}

do_execsql_test 1.5.1 {
  BEGIN;
    INSERT INTO t1 VALUES('a b c');
    SELECT * FROM v1 WHERE term<'d';
} {a 1 1   b 1 1   c 1 1}

do_execsql_test 1.5.2 {
    SELECT * FROM v2 WHERE term<'d';
  COMMIT;
} {a 0 1 1  b 0 1 1  c 0 1 1}

do_execsql_test 1.6 {
  DELETE FROM t1 WHERE one = 'a b c';
  SELECT * FROM v1;
} {x 2 4  y 1 1  z 1 1}

#-------------------------------------------------------------------------
#
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE tt USING fts5(a, b);
  INSERT INTO tt VALUES('d g b f d f', 'f c e c d a');
  INSERT INTO tt VALUES('f a e a a b', 'e d c f d d');
  INSERT INTO tt VALUES('b c a a a b', 'f f c c b c');
  INSERT INTO tt VALUES('f d c a c e', 'd g d e g d');
  INSERT INTO tt VALUES('g d e f a g x', 'f f d a a b');
  INSERT INTO tt VALUES('g c f b c g', 'a g f d c b');
  INSERT INTO tt VALUES('c e c f g b', 'f e d b g a');
  INSERT INTO tt VALUES('g d e f d e', 'a c d b a g');
  INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y');
  INSERT INTO tt VALUES('c c a a c f', 'd g a e b g');
}

set res_col {
  a 0 6 11    a 1 7 9
  b 0 6 7     b 1 7 7 
  c 0 6 12    c 1 5 8 
  d 0 4 6     d 1 9 13 
  e 0 6 7     e 1 6 6 
  f 0 9 10    f 1 7 10 
  g 0 5 7     g 1 5 7
  x 0 1 1     y 1 1 1
}
set res_row {
  a 10 20   b 9 14   c 9 20   d 9 19   
  e 8 13   f 10 20   g 7 14   x 1 1   
  y 1 1
}

foreach {tn tbl resname} {
  1 "fts5vocab(tt, 'col')" res_col
  2 "fts5vocab(tt, 'row')" res_row
  3 "fts5vocab(tt, \"row\")" res_row
  4 "fts5vocab(tt, [row])" res_row
  5 "fts5vocab(tt, `row`)" res_row

  6 "fts5vocab('tt', 'row')" res_row
  7 "fts5vocab(\"tt\", \"row\")" res_row
  8 "fts5vocab([tt], [row])" res_row
  9 "fts5vocab(`tt`, `row`)" res_row
} {
  do_execsql_test 2.$tn "
    DROP TABLE IF EXISTS tv;
    CREATE VIRTUAL TABLE tv USING $tbl;
    SELECT * FROM tv;
  " [set $resname]
}

#-------------------------------------------------------------------------
# Test errors in the CREATE VIRTUAL TABLE statement.
#
foreach {tn sql} {
  1 { CREATE VIRTUAL TABLE aa USING fts5vocab() }
  2 { CREATE VIRTUAL TABLE aa USING fts5vocab(x) }
  3 { CREATE VIRTUAL TABLE aa USING fts5vocab(x,y,z) }
  4 { CREATE VIRTUAL TABLE temp.aa USING fts5vocab(x,y,z,y) }
} {
  do_catchsql_test 3.$tn $sql {1 {wrong number of vtable arguments}}
}

do_catchsql_test 4.0 {
  CREATE VIRTUAL TABLE cc USING fts5vocab(tbl, unknown);
} {1 {fts5vocab: unknown table type: 'unknown'}}

do_catchsql_test 4.1 {
  ATTACH 'test.db' AS aux;
  CREATE VIRTUAL TABLE aux.cc USING fts5vocab(main, tbl, row);
} {1 {wrong number of vtable arguments}}

#-------------------------------------------------------------------------
# Test fts5vocab tables created in the temp schema. 
#
reset_db
forcedelete test.db2
do_execsql_test 5.0 {
  ATTACH 'test.db2' AS aux;
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  CREATE VIRTUAL TABLE temp.t1 USING fts5(x);
  CREATE VIRTUAL TABLE aux.t1 USING fts5(x);

  INSERT INTO main.t1 VALUES('a b c');
  INSERT INTO main.t1 VALUES('d e f');
  INSERT INTO main.t1 VALUES('a e c');

  INSERT INTO temp.t1 VALUES('1 2 3');
  INSERT INTO temp.t1 VALUES('4 5 6');
  INSERT INTO temp.t1 VALUES('1 5 3');

  INSERT INTO aux.t1 VALUES('x y z');
  INSERT INTO aux.t1 VALUES('m n o');
  INSERT INTO aux.t1 VALUES('x n z');
}

breakpoint
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE temp.vm  USING fts5vocab(main, t1, row);
  CREATE VIRTUAL TABLE temp.vt1 USING fts5vocab(t1, row);
  CREATE VIRTUAL TABLE temp.vt2 USING fts5vocab(temp, t1, row);
  CREATE VIRTUAL TABLE temp.va  USING fts5vocab(aux, t1, row);
}

do_execsql_test 5.2 { SELECT * FROM vm } {
  a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1
}
do_execsql_test 5.3 { SELECT * FROM vt1 } {
  1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1
}
do_execsql_test 5.4 { SELECT * FROM vt2 } {
  1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1
}
do_execsql_test 5.5 { SELECT * FROM va } {
  m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2
}

#-------------------------------------------------------------------------
#
do_execsql_test 6.0 {
  CREATE TABLE iii(iii);
  CREATE TABLE jjj(x);
}

do_catchsql_test 6.1 {
  CREATE VIRTUAL TABLE vocab1 USING fts5vocab(iii, row);
  SELECT * FROM vocab1;
} {1 {no such fts5 table: main.iii}}

do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE vocab2 USING fts5vocab(jjj, row);
  SELECT * FROM vocab2;
} {1 {no such fts5 table: main.jjj}}

do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE vocab3 USING fts5vocab(lll, row);
  SELECT * FROM vocab3;
} {1 {no such fts5 table: main.lll}}

finish_test

Added ext/fts5/tool/loadfts5.tcl.






































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131


proc loadfile {f} {
  set fd [open $f]
  set data [read $fd]
  close $fd
  return $data
}

set ::nRow 0
set ::nRowPerDot 1000

proc load_hierachy {dir} {
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow

      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }

    }
  }
}

proc usage {} {
  puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
  puts stderr ""
  puts stderr "Switches are:"
  puts stderr "  -fts4        (use fts4 instead of fts5)"
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""

if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {
      set O(vtab) fts4
    }

    -fts5 {
      set O(vtab) fts5
    }

    -porter {
      set O(tok) ", tokenize=porter"
    }

    -delete {
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }
    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {
      if { [incr i]>=$nOpt } usage
      set O(crisismerge) [lindex $argv $i]
    }

    -prefix {
      if { [incr i]>=$nOpt } usage
      set O(prefix) [lindex $argv $i]
    }

    default {
      usage
    }
  }
}

set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
db func loadfile loadfile

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
  }
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]
}



Added ext/fts5/tool/mkfts5c.tcl.




























































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/bin/sh
# restart with tclsh \
exec tclsh "$0" "$@"

set srcdir [file dirname [file dirname [info script]]]
set G(src) [string map [list %dir% $srcdir] {
  %dir%/fts5.h
  %dir%/fts5Int.h
  fts5parse.h
  %dir%/fts5_aux.c
  %dir%/fts5_buffer.c
  %dir%/fts5_config.c
  %dir%/fts5_expr.c
  %dir%/fts5_hash.c
  %dir%/fts5_index.c
  %dir%/fts5_main.c
  %dir%/fts5_storage.c
  %dir%/fts5_tokenize.c
  %dir%/fts5_unicode2.c
  %dir%/fts5_varint.c
  %dir%/fts5_vocab.c
  fts5parse.c
}]

set G(hdr) {

#if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 
# define NDEBUG 1
#endif
#if defined(NDEBUG) && defined(SQLITE_DEBUG)
# undef NDEBUG
#endif

}

proc readfile {zFile} {
  set fd [open $zFile]
  set data [read $fd]
  close $fd
  return $data
}

proc fts5c_init {zOut} {
  global G
  set G(fd) stdout
  set G(fd) [open $zOut w]

  puts -nonewline $G(fd) $G(hdr)
}

proc fts5c_printfile {zIn} {
  global G
  set data [readfile $zIn]
  puts $G(fd) "#line 1 \"[file tail $zIn]\""
  foreach line [split $data "\n"] {
    if {[regexp {^#include.*fts5} $line]} continue
    if {[regexp {^(const )?[a-zA-Z][a-zA-Z0-9]* [*]?sqlite3Fts5} $line]} {
      set line "static $line"
    }
    puts $G(fd) $line
  }
}

proc fts5c_close {} {
  global G
  if {$G(fd)!="stdout"} {
    close $G(fd)
  }
}


fts5c_init fts5.c
foreach f $G(src) { fts5c_printfile $f }
fts5c_close




Added ext/fts5/tool/showfts5.tcl.






























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


proc usage {} {
  puts stderr "usage: $::argv0 database table"
  puts stderr ""
  exit 1
}

set o(vtab)       fts5
set o(tok)        ""
set o(limit)      0
set o(automerge)  -1
set o(crisismerge)  -1

if {[llength $argv]!=2} usage

set database [lindex $argv 0]
set tbl [lindex $argv 1]

sqlite3 db $database

db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" {
  foreach lvl [lrange $d 1 end] {
    puts $lvl
  }
}





Changes to main.mk.
68
69
70
71
72
73
74


75
76
77
78
79
80
81
         random.o resolve.o rowset.o rtree.o select.o sqlite3ota.o status.o \
         table.o threads.o tokenize.o treeview.o trigger.o \
         update.o userauth.o util.o vacuum.o \
         vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
	 vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \
         utf.o vtab.o





# All of the source code files.
#
SRC = \
  $(TOP)/src/alter.c \
  $(TOP)/src/analyze.c \







>
>







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
         random.o resolve.o rowset.o rtree.o select.o sqlite3ota.o status.o \
         table.o threads.o tokenize.o treeview.o trigger.o \
         update.o userauth.o util.o vacuum.o \
         vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
	 vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \
         utf.o vtab.o

LIBOBJ += fts5.o



# All of the source code files.
#
SRC = \
  $(TOP)/src/alter.c \
  $(TOP)/src/analyze.c \
220
221
222
223
224
225
226
227

228
229
230

231
232
233
234
235
236
237
  $(TOP)/ext/icu/icu.c
SRC += \
  $(TOP)/ext/rtree/sqlite3rtree.h \
  $(TOP)/ext/rtree/rtree.h \
  $(TOP)/ext/rtree/rtree.c
SRC += \
  $(TOP)/ext/userauth/userauth.c \
  $(TOP)/ext/userauth/sqlite3userauth.h

SRC += \
  $(TOP)/ext/ota/sqlite3ota.c \
  $(TOP)/ext/ota/sqlite3ota.h


# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \
  opcodes.h \







|
>



>







222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
  $(TOP)/ext/icu/icu.c
SRC += \
  $(TOP)/ext/rtree/sqlite3rtree.h \
  $(TOP)/ext/rtree/rtree.h \
  $(TOP)/ext/rtree/rtree.c
SRC += \
  $(TOP)/ext/userauth/userauth.c \
  $(TOP)/ext/userauth/sqlite3userauth.h 

SRC += \
  $(TOP)/ext/ota/sqlite3ota.c \
  $(TOP)/ext/ota/sqlite3ota.h


# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \
  opcodes.h \
298
299
300
301
302
303
304
305

306
307
308
309
310
311
312
  $(TOP)/ext/misc/ieee754.c \
  $(TOP)/ext/misc/nextchar.c \
  $(TOP)/ext/misc/percentile.c \
  $(TOP)/ext/misc/regexp.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c \
  $(TOP)/ext/misc/vfslog.c



#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \







|
>







302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
  $(TOP)/ext/misc/ieee754.c \
  $(TOP)/ext/misc/nextchar.c \
  $(TOP)/ext/misc/percentile.c \
  $(TOP)/ext/misc/regexp.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c \
  $(TOP)/ext/misc/vfslog.c \
  $(TOP)/ext/fts5/fts5_tcl.c 


#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \
394
395
396
397
398
399
400




401
402
403
404
405
406
407
  $(TOP)/ext/fts3/fts3Int.h \
  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_tokenizer.h
EXTHDR += \
  $(TOP)/ext/rtree/rtree.h
EXTHDR += \
  $(TOP)/ext/icu/sqliteicu.h




EXTHDR += \
  $(TOP)/ext/userauth/sqlite3userauth.h

# executables needed for testing
#
TESTPROGS = \
  testfixture$(EXE) \







>
>
>
>







399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
  $(TOP)/ext/fts3/fts3Int.h \
  $(TOP)/ext/fts3/fts3_hash.h \
  $(TOP)/ext/fts3/fts3_tokenizer.h
EXTHDR += \
  $(TOP)/ext/rtree/rtree.h
EXTHDR += \
  $(TOP)/ext/icu/sqliteicu.h
EXTHDR += \
  $(TOP)/ext/fts5/fts5Int.h  \
  fts5parse.h                \
  $(TOP)/ext/fts5/fts5.h 
EXTHDR += \
  $(TOP)/ext/userauth/sqlite3userauth.h

# executables needed for testing
#
TESTPROGS = \
  testfixture$(EXE) \
620
621
622
623
624
625
626







































627
628
629
630
631
632
633
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c

fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c








































userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

sqlite3ota.o:	$(TOP)/ext/ota/sqlite3ota.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/ota/sqlite3ota.c








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c

fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c

rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c

# FTS5 things
#
FTS5_SRC = \
   $(TOP)/ext/fts5/fts5.h \
   $(TOP)/ext/fts5/fts5Int.h \
   $(TOP)/ext/fts5/fts5_aux.c \
   $(TOP)/ext/fts5/fts5_buffer.c \
   $(TOP)/ext/fts5/fts5_main.c \
   $(TOP)/ext/fts5/fts5_config.c \
   $(TOP)/ext/fts5/fts5_expr.c \
   $(TOP)/ext/fts5/fts5_hash.c \
   $(TOP)/ext/fts5/fts5_index.c \
   fts5parse.c fts5parse.h \
   $(TOP)/ext/fts5/fts5_storage.c \
   $(TOP)/ext/fts5/fts5_tokenize.c \
   $(TOP)/ext/fts5/fts5_unicode2.c \
   $(TOP)/ext/fts5/fts5_varint.c \
   $(TOP)/ext/fts5/fts5_vocab.c  \

fts5parse.c:	$(TOP)/ext/fts5/fts5parse.y lemon 
	cp $(TOP)/ext/fts5/fts5parse.y .
	rm -f fts5parse.h
	./lemon $(OPTS) fts5parse.y
	mv fts5parse.c fts5parse.c.orig
	echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c
	cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \
		| sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c
	echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c

fts5parse.h: fts5parse.c

fts5.c: $(FTS5_SRC)
	tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl

fts5.o:	fts5.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c fts5.c



userauth.o:	$(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c

sqlite3ota.o:	$(TOP)/ext/ota/sqlite3ota.c $(HDR) $(EXTHDR)
	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/ota/sqlite3ota.c

656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
TESTFIXTURE_FLAGS += -DSQLITE_SERVER=1 -DSQLITE_PRIVATE="" -DSQLITE_CORE 

testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
		$(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c                \
		-o testfixture$(EXE) $(LIBTCL) libsqlite3.a $(THREADLIB)

amalgamation-testfixture$(EXE): sqlite3.c $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
		$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c                  \
		-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)

fts3-testfixture$(EXE): sqlite3.c fts3amal.c $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
	-DSQLITE_ENABLE_FTS3=1                                               \
		$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts3amal.c       \
		-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)







|

|







704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
TESTFIXTURE_FLAGS += -DSQLITE_SERVER=1 -DSQLITE_PRIVATE="" -DSQLITE_CORE 

testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
		$(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c                \
		-o testfixture$(EXE) $(LIBTCL) libsqlite3.a $(THREADLIB)

amalgamation-testfixture$(EXE): sqlite3.c fts5.c $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
		$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts5.c           \
		-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)

fts3-testfixture$(EXE): sqlite3.c fts3amal.c $(TESTSRC) $(TOP)/src/tclsqlite.c
	$(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS)                  \
	-DSQLITE_ENABLE_FTS3=1                                               \
		$(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts3amal.c       \
		-o testfixture$(EXE) $(LIBTCL) $(THREADLIB)
773
774
775
776
777
778
779



780
781
782
783
784
785
786
speedtest1$(EXE):	$(TOP)/test/speedtest1.c sqlite3.o
	$(TCC) -I. $(OTAFLAGS) -o speedtest1$(EXE) $(TOP)/test/speedtest1.c sqlite3.o $(THREADLIB) 

ota$(EXE): $(TOP)/ext/ota/ota.c $(TOP)/ext/ota/sqlite3ota.c sqlite3.o 
	$(TCC) -I. -o ota$(EXE) $(TOP)/ext/ota/ota.c sqlite3.o \
	  $(THREADLIB)




# This target will fail if the SQLite amalgamation contains any exported
# symbols that do not begin with "sqlite3_". It is run as part of the
# releasetest.tcl script.
#
checksymbols: sqlite3.o
	nm -g --defined-only sqlite3.o | grep -v " sqlite3_" ; test $$? -ne 0








>
>
>







821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
speedtest1$(EXE):	$(TOP)/test/speedtest1.c sqlite3.o
	$(TCC) -I. $(OTAFLAGS) -o speedtest1$(EXE) $(TOP)/test/speedtest1.c sqlite3.o $(THREADLIB) 

ota$(EXE): $(TOP)/ext/ota/ota.c $(TOP)/ext/ota/sqlite3ota.c sqlite3.o 
	$(TCC) -I. -o ota$(EXE) $(TOP)/ext/ota/ota.c sqlite3.o \
	  $(THREADLIB)

loadfts: $(TOP)/tool/loadfts.c libsqlite3.a
	$(TCC) $(TOP)/tool/loadfts.c libsqlite3.a -o loadfts $(THREADLIB)

# This target will fail if the SQLite amalgamation contains any exported
# symbols that do not begin with "sqlite3_". It is run as part of the
# releasetest.tcl script.
#
checksymbols: sqlite3.o
	nm -g --defined-only sqlite3.o | grep -v " sqlite3_" ; test $$? -ne 0

Changes to src/tclsqlite.c.
3756
3757
3758
3759
3760
3761
3762

3763
3764
3765
3766
3767
3768
3769
3770
3771
    extern int Sqlitetestintarray_Init(Tcl_Interp*);
    extern int Sqlitetestvfs_Init(Tcl_Interp *);
    extern int Sqlitetestrtree_Init(Tcl_Interp*);
    extern int Sqlitequota_Init(Tcl_Interp*);
    extern int Sqlitemultiplex_Init(Tcl_Interp*);
    extern int SqliteSuperlock_Init(Tcl_Interp*);
    extern int SqlitetestSyscall_Init(Tcl_Interp*);

    extern int SqliteOta_Init(Tcl_Interp*);

#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
    extern int Sqlitetestfts3_Init(Tcl_Interp *interp);
#endif

#ifdef SQLITE_ENABLE_ZIPVFS
    extern int Zipvfs_Init(Tcl_Interp*);
    Zipvfs_Init(interp);







>

<







3756
3757
3758
3759
3760
3761
3762
3763
3764

3765
3766
3767
3768
3769
3770
3771
    extern int Sqlitetestintarray_Init(Tcl_Interp*);
    extern int Sqlitetestvfs_Init(Tcl_Interp *);
    extern int Sqlitetestrtree_Init(Tcl_Interp*);
    extern int Sqlitequota_Init(Tcl_Interp*);
    extern int Sqlitemultiplex_Init(Tcl_Interp*);
    extern int SqliteSuperlock_Init(Tcl_Interp*);
    extern int SqlitetestSyscall_Init(Tcl_Interp*);
    extern int Fts5tcl_Init(Tcl_Interp *);
    extern int SqliteOta_Init(Tcl_Interp*);

#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
    extern int Sqlitetestfts3_Init(Tcl_Interp *interp);
#endif

#ifdef SQLITE_ENABLE_ZIPVFS
    extern int Zipvfs_Init(Tcl_Interp*);
    Zipvfs_Init(interp);
3800
3801
3802
3803
3804
3805
3806

3807
3808
3809
3810
3811
3812
3813
    Sqlitetestintarray_Init(interp);
    Sqlitetestvfs_Init(interp);
    Sqlitetestrtree_Init(interp);
    Sqlitequota_Init(interp);
    Sqlitemultiplex_Init(interp);
    SqliteSuperlock_Init(interp);
    SqlitetestSyscall_Init(interp);

    SqliteOta_Init(interp);

#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
    Sqlitetestfts3_Init(interp);
#endif

    Tcl_CreateObjCommand(







>







3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
    Sqlitetestintarray_Init(interp);
    Sqlitetestvfs_Init(interp);
    Sqlitetestrtree_Init(interp);
    Sqlitequota_Init(interp);
    Sqlitemultiplex_Init(interp);
    SqliteSuperlock_Init(interp);
    SqlitetestSyscall_Init(interp);
    Fts5tcl_Init(interp);
    SqliteOta_Init(interp);

#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
    Sqlitetestfts3_Init(interp);
#endif

    Tcl_CreateObjCommand(
Changes to src/test1.c.
6339
6340
6341
6342
6343
6344
6345

6346
6347
6348
6349
6350
6351
6352



6353
6354
6355
6356
6357
6358
6359
  extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_totype_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*);

  static const struct {
    const char *zExtName;
    int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*);
  } aExtension[] = {
    { "amatch",                sqlite3_amatch_init               },
    { "closure",               sqlite3_closure_init              },
    { "eval",                  sqlite3_eval_init                 },



    { "fileio",                sqlite3_fileio_init               },
    { "fuzzer",                sqlite3_fuzzer_init               },
    { "ieee754",               sqlite3_ieee_init                 },
    { "nextchar",              sqlite3_nextchar_init             },
    { "percentile",            sqlite3_percentile_init           },
    { "regexp",                sqlite3_regexp_init               },
    { "spellfix",              sqlite3_spellfix_init             },







>







>
>
>







6339
6340
6341
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361
6362
6363
  extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_totype_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*);
  extern int sqlite3_fts5_init(sqlite3*,char**,const sqlite3_api_routines*);
  static const struct {
    const char *zExtName;
    int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*);
  } aExtension[] = {
    { "amatch",                sqlite3_amatch_init               },
    { "closure",               sqlite3_closure_init              },
    { "eval",                  sqlite3_eval_init                 },
#ifdef SQLITE_ENABLE_FTS5
    { "fts5",                  sqlite3_fts5_init                 },
#endif
    { "fileio",                sqlite3_fileio_init               },
    { "fuzzer",                sqlite3_fuzzer_init               },
    { "ieee754",               sqlite3_ieee_init                 },
    { "nextchar",              sqlite3_nextchar_init             },
    { "percentile",            sqlite3_percentile_init           },
    { "regexp",                sqlite3_regexp_init               },
    { "spellfix",              sqlite3_spellfix_init             },
Changes to src/test_config.c.
335
336
337
338
339
340
341






342
343
344
345
346
347
348
#endif

#ifdef SQLITE_ENABLE_FTS3
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
#endif







#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_DISABLE_FTS3_UNICODE)
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY);
#endif








>
>
>
>
>
>







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
#endif

#ifdef SQLITE_ENABLE_FTS3
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
#endif

#ifdef SQLITE_ENABLE_FTS5
  Tcl_SetVar2(interp, "sqlite_options", "fts5", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts5", "0", TCL_GLOBAL_ONLY);
#endif

#if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_DISABLE_FTS3_UNICODE)
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY);
#endif

Changes to src/vtab.c.
833
834
835
836
837
838
839


840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
** the offset of the method to call in the sqlite3_module structure.
**
** The array is cleared after invoking the callbacks. 
*/
static void callFinaliser(sqlite3 *db, int offset){
  int i;
  if( db->aVTrans ){


    for(i=0; i<db->nVTrans; i++){
      VTable *pVTab = db->aVTrans[i];
      sqlite3_vtab *p = pVTab->pVtab;
      if( p ){
        int (*x)(sqlite3_vtab *);
        x = *(int (**)(sqlite3_vtab *))((char *)p->pModule + offset);
        if( x ) x(p);
      }
      pVTab->iSavepoint = 0;
      sqlite3VtabUnlock(pVTab);
    }
    sqlite3DbFree(db, db->aVTrans);
    db->nVTrans = 0;
    db->aVTrans = 0;
  }
}

/*
** Invoke the xSync method of all virtual tables in the sqlite3.aVTrans
** array. Return the error code for the first error that occurs, or
** SQLITE_OK if all xSync operations are successful.







>
>

|









|

<







833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854

855
856
857
858
859
860
861
** the offset of the method to call in the sqlite3_module structure.
**
** The array is cleared after invoking the callbacks. 
*/
static void callFinaliser(sqlite3 *db, int offset){
  int i;
  if( db->aVTrans ){
    VTable **aVTrans = db->aVTrans;
    db->aVTrans = 0;
    for(i=0; i<db->nVTrans; i++){
      VTable *pVTab = aVTrans[i];
      sqlite3_vtab *p = pVTab->pVtab;
      if( p ){
        int (*x)(sqlite3_vtab *);
        x = *(int (**)(sqlite3_vtab *))((char *)p->pModule + offset);
        if( x ) x(p);
      }
      pVTab->iSavepoint = 0;
      sqlite3VtabUnlock(pVTab);
    }
    sqlite3DbFree(db, aVTrans);
    db->nVTrans = 0;

  }
}

/*
** Invoke the xSync method of all virtual tables in the sqlite3.aVTrans
** array. Return the error code for the first error that occurs, or
** SQLITE_OK if all xSync operations are successful.
Changes to test/malloc_common.tcl.
125
126
127
128
129
130
131


132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

150
151
152
153
154
155
156
    if {$n != "interrupt"} {lappend DEFAULT(-faults) $n}
  }
  set DEFAULT(-prep)          ""
  set DEFAULT(-body)          ""
  set DEFAULT(-test)          ""
  set DEFAULT(-install)       ""
  set DEFAULT(-uninstall)     ""



  fix_testname name

  array set O [array get DEFAULT]
  array set O $args
  foreach o [array names O] {
    if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" }
  }

  set faultlist [list]
  foreach f $O(-faults) {
    set flist [array names FAULTSIM $f]
    if {[llength $flist]==0} { error "unknown fault: $f" }
    set faultlist [concat $faultlist $flist]
  }

  set testspec [list -prep $O(-prep) -body $O(-body) \
      -test $O(-test) -install $O(-install) -uninstall $O(-uninstall)

  ]
  foreach f [lsort -unique $faultlist] {
    eval do_one_faultsim_test "$name-$f" $FAULTSIM($f) $testspec
  }
}









>
>

















|
>







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
    if {$n != "interrupt"} {lappend DEFAULT(-faults) $n}
  }
  set DEFAULT(-prep)          ""
  set DEFAULT(-body)          ""
  set DEFAULT(-test)          ""
  set DEFAULT(-install)       ""
  set DEFAULT(-uninstall)     ""
  set DEFAULT(-start)          1
  set DEFAULT(-end)            0

  fix_testname name

  array set O [array get DEFAULT]
  array set O $args
  foreach o [array names O] {
    if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" }
  }

  set faultlist [list]
  foreach f $O(-faults) {
    set flist [array names FAULTSIM $f]
    if {[llength $flist]==0} { error "unknown fault: $f" }
    set faultlist [concat $faultlist $flist]
  }

  set testspec [list -prep $O(-prep) -body $O(-body) \
      -test $O(-test) -install $O(-install) -uninstall $O(-uninstall) \
      -start $O(-start) -end $O(-end)
  ]
  foreach f [lsort -unique $faultlist] {
    eval do_one_faultsim_test "$name-$f" $FAULTSIM($f) $testspec
  }
}


285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# [faultsim_test_result] command created by [do_faultsim_test] and used
# by -test scripts.
#
proc faultsim_test_result_int {args} {
  upvar testrc testrc testresult testresult testnfail testnfail
  set t [list $testrc $testresult]
  set r $args
  if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch $r $t]<0 } {
    error "nfail=$testnfail rc=$testrc result=$testresult list=$r"
  }
}

#--------------------------------------------------------------------------
# Usage do_one_faultsim_test NAME ?OPTIONS...? 
#







|







288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# [faultsim_test_result] command created by [do_faultsim_test] and used
# by -test scripts.
#
proc faultsim_test_result_int {args} {
  upvar testrc testrc testresult testresult testnfail testnfail
  set t [list $testrc $testresult]
  set r $args
  if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch -exact $r $t]<0 } {
    error "nfail=$testnfail rc=$testrc result=$testresult list=$r"
  }
}

#--------------------------------------------------------------------------
# Usage do_one_faultsim_test NAME ?OPTIONS...? 
#
314
315
316
317
318
319
320


321
322
323
324
325
326
327
328
329
330
331
332


333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349



350
351
352
353
354
355
356
#
#     -prep             Script to execute before -body.
#
#     -body             Script to execute (with fault injection).
#
#     -test             Script to execute after -body.
#


proc do_one_faultsim_test {testname args} {

  set DEFAULT(-injectstart)     "expr"
  set DEFAULT(-injectstop)      "expr 0"
  set DEFAULT(-injecterrlist)   [list]
  set DEFAULT(-injectinstall)   ""
  set DEFAULT(-injectuninstall) ""
  set DEFAULT(-prep)            ""
  set DEFAULT(-body)            ""
  set DEFAULT(-test)            ""
  set DEFAULT(-install)         ""
  set DEFAULT(-uninstall)       ""



  array set O [array get DEFAULT]
  array set O $args
  foreach o [array names O] {
    if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" }
  }

  proc faultsim_test_proc {testrc testresult testnfail} $O(-test)
  proc faultsim_test_result {args} "
    uplevel faultsim_test_result_int \$args [list $O(-injecterrlist)]
  "

  eval $O(-injectinstall)
  eval $O(-install)

  set stop 0
  for {set iFail 1} {!$stop} {incr iFail} {




    # Evaluate the -prep script.
    #
    eval $O(-prep)

    # Start the fault-injection. Run the -body script. Stop the fault
    # injection. Local var $nfail is set to the total number of faults 







>
>












>
>
















|
>
>
>







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
#
#     -prep             Script to execute before -body.
#
#     -body             Script to execute (with fault injection).
#
#     -test             Script to execute after -body.
#
#     -start            Index of first fault to inject (default 1)
#
proc do_one_faultsim_test {testname args} {

  set DEFAULT(-injectstart)     "expr"
  set DEFAULT(-injectstop)      "expr 0"
  set DEFAULT(-injecterrlist)   [list]
  set DEFAULT(-injectinstall)   ""
  set DEFAULT(-injectuninstall) ""
  set DEFAULT(-prep)            ""
  set DEFAULT(-body)            ""
  set DEFAULT(-test)            ""
  set DEFAULT(-install)         ""
  set DEFAULT(-uninstall)       ""
  set DEFAULT(-start)           1
  set DEFAULT(-end)             0

  array set O [array get DEFAULT]
  array set O $args
  foreach o [array names O] {
    if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" }
  }

  proc faultsim_test_proc {testrc testresult testnfail} $O(-test)
  proc faultsim_test_result {args} "
    uplevel faultsim_test_result_int \$args [list $O(-injecterrlist)]
  "

  eval $O(-injectinstall)
  eval $O(-install)

  set stop 0
  for {set iFail $O(-start)}                        \
      {!$stop && ($O(-end)==0 || $iFail<=$O(-end))} \
      {incr iFail}                                  \
  {

    # Evaluate the -prep script.
    #
    eval $O(-prep)

    # Start the fault-injection. Run the -body script. Stop the fault
    # injection. Local var $nfail is set to the total number of faults 
Changes to test/permutations.test.
244
245
246
247
248
249
250












251
252
253
254
255
256
257
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
  fts4check.test fts4unicode.test fts4noti.test
  fts3varint.test
  fts4growth.test fts4growth2.test
}













test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [
  test_set $allquicktests -exclude *malloc* *ioerr* *fault*







>
>
>
>
>
>
>
>
>
>
>
>







244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
  fts4check.test fts4unicode.test fts4noti.test
  fts3varint.test
  fts4growth.test fts4growth2.test
}

test_suite "fts5" -prefix "" -description {
  All FTS5 tests.
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]

test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt* *fault* *big* *fts5aj*
]

test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [
  test_set $allquicktests -exclude *malloc* *ioerr* *fault*
Added tool/loadfts.c.




























































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/*
** 2013-06-10
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#include <string.h>
#include <errno.h>
#include <dirent.h>
#include "sqlite3.h"

/*
** Implementation of the "readtext(X)" SQL function.  The entire content
** of the file named X is read and returned as a TEXT value. It is assumed
** the file contains UTF-8 text. NULL is returned if the file does not 
** exist or is unreadable.
*/
static void readfileFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  const char *zName;
  FILE *in;
  long nIn;
  void *pBuf;

  zName = (const char*)sqlite3_value_text(argv[0]);
  if( zName==0 ) return;
  in = fopen(zName, "rb");
  if( in==0 ) return;
  fseek(in, 0, SEEK_END);
  nIn = ftell(in);
  rewind(in);
  pBuf = sqlite3_malloc( nIn );
  if( pBuf && 1==fread(pBuf, nIn, 1, in) ){
    sqlite3_result_text(context, pBuf, nIn, sqlite3_free);
  }else{
    sqlite3_free(pBuf);
  }
  fclose(in);
}

/*
** Print usage text for this program and exit.
*/
static void showHelp(const char *zArgv0){
  printf("\n"
"Usage: %s SWITCHES... DB\n"
"\n"
"  This program opens the database named on the command line and attempts to\n"
"  create an FTS table named \"fts\" with a single column. If successful, it\n"
"  recursively traverses the directory named by the -dir option and inserts\n"
"  the contents of each file into the fts table. All files are assumed to\n"
"  contain UTF-8 text.\n"
"\n"
"Switches are:\n"
"  -fts [345]       FTS version to use (default=5)\n"
"  -idx [01]        Create a mapping from filename to rowid (default=0)\n"
"  -dir <path>      Root of directory tree to load data from (default=.)\n"
"  -trans <integer> Number of inserts per transaction (default=1)\n"
, zArgv0
);
  exit(1);
}

/*
** Exit with a message based on the argument and the current value of errno.
*/
static void error_out(const char *zText){
  fprintf(stderr, "%s: %s\n", zText, strerror(errno));
  exit(-1);
}

/*
** Exit with a message based on the first argument and the error message
** currently stored in database handle db.
*/
static void sqlite_error_out(const char *zText, sqlite3 *db){
  fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db));
  exit(-1);
}

/*
** Context object for visit_file().
*/
typedef struct VisitContext VisitContext;
struct VisitContext {
  int nRowPerTrans;
  sqlite3 *db;                    /* Database handle */
  sqlite3_stmt *pInsert;          /* INSERT INTO fts VALUES(readtext(:1)) */
};

/*
** Callback used with traverse(). The first argument points to an object
** of type VisitContext. This function inserts the contents of the text
** file zPath into the FTS table.
*/
void visit_file(void *pCtx, const char *zPath){
  int rc;
  VisitContext *p = (VisitContext*)pCtx;
  /* printf("%s\n", zPath); */
  sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
  sqlite3_step(p->pInsert);
  rc = sqlite3_reset(p->pInsert);
  if( rc!=SQLITE_OK ){
    sqlite_error_out("insert", p->db);
  }else if( p->nRowPerTrans>0 
         && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 
  ){
    sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
  }
}

/*
** Recursively traverse directory zDir. For each file that is not a 
** directory, invoke the supplied callback with its path.
*/
static void traverse(
  const char *zDir,               /* Directory to traverse */
  void *pCtx,                     /* First argument passed to callback */
  void (*xCallback)(void*, const char *zPath)
){
  DIR *d;
  struct dirent *e;

  d = opendir(zDir);
  if( d==0 ) error_out("opendir()");

  for(e=readdir(d); e; e=readdir(d)){
    if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue;
    char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name);
    if (e->d_type & DT_DIR) {
      traverse(zPath, pCtx, xCallback);
    }else{
      xCallback(pCtx, zPath);
    }
    sqlite3_free(zPath);
  }

  closedir(d);
}

int main(int argc, char **argv){
  int iFts = 5;                   /* Value of -fts option */
  int bMap = 0;                   /* True to create mapping table */
  const char *zDir = ".";         /* Directory to scan */
  int i;
  int rc;
  int nRowPerTrans = 0;
  sqlite3 *db;
  char *zSql;
  VisitContext sCtx;

  int nCmd = 0;
  char **aCmd = 0;

  if( argc % 2 ) showHelp(argv[0]);

  for(i=1; i<(argc-1); i+=2){
    char *zOpt = argv[i];
    char *zArg = argv[i+1];
    if( strcmp(zOpt, "-fts")==0 ){
      iFts = atoi(zArg);
      if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
    }
    else if( strcmp(zOpt, "-trans")==0 ){
      nRowPerTrans = atoi(zArg);
    }
    else if( strcmp(zOpt, "-idx")==0 ){
      bMap = atoi(zArg);
      if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
    }
    else if( strcmp(zOpt, "-dir")==0 ){
      zDir = zArg;
    }
    else if( strcmp(zOpt, "-special")==0 ){
      nCmd++;
      aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd);
      aCmd[nCmd-1] = zArg;
    }
    else{
      showHelp(argv[0]);
    }
  }

  /* Open the database file */
  rc = sqlite3_open(argv[argc-1], &db);
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db);

  rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0,
                               readfileFunc, 0, 0);
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db);

  /* Create the FTS table */
  zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts);
  rc = sqlite3_exec(db, zSql, 0, 0, 0);
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
  sqlite3_free(zSql);

  for(i=0; i<nCmd; i++){
    zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]);
    rc = sqlite3_exec(db, zSql, 0, 0, 0);
    if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
    sqlite3_free(zSql);
  }

  /* Compile the INSERT statement to write data to the FTS table. */
  memset(&sCtx, 0, sizeof(VisitContext));
  sCtx.db = db;
  sCtx.nRowPerTrans = nRowPerTrans;
  rc = sqlite3_prepare_v2(db, 
      "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
  );
  if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);

  /* Load all files in the directory hierarchy into the FTS table. */
  if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
  traverse(zDir, (void*)&sCtx, visit_file);
  if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);

  /* Clean up and exit. */
  sqlite3_finalize(sCtx.pInsert);
  sqlite3_close(db);
  sqlite3_free(aCmd);
  return 0;
}