Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Merge fts5 branch into trunk. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
d27d9965b5404cd32be1113215fd9fee |
User & Date: | dan 2015-06-26 20:25:59.799 |
Context
2015-06-26
| ||
20:45 | Treat compilation of FTS5 for the loadable extension specially with MSVC. (check-in: 7c610276bb user: mistachkin tags: trunk) | |
20:25 | Merge fts5 branch into trunk. (check-in: d27d9965b5 user: dan tags: trunk) | |
20:14 | Merge latest trunk with this branch. (Closed-Leaf check-in: 8671b9e137 user: dan tags: fts5) | |
19:43 | Small size reduction and performance increase on the OP_IdxInsert opcode. (check-in: b6bedc2e9c user: drh tags: trunk) | |
Changes
Changes to Makefile.in.
︙ | ︙ | |||
169 170 171 172 173 174 175 176 177 178 179 180 181 182 | backup.lo bitvec.lo btmutex.lo btree.lo build.lo \ callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \ expr.lo fault.lo fkey.lo \ fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo \ fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ memjournal.lo \ mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \ notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \ pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \ | > | 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | backup.lo bitvec.lo btmutex.lo btree.lo build.lo \ callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \ expr.lo fault.lo fkey.lo \ fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo \ fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ fts5.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ memjournal.lo \ mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \ notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \ pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \ |
︙ | ︙ | |||
409 410 411 412 413 414 415 416 417 418 419 420 421 422 | # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/eval.c \ $(TOP)/ext/misc/fileio.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c | > | 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 | # TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/eval.c \ $(TOP)/ext/misc/fileio.c \ $(TOP)/ext/misc/fuzzer.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c |
︙ | ︙ | |||
970 971 972 973 974 975 976 977 978 979 980 981 982 983 | fts3_write.lo: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # Rules to build the 'testfixture' application. # # If using the amalgamation, use sqlite3.c directly to build the test # fixture. Otherwise link against libsqlite3.la. (This distinction is # necessary because the test fixture requires non-API symbols which are # hidden when the library is built via the amalgamation). | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 | fts3_write.lo: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.lo: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # FTS5 things # FTS5_SRC = \ $(TOP)/ext/fts5/fts5.h \ $(TOP)/ext/fts5/fts5Int.h \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ $(TOP)/ext/fts5/fts5_main.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c fts5parse.h \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c \ $(TOP)/ext/fts5/fts5_varint.c \ $(TOP)/ext/fts5/fts5_vocab.c \ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ | sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c fts5parse.h: fts5parse.c fts5.c: $(FTS5_SRC) $(TCLSH_CMD) $(TOP)/ext/fts5/tool/mkfts5c.tcl fts5.lo: fts5.c $(HDR) $(EXTHDR) $(LTCOMPILE) -DSQLITE_CORE -c fts5.c # Rules to build the 'testfixture' application. # # If using the amalgamation, use sqlite3.c directly to build the test # fixture. Otherwise link against libsqlite3.la. (This distinction is # necessary because the test fixture requires non-API symbols which are # hidden when the library is built via the amalgamation). |
︙ | ︙ |
Changes to Makefile.msc.
︙ | ︙ | |||
823 824 825 826 827 828 829 830 831 832 833 834 835 836 | LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \ backup.lo bitvec.lo btmutex.lo btree.lo build.lo \ callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \ expr.lo fault.lo fkey.lo \ fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ memjournal.lo \ mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \ notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \ pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \ | > | 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | LIBOBJS0 = vdbe.lo parse.lo alter.lo analyze.lo attach.lo auth.lo \ backup.lo bitvec.lo btmutex.lo btree.lo build.lo \ callback.lo complete.lo ctime.lo date.lo dbstat.lo delete.lo \ expr.lo fault.lo fkey.lo \ fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \ fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \ fts3_tokenize_vtab.lo fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \ fts5.lo \ func.lo global.lo hash.lo \ icu.lo insert.lo journal.lo legacy.lo loadext.lo \ main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \ memjournal.lo \ mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \ notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \ pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \ |
︙ | ︙ | |||
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 | # TESTEXT = \ $(TOP)\ext\misc\amatch.c \ $(TOP)\ext\misc\closure.c \ $(TOP)\ext\misc\eval.c \ $(TOP)\ext\misc\fileio.c \ $(TOP)\ext\misc\fuzzer.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\nextchar.c \ $(TOP)\ext\misc\percentile.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\spellfix.c \ $(TOP)\ext\misc\totype.c \ $(TOP)\ext\misc\wholenumber.c | > > | 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 | # TESTEXT = \ $(TOP)\ext\misc\amatch.c \ $(TOP)\ext\misc\closure.c \ $(TOP)\ext\misc\eval.c \ $(TOP)\ext\misc\fileio.c \ $(TOP)\ext\misc\fuzzer.c \ fts5.c \ $(TOP)\ext\fts5\fts5_tcl.c \ $(TOP)\ext\misc\ieee754.c \ $(TOP)\ext\misc\nextchar.c \ $(TOP)\ext\misc\percentile.c \ $(TOP)\ext\misc\regexp.c \ $(TOP)\ext\misc\spellfix.c \ $(TOP)\ext\misc\totype.c \ $(TOP)\ext\misc\wholenumber.c |
︙ | ︙ | |||
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 | fts3_write.lo: $(TOP)\ext\fts3\fts3_write.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3_write.c rtree.lo: $(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\rtree\rtree.c # Rules to build the 'testfixture' application. # # If using the amalgamation, use sqlite3.c directly to build the test # fixture. Otherwise link against libsqlite3.lib. (This distinction is # necessary because the test fixture requires non-API symbols which are # hidden when the library is built via the amalgamation). | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 | fts3_write.lo: $(TOP)\ext\fts3\fts3_write.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\fts3\fts3_write.c rtree.lo: $(TOP)\ext\rtree\rtree.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(CORE_COMPILE_OPTS) $(NO_WARN) -DSQLITE_CORE -c $(TOP)\ext\rtree\rtree.c # FTS5 things # FTS5_SRC = \ $(TOP)\ext\fts5\fts5.h \ $(TOP)\ext\fts5\fts5Int.h \ $(TOP)\ext\fts5\fts5_aux.c \ $(TOP)\ext\fts5\fts5_buffer.c \ $(TOP)\ext\fts5\fts5_main.c \ $(TOP)\ext\fts5\fts5_config.c \ $(TOP)\ext\fts5\fts5_expr.c \ $(TOP)\ext\fts5\fts5_hash.c \ $(TOP)\ext\fts5\fts5_index.c \ fts5parse.c fts5parse.h \ $(TOP)\ext\fts5\fts5_storage.c \ $(TOP)\ext\fts5\fts5_tokenize.c \ $(TOP)\ext\fts5\fts5_unicode2.c \ $(TOP)\ext\fts5\fts5_varint.c \ $(TOP)\ext\fts5\fts5_vocab.c fts5parse.c: $(TOP)\ext\fts5\fts5parse.y lemon.exe copy $(TOP)\ext\fts5\fts5parse.y . del /Q fts5parse.h 2>NUL .\lemon.exe $(REQ_FEATURE_FLAGS) $(OPT_FEATURE_FLAGS) $(OPTS) fts5parse.y move fts5parse.c fts5parse.c.orig echo #ifdef SQLITE_ENABLE_FTS5 > $@ type fts5parse.c.orig \ | $(NAWK) "/.*/ { gsub(/yy/,\"fts5yy\");print }" \ | $(NAWK) "/.*/ { gsub(/YY/,\"fts5YY\");print }" \ | $(NAWK) "/.*/ { gsub(/TOKEN/,\"FTS5TOKEN\");print }" >> $@ echo #endif /* SQLITE_ENABLE_FTS5 */ >> $@ fts5parse.h: fts5parse.c fts5.c: $(FTS5_SRC) $(TCLSH_CMD) $(TOP)\ext\fts5\tool\mkfts5c.tcl fts5.lo: fts5.c $(HDR) $(EXTHDR) $(LTCOMPILE) $(NO_WARN) -DSQLITE_ENABLE_FTS5 -c fts5.c fts5.dll: fts5.lo $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ fts5.lo # Rules to build the 'testfixture' application. # # If using the amalgamation, use sqlite3.c directly to build the test # fixture. Otherwise link against libsqlite3.lib. (This distinction is # necessary because the test fixture requires non-API symbols which are # hidden when the library is built via the amalgamation). |
︙ | ︙ | |||
1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 | del /Q sqlite3.exe sqlite3.dll sqlite3.def 2>NUL del /Q sqlite3.c sqlite3-*.c 2>NUL del /Q sqlite3rc.h 2>NUL del /Q shell.c sqlite3ext.h 2>NUL del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL del /Q sqlite-*-output.vsix 2>NUL del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL # Dynamic link library section. # dll: sqlite3.dll sqlite3.def: libsqlite3.lib echo EXPORTS > sqlite3.def dumpbin /all libsqlite3.lib \ | $(NAWK) "/ 1 _?sqlite3_/ { sub(/^.* _?/,\"\");print }" \ | sort >> sqlite3.def sqlite3.dll: $(LIBOBJ) $(LIBRESOBJS) $(CORE_LINK_DEP) $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL $(CORE_LINK_OPTS) /OUT:$@ $(LIBOBJ) $(LIBRESOBJS) $(LTLIBS) $(TLIBS) | > | 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 | del /Q sqlite3.exe sqlite3.dll sqlite3.def 2>NUL del /Q sqlite3.c sqlite3-*.c 2>NUL del /Q sqlite3rc.h 2>NUL del /Q shell.c sqlite3ext.h 2>NUL del /Q sqlite3_analyzer.exe sqlite3_analyzer.c 2>NUL del /Q sqlite-*-output.vsix 2>NUL del /Q fuzzershell.exe fuzzcheck.exe sqldiff.exe 2>NUL del /Q fts5.c fts5parse.* 2>NUL # Dynamic link library section. # dll: sqlite3.dll sqlite3.def: libsqlite3.lib echo EXPORTS > sqlite3.def dumpbin /all libsqlite3.lib \ | $(NAWK) "/ 1 _?sqlite3_/ { sub(/^.* _?/,\"\");print }" \ | sort >> sqlite3.def sqlite3.dll: $(LIBOBJ) $(LIBRESOBJS) $(CORE_LINK_DEP) $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL $(CORE_LINK_OPTS) /OUT:$@ $(LIBOBJ) $(LIBRESOBJS) $(LTLIBS) $(TLIBS) |
Changes to ext/fts3/unicode/mkunicode.tcl.
1 |
| < < < < < < < < < < < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1 2 3 4 5 6 7 8 9 | source [file join [file dirname [info script]] parseunicode.tcl] proc print_rd {map} { global tl_lookup_table set aChar [list] set lRange [list] set nRange 1 |
︙ | ︙ | |||
113 114 115 116 117 118 119 | puts "** If the argument is a codepoint corresponding to a lowercase letter" puts "** in the ASCII range with a diacritic added, return the codepoint" puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" | | | 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | puts "** If the argument is a codepoint corresponding to a lowercase letter" puts "** in the ASCII range with a diacritic added, return the codepoint" puts "** of the ASCII letter only. For example, if passed 235 - \"LATIN" puts "** SMALL LETTER E WITH DIAERESIS\" - return 65 (\"LATIN SMALL LETTER" puts "** E\"). The resuls of passing a codepoint that corresponds to an" puts "** uppercase letter are undefined." puts "*/" puts "static int ${::remove_diacritic}(int c)\{" puts " unsigned short aDia\[\] = \{" puts -nonewline " 0, " set i 1 foreach r $lRange { foreach {iCode nRange} $r {} if {($i % 8)==0} {puts "" ; puts -nonewline " " } incr i |
︙ | ︙ | |||
200 201 202 203 204 205 206 | puts " (mask1 & (1 << (c-$iFirst-32)));" puts "\}" } #------------------------------------------------------------------------- | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | puts " (mask1 & (1 << (c-$iFirst-32)));" puts "\}" } #------------------------------------------------------------------------- proc an_load_separator_ranges {} { global unicodedata.txt set lSep [an_load_unicodedata_text ${unicodedata.txt}] unset -nocomplain iFirst unset -nocomplain nRange set lRange [list] foreach sep $lSep { |
︙ | ︙ | |||
436 437 438 439 440 441 442 | }] puts " return 0;" puts "\}" } #------------------------------------------------------------------------- | < < < < < < < < < < < < < < < < < < < < < < < | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | }] puts " return 0;" puts "\}" } #------------------------------------------------------------------------- proc tl_create_records {} { global tl_lookup_table set iFirst "" set nOff 0 set nRange 0 set nIncr 0 |
︙ | ︙ | |||
622 623 624 625 626 627 628 | if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle tl_print_ioff_table $liOff | | < > > < | | | | | | | < | | | 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 | if {[tl_print_table_entry toggle $entry $liOff]} { lappend lHigh $entry } } tl_print_table_footer toggle tl_print_ioff_table $liOff puts [subst -nocommands { int ret = c; assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ const struct TableEntry *p; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; assert( c>aEntry[0].iCode ); while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); if( cmp>=0 ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( iRes>=0 && c>=aEntry[iRes].iCode ); p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } if( bRemoveDiacritic ) ret = ${::remove_diacritic}(ret); } }] foreach entry $lHigh { tl_print_if_entry $entry } puts "" puts " return ret;" |
︙ | ︙ | |||
728 729 730 731 732 733 734 | */ /* ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ }] puts "" | > > > | | > | 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 | */ /* ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ }] puts "" if {$::generate_fts5_code} { puts "#if defined(SQLITE_ENABLE_FTS5)" } else { puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE" puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)" } puts "" puts "#include <assert.h>" puts "" } proc print_test_main {} { puts "" |
︙ | ︙ | |||
756 757 758 759 760 761 762 | puts "\}" } # Proces the command line arguments. Exit early if they are not to # our liking. # proc usage {} { | | | < > > | > > > > > > > > > > > > > > > > > | | | | | > > > | | > | 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 | puts "\}" } # Proces the command line arguments. Exit early if they are not to # our liking. # proc usage {} { puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? " puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>" exit 1 } if {[llength $argv]<2} usage set unicodedata.txt [lindex $argv end] set casefolding.txt [lindex $argv end-1] set remove_diacritic remove_diacritic set generate_test_code 0 set generate_fts5_code 0 set function_prefix "sqlite3Fts" for {set i 0} {$i < [llength $argv]-2} {incr i} { switch -- [lindex $argv $i] { -test { set generate_test_code 1 } -fts5 { set function_prefix sqlite3Fts5 set generate_fts5_code 1 set remove_diacritic fts5_remove_diacritic } default { usage } } } print_fileheader # Print the isalnum() function to stdout. # set lRange [an_load_separator_ranges] print_isalnum ${function_prefix}UnicodeIsalnum $lRange # Leave a gap between the two generated C functions. # puts "" puts "" # Load the fold data. This is used by the [rd_XXX] commands # as well as [print_fold]. tl_load_casefolding_txt ${casefolding.txt} set mappings [rd_load_unicodedata_text ${unicodedata.txt}] print_rd $mappings puts "" puts "" print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings puts "" puts "" # Print the fold() function to stdout. # print_fold ${function_prefix}UnicodeFold # Print the test routines and main() function to stdout, if -test # was specified. # if {$::generate_test_code} { print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange print_fold_test ${function_prefix}UnicodeFold $mappings print_test_main } if {$generate_fts5_code} { puts "#endif /* defined(SQLITE_ENABLE_FTS5) */" } else { puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */" puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */" } |
Added ext/fts3/unicode/parseunicode.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | #-------------------------------------------------------------------------- # Parameter $zName must be a path to the file UnicodeData.txt. This command # reads the file and returns a list of mappings required to remove all # diacritical marks from a unicode string. Each mapping is itself a list # consisting of two elements - the unicode codepoint and the single ASCII # character that it should be replaced with, or an empty string if the # codepoint should simply be removed from the input. Examples: # # { 224 a } (replace codepoint 224 to "a") # { 769 "" } (remove codepoint 769 from input) # # Mappings are only returned for non-upper case codepoints. It is assumed # that the input has already been folded to lower case. # proc rd_load_unicodedata_text {zName} { global tl_lookup_table set fd [open $zName] set lField { code character_name general_category canonical_combining_classes bidirectional_category character_decomposition_mapping decimal_digit_value digit_value numeric_value mirrored unicode_1_name iso10646_comment_field uppercase_mapping lowercase_mapping titlecase_mapping } set lRet [list] while { ![eof $fd] } { set line [gets $fd] if {$line == ""} continue set fields [split $line ";"] if {[llength $fields] != [llength $lField]} { error "parse error: $line" } foreach $lField $fields {} if { [llength $character_decomposition_mapping]!=2 || [string is xdigit [lindex $character_decomposition_mapping 0]]==0 } { continue } set iCode [expr "0x$code"] set iAscii [expr "0x[lindex $character_decomposition_mapping 0]"] set iDia [expr "0x[lindex $character_decomposition_mapping 1]"] if {[info exists tl_lookup_table($iCode)]} continue if { ($iAscii >= 97 && $iAscii <= 122) || ($iAscii >= 65 && $iAscii <= 90) } { lappend lRet [list $iCode [string tolower [format %c $iAscii]]] set dia($iDia) 1 } } foreach d [array names dia] { lappend lRet [list $d ""] } set lRet [lsort -integer -index 0 $lRet] close $fd set lRet } #------------------------------------------------------------------------- # Parameter $zName must be a path to the file UnicodeData.txt. This command # reads the file and returns a list of codepoints (integers). The list # contains all codepoints in the UnicodeData.txt assigned to any "General # Category" that is not a "Letter" or "Number". # proc an_load_unicodedata_text {zName} { set fd [open $zName] set lField { code character_name general_category canonical_combining_classes bidirectional_category character_decomposition_mapping decimal_digit_value digit_value numeric_value mirrored unicode_1_name iso10646_comment_field uppercase_mapping lowercase_mapping titlecase_mapping } set lRet [list] while { ![eof $fd] } { set line [gets $fd] if {$line == ""} continue set fields [split $line ";"] if {[llength $fields] != [llength $lField]} { error "parse error: $line" } foreach $lField $fields {} set iCode [expr "0x$code"] set bAlnum [expr { [lsearch {L N} [string range $general_category 0 0]] >= 0 || $general_category=="Co" }] if { !$bAlnum } { lappend lRet $iCode } } close $fd set lRet } proc tl_load_casefolding_txt {zName} { global tl_lookup_table set fd [open $zName] while { ![eof $fd] } { set line [gets $fd] if {[string range $line 0 0] == "#"} continue if {$line == ""} continue foreach x {a b c d} {unset -nocomplain $x} foreach {a b c d} [split $line ";"] {} set a2 [list] set c2 [list] foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] } foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] } set b [string trim $b] set d [string trim $d] if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 } } } |
Added ext/fts5/extract_api_docs.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | # # 2014 August 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #-------------------------------------------------------------------------- # # This script extracts the documentation for the API used by fts5 auxiliary # functions from header file fts5.h. It outputs html text on stdout that # is included in the documentation on the web. # set ::fts5_docs_output "" if {[info commands hd_putsnl]==""} { if {[llength $argv]>0} { set ::extract_api_docs_mode [lindex $argv 0] } proc output {text} { puts $text } } else { proc output {text} { append ::fts5_docs_output "$text\n" } } if {[info exists ::extract_api_docs_mode]==0} {set ::extract_api_docs_mode api} set input_file [file join [file dir [info script]] fts5.h] set fd [open $input_file] set data [read $fd] close $fd # Argument $data is the entire text of the fts5.h file. This function # extracts the definition of the Fts5ExtensionApi structure from it and # returns a key/value list of structure member names and definitions. i.e. # # iVersion {int iVersion} xUserData {void *(*xUserData)(Fts5Context*)} ... # proc get_struct_members {data} { # Extract the structure definition from the fts5.h file. regexp "struct Fts5ExtensionApi {(.*?)};" $data -> defn # Remove all comments from the structure definition regsub -all {/[*].*?[*]/} $defn {} defn2 set res [list] foreach member [split $defn2 {;}] { set member [string trim $member] if {$member!=""} { catch { set name [lindex $member end] } regexp {.*?[(][*]([^)]*)[)]} $member -> name lappend res $name $member } } set res } proc get_struct_docs {data names} { # Extract the structure definition from the fts5.h file. regexp {EXTENSION API FUNCTIONS(.*?)[*]/} $data -> docs set current_doc "" set current_header "" foreach line [split $docs "\n"] { regsub {[*]*} $line {} line if {[regexp {^ } $line]} { append current_doc "$line\n" } elseif {[string trim $line]==""} { if {$current_header!=""} { append current_doc "\n" } } else { if {$current_doc != ""} { lappend res $current_header $current_doc set current_doc "" } set subject n/a regexp {^ *([[:alpha:]]*)} $line -> subject if {[lsearch $names $subject]>=0} { set current_header $subject } else { set current_header [string trim $line] } } } if {$current_doc != ""} { lappend res $current_header $current_doc } set res } proc get_tokenizer_docs {data} { regexp {(xCreate:.*?)[*]/} $data -> docs set res "<dl>\n" foreach line [split [string trim $docs] "\n"] { regexp {[*][*](.*)} $line -> line if {[regexp {^ ?x.*:} $line]} { append res "<dt><b>$line</b></dt><dd><p style=margin-top:0>\n" continue } if {[string trim $line] == ""} { append res "<p>\n" } else { append res "$line\n" } } append res "</dl>\n" set res } proc get_api_docs {data} { # Initialize global array M as a map from Fts5StructureApi member name # to member definition. i.e. # # iVersion -> {int iVersion} # xUserData -> {void *(*xUserData)(Fts5Context*)} # ... # array set M [get_struct_members $data] # Initialize global list D as a map from section name to documentation # text. Most (all?) section names are structure member names. # set D [get_struct_docs $data [array names M]] foreach {sub docs} $D { if {[info exists M($sub)]} { set hdr $M($sub) set link " id=$sub" } else { set link "" } output "<hr color=#eeeee style=\"margin:1em 8.4ex 0 8.4ex;\"$link>" set style "padding-left:6ex;font-size:1.4em;display:block" output "<h style=\"$style\"><pre>$hdr</pre></h>" set mode "" set bEmpty 1 foreach line [split [string trim $docs] "\n"] { if {[string trim $line]==""} { if {$mode != ""} {output "</$mode>"} set mode "" } elseif {$mode == ""} { if {[regexp {^ } $line]} { set mode codeblock } else { set mode p } output "<$mode>" } output $line } if {$mode != ""} {output "</$mode>"} } } proc get_fts5_struct {data start end} { set res "" set bOut 0 foreach line [split $data "\n"] { if {$bOut==0} { if {[regexp $start $line]} { set bOut 1 } } if {$bOut} { append res "$line\n" } if {$bOut} { if {[regexp $end $line]} { set bOut 0 } } } set map [list /* <i>/* */ */</i>] string map $map $res } proc main {data} { switch $::extract_api_docs_mode { fts5_api { output [get_fts5_struct $data "typedef struct fts5_api" "^\};"] } fts5_tokenizer { output [get_fts5_struct $data "typedef struct Fts5Tokenizer" "^\};"] } fts5_extension { output [get_fts5_struct $data "typedef.*Fts5ExtensionApi" "^.;"] } Fts5ExtensionApi { set struct [get_fts5_struct $data "^struct Fts5ExtensionApi" "^.;"] set map [list] foreach {k v} [get_struct_members $data] { if {[string match x* $k]==0} continue lappend map $k "<a href=#$k>$k</a>" } output [string map $map $struct] } api { get_api_docs $data } tokenizer_api { output [get_tokenizer_docs $data] } default { } } } main $data set ::fts5_docs_output |
Added ext/fts5/fts5.h.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Interfaces to extend FTS5. Using the interfaces defined in this file, ** FTS5 may be extended with: ** ** * custom tokenizers, and ** * custom auxiliary functions. */ #ifndef _FTS5_H #define _FTS5_H #include "sqlite3.h" /************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** ** Virtual table implementations may overload SQL functions by implementing ** the sqlite3_module.xFindFunction() method. */ typedef struct Fts5ExtensionApi Fts5ExtensionApi; typedef struct Fts5Context Fts5Context; typedef void (*fts5_extension_function)( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ); /* ** EXTENSION API FUNCTIONS ** ** xUserData(pFts): ** Return a copy of the context pointer the extension function was ** registered with. ** ** xColumnTotalSize(pFts, iCol, pnToken): ** If parameter iCol is less than zero, set output variable *pnToken ** to the total number of tokens in the FTS5 table. Or, if iCol is ** non-negative but less than the number of columns in the table, return ** the total number of tokens in column iCol, considering all rows in ** the FTS5 table. ** ** If parameter iCol is greater than or equal to the number of columns ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. ** an OOM condition or IO error), an appropriate SQLite error code is ** returned. ** ** xColumnCount: ** Returns the number of columns in the FTS5 table. ** ** xColumnSize: ** Reports the size in tokens of a column value from the current row. ** ** xColumnText: ** This function attempts to retrieve the text of column iCol of the ** current document. If successful, (*pz) is set to point to a buffer ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, ** if an error occurs, an SQLite error code is returned and the final values ** of (*pz) and (*pn) are undefined. ** ** xPhraseCount: ** Returns the number of phrases in the current query expression. ** ** xPhraseSize: ** Returns the number of tokens in phrase iPhrase of the query. Phrases ** are numbered starting from zero. ** ** xInstCount: ** Set *pnInst to the total number of occurrences of all phrases within ** the query within the current row. Return SQLITE_OK if successful, or ** an error code (i.e. SQLITE_NOMEM) if an error occurs. ** ** xInst: ** Query for the details of phrase match iIdx within the current row. ** Phrase matches are numbered starting from zero, so the iIdx argument ** should be greater than or equal to zero and smaller than the value ** output by xInstCount(). ** ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) ** if an error occurs. ** ** xRowid: ** Returns the rowid of the current row. ** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. ** ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): ** This API function is used to query the FTS table for phrase iPhrase ** of the current query. Specifically, a query equivalent to: ** ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid ** ** with $p set to a phrase equivalent to the phrase iPhrase of the ** current query is executed. For each row visited, the callback function ** passed as the fourth argument is invoked. The context and API objects ** passed to the callback function may be used to access the properties of ** each matched row. Invoking Api.xUserData() returns a copy of the pointer ** passed as the third argument to pUserData. ** ** If the callback function returns any value other than SQLITE_OK, the ** query is abandoned and the xQueryPhrase function returns immediately. ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. ** Otherwise, the error code is propagated upwards. ** ** If the query runs to completion without incident, SQLITE_OK is returned. ** Or, if some error occurs before the query completes or is aborted by ** the callback, an SQLite error code is returned. ** ** ** xSetAuxdata(pFts5, pAux, xDelete) ** ** Save the pointer passed as the second argument as the extension functions ** "auxiliary data". The pointer may then be retrieved by the current or any ** future invocation of the same fts5 extension function made as part of ** of the same MATCH query using the xGetAuxdata() API. ** ** Each extension function is allocated a single auxiliary data slot for ** each FTS query (MATCH expression). If the extension function is invoked ** more than once for a single FTS query, then all invocations share a ** single auxiliary data context. ** ** If there is already an auxiliary data pointer when this function is ** invoked, then it is replaced by the new pointer. If an xDelete callback ** was specified along with the original pointer, it is invoked at this ** point. ** ** The xDelete callback, if one is specified, is also invoked on the ** auxiliary data pointer after the FTS5 query has finished. ** ** If an error (e.g. an OOM condition) occurs within this function, an ** the auxiliary data is set to NULL and an error code returned. If the ** xDelete parameter was not NULL, it is invoked on the auxiliary data ** pointer before returning. ** ** ** xGetAuxdata(pFts5, bClear) ** ** Returns the current auxiliary data pointer for the fts5 extension ** function. See the xSetAuxdata() method for details. ** ** If the bClear argument is non-zero, then the auxiliary data is cleared ** (set to NULL) before this function returns. In this case the xDelete, ** if any, is not invoked. ** ** ** xRowCount(pFts5, pnRow) ** ** This function is used to retrieve the total number of rows in the table. ** In other words, the same value that would be returned by: ** ** SELECT count(*) FROM ftstable; */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 1 */ void *(*xUserData)(Fts5Context*); int (*xColumnCount)(Fts5Context*); int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); int (*xPhraseCount)(Fts5Context*); int (*xPhraseSize)(Fts5Context*, int iPhrase); int (*xInstCount)(Fts5Context*, int *pnInst); int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); sqlite3_int64 (*xRowid)(Fts5Context*); int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) ); int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); void *(*xGetAuxdata)(Fts5Context*, int bClear); }; /* ** CUSTOM AUXILIARY FUNCTIONS *************************************************************************/ /************************************************************************* ** CUSTOM TOKENIZERS ** ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** ** xCreate: ** This function is used to allocate and inititalize a tokenizer instance. ** A tokenizer instance is required to actually tokenize text. ** ** The first argument passed to this function is a copy of the (void*) ** pointer provided by the application when the fts5_tokenizer object ** was registered with FTS5 (the third argument to xCreateTokenizer()). ** The second and third arguments are an array of nul-terminated strings ** containing the tokenizer arguments, if any, specified following the ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used ** to create the FTS5 table. ** ** The final argument is an output variable. If successful, (*ppOut) ** should be set to point to the new tokenizer handle and SQLITE_OK ** returned. If an error occurs, some value other than SQLITE_OK should ** be returned. In this case, fts5 assumes that the final value of *ppOut ** is undefined. ** ** xDelete: ** This function is invoked to delete a tokenizer handle previously ** allocated using xCreate(). Fts5 guarantees that this function will ** be invoked exactly once for each successful call to xCreate(). ** ** xTokenize: ** This function is expected to tokenize the nText byte string indicated ** by argument pText. pText may not be nul-terminated. The first argument ** passed to this function is a pointer to an Fts5Tokenizer object returned ** by an earlier call to xCreate(). ** ** For each token in the input string, the supplied callback xToken() must ** be invoked. The first argument to it should be a copy of the pointer ** passed as the second argument to xTokenize(). The next two arguments ** are a pointer to a buffer containing the token text, and the size of ** the token in bytes. The 4th and 5th arguments are the byte offsets of ** the first byte of and first byte immediately following the text from ** which the token is derived within the input. ** ** FTS5 assumes the xToken() callback is invoked for each token in the ** order that they occur within the input text. ** ** If an xToken() callback returns any value other than SQLITE_OK, then ** the tokenization should be abandoned and the xTokenize() method should ** immediately return a copy of the xToken() return value. Or, if the ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, ** if an error occurs with the xTokenize() implementation itself, it ** may abandon the tokenization and return any error code other than ** SQLITE_OK or SQLITE_DONE. ** */ typedef struct Fts5Tokenizer Fts5Tokenizer; typedef struct fts5_tokenizer fts5_tokenizer; struct fts5_tokenizer { int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); void (*xDelete)(Fts5Tokenizer*); int (*xTokenize)(Fts5Tokenizer*, void *pCtx, const char *pText, int nText, int (*xToken)( void *pCtx, /* Copy of 2nd argument to xTokenize() */ const char *pToken, /* Pointer to buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Byte offset of token within input text */ int iEnd /* Byte offset of end of token within input text */ ) ); }; /* ** END OF CUSTOM TOKENIZERS *************************************************************************/ /************************************************************************* ** FTS5 EXTENSION REGISTRATION API */ typedef struct fts5_api fts5_api; struct fts5_api { int iVersion; /* Currently always set to 1 */ /* Create a new tokenizer */ int (*xCreateTokenizer)( fts5_api *pApi, const char *zName, void *pContext, fts5_tokenizer *pTokenizer, void (*xDestroy)(void*) ); /* Find an existing tokenizer */ int (*xFindTokenizer)( fts5_api *pApi, const char *zName, void **ppContext, fts5_tokenizer *pTokenizer ); /* Create a new auxiliary function */ int (*xCreateFunction)( fts5_api *pApi, const char *zName, void *pContext, fts5_extension_function xFunction, void (*xDestroy)(void*) ); }; /* ** END OF REGISTRATION API *************************************************************************/ #endif /* _FTS5_H */ |
Added ext/fts5/fts5Int.h.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #ifndef _FTS5INT_H #define _FTS5INT_H #ifdef SQLITE_ENABLE_FTS5 #include "fts5.h" #include "sqlite3ext.h" SQLITE_EXTENSION_INIT1 #include <string.h> #include <assert.h> #ifndef SQLITE_AMALGAMATION typedef unsigned char u8; typedef unsigned int u32; typedef unsigned short u16; typedef sqlite3_int64 i64; typedef sqlite3_uint64 u64; #define ArraySize(x) (sizeof(x) / sizeof(x[0])) #define testcase(x) #define ALWAYS(x) 1 #define NEVER(x) 0 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) #endif /* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail. */ #define FTS5_MAX_PREFIX_INDEXES 31 #define FTS5_DEFAULT_NEARDIST 10 #define FTS5_DEFAULT_RANK "bm25" /* Name of rank and rowid columns */ #define FTS5_RANK_NAME "rank" #define FTS5_ROWID_NAME "rowid" #ifdef SQLITE_DEBUG # define FTS5_CORRUPT sqlite3Fts5Corrupt() int sqlite3Fts5Corrupt(void); #else # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB #endif /* ** The assert_nc() macro is similar to the assert() macro, except that it ** is used for assert() conditions that are true only if it can be ** guranteed that the database is not corrupt. */ #ifdef SQLITE_DEBUG extern int sqlite3_fts5_may_be_corrupt; # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) #else # define assert_nc(x) assert(x) #endif typedef struct Fts5Global Fts5Global; /************************************************************************** ** Interface to code in fts5_config.c. fts5_config.c contains contains code ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. */ typedef struct Fts5Config Fts5Config; /* ** An instance of the following structure encodes all information that can ** be gleaned from the CREATE VIRTUAL TABLE statement. ** ** And all information loaded from the %_config table. ** ** nAutomerge: ** The minimum number of segments that an auto-merge operation should ** attempt to merge together. A value of 1 sets the object to use the ** compile time default. Zero disables auto-merge altogether. ** ** zContent: ** ** zContentRowid: ** The value of the content_rowid= option, if one was specified. Or ** the string "rowid" otherwise. This text is not quoted - if it is ** used as part of an SQL statement it needs to be quoted appropriately. ** ** zContentExprlist: ** ** pzErrmsg: ** This exists in order to allow the fts5_index.c module to return a ** decent error message if it encounters a file-format version it does ** not understand. ** ** bColumnsize: ** True if the %_docsize table is created. ** */ struct Fts5Config { sqlite3 *db; /* Database handle */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ char **azCol; /* Column names */ u8 *abUnindexed; /* True for unindexed columns */ int nPrefix; /* Number of prefix indexes */ int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ char *zContentExprlist; Fts5Tokenizer *pTok; fts5_tokenizer *pTokApi; /* Values loaded from the %_config table */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; }; /* Current expected value of %_config table 'version' field */ #define FTS5_CURRENT_VERSION 2 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 #define FTS5_CONTENT_EXTERNAL 2 int sqlite3Fts5ConfigParse( Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** ); void sqlite3Fts5ConfigFree(Fts5Config*); int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ); void sqlite3Fts5Dequote(char *z); /* Load the contents of the %_config table */ int sqlite3Fts5ConfigLoad(Fts5Config*, int); /* Set the value of a single config attribute */ int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); int sqlite3Fts5ConfigParseRank(const char*, char**, char**); /* ** End of interface to code in fts5_config.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_buffer.c. */ /* ** Buffer object for the incremental building of string data. */ typedef struct Fts5Buffer Fts5Buffer; struct Fts5Buffer { u8 *p; int n; int nSpace; }; int sqlite3Fts5BufferGrow(int*, Fts5Buffer*, int); void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); void sqlite3Fts5BufferFree(Fts5Buffer*); void sqlite3Fts5BufferZero(Fts5Buffer*); void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); void sqlite3Fts5BufferAppend32(int*, Fts5Buffer*, int); char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) #define fts5BufferGrow(a,b,c) sqlite3Fts5BufferGrow(a,b,c) #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,c) #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) #define fts5BufferAppend32(a,b,c) sqlite3Fts5BufferAppend32(a,b,c) /* Write and decode big-endian 32-bit integer values */ void sqlite3Fts5Put32(u8*, int); int sqlite3Fts5Get32(const u8*); #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0xFFFFFFFF) typedef struct Fts5PoslistReader Fts5PoslistReader; struct Fts5PoslistReader { /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ int iCol; /* If (iCol>=0), this column only */ const u8 *a; /* Position list to iterate through */ int n; /* Size of buffer at a[] in bytes */ int i; /* Current offset in a[] */ /* Output variables */ int bEof; /* Set to true at EOF */ i64 iPos; /* (iCol<<32) + iPos */ }; int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ Fts5PoslistReader *pIter /* Iterator object to initialize */ ); int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); typedef struct Fts5PoslistWriter Fts5PoslistWriter; struct Fts5PoslistWriter { i64 iPrev; }; int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); int sqlite3Fts5PoslistNext64( const u8 *a, int n, /* Buffer containing poslist */ int *pi, /* IN/OUT: Offset within a[] */ i64 *piOff /* IN/OUT: Current offset */ ); /* Malloc utility */ void *sqlite3Fts5MallocZero(int *pRc, int nByte); char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); /* Character set tests (like isspace(), isalpha() etc.) */ int sqlite3Fts5IsBareword(char t); /* ** End of interface to code in fts5_buffer.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_index.c. fts5_index.c contains contains code ** to access the data stored in the %_data table. */ typedef struct Fts5Index Fts5Index; typedef struct Fts5IndexIter Fts5IndexIter; /* ** Values used as part of the flags argument passed to IndexQuery(). */ #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ /* ** Create/destroy an Fts5Index object. */ int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); int sqlite3Fts5IndexClose(Fts5Index *p); /* ** for( ** pIter = sqlite3Fts5IndexQuery(p, "token", 5, 0); ** 0==sqlite3Fts5IterEof(pIter); ** sqlite3Fts5IterNext(pIter) ** ){ ** i64 iRowid = sqlite3Fts5IterRowid(pIter); ** } */ /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5IndexIter **ppIter ); /* ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). */ int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn, i64 *pi); int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf); /* ** Close an iterator opened by sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter*); /* ** This interface is used by the fts5vocab module. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); int sqlite3Fts5IterNextScan(Fts5IndexIter*); /* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more ** times. ** ** For an insert, it must be called once for each token in the new document. ** If the operation is a delete, it must be called (at least) once for each ** unique token in the document with an iCol value less than zero. The iPos ** argument is ignored for a delete. */ int sqlite3Fts5IndexWrite( Fts5Index *p, /* Index to write to */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ); /* ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to ** document iDocid. */ int sqlite3Fts5IndexBeginWrite( Fts5Index *p, /* Index to write to */ i64 iDocid /* Docid to add or remove data from */ ); /* ** Flush any data stored in the in-memory hash tables to the database. ** If the bCommit flag is true, also close any open blob handles. */ int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit); /* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated. */ int sqlite3Fts5IndexRollback(Fts5Index *p); /* ** Retrieve and clear the current error code, respectively. */ int sqlite3Fts5IndexErrcode(Fts5Index*); void sqlite3Fts5IndexReset(Fts5Index*); /* ** Get or set the "averages" record. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf); int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); /* ** Functions called by the storage module as part of integrity-check. */ u64 sqlite3Fts5IndexCksum(Fts5Config*,i64,int,int,const char*,int); int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum); /* ** Called during virtual module initialization to register UDF ** fts5_decode() with SQLite */ int sqlite3Fts5IndexInit(sqlite3*); int sqlite3Fts5IndexSetCookie(Fts5Index*, int); /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p); int sqlite3Fts5IndexReinit(Fts5Index *p); int sqlite3Fts5IndexOptimize(Fts5Index *p); int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); int sqlite3Fts5IndexLoadConfig(Fts5Index *p); /* ** End of interface to code in fts5_index.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_varint.c. */ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); int sqlite3Fts5GetVarintLen(u32 iVal); u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); int sqlite3Fts5PutVarint(unsigned char *p, u64 v); #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) #define fts5GetVarint sqlite3Fts5GetVarint /* ** End of interface to code in fts5_varint.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5.c. */ int sqlite3Fts5GetTokenizer( Fts5Global*, const char **azArg, int nArg, Fts5Tokenizer**, fts5_tokenizer**, char **pzErr ); Fts5Index *sqlite3Fts5IndexFromCsrid(Fts5Global*, i64, int*); /* ** End of interface to code in fts5.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_hash.c. */ typedef struct Fts5Hash Fts5Hash; /* ** Create a hash table, free a hash table. */ int sqlite3Fts5HashNew(Fts5Hash**, int *pnSize); void sqlite3Fts5HashFree(Fts5Hash*); int sqlite3Fts5HashWrite( Fts5Hash*, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ char bByte, const char *pToken, int nToken /* Token to add or remove to or from index */ ); /* ** Empty (but do not delete) a hash table. */ void sqlite3Fts5HashClear(Fts5Hash*); int sqlite3Fts5HashQuery( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */ ); int sqlite3Fts5HashScanInit( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ); void sqlite3Fts5HashScanNext(Fts5Hash*); int sqlite3Fts5HashScanEof(Fts5Hash*); void sqlite3Fts5HashScanEntry(Fts5Hash *, const char **pzTerm, /* OUT: term (nul-terminated) */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ); /* ** End of interface to code in fts5_hash.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_storage.c. fts5_storage.c contains contains ** code to access the data stored in the %_content and %_docsize tables. */ #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ typedef struct Fts5Storage Fts5Storage; int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); int sqlite3Fts5StorageClose(Fts5Storage *p); int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); int sqlite3Fts5DropAll(Fts5Config*); int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); int sqlite3Fts5StorageDelete(Fts5Storage *p, i64); int sqlite3Fts5StorageInsert(Fts5Storage *p, sqlite3_value **apVal, int, i64*); int sqlite3Fts5StorageIntegrity(Fts5Storage *p); int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit); int sqlite3Fts5StorageRollback(Fts5Storage *p); int sqlite3Fts5StorageConfigValue( Fts5Storage *p, const char*, sqlite3_value*, int ); int sqlite3Fts5StorageSpecialDelete(Fts5Storage *p, i64 iDel, sqlite3_value**); int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); int sqlite3Fts5StorageRebuild(Fts5Storage *p); int sqlite3Fts5StorageOptimize(Fts5Storage *p); int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); /* ** End of interface to code in fts5_storage.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_expr.c. */ typedef struct Fts5Expr Fts5Expr; typedef struct Fts5ExprNode Fts5ExprNode; typedef struct Fts5Parse Fts5Parse; typedef struct Fts5Token Fts5Token; typedef struct Fts5ExprPhrase Fts5ExprPhrase; typedef struct Fts5ExprNearset Fts5ExprNearset; typedef struct Fts5ExprColset Fts5ExprColset; struct Fts5Token { const char *p; /* Token text (not NULL terminated) */ int n; /* Size of buffer p in bytes */ }; /* Parse a MATCH expression. */ int sqlite3Fts5ExprNew( Fts5Config *pConfig, const char *zExpr, Fts5Expr **ppNew, char **pzErr ); /* ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); ** rc = sqlite3Fts5ExprNext(pExpr) ** ){ ** // The document with rowid iRowid matches the expression! ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); ** } */ int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); int sqlite3Fts5ExprEof(Fts5Expr*); i64 sqlite3Fts5ExprRowid(Fts5Expr*); void sqlite3Fts5ExprFree(Fts5Expr*); /* Called during startup to register a UDF with SQLite */ int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**); /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by ** the parser code in fts5parse.y. */ void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); Fts5ExprNode *sqlite3Fts5ParseNode( Fts5Parse *pParse, int eType, Fts5ExprNode *pLeft, Fts5ExprNode *pRight, Fts5ExprNearset *pNear ); Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, Fts5ExprPhrase *pPhrase, Fts5Token *pToken, int bPrefix ); Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse*, Fts5ExprNearset*, Fts5ExprPhrase* ); Fts5ExprColset *sqlite3Fts5ParseColset( Fts5Parse*, Fts5ExprColset*, Fts5Token * ); void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNearset*, Fts5ExprColset*); void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); /* ** End of interface to code in fts5_expr.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_aux.c. */ int sqlite3Fts5AuxInit(fts5_api*); /* ** End of interface to code in fts5_aux.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_tokenizer.c. */ int sqlite3Fts5TokenizerInit(fts5_api*); /* ** End of interface to code in fts5_tokenizer.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_sorter.c. */ typedef struct Fts5Sorter Fts5Sorter; int sqlite3Fts5SorterNew(Fts5Expr *pExpr, Fts5Sorter **pp); /* ** End of interface to code in fts5_sorter.c. **************************************************************************/ /************************************************************************** ** Interface to code in fts5_vocab.c. */ int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); /* ** End of interface to code in fts5_vocab.c. **************************************************************************/ /************************************************************************** ** Interface to automatically generated code in fts5_unicode2.c. */ int sqlite3Fts5UnicodeIsalnum(int c); int sqlite3Fts5UnicodeIsdiacritic(int c); int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); /* ** End of interface to code in fts5_unicode2.c. **************************************************************************/ #endif #endif |
Added ext/fts5/fts5_aux.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" #include <math.h> /* ** Object used to iterate through all "coalesced phrase instances" in ** a single column of the current row. If the phrase instances in the ** column being considered do not overlap, this object simply iterates ** through them. Or, if they do overlap (share one or more tokens in ** common), each set of overlapping instances is treated as a single ** match. See documentation for the highlight() auxiliary function for ** details. ** ** Usage is: ** ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); ** rc = fts5CInstIterNext(&iter) ** ){ ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); ** } ** */ typedef struct CInstIter CInstIter; struct CInstIter { const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ Fts5Context *pFts; /* First arg to pass to pApi functions */ int iCol; /* Column to search */ int iInst; /* Next phrase instance index */ int nInst; /* Total number of phrase instances */ /* Output variables */ int iStart; /* First token in coalesced phrase instance */ int iEnd; /* Last token in coalesced phrase instance */ }; /* ** Advance the iterator to the next coalesced phrase instance. Return ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. */ static int fts5CInstIterNext(CInstIter *pIter){ int rc = SQLITE_OK; pIter->iStart = -1; pIter->iEnd = -1; while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ int ip; int ic; int io; rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); if( rc==SQLITE_OK ){ if( ic==pIter->iCol ){ int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); if( pIter->iStart<0 ){ pIter->iStart = io; pIter->iEnd = iEnd; }else if( io<=pIter->iEnd ){ if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; }else{ break; } } pIter->iInst++; } } return rc; } /* ** Initialize the iterator object indicated by the final parameter to ** iterate through coalesced phrase instances in column iCol. */ static int fts5CInstIterInit( const Fts5ExtensionApi *pApi, Fts5Context *pFts, int iCol, CInstIter *pIter ){ int rc; memset(pIter, 0, sizeof(CInstIter)); pIter->pApi = pApi; pIter->pFts = pFts; pIter->iCol = iCol; rc = pApi->xInstCount(pFts, &pIter->nInst); if( rc==SQLITE_OK ){ rc = fts5CInstIterNext(pIter); } return rc; } /************************************************************************* ** Start of highlight() implementation. */ typedef struct HighlightContext HighlightContext; struct HighlightContext { CInstIter iter; /* Coalesced Instance Iterator */ int iPos; /* Current token offset in zIn[] */ int iRangeStart; /* First token to include */ int iRangeEnd; /* If non-zero, last token to include */ const char *zOpen; /* Opening highlight */ const char *zClose; /* Closing highlight */ const char *zIn; /* Input text */ int nIn; /* Size of input text in bytes */ int iOff; /* Current offset within zIn[] */ char *zOut; /* Output value */ }; /* ** Append text to the HighlightContext output string - p->zOut. Argument ** z points to a buffer containing n bytes of text to append. If n is ** negative, everything up until the first '\0' is appended to the output. ** ** If *pRc is set to any value other than SQLITE_OK when this function is ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, ** *pRc is set to an error code before returning. */ static void fts5HighlightAppend( int *pRc, HighlightContext *p, const char *z, int n ){ if( *pRc==SQLITE_OK ){ if( n<0 ) n = strlen(z); p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); if( p->zOut==0 ) *pRc = SQLITE_NOMEM; } } /* ** Tokenizer callback used by implementation of highlight() function. */ static int fts5HighlightCb( void *pContext, /* Pointer to HighlightContext object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStartOff, /* Start offset of token */ int iEndOff /* End offset of token */ ){ HighlightContext *p = (HighlightContext*)pContext; int rc = SQLITE_OK; int iPos = p->iPos++; if( p->iRangeEnd>0 ){ if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; } if( iPos==p->iter.iStart ){ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); fts5HighlightAppend(&rc, p, p->zOpen, -1); p->iOff = iStartOff; } if( iPos==p->iter.iEnd ){ if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ fts5HighlightAppend(&rc, p, p->zOpen, -1); } fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); fts5HighlightAppend(&rc, p, p->zClose, -1); p->iOff = iEndOff; if( rc==SQLITE_OK ){ rc = fts5CInstIterNext(&p->iter); } } if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); p->iOff = iEndOff; if( iPos<p->iter.iEnd ){ fts5HighlightAppend(&rc, p, p->zClose, -1); } } return rc; } /* ** Implementation of highlight() function. */ static void fts5HighlightFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ HighlightContext ctx; int rc; int iCol; if( nVal!=3 ){ const char *zErr = "wrong number of arguments to function highlight()"; sqlite3_result_error(pCtx, zErr, -1); return; } iCol = sqlite3_value_int(apVal[0]); memset(&ctx, 0, sizeof(HighlightContext)); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); if( ctx.zIn ){ if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); } if( rc==SQLITE_OK ){ rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); } fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); } sqlite3_free(ctx.zOut); } if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); } } /* ** End of highlight() implementation. **************************************************************************/ /* ** Implementation of snippet() function. */ static void fts5SnippetFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ HighlightContext ctx; int rc = SQLITE_OK; /* Return code */ int iCol; /* 1st argument to snippet() */ const char *zEllips; /* 4th argument to snippet() */ int nToken; /* 5th argument to snippet() */ int nInst; /* Number of instance matches this row */ int i; /* Used to iterate through instances */ int nPhrase; /* Number of phrases in query */ unsigned char *aSeen; /* Array of "seen instance" flags */ int iBestCol; /* Column containing best snippet */ int iBestStart = 0; /* First token of best snippet */ int iBestLast; /* Last token of best snippet */ int nBestScore = 0; /* Score of best snippet */ int nColSize; /* Total size of iBestCol in tokens */ if( nVal!=5 ){ const char *zErr = "wrong number of arguments to function snippet()"; sqlite3_result_error(pCtx, zErr, -1); return; } memset(&ctx, 0, sizeof(HighlightContext)); iCol = sqlite3_value_int(apVal[0]); ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); zEllips = (const char*)sqlite3_value_text(apVal[3]); nToken = sqlite3_value_int(apVal[4]); iBestLast = nToken-1; iBestCol = (iCol>=0 ? iCol : 0); nPhrase = pApi->xPhraseCount(pFts); aSeen = sqlite3_malloc(nPhrase); if( aSeen==0 ){ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = pApi->xInstCount(pFts, &nInst); } for(i=0; rc==SQLITE_OK && i<nInst; i++){ int ip, iSnippetCol, iStart; memset(aSeen, 0, nPhrase); rc = pApi->xInst(pFts, i, &ip, &iSnippetCol, &iStart); if( rc==SQLITE_OK && (iCol<0 || iSnippetCol==iCol) ){ int nScore = 1000; int iLast = iStart - 1 + pApi->xPhraseSize(pFts, ip); int j; aSeen[ip] = 1; for(j=i+1; rc==SQLITE_OK && j<nInst; j++){ int ic; int io; int iFinal; rc = pApi->xInst(pFts, j, &ip, &ic, &io); iFinal = io + pApi->xPhraseSize(pFts, ip) - 1; if( rc==SQLITE_OK && ic==iSnippetCol && iLast<iStart+nToken ){ nScore += aSeen[ip] ? 1000 : 1; aSeen[ip] = 1; if( iFinal>iLast ) iLast = iFinal; } } if( rc==SQLITE_OK && nScore>nBestScore ){ iBestCol = iSnippetCol; iBestStart = iStart; iBestLast = iLast; nBestScore = nScore; } } } if( rc==SQLITE_OK ){ rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); } if( rc==SQLITE_OK ){ rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); } if( ctx.zIn ){ if( rc==SQLITE_OK ){ rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); } if( (iBestStart+nToken-1)>iBestLast ){ iBestStart -= (iBestStart+nToken-1-iBestLast) / 2; } if( iBestStart+nToken>nColSize ){ iBestStart = nColSize - nToken; } if( iBestStart<0 ) iBestStart = 0; ctx.iRangeStart = iBestStart; ctx.iRangeEnd = iBestStart + nToken - 1; if( iBestStart>0 ){ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } if( rc==SQLITE_OK ){ rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); } if( ctx.iRangeEnd>=(nColSize-1) ){ fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); }else{ fts5HighlightAppend(&rc, &ctx, zEllips, -1); } if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); }else{ sqlite3_result_error_code(pCtx, rc); } sqlite3_free(ctx.zOut); } sqlite3_free(aSeen); } /************************************************************************/ /* ** The first time the bm25() function is called for a query, an instance ** of the following structure is allocated and populated. */ typedef struct Fts5Bm25Data Fts5Bm25Data; struct Fts5Bm25Data { int nPhrase; /* Number of phrases in query */ double avgdl; /* Average number of tokens in each row */ double *aIDF; /* IDF for each phrase */ double *aFreq; /* Array used to calculate phrase freq. */ }; /* ** Callback used by fts5Bm25GetData() to count the number of rows in the ** table matched by each individual phrase within the query. */ static int fts5CountCb( const Fts5ExtensionApi *pApi, Fts5Context *pFts, void *pUserData /* Pointer to sqlite3_int64 variable */ ){ sqlite3_int64 *pn = (sqlite3_int64*)pUserData; (*pn)++; return SQLITE_OK; } /* ** Set *ppData to point to the Fts5Bm25Data object for the current query. ** If the object has not already been allocated, allocate and populate it ** now. */ static int fts5Bm25GetData( const Fts5ExtensionApi *pApi, Fts5Context *pFts, Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ ){ int rc = SQLITE_OK; /* Return code */ Fts5Bm25Data *p; /* Object to return */ p = pApi->xGetAuxdata(pFts, 0); if( p==0 ){ int nPhrase; /* Number of phrases in query */ sqlite3_int64 nRow; /* Number of rows in table */ sqlite3_int64 nToken; /* Number of tokens in table */ int nByte; /* Bytes of space to allocate */ int i; /* Allocate the Fts5Bm25Data object */ nPhrase = pApi->xPhraseCount(pFts); nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); p = (Fts5Bm25Data*)sqlite3_malloc(nByte); if( p==0 ){ rc = SQLITE_NOMEM; }else{ memset(p, 0, nByte); p->nPhrase = nPhrase; p->aIDF = (double*)&p[1]; p->aFreq = &p->aIDF[nPhrase]; } /* Calculate the average document length for this FTS5 table */ if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; /* Calculate an IDF for each phrase in the query */ for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ sqlite3_int64 nHit = 0; rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); if( rc==SQLITE_OK ){ /* Calculate the IDF (Inverse Document Frequency) for phrase i. ** This is done using the standard BM25 formula as found on wikipedia: ** ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) ** ** where "N" is the total number of documents in the set and nHit ** is the number that contain at least one instance of the phrase ** under consideration. ** ** The problem with this is that if (N < 2*nHit), the IDF is ** negative. Which is undesirable. So the mimimum allowable IDF is ** (1e-6) - roughly the same as a term that appears in just over ** half of set of 5,000,000 documents. */ double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); if( idf<=0.0 ) idf = 1e-6; p->aIDF[i] = idf; } } if( rc!=SQLITE_OK ){ sqlite3_free(p); }else{ rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); } if( rc!=SQLITE_OK ) p = 0; } *ppData = p; return rc; } /* ** Implementation of bm25() function. */ static void fts5Bm25Function( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ const double k1 = 1.2; /* Constant "k1" from BM25 formula */ const double b = 0.75; /* Constant "b" from BM25 formula */ int rc = SQLITE_OK; /* Error code */ double score = 0.0; /* SQL function return value */ Fts5Bm25Data *pData; /* Values allocated/calculated once only */ int i; /* Iterator variable */ int nInst; /* Value returned by xInstCount() */ double D; /* Total number of tokens in row */ double *aFreq; /* Array of phrase freq. for current row */ /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) ** for each phrase in the query for the current row. */ rc = fts5Bm25GetData(pApi, pFts, &pData); if( rc==SQLITE_OK ){ aFreq = pData->aFreq; memset(aFreq, 0, sizeof(double) * pData->nPhrase); rc = pApi->xInstCount(pFts, &nInst); } for(i=0; rc==SQLITE_OK && i<nInst; i++){ int ip; int ic; int io; rc = pApi->xInst(pFts, i, &ip, &ic, &io); if( rc==SQLITE_OK ){ double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; aFreq[ip] += w; } } /* Figure out the total size of the current row in tokens. */ if( rc==SQLITE_OK ){ int nTok; rc = pApi->xColumnSize(pFts, -1, &nTok); D = (double)nTok; } /* Determine the BM25 score for the current row. */ for(i=0; rc==SQLITE_OK && i<pData->nPhrase; i++){ score += pData->aIDF[i] * ( ( aFreq[i] * (k1 + 1.0) ) / ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) ); } /* If no error has occurred, return the calculated score. Otherwise, ** throw an SQL exception. */ if( rc==SQLITE_OK ){ sqlite3_result_double(pCtx, -1.0 * score); }else{ sqlite3_result_error_code(pCtx, rc); } } int sqlite3Fts5AuxInit(fts5_api *pApi){ struct Builtin { const char *zFunc; /* Function name (nul-terminated) */ void *pUserData; /* User-data pointer */ fts5_extension_function xFunc;/* Callback function */ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { { "snippet", 0, fts5SnippetFunction, 0 }, { "highlight", 0, fts5HighlightFunction, 0 }, { "bm25", 0, fts5Bm25Function, 0 }, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){ rc = pApi->xCreateFunction(pApi, aBuiltin[i].zFunc, aBuiltin[i].pUserData, aBuiltin[i].xFunc, aBuiltin[i].xDestroy ); } return rc; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_buffer.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ /* A no-op if an error has already occurred */ if( *pRc ) return 1; if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } pNew = sqlite3_realloc(pBuf->p, nNew); if( pNew==0 ){ *pRc = SQLITE_NOMEM; return 1; }else{ pBuf->nSpace = nNew; pBuf->p = pNew; } } return 0; } /* ** Encode value iVal as an SQLite varint and append it to the buffer object ** pBuf. If an OOM error occurs, set the error code in p. */ void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ if( sqlite3Fts5BufferGrow(pRc, pBuf, 9) ) return; pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); } void sqlite3Fts5Put32(u8 *aBuf, int iVal){ aBuf[0] = (iVal>>24) & 0x00FF; aBuf[1] = (iVal>>16) & 0x00FF; aBuf[2] = (iVal>> 8) & 0x00FF; aBuf[3] = (iVal>> 0) & 0x00FF; } int sqlite3Fts5Get32(const u8 *aBuf){ return (aBuf[0] << 24) + (aBuf[1] << 16) + (aBuf[2] << 8) + aBuf[3]; } void sqlite3Fts5BufferAppend32(int *pRc, Fts5Buffer *pBuf, int iVal){ if( sqlite3Fts5BufferGrow(pRc, pBuf, 4) ) return; sqlite3Fts5Put32(&pBuf->p[pBuf->n], iVal); pBuf->n += 4; } /* ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op. */ void sqlite3Fts5BufferAppendBlob( int *pRc, Fts5Buffer *pBuf, int nData, const u8 *pData ){ assert( *pRc || nData>=0 ); if( sqlite3Fts5BufferGrow(pRc, pBuf, nData) ) return; memcpy(&pBuf->p[pBuf->n], pData, nData); pBuf->n += nData; } /* ** Append the nul-terminated string zStr to the buffer pBuf. This function ** ensures that the byte following the buffer data is set to 0x00, even ** though this byte is not included in the pBuf->n count. */ void sqlite3Fts5BufferAppendString( int *pRc, Fts5Buffer *pBuf, const char *zStr ){ int nStr = strlen(zStr); sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); pBuf->n--; } /* ** Argument zFmt is a printf() style format string. This function performs ** the printf() style processing, then appends the results to buffer pBuf. ** ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte ** following the buffer data is set to 0x00, even though this byte is not ** included in the pBuf->n count. */ void sqlite3Fts5BufferAppendPrintf( int *pRc, Fts5Buffer *pBuf, char *zFmt, ... ){ if( *pRc==SQLITE_OK ){ char *zTmp; va_list ap; va_start(ap, zFmt); zTmp = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( zTmp==0 ){ *pRc = SQLITE_NOMEM; }else{ sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); sqlite3_free(zTmp); } } } char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ char *zRet = 0; if( *pRc==SQLITE_OK ){ va_list ap; va_start(ap, zFmt); zRet = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( zRet==0 ){ *pRc = SQLITE_NOMEM; } } return zRet; } /* ** Free any buffer allocated by pBuf. Zero the structure before returning. */ void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ sqlite3_free(pBuf->p); memset(pBuf, 0, sizeof(Fts5Buffer)); } /* ** Zero the contents of the buffer object. But do not free the associated ** memory allocation. */ void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ pBuf->n = 0; } /* ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an ** the error code in p. If an error has already occurred when this function ** is called, it is a no-op. */ void sqlite3Fts5BufferSet( int *pRc, Fts5Buffer *pBuf, int nData, const u8 *pData ){ pBuf->n = 0; sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); } int sqlite3Fts5PoslistNext64( const u8 *a, int n, /* Buffer containing poslist */ int *pi, /* IN/OUT: Offset within a[] */ i64 *piOff /* IN/OUT: Current offset */ ){ int i = *pi; if( i>=n ){ /* EOF */ *piOff = -1; return 1; }else{ i64 iOff = *piOff; int iVal; i += fts5GetVarint32(&a[i], iVal); if( iVal==1 ){ i += fts5GetVarint32(&a[i], iVal); iOff = ((i64)iVal) << 32; i += fts5GetVarint32(&a[i], iVal); } *piOff = iOff + (iVal-2); *pi = i; return 0; } } /* ** Advance the iterator object passed as the only argument. Return true ** if the iterator reaches EOF, or false otherwise. */ int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) || (pIter->iCol>=0 && (pIter->iPos >> 32) > pIter->iCol) ){ pIter->bEof = 1; } return pIter->bEof; } int sqlite3Fts5PoslistReaderInit( int iCol, /* If (iCol>=0), this column only */ const u8 *a, int n, /* Poslist buffer to iterate through */ Fts5PoslistReader *pIter /* Iterator object to initialize */ ){ memset(pIter, 0, sizeof(*pIter)); pIter->a = a; pIter->n = n; pIter->iCol = iCol; do { sqlite3Fts5PoslistReaderNext(pIter); }while( pIter->bEof==0 && (pIter->iPos >> 32)<iCol ); return pIter->bEof; } int sqlite3Fts5PoslistWriterAppend( Fts5Buffer *pBuf, Fts5PoslistWriter *pWriter, i64 iPos ){ static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; int rc = SQLITE_OK; if( (iPos & colmask) != (pWriter->iPrev & colmask) ){ fts5BufferAppendVarint(&rc, pBuf, 1); fts5BufferAppendVarint(&rc, pBuf, (iPos >> 32)); pWriter->iPrev = (iPos & colmask); } fts5BufferAppendVarint(&rc, pBuf, (iPos - pWriter->iPrev) + 2); pWriter->iPrev = iPos; return rc; } void *sqlite3Fts5MallocZero(int *pRc, int nByte){ void *pRet = 0; if( *pRc==SQLITE_OK ){ pRet = sqlite3_malloc(nByte); if( pRet==0 && nByte>0 ){ *pRc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); } } return pRet; } /* ** Return a nul-terminated copy of the string indicated by pIn. If nIn ** is non-negative, then it is the length of the string in bytes. Otherwise, ** the length of the string is determined using strlen(). ** ** It is the responsibility of the caller to eventually free the returned ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. */ char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ char *zRet = 0; if( *pRc==SQLITE_OK ){ if( nIn<0 ){ nIn = strlen(pIn); } zRet = (char*)sqlite3_malloc(nIn+1); if( zRet ){ memcpy(zRet, pIn, nIn); zRet[nIn] = '\0'; }else{ *pRc = SQLITE_NOMEM; } } return zRet; } /* ** Return true if character 't' may be part of an FTS5 bareword, or false ** otherwise. Characters that may be part of barewords: ** ** * All non-ASCII characters, ** * The 52 upper and lower case ASCII characters, and ** * The 10 integer ASCII characters. ** * The underscore character "_" (0x5F). */ int sqlite3Fts5IsBareword(char t){ u8 aBareword[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ }; return (t & 0x80) || aBareword[(int)t]; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_config.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 | /* ** 2014 Jun 09 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This is an SQLite module implementing full-text search. */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_DEFAULT_AUTOMERGE 4 #define FTS5_DEFAULT_CRISISMERGE 16 /* Maximum allowed page size */ #define FTS5_MAX_PAGE_SIZE (128*1024) static int fts5_iswhitespace(char x){ return (x==' '); } static int fts5_isopenquote(char x){ return (x=='"' || x=='\'' || x=='[' || x=='`'); } /* ** Argument pIn points to a character that is part of a nul-terminated ** string. Return a pointer to the first character following *pIn in ** the string that is not a white-space character. */ static const char *fts5ConfigSkipWhitespace(const char *pIn){ const char *p = pIn; if( p ){ while( fts5_iswhitespace(*p) ){ p++; } } return p; } /* ** Argument pIn points to a character that is part of a nul-terminated ** string. Return a pointer to the first character following *pIn in ** the string that is not a "bareword" character. */ static const char *fts5ConfigSkipBareword(const char *pIn){ const char *p = pIn; while ( sqlite3Fts5IsBareword(*p) ) p++; if( p==pIn ) p = 0; return p; } static int fts5_isdigit(char a){ return (a>='0' && a<='9'); } static const char *fts5ConfigSkipLiteral(const char *pIn){ const char *p = pIn; switch( *p ){ case 'n': case 'N': if( sqlite3_strnicmp("null", p, 4)==0 ){ p = &p[4]; }else{ p = 0; } break; case 'x': case 'X': p++; if( *p=='\'' ){ p++; while( (*p>='a' && *p<='f') || (*p>='A' && *p<='F') || (*p>='0' && *p<='9') ){ p++; } if( *p=='\'' && 0==((p-pIn)%2) ){ p++; }else{ p = 0; } }else{ p = 0; } break; case '\'': p++; while( p ){ if( *p=='\'' ){ p++; if( *p!='\'' ) break; } p++; if( *p==0 ) p = 0; } break; default: /* maybe a number */ if( *p=='+' || *p=='-' ) p++; while( fts5_isdigit(*p) ) p++; /* At this point, if the literal was an integer, the parse is ** finished. Or, if it is a floating point value, it may continue ** with either a decimal point or an 'E' character. */ if( *p=='.' && fts5_isdigit(p[1]) ){ p += 2; while( fts5_isdigit(*p) ) p++; } if( p==pIn ) p = 0; break; } return p; } /* ** The first character of the string pointed to by argument z is guaranteed ** to be an open-quote character (see function fts5_isopenquote()). ** ** This function searches for the corresponding close-quote character within ** the string and, if found, dequotes the string in place and adds a new ** nul-terminator byte. ** ** If the close-quote is found, the value returned is the byte offset of ** the character immediately following it. Or, if the close-quote is not ** found, -1 is returned. If -1 is returned, the buffer is left in an ** undefined state. */ static int fts5Dequote(char *z){ char q; int iIn = 1; int iOut = 0; q = z[0]; /* Set stack variable q to the close-quote character */ assert( q=='[' || q=='\'' || q=='"' || q=='`' ); if( q=='[' ) q = ']'; while( ALWAYS(z[iIn]) ){ if( z[iIn]==q ){ if( z[iIn+1]!=q ){ /* Character iIn was the close quote. */ iIn++; break; }else{ /* Character iIn and iIn+1 form an escaped quote character. Skip ** the input cursor past both and copy a single quote character ** to the output buffer. */ iIn += 2; z[iOut++] = q; } }else{ z[iOut++] = z[iIn++]; } } z[iOut] = '\0'; return iIn; } /* ** Convert an SQL-style quoted string into a normal string by removing ** the quote characters. The conversion is done in-place. If the ** input does not begin with a quote character, then this routine ** is a no-op. ** ** Examples: ** ** "abc" becomes abc ** 'xyz' becomes xyz ** [pqr] becomes pqr ** `mno` becomes mno */ void sqlite3Fts5Dequote(char *z){ char quote; /* Quote character (if any ) */ assert( 0==fts5_iswhitespace(z[0]) ); quote = z[0]; if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ fts5Dequote(z); } } /* ** Parse a "special" CREATE VIRTUAL TABLE directive and update ** configuration object pConfig as appropriate. ** ** If successful, object pConfig is updated and SQLITE_OK returned. If ** an error occurs, an SQLite error code is returned and an error message ** may be left in *pzErr. It is the responsibility of the caller to ** eventually free any such error message using sqlite3_free(). */ static int fts5ConfigParseSpecial( Fts5Global *pGlobal, Fts5Config *pConfig, /* Configuration object to update */ const char *zCmd, /* Special command to parse */ const char *zArg, /* Argument to parse */ char **pzErr /* OUT: Error message */ ){ int rc = SQLITE_OK; int nCmd = strlen(zCmd); if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; const char *p; if( pConfig->aPrefix ){ *pzErr = sqlite3_mprintf("multiple prefix=... directives"); rc = SQLITE_ERROR; }else{ pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); } p = zArg; while( rc==SQLITE_OK && p[0] ){ int nPre = 0; while( p[0]==' ' ) p++; while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ nPre = nPre*10 + (p[0] - '0'); p++; } while( p[0]==' ' ) p++; if( p[0]==',' ){ p++; }else if( p[0] ){ *pzErr = sqlite3_mprintf("malformed prefix=... directive"); rc = SQLITE_ERROR; } if( rc==SQLITE_OK && (nPre==0 || nPre>=1000) ){ *pzErr = sqlite3_mprintf("prefix length out of range: %d", nPre); rc = SQLITE_ERROR; } pConfig->aPrefix[pConfig->nPrefix] = nPre; pConfig->nPrefix++; } return rc; } if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ const char *p = (const char*)zArg; int nArg = strlen(zArg) + 1; char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); char *pSpace = pDel; if( azArg && pSpace ){ if( pConfig->pTok ){ *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); rc = SQLITE_ERROR; }else{ for(nArg=0; p && *p; nArg++){ const char *p2 = fts5ConfigSkipWhitespace(p); if( *p2=='\'' ){ p = fts5ConfigSkipLiteral(p2); }else{ p = fts5ConfigSkipBareword(p2); } if( p ){ memcpy(pSpace, p2, p-p2); azArg[nArg] = pSpace; sqlite3Fts5Dequote(pSpace); pSpace += (p - p2) + 1; p = fts5ConfigSkipWhitespace(p); } } if( p==0 ){ *pzErr = sqlite3_mprintf("parse error in tokenize directive"); rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5GetTokenizer(pGlobal, (const char**)azArg, nArg, &pConfig->pTok, &pConfig->pTokApi, pzErr ); } } } sqlite3_free(azArg); sqlite3_free(pDel); return rc; } if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ *pzErr = sqlite3_mprintf("multiple content=... directives"); rc = SQLITE_ERROR; }else{ if( zArg[0] ){ pConfig->eContent = FTS5_CONTENT_EXTERNAL; pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); }else{ pConfig->eContent = FTS5_CONTENT_NONE; } } return rc; } if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ if( pConfig->zContentRowid ){ *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); rc = SQLITE_ERROR; }else{ pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); } return rc; } if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); rc = SQLITE_ERROR; }else{ pConfig->bColumnsize = (zArg[0]=='1'); } return rc; } *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); return SQLITE_ERROR; } /* ** Allocate an instance of the default tokenizer ("simple") at ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error ** code if an error occurs. */ static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); return sqlite3Fts5GetTokenizer( pGlobal, 0, 0, &pConfig->pTok, &pConfig->pTokApi, 0 ); } /* ** Gobble up the first bareword or quoted word from the input buffer zIn. ** Return a pointer to the character immediately following the last in ** the gobbled word if successful, or a NULL pointer otherwise (failed ** to find close-quote character). ** ** Before returning, set pzOut to point to a new buffer containing a ** nul-terminated, dequoted copy of the gobbled word. If the word was ** quoted, *pbQuoted is also set to 1 before returning. ** ** If *pRc is other than SQLITE_OK when this function is called, it is ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* ** set if a parse error (failed to find close quote) occurs. */ static const char *fts5ConfigGobbleWord( int *pRc, /* IN/OUT: Error code */ const char *zIn, /* Buffer to gobble string/bareword from */ char **pzOut, /* OUT: malloc'd buffer containing str/bw */ int *pbQuoted /* OUT: Set to true if dequoting required */ ){ const char *zRet = 0; int nIn = strlen(zIn); char *zOut = sqlite3_malloc(nIn+1); assert( *pRc==SQLITE_OK ); *pbQuoted = 0; *pzOut = 0; if( zOut==0 ){ *pRc = SQLITE_NOMEM; }else{ memcpy(zOut, zIn, nIn+1); if( fts5_isopenquote(zOut[0]) ){ int ii = fts5Dequote(zOut); zRet = &zIn[ii]; *pbQuoted = 1; }else{ zRet = fts5ConfigSkipBareword(zIn); zOut[zRet-zIn] = '\0'; } } if( zRet==0 ){ sqlite3_free(zOut); }else{ *pzOut = zOut; } return zRet; } static int fts5ConfigParseColumn( Fts5Config *p, char *zCol, char *zArg, char **pzErr ){ int rc = SQLITE_OK; if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) ){ *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); rc = SQLITE_ERROR; }else if( zArg ){ if( 0==sqlite3_stricmp(zArg, "unindexed") ){ p->abUnindexed[p->nCol] = 1; }else{ *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); rc = SQLITE_ERROR; } } p->azCol[p->nCol++] = zCol; return rc; } /* ** Populate the Fts5Config.zContentExprlist string. */ static int fts5ConfigMakeExprlist(Fts5Config *p){ int i; int rc = SQLITE_OK; Fts5Buffer buf = {0, 0, 0}; sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); if( p->eContent!=FTS5_CONTENT_NONE ){ for(i=0; i<p->nCol; i++){ if( p->eContent==FTS5_CONTENT_EXTERNAL ){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); }else{ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); } } } assert( p->zContentExprlist==0 ); p->zContentExprlist = (char*)buf.p; return rc; } /* ** Arguments nArg/azArg contain the string arguments passed to the xCreate ** or xConnect method of the virtual table. This function attempts to ** allocate an instance of Fts5Config containing the results of parsing ** those arguments. ** ** If successful, SQLITE_OK is returned and *ppOut is set to point to the ** new Fts5Config object. If an error occurs, an SQLite error code is ** returned, *ppOut is set to NULL and an error message may be left in ** *pzErr. It is the responsibility of the caller to eventually free any ** such error message using sqlite3_free(). */ int sqlite3Fts5ConfigParse( Fts5Global *pGlobal, sqlite3 *db, int nArg, /* Number of arguments */ const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ Fts5Config **ppOut, /* OUT: Results of parse */ char **pzErr /* OUT: Error message */ ){ int rc = SQLITE_OK; /* Return code */ Fts5Config *pRet; /* New object to return */ int i; int nByte; *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; memset(pRet, 0, sizeof(Fts5Config)); pRet->db = db; pRet->iCookie = -1; nByte = nArg * (sizeof(char*) + sizeof(u8)); pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); pRet->abUnindexed = (u8*)&pRet->azCol[nArg]; pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; } for(i=3; rc==SQLITE_OK && i<nArg; i++){ const char *zOrig = azArg[i]; const char *z; char *zOne = 0; char *zTwo = 0; int bOption = 0; int bMustBeCol = 0; z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); z = fts5ConfigSkipWhitespace(z); if( z && *z=='=' ){ bOption = 1; z++; if( bMustBeCol ) z = 0; } z = fts5ConfigSkipWhitespace(z); if( z && z[0] ){ int bDummy; z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); if( z && z[0] ) z = 0; } if( rc==SQLITE_OK ){ if( z==0 ){ *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); rc = SQLITE_ERROR; }else{ if( bOption ){ rc = fts5ConfigParseSpecial(pGlobal, pRet, zOne, zTwo?zTwo:"", pzErr); }else{ rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); zOne = 0; } } } sqlite3_free(zOne); sqlite3_free(zTwo); } /* If a tokenizer= option was successfully parsed, the tokenizer has ** already been allocated. Otherwise, allocate an instance of the default ** tokenizer (unicode61) now. */ if( rc==SQLITE_OK && pRet->pTok==0 ){ rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); } /* If no zContent option was specified, fill in the default values. */ if( rc==SQLITE_OK && pRet->zContent==0 ){ const char *zTail = 0; assert( pRet->eContent==FTS5_CONTENT_NORMAL || pRet->eContent==FTS5_CONTENT_NONE ); if( pRet->eContent==FTS5_CONTENT_NORMAL ){ zTail = "content"; }else if( pRet->bColumnsize ){ zTail = "docsize"; } if( zTail ){ pRet->zContent = sqlite3Fts5Mprintf( &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail ); } } if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); } /* Formulate the zContentExprlist text */ if( rc==SQLITE_OK ){ rc = fts5ConfigMakeExprlist(pRet); } if( rc!=SQLITE_OK ){ sqlite3Fts5ConfigFree(pRet); *ppOut = 0; } return rc; } /* ** Free the configuration object passed as the only argument. */ void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; if( pConfig->pTok ){ pConfig->pTokApi->xDelete(pConfig->pTok); } sqlite3_free(pConfig->zDb); sqlite3_free(pConfig->zName); for(i=0; i<pConfig->nCol; i++){ sqlite3_free(pConfig->azCol[i]); } sqlite3_free(pConfig->azCol); sqlite3_free(pConfig->aPrefix); sqlite3_free(pConfig->zRank); sqlite3_free(pConfig->zRankArgs); sqlite3_free(pConfig->zContent); sqlite3_free(pConfig->zContentRowid); sqlite3_free(pConfig->zContentExprlist); sqlite3_free(pConfig); } } /* ** Call sqlite3_declare_vtab() based on the contents of the configuration ** object passed as the only argument. Return SQLITE_OK if successful, or ** an SQLite error code if an error occurs. */ int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ int i; int rc = SQLITE_OK; char *zSql; zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); for(i=0; zSql && i<pConfig->nCol; i++){ const char *zSep = (i==0?"":", "); zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); } zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", zSql, pConfig->zName, FTS5_RANK_NAME ); assert( zSql || rc==SQLITE_NOMEM ); if( zSql ){ rc = sqlite3_declare_vtab(pConfig->db, zSql); sqlite3_free(zSql); } return rc; } /* ** Tokenize the text passed via the second and third arguments. ** ** The callback is invoked once for each token in the input text. The ** arguments passed to it are, in order: ** ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() ** const char *pToken // Pointer to buffer containing token ** int nToken // Size of token in bytes ** int iStart // Byte offset of start of token within input text ** int iEnd // Byte offset of end of token within input text ** int iPos // Position of token in input (first token is 0) ** ** If the callback returns a non-zero value the tokenization is abandoned ** and no further callbacks are issued. ** ** This function returns SQLITE_OK if successful or an SQLite error code ** if an error occurs. If the tokenization was abandoned early because ** the callback returned SQLITE_DONE, this is not an error and this function ** still returns SQLITE_OK. Or, if the tokenization was abandoned early ** because the callback returned another non-zero value, it is assumed ** to be an SQLite error code and returned to the caller. */ int sqlite3Fts5Tokenize( Fts5Config *pConfig, /* FTS5 Configuration object */ const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, const char*, int, int, int) /* Callback */ ){ if( pText==0 ) return SQLITE_OK; return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken); } /* ** Argument pIn points to the first character in what is expected to be ** a comma-separated list of SQL literals followed by a ')' character. ** If it actually is this, return a pointer to the ')'. Otherwise, return ** NULL to indicate a parse error. */ static const char *fts5ConfigSkipArgs(const char *pIn){ const char *p = pIn; while( 1 ){ p = fts5ConfigSkipWhitespace(p); p = fts5ConfigSkipLiteral(p); p = fts5ConfigSkipWhitespace(p); if( p==0 || *p==')' ) break; if( *p!=',' ){ p = 0; break; } p++; } return p; } /* ** Parameter zIn contains a rank() function specification. The format of ** this is: ** ** + Bareword (function name) ** + Open parenthesis - "(" ** + Zero or more SQL literals in a comma separated list ** + Close parenthesis - ")" */ int sqlite3Fts5ConfigParseRank( const char *zIn, /* Input string */ char **pzRank, /* OUT: Rank function name */ char **pzRankArgs /* OUT: Rank function arguments */ ){ const char *p = zIn; const char *pRank; char *zRank = 0; char *zRankArgs = 0; int rc = SQLITE_OK; *pzRank = 0; *pzRankArgs = 0; p = fts5ConfigSkipWhitespace(p); pRank = p; p = fts5ConfigSkipBareword(p); if( p ){ zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); if( zRank ) memcpy(zRank, pRank, p-pRank); }else{ rc = SQLITE_ERROR; } if( rc==SQLITE_OK ){ p = fts5ConfigSkipWhitespace(p); if( *p!='(' ) rc = SQLITE_ERROR; p++; } if( rc==SQLITE_OK ){ const char *pArgs; p = fts5ConfigSkipWhitespace(p); pArgs = p; if( *p!=')' ){ p = fts5ConfigSkipArgs(p); if( p==0 ){ rc = SQLITE_ERROR; }else{ zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); } } } if( rc!=SQLITE_OK ){ sqlite3_free(zRank); assert( zRankArgs==0 ); }else{ *pzRank = zRank; *pzRankArgs = zRankArgs; } return rc; } int sqlite3Fts5ConfigSetValue( Fts5Config *pConfig, const char *zKey, sqlite3_value *pVal, int *pbBadkey ){ int rc = SQLITE_OK; if( 0==sqlite3_stricmp(zKey, "pgsz") ){ int pgsz = 0; if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ pgsz = sqlite3_value_int(pVal); } if( pgsz<=0 || pgsz>FTS5_MAX_PAGE_SIZE ){ *pbBadkey = 1; }else{ pConfig->pgsz = pgsz; } } else if( 0==sqlite3_stricmp(zKey, "automerge") ){ int nAutomerge = -1; if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ nAutomerge = sqlite3_value_int(pVal); } if( nAutomerge<0 || nAutomerge>64 ){ *pbBadkey = 1; }else{ if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; pConfig->nAutomerge = nAutomerge; } } else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ int nCrisisMerge = -1; if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ nCrisisMerge = sqlite3_value_int(pVal); } if( nCrisisMerge<0 ){ *pbBadkey = 1; }else{ if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; pConfig->nCrisisMerge = nCrisisMerge; } } else if( 0==sqlite3_stricmp(zKey, "rank") ){ const char *zIn = (const char*)sqlite3_value_text(pVal); char *zRank; char *zRankArgs; rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); if( rc==SQLITE_OK ){ sqlite3_free(pConfig->zRank); sqlite3_free(pConfig->zRankArgs); pConfig->zRank = zRank; pConfig->zRankArgs = zRankArgs; }else if( rc==SQLITE_ERROR ){ rc = SQLITE_OK; *pbBadkey = 1; } }else{ *pbBadkey = 1; } return rc; } /* ** Load the contents of the %_config table into memory. */ int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; char *zSql; sqlite3_stmt *p = 0; int rc = SQLITE_OK; int iVersion = 0; /* Set default values */ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); sqlite3_free(zSql); } assert( rc==SQLITE_OK || p==0 ); if( rc==SQLITE_OK ){ while( SQLITE_ROW==sqlite3_step(p) ){ const char *zK = (const char*)sqlite3_column_text(p, 0); sqlite3_value *pVal = sqlite3_column_value(p, 1); if( 0==sqlite3_stricmp(zK, "version") ){ iVersion = sqlite3_value_int(pVal); }else{ int bDummy = 0; sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); } } rc = sqlite3_finalize(p); } if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ rc = SQLITE_ERROR; if( pConfig->pzErrmsg ){ assert( 0==*pConfig->pzErrmsg ); *pConfig->pzErrmsg = sqlite3_mprintf( "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", iVersion, FTS5_CURRENT_VERSION ); } } if( rc==SQLITE_OK ){ pConfig->iCookie = iCookie; } return rc; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_expr.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" #include "fts5parse.h" /* ** All token types in the generated fts5parse.h file are greater than 0. */ #define FTS5_EOF 0 typedef struct Fts5ExprTerm Fts5ExprTerm; /* ** Functions generated by lemon from fts5parse.y. */ void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); struct Fts5Expr { Fts5Index *pIndex; Fts5ExprNode *pRoot; int bDesc; /* Iterate in descending docid order */ int nPhrase; /* Number of phrases in expression */ Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ }; /* ** eType: ** Expression node type. Always one of: ** ** FTS5_AND (nChild, apChild valid) ** FTS5_OR (nChild, apChild valid) ** FTS5_NOT (nChild, apChild valid) ** FTS5_STRING (pNear valid) ** FTS5_TERM (pNear valid) */ struct Fts5ExprNode { int eType; /* Node type */ int bEof; /* True at EOF */ int bNomatch; /* True if entry is not a match */ i64 iRowid; /* Current rowid */ Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ /* Child nodes. For a NOT node, this array always contains 2 entries. For ** AND or OR nodes, it contains 2 or more entries. */ int nChild; /* Number of child nodes */ Fts5ExprNode *apChild[0]; /* Array of child nodes */ }; #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) /* ** An instance of the following structure represents a single search term ** or term prefix. */ struct Fts5ExprTerm { int bPrefix; /* True for a prefix term */ char *zTerm; /* nul-terminated term */ Fts5IndexIter *pIter; /* Iterator for this term */ }; /* ** A phrase. One or more terms that must appear in a contiguous sequence ** within a document for it to match. */ struct Fts5ExprPhrase { Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ Fts5Buffer poslist; /* Current position list */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; /* ** If a NEAR() clump may only match a specific set of columns, then ** Fts5ExprNearset.pColset points to an object of the following type. ** Each entry in the aiCol[] array */ struct Fts5ExprColset { int nCol; int aiCol[1]; }; /* ** One or more phrases that must appear within a certain token distance of ** each other within each matching document. */ struct Fts5ExprNearset { int nNear; /* NEAR parameter */ Fts5ExprColset *pColset; /* Columns to search (NULL -> all columns) */ int nPhrase; /* Number of entries in aPhrase[] array */ Fts5ExprPhrase *apPhrase[0]; /* Array of phrase pointers */ }; /* ** Parse context. */ struct Fts5Parse { Fts5Config *pConfig; char *zErr; int rc; int nPhrase; /* Size of apPhrase array */ Fts5ExprPhrase **apPhrase; /* Array of all phrases */ Fts5ExprNode *pExpr; /* Result of a successful parse */ }; void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ va_list ap; va_start(ap, zFmt); if( pParse->rc==SQLITE_OK ){ pParse->zErr = sqlite3_vmprintf(zFmt, ap); pParse->rc = SQLITE_ERROR; } va_end(ap); } static int fts5ExprIsspace(char t){ return t==' ' || t=='\t' || t=='\n' || t=='\r'; } /* ** Read the first token from the nul-terminated string at *pz. */ static int fts5ExprGetToken( Fts5Parse *pParse, const char **pz, /* IN/OUT: Pointer into buffer */ Fts5Token *pToken ){ const char *z = *pz; int tok; /* Skip past any whitespace */ while( fts5ExprIsspace(*z) ) z++; pToken->p = z; pToken->n = 1; switch( *z ){ case '(': tok = FTS5_LP; break; case ')': tok = FTS5_RP; break; case '{': tok = FTS5_LCP; break; case '}': tok = FTS5_RCP; break; case ':': tok = FTS5_COLON; break; case ',': tok = FTS5_COMMA; break; case '+': tok = FTS5_PLUS; break; case '*': tok = FTS5_STAR; break; case '\0': tok = FTS5_EOF; break; case '"': { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; 1; z2++){ if( z2[0]=='"' ){ z2++; if( z2[0]!='"' ) break; } if( z2[0]=='\0' ){ sqlite3Fts5ParseError(pParse, "unterminated string"); return FTS5_EOF; } } pToken->n = (z2 - z); break; } default: { const char *z2; tok = FTS5_STRING; for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); pToken->n = (z2 - z); if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; break; } } *pz = &pToken->p[pToken->n]; return tok; } static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc((int)t); } static void fts5ParseFree(void *p){ sqlite3_free(p); } int sqlite3Fts5ExprNew( Fts5Config *pConfig, /* FTS5 Configuration */ const char *zExpr, /* Expression text */ Fts5Expr **ppNew, char **pzErr ){ Fts5Parse sParse; Fts5Token token; const char *z = zExpr; int t; /* Next token type */ void *pEngine; Fts5Expr *pNew; *ppNew = 0; *pzErr = 0; memset(&sParse, 0, sizeof(sParse)); pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); if( pEngine==0 ){ return SQLITE_NOMEM; } sParse.pConfig = pConfig; do { t = fts5ExprGetToken(&sParse, &z, &token); sqlite3Fts5Parser(pEngine, t, token, &sParse); }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); sqlite3Fts5ParserFree(pEngine, fts5ParseFree); assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); if( sParse.rc==SQLITE_OK ){ *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); if( pNew==0 ){ sParse.rc = SQLITE_NOMEM; sqlite3Fts5ParseNodeFree(sParse.pExpr); }else{ pNew->pRoot = sParse.pExpr; pNew->pIndex = 0; pNew->apExprPhrase = sParse.apPhrase; pNew->nPhrase = sParse.nPhrase; sParse.apPhrase = 0; } } sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } /* ** Create a new FTS5 expression by cloning phrase iPhrase of the ** expression passed as the second argument. */ int sqlite3Fts5ExprPhraseExpr( Fts5Config *pConfig, Fts5Expr *pExpr, int iPhrase, Fts5Expr **ppNew ){ int rc = SQLITE_OK; /* Return code */ Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ Fts5ExprPhrase *pCopy; /* Copy of pOrig */ Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ pOrig = pExpr->apExprPhrase[iPhrase]; pCopy = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm ); if( pCopy ){ int i; /* Used to iterate through phrase terms */ Fts5ExprPhrase **apPhrase; Fts5ExprNode *pNode; Fts5ExprNearset *pNear; pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); apPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase*) ); pNode = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNode)); pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) ); for(i=0; i<pOrig->nTerm; i++){ pCopy->aTerm[i].zTerm = sqlite3Fts5Strndup(&rc, pOrig->aTerm[i].zTerm,-1); pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; } if( rc==SQLITE_OK ){ /* All the allocations succeeded. Put the expression object together. */ pNew->pIndex = pExpr->pIndex; pNew->pRoot = pNode; pNew->nPhrase = 1; pNew->apExprPhrase = apPhrase; pNew->apExprPhrase[0] = pCopy; pNode->eType = (pOrig->nTerm==1 ? FTS5_TERM : FTS5_STRING); pNode->pNear = pNear; pNear->nPhrase = 1; pNear->apPhrase[0] = pCopy; pCopy->nTerm = pOrig->nTerm; pCopy->pNode = pNode; }else{ /* At least one allocation failed. Free them all. */ for(i=0; i<pOrig->nTerm; i++){ sqlite3_free(pCopy->aTerm[i].zTerm); } sqlite3_free(pCopy); sqlite3_free(pNear); sqlite3_free(pNode); sqlite3_free(apPhrase); sqlite3_free(pNew); pNew = 0; } } *ppNew = pNew; return rc; } /* ** Free the expression node object passed as the only argument. */ void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ if( p ){ int i; for(i=0; i<p->nChild; i++){ sqlite3Fts5ParseNodeFree(p->apChild[i]); } sqlite3Fts5ParseNearsetFree(p->pNear); sqlite3_free(p); } } /* ** Free the expression object passed as the only argument. */ void sqlite3Fts5ExprFree(Fts5Expr *p){ if( p ){ sqlite3Fts5ParseNodeFree(p->pRoot); sqlite3_free(p->apExprPhrase); sqlite3_free(p); } } static int fts5ExprColsetTest(Fts5ExprColset *pColset, int iCol){ int i; for(i=0; i<pColset->nCol; i++){ if( pColset->aiCol[i]==iCol ) return 1; } return 0; } /* ** All individual term iterators in pPhrase are guaranteed to be valid and ** pointing to the same rowid when this function is called. This function ** checks if the current rowid really is a match, and if so populates ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch ** is set to true if this is really a match, or false otherwise. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if the current rowid is ** not a match. */ static int fts5ExprPhraseIsMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprColset *pColset, /* Restrict matches to these columns */ Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ Fts5PoslistWriter writer = {0}; Fts5PoslistReader aStatic[4]; Fts5PoslistReader *aIter = aStatic; int i; int rc = SQLITE_OK; int iCol = -1; if( pColset && pColset->nCol==1 ){ iCol = pColset->aiCol[0]; pColset = 0; } fts5BufferZero(&pPhrase->poslist); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; aIter = (Fts5PoslistReader*)sqlite3_malloc(nByte); if( !aIter ) return SQLITE_NOMEM; } /* Initialize a term iterator for each term in the phrase */ for(i=0; i<pPhrase->nTerm; i++){ i64 dummy; int n; const u8 *a; rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[i].pIter, &a, &n, &dummy); if( rc || sqlite3Fts5PoslistReaderInit(iCol, a, n, &aIter[i]) ){ goto ismatch_out; } } while( 1 ){ int bMatch; i64 iPos = aIter[0].iPos; do { bMatch = 1; for(i=0; i<pPhrase->nTerm; i++){ Fts5PoslistReader *pPos = &aIter[i]; i64 iAdj = iPos + i; if( pPos->iPos!=iAdj ){ bMatch = 0; while( pPos->iPos<iAdj ){ if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; } if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; } } }while( bMatch==0 ); if( pColset==0 || fts5ExprColsetTest(pColset, FTS5_POS2COLUMN(iPos)) ){ /* Append position iPos to the output */ rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); if( rc!=SQLITE_OK ) goto ismatch_out; } for(i=0; i<pPhrase->nTerm; i++){ if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; } } ismatch_out: *pbMatch = (pPhrase->poslist.n>0); if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } typedef struct Fts5LookaheadReader Fts5LookaheadReader; struct Fts5LookaheadReader { const u8 *a; /* Buffer containing position list */ int n; /* Size of buffer a[] in bytes */ int i; /* Current offset in position list */ i64 iPos; /* Current position */ i64 iLookahead; /* Next position */ }; #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ p->iPos = p->iLookahead; if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ p->iLookahead = FTS5_LOOKAHEAD_EOF; } return (p->iPos==FTS5_LOOKAHEAD_EOF); } static int fts5LookaheadReaderInit( const u8 *a, int n, /* Buffer to read position list from */ Fts5LookaheadReader *p /* Iterator object to initialize */ ){ memset(p, 0, sizeof(Fts5LookaheadReader)); p->a = a; p->n = n; fts5LookaheadReaderNext(p); return fts5LookaheadReaderNext(p); } #if 0 static int fts5LookaheadReaderEof(Fts5LookaheadReader *p){ return (p->iPos==FTS5_LOOKAHEAD_EOF); } #endif typedef struct Fts5NearTrimmer Fts5NearTrimmer; struct Fts5NearTrimmer { Fts5LookaheadReader reader; /* Input iterator */ Fts5PoslistWriter writer; /* Writer context */ Fts5Buffer *pOut; /* Output poslist */ }; /* ** The near-set object passed as the first argument contains more than ** one phrase. All phrases currently point to the same row. The ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function ** tests if the current row contains instances of each phrase sufficiently ** close together to meet the NEAR constraint. Non-zero is returned if it ** does, or zero otherwise. ** ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) ** occurs within this function (*pRc) is set accordingly before returning. ** The return value is undefined in both these cases. ** ** If no error occurs and non-zero (a match) is returned, the position-list ** of each phrase object is edited to contain only those entries that ** meet the constraint before returning. */ static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ Fts5NearTrimmer aStatic[4]; Fts5NearTrimmer *a = aStatic; Fts5ExprPhrase **apPhrase = pNear->apPhrase; int i; int rc = *pRc; int bMatch; assert( pNear->nPhrase>1 ); /* If the aStatic[] array is not large enough, allocate a large array ** using sqlite3_malloc(). This approach could be improved upon. */ if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); }else{ memset(aStatic, 0, sizeof(aStatic)); } if( rc!=SQLITE_OK ){ *pRc = rc; return 0; } /* Initialize a lookahead iterator for each phrase. After passing the ** buffer and buffer size to the lookaside-reader init function, zero ** the phrase poslist buffer. The new poslist for the phrase (containing ** the same entries as the original with some entries removed on account ** of the NEAR constraint) is written over the original even as it is ** being read. This is safe as the entries for the new poslist are a ** subset of the old, so it is not possible for data yet to be read to ** be overwritten. */ for(i=0; i<pNear->nPhrase; i++){ Fts5Buffer *pPoslist = &apPhrase[i]->poslist; fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); pPoslist->n = 0; a[i].pOut = pPoslist; } while( 1 ){ int iAdv; i64 iMin; i64 iMax; /* This block advances the phrase iterators until they point to a set of ** entries that together comprise a match. */ iMax = a[0].reader.iPos; do { bMatch = 1; for(i=0; i<pNear->nPhrase; i++){ Fts5LookaheadReader *pPos = &a[i].reader; iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; if( pPos->iPos<iMin || pPos->iPos>iMax ){ bMatch = 0; while( pPos->iPos<iMin ){ if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; } if( pPos->iPos>iMax ) iMax = pPos->iPos; } } }while( bMatch==0 ); /* Add an entry to each output position list */ for(i=0; i<pNear->nPhrase; i++){ i64 iPos = a[i].reader.iPos; Fts5PoslistWriter *pWriter = &a[i].writer; if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); } } iAdv = 0; iMin = a[0].reader.iLookahead; for(i=0; i<pNear->nPhrase; i++){ if( a[i].reader.iLookahead < iMin ){ iMin = a[i].reader.iLookahead; iAdv = i; } } if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; } ismatch_out: { int bRet = a[0].pOut->n>0; *pRc = rc; if( a!=aStatic ) sqlite3_free(a); return bRet; } } /* ** Advance the first term iterator in the first phrase of pNear. Set output ** variable *pbEof to true if it reaches EOF or if an error occurs. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5ExprNearAdvanceFirst( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ int bFromValid, i64 iFrom ){ Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; int rc; assert( Fts5NodeIsString(pNode) ); if( bFromValid ){ rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ rc = sqlite3Fts5IterNext(pIter); } pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); return rc; } /* ** Advance iterator pIter until it points to a value equal to or laster ** than the initial value of *piLast. If this means the iterator points ** to a value laster than *piLast, update *piLast to the new lastest value. ** ** If the iterator reaches EOF, set *pbEof to true before returning. If ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc ** are set, return a non-zero value. Otherwise, return zero. */ static int fts5ExprAdvanceto( Fts5IndexIter *pIter, /* Iterator to advance */ int bDesc, /* True if iterator is "rowid DESC" */ i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ int *pRc, /* OUT: Error code */ int *pbEof /* OUT: Set to true if EOF */ ){ i64 iLast = *piLast; i64 iRowid; iRowid = sqlite3Fts5IterRowid(pIter); if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ int rc = sqlite3Fts5IterNextFrom(pIter, iLast); if( rc || sqlite3Fts5IterEof(pIter) ){ *pRc = rc; *pbEof = 1; return 1; } iRowid = sqlite3Fts5IterRowid(pIter); assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); } *piLast = iRowid; return 0; } /* ** IN/OUT parameter (*pa) points to a position list n bytes in size. If ** the position list contains entries for column iCol, then (*pa) is set ** to point to the sub-position-list for that column and the number of ** bytes in it returned. Or, if the argument position list does not ** contain any entries for column iCol, return 0. */ static int fts5ExprExtractCol( const u8 **pa, /* IN/OUT: Pointer to poslist */ int n, /* IN: Size of poslist in bytes */ int iCol /* Column to extract from poslist */ ){ int iCurrent = 0; const u8 *p = *pa; const u8 *pEnd = &p[n]; /* One byte past end of position list */ u8 prev = 0; while( iCol!=iCurrent ){ /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ while( (prev & 0x80) || *p!=0x01 ){ prev = *p++; if( p==pEnd ) return 0; } *pa = p++; p += fts5GetVarint32(p, iCurrent); } /* Advance pointer p until it points to pEnd or an 0x01 byte that is ** not part of a varint */ assert( (prev & 0x80)==0 ); while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){ prev = *p++; } return p - (*pa); } static int fts5ExprExtractColset ( Fts5ExprColset *pColset, /* Colset to filter on */ const u8 *pPos, int nPos, /* Position list */ Fts5Buffer *pBuf /* Output buffer */ ){ int rc = SQLITE_OK; int i; fts5BufferZero(pBuf); for(i=0; i<pColset->nCol; i++){ const u8 *pSub = pPos; int nSub = fts5ExprExtractCol(&pSub, nPos, pColset->aiCol[i]); if( nSub ){ fts5BufferAppendBlob(&rc, pBuf, nSub, pSub); } } return rc; } static int fts5ExprNearTest( int *pRc, Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ ){ Fts5ExprNearset *pNear = pNode->pNear; int rc = *pRc; int i; /* Check that each phrase in the nearset matches the current row. ** Populate the pPhrase->poslist buffers at the same time. If any ** phrase is not a match, break out of the loop early. */ for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( pPhrase->nTerm>1 || pNear->pColset ){ int bMatch = 0; rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch); if( bMatch==0 ) break; }else{ rc = sqlite3Fts5IterPoslistBuffer( pPhrase->aTerm[0].pIter, &pPhrase->poslist ); } } *pRc = rc; if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ return 1; } return 0; } static int fts5ExprTokenTest( Fts5Expr *pExpr, /* Expression that pNear is a part of */ Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ ){ /* As this "NEAR" object is actually a single phrase that consists ** of a single term only, grab pointers into the poslist managed by the ** fts5_index.c iterator object. This is much faster than synthesizing ** a new poslist the way we have to for more complicated phrase or NEAR ** expressions. */ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; Fts5ExprColset *pColset = pNear->pColset; const u8 *pPos; int nPos; int rc; assert( pNode->eType==FTS5_TERM ); assert( pNear->nPhrase==1 && pPhrase->nTerm==1 ); rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid); /* If the term may match any column, then this must be a match. ** Return immediately in this case. Otherwise, try to find the ** part of the poslist that corresponds to the required column. ** If it can be found, return. If it cannot, the next iteration ** of the loop will test the next rowid in the database for this ** term. */ if( pColset==0 ){ assert( pPhrase->poslist.nSpace==0 ); pPhrase->poslist.p = (u8*)pPos; pPhrase->poslist.n = nPos; }else if( pColset->nCol==1 ){ assert( pPhrase->poslist.nSpace==0 ); pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]); pPhrase->poslist.p = (u8*)pPos; }else if( rc==SQLITE_OK ){ rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist); } pNode->bNomatch = (pPhrase->poslist.n==0); return rc; } /* ** All individual term iterators in pNear are guaranteed to be valid when ** this function is called. This function checks if all term iterators ** point to the same rowid, and if not, advances them until they do. ** If an EOF is reached before this happens, *pbEof is set to true before ** returning. ** ** SQLITE_OK is returned if an error occurs, or an SQLite error code ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ static int fts5ExprNearNextMatch( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; int rc = SQLITE_OK; i64 iLast; /* Lastest rowid any iterator points to */ int i, j; /* Phrase and token index, respectively */ int bMatch; /* True if all terms are at the same rowid */ assert( pNear->nPhrase>1 || pNear->apPhrase[0]->nTerm>1 ); /* Initialize iLast, the "lastest" rowid any iterator points to. If the ** iterator skips through rowids in the default ascending order, this means ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it ** means the minimum rowid. */ iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); do { bMatch = 1; for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; i64 iRowid = sqlite3Fts5IterRowid(pIter); if( iRowid!=iLast ) bMatch = 0; if( fts5ExprAdvanceto(pIter, pExpr->bDesc, &iLast,&rc,&pNode->bEof) ){ return rc; } } } }while( bMatch==0 ); pNode->bNomatch = (0==fts5ExprNearTest(&rc, pExpr, pNode)); pNode->iRowid = iLast; return rc; } /* ** Initialize all term iterators in the pNear object. If any term is found ** to match no documents at all, set *pbEof to true and return immediately, ** without initializing any further iterators. */ static int fts5ExprNearInitAll( Fts5Expr *pExpr, Fts5ExprNode *pNode ){ Fts5ExprNearset *pNear = pNode->pNear; Fts5ExprTerm *pTerm; Fts5ExprPhrase *pPhrase; int i, j; int rc = SQLITE_OK; for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ pPhrase = pNear->apPhrase[i]; for(j=0; j<pPhrase->nTerm; j++){ pTerm = &pPhrase->aTerm[j]; if( pTerm->pIter ){ sqlite3Fts5IterClose(pTerm->pIter); pTerm->pIter = 0; } rc = sqlite3Fts5IndexQuery( pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), &pTerm->pIter ); assert( rc==SQLITE_OK || pTerm->pIter==0 ); if( pTerm->pIter==0 || sqlite3Fts5IterEof(pTerm->pIter) ){ pNode->bEof = 1; break; } } } return rc; } /* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); /* ** If pExpr is an ASC iterator, this function returns a value with the ** same sign as: ** ** (iLhs - iRhs) ** ** Otherwise, if this is a DESC iterator, the opposite is returned: ** ** (iRhs - iLhs) */ static int fts5RowidCmp( Fts5Expr *pExpr, i64 iLhs, i64 iRhs ){ assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); if( pExpr->bDesc==0 ){ if( iLhs<iRhs ) return -1; return (iLhs > iRhs); }else{ if( iLhs>iRhs ) return -1; return (iLhs < iRhs); } } static void fts5ExprSetEof(Fts5ExprNode *pNode){ int i; pNode->bEof = 1; for(i=0; i<pNode->nChild; i++){ fts5ExprSetEof(pNode->apChild[i]); } } static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pNode->pNear; int i; for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; pPhrase->poslist.n = 0; } }else{ int i; for(i=0; i<pNode->nChild; i++){ fts5ExprNodeZeroPoslist(pNode->apChild[i]); } } } static int fts5ExprNodeNext(Fts5Expr*, Fts5ExprNode*, int, i64); /* ** Argument pNode is an FTS5_AND node. */ static int fts5ExprAndNextRowid( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pAnd /* FTS5_AND node to advance */ ){ int iChild; i64 iLast = pAnd->iRowid; int rc = SQLITE_OK; int bMatch; assert( pAnd->bEof==0 ); do { pAnd->bNomatch = 0; bMatch = 1; for(iChild=0; iChild<pAnd->nChild; iChild++){ Fts5ExprNode *pChild = pAnd->apChild[iChild]; if( 0 && pChild->eType==FTS5_STRING ){ /* TODO */ }else{ int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); if( cmp>0 ){ /* Advance pChild until it points to iLast or laster */ rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); if( rc!=SQLITE_OK ) return rc; } } /* If the child node is now at EOF, so is the parent AND node. Otherwise, ** the child node is guaranteed to have advanced at least as far as ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the ** new lastest rowid seen so far. */ assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); if( pChild->bEof ){ fts5ExprSetEof(pAnd); bMatch = 1; break; }else if( iLast!=pChild->iRowid ){ bMatch = 0; iLast = pChild->iRowid; } if( pChild->bNomatch ){ pAnd->bNomatch = 1; } } }while( bMatch==0 ); if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ fts5ExprNodeZeroPoslist(pAnd); } pAnd->iRowid = iLast; return SQLITE_OK; } /* ** Compare the values currently indicated by the two nodes as follows: ** ** res = (*p1) - (*p2) ** ** Nodes that point to values that come later in the iteration order are ** considered to be larger. Nodes at EOF are the largest of all. ** ** This means that if the iteration order is ASC, then numerically larger ** rowids are considered larger. Or if it is the default DESC, numerically ** smaller rowids are larger. */ static int fts5NodeCompare( Fts5Expr *pExpr, Fts5ExprNode *p1, Fts5ExprNode *p2 ){ if( p2->bEof ) return -1; if( p1->bEof ) return +1; return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); } /* ** Advance node iterator pNode, part of expression pExpr. If argument ** bFromValid is zero, then pNode is advanced exactly once. Or, if argument ** bFromValid is non-zero, then pNode is advanced until it is at or past ** rowid value iFrom. Whether "past" means "less than" or "greater than" ** depends on whether this is an ASC or DESC iterator. */ static int fts5ExprNodeNext( Fts5Expr *pExpr, Fts5ExprNode *pNode, int bFromValid, i64 iFrom ){ int rc = SQLITE_OK; if( pNode->bEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); break; }; case FTS5_TERM: { rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); if( pNode->bEof==0 ){ assert( rc==SQLITE_OK ); rc = fts5ExprTokenTest(pExpr, pNode); } return rc; }; case FTS5_AND: { Fts5ExprNode *pLeft = pNode->apChild[0]; rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); break; } case FTS5_OR: { int i; i64 iLast = pNode->iRowid; for(i=0; rc==SQLITE_OK && i<pNode->nChild; i++){ Fts5ExprNode *p1 = pNode->apChild[i]; assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); if( p1->bEof==0 ){ if( (p1->iRowid==iLast) || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) ){ rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); } } } break; } default: assert( pNode->eType==FTS5_NOT ); { assert( pNode->nChild==2 ); rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); break; } } if( rc==SQLITE_OK ){ rc = fts5ExprNodeNextMatch(pExpr, pNode); } } /* Assert that if bFromValid was true, either: ** ** a) an error occurred, or ** b) the node is now at EOF, or ** c) the node is now at or past rowid iFrom. */ assert( bFromValid==0 || rc!=SQLITE_OK /* a */ || pNode->bEof /* b */ || pNode->iRowid==iFrom || pExpr->bDesc==(pNode->iRowid<iFrom) /* c */ ); return rc; } /* ** If pNode currently points to a match, this function returns SQLITE_OK ** without modifying it. Otherwise, pNode is advanced until it does point ** to a match or EOF is reached. */ static int fts5ExprNodeNextMatch( Fts5Expr *pExpr, /* Expression of which pNode is a part */ Fts5ExprNode *pNode /* Expression node to test */ ){ int rc = SQLITE_OK; if( pNode->bEof==0 ){ switch( pNode->eType ){ case FTS5_STRING: { /* Advance the iterators until they all point to the same rowid */ rc = fts5ExprNearNextMatch(pExpr, pNode); break; } case FTS5_TERM: { rc = fts5ExprTokenTest(pExpr, pNode); break; } case FTS5_AND: { rc = fts5ExprAndNextRowid(pExpr, pNode); break; } case FTS5_OR: { Fts5ExprNode *pNext = pNode->apChild[0]; int i; for(i=1; i<pNode->nChild; i++){ Fts5ExprNode *pChild = pNode->apChild[i]; int cmp = fts5NodeCompare(pExpr, pNext, pChild); if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ pNext = pChild; } } pNode->iRowid = pNext->iRowid; pNode->bEof = pNext->bEof; pNode->bNomatch = pNext->bNomatch; break; } default: assert( pNode->eType==FTS5_NOT ); { Fts5ExprNode *p1 = pNode->apChild[0]; Fts5ExprNode *p2 = pNode->apChild[1]; assert( pNode->nChild==2 ); while( rc==SQLITE_OK && p1->bEof==0 ){ int cmp = fts5NodeCompare(pExpr, p1, p2); if( cmp>0 ){ rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); cmp = fts5NodeCompare(pExpr, p1, p2); } assert( rc!=SQLITE_OK || cmp<=0 ); if( cmp || p2->bNomatch ) break; rc = fts5ExprNodeNext(pExpr, p1, 0, 0); } pNode->bEof = p1->bEof; pNode->iRowid = p1->iRowid; break; } } } return rc; } /* ** Set node pNode, which is part of expression pExpr, to point to the first ** match. If there are no matches, set the Node.bEof flag to indicate EOF. ** ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. ** It is not an error if there are no matches. */ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; pNode->bEof = 0; if( Fts5NodeIsString(pNode) ){ /* Initialize all term iterators in the NEAR object. */ rc = fts5ExprNearInitAll(pExpr, pNode); }else{ int i; for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){ rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); } pNode->iRowid = pNode->apChild[0]->iRowid; } if( rc==SQLITE_OK ){ rc = fts5ExprNodeNextMatch(pExpr, pNode); } return rc; } /* ** Begin iterating through the set of documents in index pIdx matched by ** the MATCH expression passed as the first argument. If the "bDesc" ** parameter is passed a non-zero value, iteration is in descending rowid ** order. Or, if it is zero, in ascending order. ** ** If iterating in ascending rowid order (bDesc==0), the first document ** visited is that with the smallest rowid that is larger than or equal ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), ** then the first document visited must have a rowid smaller than or ** equal to iFirst. ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ Fts5ExprNode *pRoot = p->pRoot; int rc = SQLITE_OK; if( pRoot ){ p->pIndex = pIdx; p->bDesc = bDesc; rc = fts5ExprNodeFirst(p, pRoot); /* If not at EOF but the current rowid occurs earlier than iFirst in ** the iteration order, move to document iFirst or later. */ if( pRoot->bEof==0 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 ){ rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); } /* If the iterator is not at a real match, skip forward until it is. */ while( pRoot->bNomatch && rc==SQLITE_OK && pRoot->bEof==0 ){ rc = fts5ExprNodeNext(p, pRoot, 0, 0); } } return rc; } /* ** Move to the next document ** ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It ** is not considered an error if the query does not match any documents. */ int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ int rc; Fts5ExprNode *pRoot = p->pRoot; do { rc = fts5ExprNodeNext(p, pRoot, 0, 0); }while( pRoot->bNomatch && pRoot->bEof==0 && rc==SQLITE_OK ); if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ pRoot->bEof = 1; } return rc; } int sqlite3Fts5ExprEof(Fts5Expr *p){ return (p->pRoot==0 || p->pRoot->bEof); } i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ return p->pRoot->iRowid; } static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ int rc = SQLITE_OK; *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); return rc; } /* ** Free the phrase object passed as the only argument. */ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ if( pPhrase ){ int i; for(i=0; i<pPhrase->nTerm; i++){ Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; sqlite3_free(pTerm->zTerm); if( pTerm->pIter ){ sqlite3Fts5IterClose(pTerm->pIter); } } if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); sqlite3_free(pPhrase); } } /* ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is ** appended to it and the results returned. ** ** If an OOM error occurs, both the pNear and pPhrase objects are freed and ** NULL returned. */ Fts5ExprNearset *sqlite3Fts5ParseNearset( Fts5Parse *pParse, /* Parse context */ Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ ){ const int SZALLOC = 8; Fts5ExprNearset *pRet = 0; if( pParse->rc==SQLITE_OK ){ if( pPhrase==0 ){ return pNear; } if( pNear==0 ){ int nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); pRet = sqlite3_malloc(nByte); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; }else{ memset(pRet, 0, nByte); } }else if( (pNear->nPhrase % SZALLOC)==0 ){ int nNew = pNear->nPhrase + SZALLOC; int nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); pRet = (Fts5ExprNearset*)sqlite3_realloc(pNear, nByte); if( pRet==0 ){ pParse->rc = SQLITE_NOMEM; } }else{ pRet = pNear; } } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); sqlite3Fts5ParseNearsetFree(pNear); sqlite3Fts5ParsePhraseFree(pPhrase); }else{ pRet->apPhrase[pRet->nPhrase++] = pPhrase; } return pRet; } typedef struct TokenCtx TokenCtx; struct TokenCtx { Fts5ExprPhrase *pPhrase; }; /* ** Callback for tokenizing terms used by ParseTerm(). */ static int fts5ParseTokenize( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int rc = SQLITE_OK; const int SZALLOC = 8; TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; Fts5ExprTerm *pTerm; if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ Fts5ExprPhrase *pNew; int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); pNew = (Fts5ExprPhrase*)sqlite3_realloc(pPhrase, sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew ); if( pNew==0 ) return SQLITE_NOMEM; if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); pCtx->pPhrase = pPhrase = pNew; pNew->nTerm = nNew - SZALLOC; } pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); return rc; } /* ** Free the phrase object passed as the only argument. */ void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ fts5ExprPhraseFree(pPhrase); } /* ** Free the phrase object passed as the second argument. */ void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ if( pNear ){ int i; for(i=0; i<pNear->nPhrase; i++){ fts5ExprPhraseFree(pNear->apPhrase[i]); } sqlite3_free(pNear->pColset); sqlite3_free(pNear); } } void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ assert( pParse->pExpr==0 ); pParse->pExpr = p; } /* ** This function is called by the parser to process a string token. The ** string may or may not be quoted. In any case it is tokenized and a ** phrase object consisting of all tokens returned. */ Fts5ExprPhrase *sqlite3Fts5ParseTerm( Fts5Parse *pParse, /* Parse context */ Fts5ExprPhrase *pAppend, /* Phrase to append to */ Fts5Token *pToken, /* String to tokenize */ int bPrefix /* True if there is a trailing "*" */ ){ Fts5Config *pConfig = pParse->pConfig; TokenCtx sCtx; /* Context object passed to callback */ int rc; /* Tokenize return code */ char *z = 0; memset(&sCtx, 0, sizeof(TokenCtx)); sCtx.pPhrase = pAppend; rc = fts5ParseStringFromToken(pToken, &z); if( rc==SQLITE_OK ){ sqlite3Fts5Dequote(z); rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize); } sqlite3_free(z); if( rc ){ pParse->rc = rc; fts5ExprPhraseFree(sCtx.pPhrase); sCtx.pPhrase = 0; }else if( sCtx.pPhrase ){ if( pAppend==0 ){ if( (pParse->nPhrase % 8)==0 ){ int nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); Fts5ExprPhrase **apNew; apNew = (Fts5ExprPhrase**)sqlite3_realloc(pParse->apPhrase, nByte); if( apNew==0 ){ pParse->rc = SQLITE_NOMEM; fts5ExprPhraseFree(sCtx.pPhrase); return 0; } pParse->apPhrase = apNew; } pParse->nPhrase++; } pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; assert( sCtx.pPhrase->nTerm>0 ); sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = bPrefix; } return sCtx.pPhrase; } /* ** Token pTok has appeared in a MATCH expression where the NEAR operator ** is expected. If token pTok does not contain "NEAR", store an error ** in the pParse object. */ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ sqlite3Fts5ParseError( pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p ); } } void sqlite3Fts5ParseSetDistance( Fts5Parse *pParse, Fts5ExprNearset *pNear, Fts5Token *p ){ int nNear = 0; int i; if( p->n ){ for(i=0; i<p->n; i++){ char c = (char)p->p[i]; if( c<'0' || c>'9' ){ sqlite3Fts5ParseError( pParse, "expected integer, got \"%.*s\"", p->n, p->p ); return; } nNear = nNear * 10 + (p->p[i] - '0'); } }else{ nNear = FTS5_DEFAULT_NEARDIST; } pNear->nNear = nNear; } /* ** The second argument passed to this function may be NULL, or it may be ** an existing Fts5ExprColset object. This function returns a pointer to ** a new colset object containing the contents of (p) with new value column ** number iCol appended. ** ** If an OOM error occurs, store an error code in pParse and return NULL. ** The old colset object (if any) is not freed in this case. */ static Fts5ExprColset *fts5ParseColset( Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ Fts5ExprColset *p, /* Existing colset object */ int iCol /* New column to add to colset object */ ){ int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ Fts5ExprColset *pNew; /* New colset object to return */ assert( pParse->rc==SQLITE_OK ); assert( iCol>=0 && iCol<pParse->pConfig->nCol ); pNew = sqlite3_realloc(p, sizeof(Fts5ExprColset) + sizeof(int)*nCol); if( pNew==0 ){ pParse->rc = SQLITE_NOMEM; }else{ int *aiCol = pNew->aiCol; int i, j; for(i=0; i<nCol; i++){ if( aiCol[i]==iCol ) return pNew; if( aiCol[i]>iCol ) break; } for(j=nCol; j>i; j--){ aiCol[j] = aiCol[j-1]; } aiCol[i] = iCol; pNew->nCol = nCol+1; #ifndef NDEBUG /* Check that the array is in order and contains no duplicate entries. */ for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); #endif } return pNew; } Fts5ExprColset *sqlite3Fts5ParseColset( Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ Fts5ExprColset *pColset, /* Existing colset object */ Fts5Token *p ){ Fts5ExprColset *pRet = 0; int iCol; char *z; /* Dequoted copy of token p */ z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); if( pParse->rc==SQLITE_OK ){ Fts5Config *pConfig = pParse->pConfig; sqlite3Fts5Dequote(z); for(iCol=0; iCol<pConfig->nCol; iCol++){ if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; } if( iCol==pConfig->nCol ){ sqlite3Fts5ParseError(pParse, "no such column: %s", z); }else{ pRet = fts5ParseColset(pParse, pColset, iCol); } sqlite3_free(z); } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); sqlite3_free(pColset); } return pRet; } void sqlite3Fts5ParseSetColset( Fts5Parse *pParse, Fts5ExprNearset *pNear, Fts5ExprColset *pColset ){ if( pNear ){ pNear->pColset = pColset; }else{ sqlite3_free(pColset); } } static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); p->nChild += pSub->nChild; sqlite3_free(pSub); }else{ p->apChild[p->nChild++] = pSub; } } /* ** Allocate and return a new expression object. If anything goes wrong (i.e. ** OOM error), leave an error code in pParse and return NULL. */ Fts5ExprNode *sqlite3Fts5ParseNode( Fts5Parse *pParse, /* Parse context */ int eType, /* FTS5_STRING, AND, OR or NOT */ Fts5ExprNode *pLeft, /* Left hand child expression */ Fts5ExprNode *pRight, /* Right hand child expression */ Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ ){ Fts5ExprNode *pRet = 0; if( pParse->rc==SQLITE_OK ){ int nChild = 0; /* Number of children of returned node */ int nByte; /* Bytes of space to allocate for this node */ assert( (eType!=FTS5_STRING && !pNear) || (eType==FTS5_STRING && !pLeft && !pRight) ); if( eType==FTS5_STRING && pNear==0 ) return 0; if( eType!=FTS5_STRING && pLeft==0 ) return pRight; if( eType!=FTS5_STRING && pRight==0 ) return pLeft; if( eType==FTS5_NOT ){ nChild = 2; }else if( eType==FTS5_AND || eType==FTS5_OR ){ nChild = 2; if( pLeft->eType==eType ) nChild += pLeft->nChild-1; if( pRight->eType==eType ) nChild += pRight->nChild-1; } nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*nChild; pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); if( pRet ){ pRet->eType = eType; pRet->pNear = pNear; if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ pRet->eType = FTS5_TERM; } }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); } } } if( pRet==0 ){ assert( pParse->rc!=SQLITE_OK ); sqlite3Fts5ParseNodeFree(pLeft); sqlite3Fts5ParseNodeFree(pRight); sqlite3Fts5ParseNearsetFree(pNear); } return pRet; } static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ char *zQuoted = sqlite3_malloc(strlen(pTerm->zTerm) * 2 + 3 + 2); if( zQuoted ){ int i = 0; char *zIn = pTerm->zTerm; zQuoted[i++] = '"'; while( *zIn ){ if( *zIn=='"' ) zQuoted[i++] = '"'; zQuoted[i++] = *zIn++; } zQuoted[i++] = '"'; if( pTerm->bPrefix ){ zQuoted[i++] = ' '; zQuoted[i++] = '*'; } zQuoted[i++] = '\0'; } return zQuoted; } static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ char *zNew; va_list ap; va_start(ap, zFmt); zNew = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( zApp && zNew ){ char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); sqlite3_free(zNew); zNew = zNew2; } sqlite3_free(zApp); return zNew; } /* ** Compose a tcl-readable representation of expression pExpr. Return a ** pointer to a buffer containing that representation. It is the ** responsibility of the caller to at some point free the buffer using ** sqlite3_free(). */ static char *fts5ExprPrintTcl( Fts5Config *pConfig, const char *zNearsetCmd, Fts5ExprNode *pExpr ){ char *zRet = 0; if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); if( zRet==0 ) return 0; if( pNear->pColset ){ int *aiCol = pNear->pColset->aiCol; int nCol = pNear->pColset->nCol; if( nCol==1 ){ zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); }else{ zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); for(i=1; i<pNear->pColset->nCol; i++){ zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); } zRet = fts5PrintfAppend(zRet, "} "); } if( zRet==0 ) return 0; } if( pNear->nPhrase>1 ){ zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); if( zRet==0 ) return 0; } zRet = fts5PrintfAppend(zRet, "--"); if( zRet==0 ) return 0; for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; zRet = fts5PrintfAppend(zRet, " {"); for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ char *zTerm = pPhrase->aTerm[iTerm].zTerm; zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); } if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); if( zRet==0 ) return 0; } }else{ char const *zOp = 0; int i; switch( pExpr->eType ){ case FTS5_AND: zOp = "AND"; break; case FTS5_NOT: zOp = "NOT"; break; default: assert( pExpr->eType==FTS5_OR ); zOp = "OR"; break; } zRet = sqlite3_mprintf("%s", zOp); for(i=0; zRet && i<pExpr->nChild; i++){ char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); if( !z ){ sqlite3_free(zRet); zRet = 0; }else{ zRet = fts5PrintfAppend(zRet, " [%z]", z); } } } return zRet; } static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ char *zRet = 0; if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ Fts5ExprNearset *pNear = pExpr->pNear; int i; int iTerm; if( pNear->pColset ){ int iCol = pNear->pColset->aiCol[0]; zRet = fts5PrintfAppend(zRet, "%s : ", pConfig->azCol[iCol]); if( zRet==0 ) return 0; } if( pNear->nPhrase>1 ){ zRet = fts5PrintfAppend(zRet, "NEAR("); if( zRet==0 ) return 0; } for(i=0; i<pNear->nPhrase; i++){ Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; if( i!=0 ){ zRet = fts5PrintfAppend(zRet, " "); if( zRet==0 ) return 0; } for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); if( zTerm ){ zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); sqlite3_free(zTerm); } if( zTerm==0 || zRet==0 ){ sqlite3_free(zRet); return 0; } } } if( pNear->nPhrase>1 ){ zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); if( zRet==0 ) return 0; } }else{ char const *zOp = 0; int i; switch( pExpr->eType ){ case FTS5_AND: zOp = " AND "; break; case FTS5_NOT: zOp = " NOT "; break; default: assert( pExpr->eType==FTS5_OR ); zOp = " OR "; break; } for(i=0; i<pExpr->nChild; i++){ char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); if( z==0 ){ sqlite3_free(zRet); zRet = 0; }else{ int e = pExpr->apChild[i]->eType; int b = (e!=FTS5_STRING && e!=FTS5_TERM); zRet = fts5PrintfAppend(zRet, "%s%s%z%s", (i==0 ? "" : zOp), (b?"(":""), z, (b?")":"") ); } if( zRet==0 ) break; } } return zRet; } /* ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) ** and fts5_expr_tcl() (bTcl!=0). */ static void fts5ExprFunction( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal, /* Function arguments */ int bTcl ){ Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); sqlite3 *db = sqlite3_context_db_handle(pCtx); const char *zExpr = 0; char *zErr = 0; Fts5Expr *pExpr = 0; int rc; int i; const char **azConfig; /* Array of arguments for Fts5Config */ const char *zNearsetCmd = "nearset"; int nConfig; /* Size of azConfig[] */ Fts5Config *pConfig = 0; int iArg = 1; if( bTcl && nArg>1 ){ zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); iArg = 2; } nConfig = 3 + (nArg-iArg); azConfig = (const char**)sqlite3_malloc(sizeof(char*) * nConfig); if( azConfig==0 ){ sqlite3_result_error_nomem(pCtx); return; } azConfig[0] = 0; azConfig[1] = "main"; azConfig[2] = "tbl"; for(i=3; iArg<nArg; iArg++){ azConfig[i++] = (const char*)sqlite3_value_text(apVal[iArg]); } zExpr = (const char*)sqlite3_value_text(apVal[0]); rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pExpr, &zErr); } if( rc==SQLITE_OK ){ char *zText; if( pExpr->pRoot==0 ){ zText = sqlite3_mprintf(""); }else if( bTcl ){ zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); }else{ zText = fts5ExprPrint(pConfig, pExpr->pRoot); } if( zText==0 ){ rc = SQLITE_NOMEM; }else{ sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); sqlite3_free(zText); } } if( rc!=SQLITE_OK ){ if( zErr ){ sqlite3_result_error(pCtx, zErr, -1); sqlite3_free(zErr); }else{ sqlite3_result_error_code(pCtx, rc); } } sqlite3_free((void *)azConfig); sqlite3Fts5ConfigFree(pConfig); sqlite3Fts5ExprFree(pExpr); } static void fts5ExprFunctionHr( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ fts5ExprFunction(pCtx, nArg, apVal, 0); } static void fts5ExprFunctionTcl( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ fts5ExprFunction(pCtx, nArg, apVal, 1); } /* ** The implementation of an SQLite user-defined-function that accepts a ** single integer as an argument. If the integer is an alpha-numeric ** unicode code point, 1 is returned. Otherwise 0. */ static void fts5ExprIsAlnum( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ int iCode; if( nArg!=1 ){ sqlite3_result_error(pCtx, "wrong number of arguments to function fts5_isalnum", -1 ); return; } iCode = sqlite3_value_int(apVal[0]); sqlite3_result_int(pCtx, sqlite3Fts5UnicodeIsalnum(iCode)); } static void fts5ExprFold( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ if( nArg!=1 && nArg!=2 ){ sqlite3_result_error(pCtx, "wrong number of arguments to function fts5_fold", -1 ); }else{ int iCode; int bRemoveDiacritics = 0; iCode = sqlite3_value_int(apVal[0]); if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); } } /* ** This is called during initialization to register the fts5_expr() scalar ** UDF with the SQLite handle passed as the only argument. */ int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ struct Fts5ExprFunc { const char *z; void (*x)(sqlite3_context*,int,sqlite3_value**); } aFunc[] = { { "fts5_expr", fts5ExprFunctionHr }, { "fts5_expr_tcl", fts5ExprFunctionTcl }, { "fts5_isalnum", fts5ExprIsAlnum }, { "fts5_fold", fts5ExprFold }, }; int i; int rc = SQLITE_OK; void *pCtx = (void*)pGlobal; for(i=0; rc==SQLITE_OK && i<(sizeof(aFunc) / sizeof(aFunc[0])); i++){ struct Fts5ExprFunc *p = &aFunc[i]; rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); } return rc; } /* ** Return the number of phrases in expression pExpr. */ int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ return pExpr->nPhrase; } /* ** Return the number of terms in the iPhrase'th phrase in pExpr. */ int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; return pExpr->apExprPhrase[iPhrase]->nTerm; } /* ** This function is used to access the current position list for phrase ** iPhrase. */ int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ int nRet; Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; Fts5ExprNode *pNode = pPhrase->pNode; if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ *pa = pPhrase->poslist.p; nRet = pPhrase->poslist.n; }else{ *pa = 0; nRet = 0; } return nRet; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_hash.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 | /* ** 2014 August 11 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" typedef struct Fts5HashEntry Fts5HashEntry; /* ** This file contains the implementation of an in-memory hash table used ** to accumuluate "term -> doclist" content before it is flused to a level-0 ** segment. */ struct Fts5Hash { int *pnByte; /* Pointer to bytes counter */ int nEntry; /* Number of entries currently in hash */ int nSlot; /* Size of aSlot[] array */ Fts5HashEntry *pScan; /* Current ordered scan item */ Fts5HashEntry **aSlot; /* Array of hash slots */ }; /* ** Each entry in the hash table is represented by an object of the ** following type. Each object, its key (zKey[]) and its current data ** are stored in a single memory allocation. The position list data ** immediately follows the key data in memory. ** ** The data that follows the key is in a similar, but not identical format ** to the doclist data stored in the database. It is: ** ** * Rowid, as a varint ** * Position list, without 0x00 terminator. ** * Size of previous position list and rowid, as a 4 byte ** big-endian integer. ** ** iRowidOff: ** Offset of last rowid written to data area. Relative to first byte of ** structure. ** ** nData: ** Bytes of data written since iRowidOff. */ struct Fts5HashEntry { Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ Fts5HashEntry *pScanNext; /* Next entry in sorted order */ int nAlloc; /* Total size of allocation */ int iSzPoslist; /* Offset of space for 4-byte poslist size */ int nData; /* Total bytes of data (incl. structure) */ u8 bDel; /* Set delete-flag @ iSzPoslist */ int iCol; /* Column of last value written */ int iPos; /* Position of last value written */ i64 iRowid; /* Rowid of last value written */ char zKey[0]; /* Nul-terminated entry key */ }; /* ** Allocate a new hash table. */ int sqlite3Fts5HashNew(Fts5Hash **ppNew, int *pnByte){ int rc = SQLITE_OK; Fts5Hash *pNew; *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); if( pNew==0 ){ rc = SQLITE_NOMEM; }else{ int nByte; memset(pNew, 0, sizeof(Fts5Hash)); pNew->pnByte = pnByte; pNew->nSlot = 1024; nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); if( pNew->aSlot==0 ){ sqlite3_free(pNew); *ppNew = 0; rc = SQLITE_NOMEM; }else{ memset(pNew->aSlot, 0, nByte); } } return rc; } /* ** Free a hash table object. */ void sqlite3Fts5HashFree(Fts5Hash *pHash){ if( pHash ){ sqlite3Fts5HashClear(pHash); sqlite3_free(pHash->aSlot); sqlite3_free(pHash); } } /* ** Empty (but do not delete) a hash table. */ void sqlite3Fts5HashClear(Fts5Hash *pHash){ int i; for(i=0; i<pHash->nSlot; i++){ Fts5HashEntry *pNext; Fts5HashEntry *pSlot; for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ pNext = pSlot->pHashNext; sqlite3_free(pSlot); } } memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); pHash->nEntry = 0; } static unsigned int fts5HashKey(int nSlot, const char *p, int n){ int i; unsigned int h = 13; for(i=n-1; i>=0; i--){ h = (h << 3) ^ h ^ p[i]; } return (h % nSlot); } static unsigned int fts5HashKey2(int nSlot, char b, const char *p, int n){ int i; unsigned int h = 13; for(i=n-1; i>=0; i--){ h = (h << 3) ^ h ^ p[i]; } h = (h << 3) ^ h ^ b; return (h % nSlot); } /* ** Resize the hash table by doubling the number of slots. */ static int fts5HashResize(Fts5Hash *pHash){ int nNew = pHash->nSlot*2; int i; Fts5HashEntry **apNew; Fts5HashEntry **apOld = pHash->aSlot; apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*)); if( !apNew ) return SQLITE_NOMEM; memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); for(i=0; i<pHash->nSlot; i++){ while( apOld[i] ){ int iHash; Fts5HashEntry *p = apOld[i]; apOld[i] = p->pHashNext; iHash = fts5HashKey(nNew, p->zKey, strlen(p->zKey)); p->pHashNext = apNew[iHash]; apNew[iHash] = p; } } sqlite3_free(apOld); pHash->nSlot = nNew; pHash->aSlot = apNew; return SQLITE_OK; } static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ u8 *pPtr = (u8*)p; int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ int nPos = nSz*2 + p->bDel; /* Value of nPos field */ assert( p->bDel==0 || p->bDel==1 ); if( nPos<=127 ){ pPtr[p->iSzPoslist] = nPos; }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nPos); memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); p->nData += (nByte-1); } p->bDel = 0; p->iSzPoslist = 0; } } int sqlite3Fts5HashWrite( Fts5Hash *pHash, i64 iRowid, /* Rowid for this entry */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ char bByte, /* First byte of token */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ unsigned int iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken); Fts5HashEntry *p; u8 *pPtr; int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ /* Attempt to locate an existing hash entry */ for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( p->zKey[0]==bByte && memcmp(&p->zKey[1], pToken, nToken)==0 && p->zKey[nToken+1]==0 ){ break; } } /* If an existing hash entry cannot be found, create a new one. */ if( p==0 ){ int nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; if( nByte<128 ) nByte = 128; if( (pHash->nEntry*2)>=pHash->nSlot ){ int rc = fts5HashResize(pHash); if( rc!=SQLITE_OK ) return rc; iHash = fts5HashKey2(pHash->nSlot, bByte, pToken, nToken); } p = (Fts5HashEntry*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5HashEntry)); p->nAlloc = nByte; p->zKey[0] = bByte; memcpy(&p->zKey[1], pToken, nToken); assert( iHash==fts5HashKey(pHash->nSlot, p->zKey, nToken+1) ); p->zKey[nToken+1] = '\0'; p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iRowid = iRowid; p->pHashNext = pHash->aSlot[iHash]; pHash->aSlot[iHash] = p; pHash->nEntry++; nIncr += p->nData; } /* Check there is enough space to append a new entry. Worst case scenario ** is: ** ** + 9 bytes for a new rowid, ** + 4 byte reserved for the "poslist size" varint. ** + 1 byte for a "new column" byte, ** + 3 bytes for a new column number (16-bit max) as a varint, ** + 5 bytes for the new position offset (32-bit max). */ if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ int nNew = p->nAlloc * 2; Fts5HashEntry *pNew; Fts5HashEntry **pp; pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); if( pNew==0 ) return SQLITE_NOMEM; pNew->nAlloc = nNew; for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); *pp = pNew; p = pNew; } pPtr = (u8*)p; nIncr -= p->nData; /* If this is a new rowid, append the 4-byte size field for the previous ** entry, and the new rowid for this entry. */ if( iRowid!=p->iRowid ){ fts5HashAddPoslistSize(p); p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); p->iSzPoslist = p->nData; p->nData += 1; p->iCol = 0; p->iPos = 0; p->iRowid = iRowid; } if( iCol>=0 ){ /* Append a new column value, if necessary */ assert( iCol>=p->iCol ); if( iCol!=p->iCol ){ pPtr[p->nData++] = 0x01; p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); p->iCol = iCol; p->iPos = 0; } /* Append the new position offset */ p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); p->iPos = iPos; }else{ /* This is a delete. Set the delete flag. */ p->bDel = 1; } nIncr += p->nData; *pHash->pnByte += nIncr; return SQLITE_OK; } /* ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, ** each sorted in key order. This function merges the two lists into a ** single list and returns a pointer to its first element. */ static Fts5HashEntry *fts5HashEntryMerge( Fts5HashEntry *pLeft, Fts5HashEntry *pRight ){ Fts5HashEntry *p1 = pLeft; Fts5HashEntry *p2 = pRight; Fts5HashEntry *pRet = 0; Fts5HashEntry **ppOut = &pRet; while( p1 || p2 ){ if( p1==0 ){ *ppOut = p2; p2 = 0; }else if( p2==0 ){ *ppOut = p1; p1 = 0; }else{ int i = 0; while( p1->zKey[i]==p2->zKey[i] ) i++; if( ((u8)p1->zKey[i])>((u8)p2->zKey[i]) ){ /* p2 is smaller */ *ppOut = p2; ppOut = &p2->pScanNext; p2 = p2->pScanNext; }else{ /* p1 is smaller */ *ppOut = p1; ppOut = &p1->pScanNext; p1 = p1->pScanNext; } *ppOut = 0; } } return pRet; } /* ** Extract all tokens from hash table iHash and link them into a list ** in sorted order. The hash table is cleared before returning. It is ** the responsibility of the caller to free the elements of the returned ** list. */ static int fts5HashEntrySort( Fts5Hash *pHash, const char *pTerm, int nTerm, /* Query prefix, if any */ Fts5HashEntry **ppSorted ){ const int nMergeSlot = 32; Fts5HashEntry **ap; Fts5HashEntry *pList; int iSlot; int i; *ppSorted = 0; ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); if( !ap ) return SQLITE_NOMEM; memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ Fts5HashEntry *pIter; for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ if( pTerm==0 || 0==memcmp(pIter->zKey, pTerm, nTerm) ){ Fts5HashEntry *pEntry = pIter; pEntry->pScanNext = 0; for(i=0; ap[i]; i++){ pEntry = fts5HashEntryMerge(pEntry, ap[i]); ap[i] = 0; } ap[i] = pEntry; } } } pList = 0; for(i=0; i<nMergeSlot; i++){ pList = fts5HashEntryMerge(pList, ap[i]); } pHash->nEntry = 0; sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; } /* ** Query the hash table for a doclist associated with term pTerm/nTerm. */ int sqlite3Fts5HashQuery( Fts5Hash *pHash, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */ ){ unsigned int iHash = fts5HashKey(pHash->nSlot, pTerm, nTerm); Fts5HashEntry *p; for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ if( memcmp(p->zKey, pTerm, nTerm)==0 && p->zKey[nTerm]==0 ) break; } if( p ){ fts5HashAddPoslistSize(p); *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *ppDoclist = 0; *pnDoclist = 0; } return SQLITE_OK; } int sqlite3Fts5HashScanInit( Fts5Hash *p, /* Hash table to query */ const char *pTerm, int nTerm /* Query prefix */ ){ return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); } void sqlite3Fts5HashScanNext(Fts5Hash *p){ assert( !sqlite3Fts5HashScanEof(p) ); p->pScan = p->pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ return (p->pScan==0); } void sqlite3Fts5HashScanEntry( Fts5Hash *pHash, const char **pzTerm, /* OUT: term (nul-terminated) */ const u8 **ppDoclist, /* OUT: pointer to doclist */ int *pnDoclist /* OUT: size of doclist in bytes */ ){ Fts5HashEntry *p; if( (p = pHash->pScan) ){ int nTerm = strlen(p->zKey); fts5HashAddPoslistSize(p); *pzTerm = p->zKey; *ppDoclist = (const u8*)&p->zKey[nTerm+1]; *pnDoclist = p->nData - (sizeof(*p) + nTerm + 1); }else{ *pzTerm = 0; *ppDoclist = 0; *pnDoclist = 0; } } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_index.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the ** %_data table. Other parts of the system access this functionality via ** the interface defined in fts5Int.h. */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" /* ** Overview: ** ** The %_data table contains all the FTS indexes for an FTS5 virtual table. ** As well as the main term index, there may be up to 31 prefix indexes. ** The format is similar to FTS3/4, except that: ** ** * all segment b-tree leaf data is stored in fixed size page records ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is ** taken to ensure it is possible to iterate in either direction through ** the entries in a doclist, or to seek to a specific entry within a ** doclist, without loading it into memory. ** ** * large doclists that span many pages have associated "doclist index" ** records that contain a copy of the first docid on each page spanned by ** the doclist. This is used to speed up seek operations, and merges of ** large doclists with very small doclists. ** ** * extra fields in the "structure record" record the state of ongoing ** incremental merge operations. ** */ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ #define FTS5_MAIN_PREFIX '0' #if FTS5_MAX_PREFIX_INDEXES > 31 # error "FTS5_MAX_PREFIX_INDEXES is too large" #endif /* ** Details: ** ** The %_data table managed by this module, ** ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); ** ** , contains the following 5 types of records. See the comments surrounding ** the FTS5_*_ROWID macros below for a description of how %_data rowids are ** assigned to each fo them. ** ** 1. Structure Records: ** ** The set of segments that make up an index - the index structure - are ** recorded in a single record within the %_data table. The record consists ** of a single 32-bit configuration cookie value followed by a list of ** SQLite varints. If the FTS table features more than one index (because ** there are one or more prefix indexes), it is guaranteed that all share ** the same cookie value. ** ** Immediately following the configuration cookie, the record begins with ** three varints: ** ** + number of levels, ** + total number of segments on all levels, ** + value of write counter. ** ** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) ** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** 2. The Averages Record: ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column ** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** ** TERM DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The ** general format of the term/doclist data is: ** ** varint : size of first term ** blob: first term data ** doclist: first doclist ** zero-or-more { ** varint: number of bytes in common with previous term ** varint: number of bytes of new term data (nNew) ** blob: nNew bytes of new term data ** doclist: next doclist ** } ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } ** 0x00 byte ** ** poslist format: ** ** varint: size of poslist in bytes multiplied by 2, not including ** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } ** ** collist format: ** ** varint: first offset + 2 ** zero-or-more { ** varint: offset delta + 2 ** } ** ** PAGINATION ** ** The format described above is only accurate if the entire term/doclist ** data fits on a single leaf page. If this is not the case, the format ** is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: ** ** varint: first rowid ** ** + the first term on each page is stored in the same way as the ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** ** Each leaf page begins with: ** ** + 2-byte unsigned containing offset to first rowid (or 0). ** + 2-byte unsigned containing offset to first term (or 0). ** ** Followed by term/doclist data. ** ** 4. Segment interior nodes: ** ** The interior nodes turn the list of leaves into a b+tree. ** ** Each interior node begins with a varint - the page number of the left ** most child node. Following this, for each leaf page except the first, ** the interior nodes contain: ** ** a) If the leaf page contains at least one term, then a term-prefix that ** is greater than all previous terms, and less than or equal to the ** first term on the leaf page. ** ** b) If the leaf page no terms, a record indicating how many consecutive ** leaves contain no terms, and whether or not there is an associated ** by-rowid index record. ** ** By definition, there is never more than one type (b) record in a row. ** Type (b) records only ever appear on height=1 pages - immediate parents ** of leaves. Only type (a) records are pushed to higher levels. ** ** Term format: ** ** * Number of bytes in common with previous term plus 2, as a varint. ** * Number of bytes of new term data, as a varint. ** * new term data. ** ** No-term format: ** ** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there ** is an associated index-by-rowid record. ** * the number of zero-term leaves as a varint. ** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: ** ** * Flags byte. Bits are: ** 0x01: Clear if leaf is also the root page, otherwise set. ** ** * Page number of fts index leaf page. As a varint. ** ** * First docid on page indicated by previous field. As a varint. ** ** * A list of varints, one for each subsequent termless page. A ** positive delta if the termless page contains at least one docid, ** or an 0x00 byte otherwise. ** ** Internal doclist index nodes are: ** ** * Flags byte. Bits are: ** 0x01: Clear for root page, otherwise set. ** ** * Page number of first child page. As a varint. ** ** * Copy of first docid on page indicated by previous field. As a varint. ** ** * A list of delta-encoded varints - the first docid on each subsequent ** child page. ** */ /* ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* ** Macros determining the rowids used by segment nodes. All nodes in all ** segments for all indexes (the regular FTS index and any prefix indexes) ** are stored in the %_data table with large positive rowids. ** ** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS) ** indexes - one regular term index and zero or more prefix indexes. ** ** Each segment in an index has a unique id greater than zero. ** ** Each node in a segment b-tree is assigned a "page number" that is unique ** within nodes of its height within the segment (leaf nodes have a height ** of 0, parents 1, etc.). Page numbers are allocated sequentially so that ** a nodes page number is always one more than its left sibling. ** ** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro ** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used ** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns ** SQLITE_FULL and fails the current operation if they ever prove too small. */ #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ #define FTS5_DATA_HEIGHT_B 5 /* Max b-tree height of 32 */ #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ #define fts5_dri(segid, dlidx, height, pgno) ( \ ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ ((i64)(pgno)) \ ) #define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno) #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) /* ** Maximum segments permitted in a single index */ #define FTS5_MAX_SEGMENT 2000 #ifdef SQLITE_DEBUG int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } #endif /* ** Each time a blob is read from the %_data table, it is padded with this ** many zero bytes. This makes it easier to decode the various record formats ** without overreading if the records are corrupt. */ #define FTS5_DATA_ZERO_PADDING 8 typedef struct Fts5BtreeIter Fts5BtreeIter; typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; typedef struct Fts5DlidxWriter Fts5DlidxWriter; typedef struct Fts5MultiSegIter Fts5MultiSegIter; typedef struct Fts5NodeIter Fts5NodeIter; typedef struct Fts5PageWriter Fts5PageWriter; typedef struct Fts5SegIter Fts5SegIter; typedef struct Fts5DoclistIter Fts5DoclistIter; typedef struct Fts5SegWriter Fts5SegWriter; typedef struct Fts5Structure Fts5Structure; typedef struct Fts5StructureLevel Fts5StructureLevel; typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ int n; /* Size of record in bytes */ }; /* ** One object per %_data table. */ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ Fts5Hash *pHash; /* Hash table for in-memory data */ int nMaxPendingData; /* Max pending data before flush to disk */ int nPendingData; /* Current bytes of pending data */ i64 iWriteRowid; /* Rowid for current doc being written */ Fts5Buffer scratch; /* Error state. */ int rc; /* Current error code */ /* State used by the fts5DataXXX() functions. */ sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ int nRead; /* Total number of blocks read */ }; struct Fts5DoclistIter { u8 *a; int n; int i; /* Output variables. aPoslist==0 at EOF */ i64 iRowid; u8 *aPoslist; int nPoslist; }; /* ** Each iterator used by external modules is an instance of this type. */ struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; Fts5MultiSegIter *pMulti; Fts5Buffer poslist; /* Buffer containing current poslist */ }; /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. */ struct Fts5StructureSegment { int iSegid; /* Segment id */ int nHeight; /* Height of segment b-tree */ int pgnoFirst; /* First leaf page number in segment */ int pgnoLast; /* Last leaf page number in segment */ }; struct Fts5StructureLevel { int nMerge; /* Number of segments in incr-merge */ int nSeg; /* Total number of segments on level */ Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ }; struct Fts5Structure { u64 nWriteCounter; /* Total leaves written to level 0 */ int nSegment; /* Total segments in this structure */ int nLevel; /* Number of levels in this index */ Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ i64 iPrev; /* Previous docid value written to page */ Fts5Buffer buf; /* Buffer containing page data */ }; struct Fts5SegWriter { int iSegid; /* Segid to write to */ int nWriter; /* Number of entries in aWriter */ Fts5PageWriter *aWriter; /* Array of PageWriter objects */ i64 iPrevRowid; /* Previous docid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ int nDlidx; /* Allocated size of aDlidx[] array */ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ }; /* ** Object for iterating through the merged results of one or more segments, ** visiting each term/docid pair in the merged data. ** ** nSeg is always a power of two greater than or equal to the number of ** segments that this object is merging data from. Both the aSeg[] and ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded ** with zeroed objects - these are handled as if they were iterators opened ** on empty segments. ** ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the ** comparison in this context is the index of the iterator that currently ** points to the smaller term/rowid combination. Iterators at EOF are ** considered to be greater than all other iterators. ** ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. */ typedef struct Fts5CResult Fts5CResult; struct Fts5CResult { u16 iFirst; /* aSeg[] index of firstest iterator */ u8 bTermEq; /* True if the terms are equal */ }; struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ int bSkipEmpty; /* True to skip deleted entries */ Fts5SegIter *aSeg; /* Array of segment iterators */ Fts5CResult *aFirst; /* Current merge state (see above) */ }; /* ** Object for iterating through a single segment, visiting each term/docid ** pair in the segment. ** ** pSeg: ** The segment to iterate through. ** ** iLeafPgno: ** Current leaf page number within segment. ** ** iLeafOffset: ** Byte offset within the current leaf that is the first byte of the ** position list data (one byte passed the position-list size field). ** rowid field of the current entry. Usually this is the size field of the ** position list data. The exception is if the rowid for the current entry ** is the last thing on the leaf page. ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. ** ** iTermLeafPgno, iTermLeafOffset: ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. ** ** flags: ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: ** ** FTS5_SEGITER_ONETERM: ** If set, set the iterator to point to EOF after the current doclist ** has been exhausted. Do not proceed to the next term in the segment. ** ** FTS5_SEGITER_REVERSE: ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If ** it is set, iterate through docids in descending order instead of the ** default ascending order. ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ int bDel; /* True if the delete flag is set */ }; #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 /* ** Object for iterating through the conents of a single internal node in ** memory. */ struct Fts5NodeIter { /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ const u8 *aData; int nData; int iOff; /* Output variables */ Fts5Buffer term; int nEmpty; int iChild; int bDlidx; }; /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: ** Record containing the doclist-index data. ** ** bEof: ** Set to true once iterator has reached EOF. ** ** iOff: ** Set to the current offset within record pData. */ struct Fts5DlidxLvl { Fts5Data *pData; /* Data for current page of this level */ int iOff; /* Current offset into pData */ int bEof; /* At EOF already */ int iFirstOff; /* Used by reverse iterators */ /* Output variables */ int iLeafPgno; /* Page number of current leaf page */ i64 iRowid; /* First rowid on leaf iLeafPgno */ }; struct Fts5DlidxIter { int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; /* ** An Fts5BtreeIter object is used to iterate through all entries in the ** b-tree hierarchy belonging to a single fts5 segment. In this case the ** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the ** b-tree hierarchy consists of the following: ** ** iLeaf: The page number of the leaf page the entry points to. ** ** term: A split-key that all terms on leaf page $iLeaf must be greater ** than or equal to. The "term" associated with the first b-tree ** hierarchy entry (the one that points to leaf page 1) is always ** an empty string. ** ** nEmpty: The number of empty (termless) leaf pages that immediately ** following iLeaf. ** ** The Fts5BtreeIter object is only used as part of the integrity-check code. */ struct Fts5BtreeIterLevel { Fts5NodeIter s; /* Iterator for the current node */ Fts5Data *pData; /* Data for the current node */ }; struct Fts5BtreeIter { Fts5Index *p; /* FTS5 backend object */ Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ int nLvl; /* Size of aLvl[] array */ Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ /* Output variables */ Fts5Buffer term; /* Current term */ int iLeaf; /* Leaf containing terms >= current term */ int nEmpty; /* Number of "empty" leaves following iLeaf */ int bEof; /* Set to true at EOF */ int bDlidx; /* True if there exists a dlidx */ }; static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; } /* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument. */ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ return sqlite3Fts5MallocZero(&p->rc, nByte); } /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ int nCmp = MIN(pLeft->n, pRight->n); int res = memcmp(pLeft->p, pRight->p, nCmp); return (res==0 ? (pLeft->n - pRight->n) : res); } #ifdef SQLITE_DEBUG static int fts5BlobCompare( const u8 *pLeft, int nLeft, const u8 *pRight, int nRight ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; p->pReader = 0; sqlite3_blob_close(pReader); } } static Fts5Data *fts5DataReadOrBuffer( Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid ){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; if( p->pReader ){ /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle ** is required. */ sqlite3_blob *pBlob = p->pReader; p->pReader = 0; rc = sqlite3_blob_reopen(pBlob, iRowid); assert( p->pReader==0 ); p->pReader = pBlob; if( rc!=SQLITE_OK ){ fts5CloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); } /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. ** All the reasons those functions might return SQLITE_ERROR - missing ** table, missing row, non-blob/text in block column - indicate ** backing store corruption. */ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; if( rc==SQLITE_OK ){ u8 *aOut; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ fts5BufferZero(pBuf); fts5BufferGrow(&rc, pBuf, nByte); aOut = pBuf->p; pBuf->n = nByte; }else{ int nSpace = nByte + FTS5_DATA_ZERO_PADDING; pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nSpace+sizeof(Fts5Data)); if( pRet ){ pRet->n = nByte; aOut = pRet->p = (u8*)&pRet[1]; } } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; } } p->rc = rc; p->nRead++; } return pRet; } /* ** Retrieve a record from the %_data table. ** ** If an error occurs, NULL is returned and an error left in the ** Fts5Index object. */ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); return pRet; } /* ** Read a record from the %_data table into the buffer supplied as the ** second argument. ** ** If an error occurs, an error is left in the Fts5Index object. If an ** error has already occurred when this function is called, it is a ** no-op. */ static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ (void)fts5DataReadOrBuffer(p, pBuf, iRowid); } /* ** Release a reference to data record returned by an earlier call to ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ sqlite3_free(pData); } /* ** INSERT OR REPLACE a record into the %_data table. */ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return; if( p->pWriter==0 ){ int rc = SQLITE_OK; Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3Fts5Mprintf(&rc, "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl ); if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); sqlite3_free(zSql); } if( rc!=SQLITE_OK ){ p->rc = rc; return; } } sqlite3_bind_int64(p->pWriter, 1, iRowid); sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); sqlite3_step(p->pWriter); p->rc = sqlite3_reset(p->pWriter); } /* ** Execute the following SQL: ** ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast */ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ if( p->rc!=SQLITE_OK ) return; if( p->pDeleter==0 ){ int rc; Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3_mprintf( "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl ); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); sqlite3_free(zSql); } if( rc!=SQLITE_OK ){ p->rc = rc; return; } } sqlite3_bind_int64(p->pDeleter, 1, iFirst); sqlite3_bind_int64(p->pDeleter, 2, iLast); sqlite3_step(p->pDeleter); p->rc = sqlite3_reset(p->pDeleter); } /* ** Remove all records associated with segment iSegid. */ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0); i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1; fts5DataDelete(p, iFirst, iLast); } /* ** Release a reference to an Fts5Structure object returned by an earlier ** call to fts5StructureRead() or fts5StructureDecode(). */ static void fts5StructureRelease(Fts5Structure *pStruct){ if( pStruct ){ int i; for(i=0; i<pStruct->nLevel; i++){ sqlite3_free(pStruct->aLevel[i].aSeg); } sqlite3_free(pStruct); } } /* ** Deserialize and return the structure record currently stored in serialized ** form within buffer pData/nData. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated by one slot. This allows the structure contents ** to be more easily edited. ** ** If an error occurs, *ppOut is set to NULL and an SQLite error code ** returned. Otherwise, *ppOut is set to point to the new object and ** SQLITE_OK returned. */ static int fts5StructureDecode( const u8 *pData, /* Buffer containing serialized structure */ int nData, /* Size of buffer pData in bytes */ int *piCookie, /* Configuration cookie value */ Fts5Structure **ppOut /* OUT: Deserialized object */ ){ int rc = SQLITE_OK; int i = 0; int iLvl; int nLevel = 0; int nSegment = 0; int nByte; /* Bytes of space to allocate at pRet */ Fts5Structure *pRet = 0; /* Structure object to return */ /* Grab the cookie value */ if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); i = 4; /* Read the total number of levels and segments from the start of the ** structure record. */ i += fts5GetVarint32(&pData[i], nLevel); i += fts5GetVarint32(&pData[i], nSegment); nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ ); pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); if( pRet ){ pRet->nLevel = nLevel; pRet->nSegment = nSegment; i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; int nTotal; int iSeg; i += fts5GetVarint32(&pData[i], pLvl->nMerge); i += fts5GetVarint32(&pData[i], nTotal); assert( nTotal>=pLvl->nMerge ); pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, nTotal * sizeof(Fts5StructureSegment) ); if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSeg<nTotal; iSeg++){ i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } }else{ fts5StructureRelease(pRet); pRet = 0; } } } *ppOut = pRet; return rc; } /* ** */ static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ if( *pRc==SQLITE_OK ){ Fts5Structure *pStruct = *ppStruct; int nLevel = pStruct->nLevel; int nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ ); pStruct = sqlite3_realloc(pStruct, nByte); if( pStruct ){ memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); pStruct->nLevel++; *ppStruct = pStruct; }else{ *pRc = SQLITE_NOMEM; } } } /* ** Extend level iLvl so that there is room for at least nExtra more ** segments. */ static void fts5StructureExtendLevel( int *pRc, Fts5Structure *pStruct, int iLvl, int nExtra, int bInsert ){ if( *pRc==SQLITE_OK ){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureSegment *aNew; int nByte; nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); aNew = sqlite3_realloc(pLvl->aSeg, nByte); if( aNew ){ if( bInsert==0 ){ memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); }else{ int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); memmove(&aNew[nExtra], aNew, nMove); memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); } pLvl->aSeg = aNew; }else{ *pRc = SQLITE_NOMEM; } } } /* ** Read, deserialize and return the structure record. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated as described for function fts5StructureDecode() ** above. ** ** If an error occurs, NULL is returned and an error code left in the ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op. */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ Fts5Data *pData; /* %_data entry containing structure record */ int iCookie; /* Configuration cookie */ pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( !pData ) return 0; p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); pRet = 0; } return pRet; } /* ** Return the total number of segments in index structure pStruct. This ** function is only ever used as part of assert() conditions. */ #ifdef SQLITE_DEBUG static int fts5StructureCountSegments(Fts5Structure *pStruct){ int nSegment = 0; /* Total number of segments */ if( pStruct ){ int iLvl; /* Used to iterate through levels */ for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ nSegment += pStruct->aLevel[iLvl].nSeg; } } return nSegment; } #endif /* ** Serialize and store the "structure" record. ** ** If an error occurs, leave an error code in the Fts5Index object. If an ** error has already occurred, this function is a no-op. */ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ if( p->rc==SQLITE_OK ){ Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ int iCookie; /* Cookie value to store */ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); memset(&buf, 0, sizeof(Fts5Buffer)); /* Append the current configuration cookie */ iCookie = p->pConfig->iCookie; if( iCookie<0 ) iCookie = 0; fts5BufferAppend32(&p->rc, &buf, iCookie); fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); fts5BufferAppendVarint(&p->rc, &buf, pStruct->nSegment); fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } } fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); fts5BufferFree(&buf); } } #if 0 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ int rc = SQLITE_OK; Fts5Buffer buf; memset(&buf, 0, sizeof(buf)); fts5DebugStructure(&rc, &buf, pStruct); fprintf(stdout, "%s: %s\n", zCaption, buf.p); fflush(stdout); fts5BufferFree(&buf); } #else # define fts5PrintStructure(x,y) #endif static int fts5SegmentSize(Fts5StructureSegment *pSeg){ return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; } /* ** Return a copy of index structure pStruct. Except, promote as many ** segments as possible to level iPromote. If an OOM occurs, NULL is ** returned. */ static void fts5StructurePromoteTo( Fts5Index *p, int iPromote, int szPromote, Fts5Structure *pStruct ){ int il, is; Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; if( pOut->nMerge==0 ){ for(il=iPromote+1; il<pStruct->nLevel; il++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; if( pLvl->nMerge ) return; for(is=pLvl->nSeg-1; is>=0; is--){ int sz = fts5SegmentSize(&pLvl->aSeg[is]); if( sz>szPromote ) return; fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); if( p->rc ) return; memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); pOut->nSeg++; pLvl->nSeg--; } } } } /* ** A new segment has just been written to level iLvl of index structure ** pStruct. This function determines if any segments should be promoted ** as a result. Segments are promoted in two scenarios: ** ** a) If the segment just written is smaller than one or more segments ** within the previous populated level, it is promoted to the previous ** populated level. ** ** b) If the segment just written is larger than the newest segment on ** the next populated level, then that segment, and any other adjacent ** segments that are also smaller than the one just written, are ** promoted. ** ** If one or more segments are promoted, the structure object is updated ** to reflect this. */ static void fts5StructurePromote( Fts5Index *p, /* FTS5 backend object */ int iLvl, /* Index level just updated */ Fts5Structure *pStruct /* Index structure */ ){ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); if( iTst>=0 ){ int i; int szMax = 0; Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; assert( pTst->nMerge==0 ); for(i=0; i<pTst->nSeg; i++){ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; if( sz>szMax ) szMax = sz; } if( szMax>=szSeg ){ /* Condition (a) is true. Promote the newest segment on level ** iLvl to level iTst. */ iPromote = iTst; szPromote = szMax; } } /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() ** is a no-op if it is not. */ if( iPromote<0 ){ iPromote = iLvl; szPromote = szSeg; } fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } /* ** If the pIter->iOff offset currently points to an entry indicating one ** or more term-less nodes, advance past it and set pIter->nEmpty to ** the number of empty child nodes. */ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; pIter->bDlidx = 0; } } /* ** Advance to the next entry within the node. */ static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ if( pIter->iOff>=pIter->nData ){ pIter->aData = 0; pIter->iChild += pIter->nEmpty; }else{ int nPre, nNew; pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); pIter->term.n = nPre-2; fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); pIter->iOff += nNew; pIter->iChild += (1 + pIter->nEmpty); fts5NodeIterGobbleNEmpty(pIter); if( *pRc ) pIter->aData = 0; } } /* ** Initialize the iterator object pIter to iterate through the internal ** segment node in pData. */ static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ memset(pIter, 0, sizeof(*pIter)); pIter->aData = aData; pIter->nData = nData; pIter->iOff = fts5GetVarint32(aData, pIter->iChild); fts5NodeIterGobbleNEmpty(pIter); } /* ** Free any memory allocated by the iterator object. */ static void fts5NodeIterFree(Fts5NodeIter *pIter){ fts5BufferFree(&pIter->term); } /* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero. */ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ Fts5Data *pData = pLvl->pData; if( pLvl->iOff==0 ){ assert( pLvl->bEof==0 ); pLvl->iOff = 1; pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; for(iOff=pLvl->iOff; iOff<pData->n; iOff++){ if( pData->p[iOff] ) break; } if( iOff<pData->n ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); pLvl->iRowid += iVal; pLvl->iOff = iOff; }else{ pLvl->bEof = 1; } } return pLvl->bEof; } /* ** Advance the iterator passed as the only argument. */ static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; assert( iLvl<pIter->nLvl ); if( fts5DlidxLvlNext(pLvl) ){ if( (iLvl+1) < pIter->nLvl ){ fts5DlidxIterNextR(p, pIter, iLvl+1); if( pLvl[1].bEof==0 ){ fts5DataRelease(pLvl->pData); memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) ); if( pLvl->pData ) fts5DlidxLvlNext(pLvl); } } } return pIter->aLvl[0].bEof; } static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ return fts5DlidxIterNextR(p, pIter, 0); } /* ** The iterator passed as the first argument has the following fields set ** as follows. This function sets up the rest of the iterator so that it ** points to the first rowid in the doclist-index. ** ** pData: ** pointer to doclist-index record, ** ** When this function is called pIter->iLeafPgno is the page number the ** doclist is associated with (the one featuring the term). */ static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ int i; for(i=0; i<pIter->nLvl; i++){ fts5DlidxLvlNext(&pIter->aLvl[i]); } return pIter->aLvl[0].bEof; } static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; } static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ int i; /* Advance each level to the last entry on the last page */ for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; while( fts5DlidxLvlNext(pLvl)==0 ); pLvl->bEof = 0; if( i>0 ){ Fts5DlidxLvl *pChild = &pLvl[-1]; fts5DataRelease(pChild->pData); memset(pChild, 0, sizeof(Fts5DlidxLvl)); pChild->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) ); } } } /* ** Move the iterator passed as the only argument to the previous entry. */ static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ int iOff = pLvl->iOff; assert( pLvl->bEof==0 ); if( iOff<=pLvl->iFirstOff ){ pLvl->bEof = 1; }else{ u8 *a = pLvl->pData->p; i64 iVal; int iLimit; int ii; int nZero = 0; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous ** varint. Taking care not to read any memory locations that occur ** before the buffer in memory. */ iLimit = (iOff>9 ? iOff-9 : 0); for(iOff--; iOff>iLimit; iOff--){ if( (a[iOff-1] & 0x80)==0 ) break; } fts5GetVarint(&a[iOff], (u64*)&iVal); pLvl->iRowid -= iVal; pLvl->iLeafPgno--; /* Skip backwards past any 0x00 varints. */ for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ nZero++; } if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ /* The byte immediately before the last 0x00 byte has the 0x80 bit ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 ** bytes before a[ii]. */ int bZero = 0; /* True if last 0x00 counts */ if( (ii-8)>=pLvl->iFirstOff ){ int j; for(j=1; j<=8 && (a[ii-j] & 0x80); j++); bZero = (j>8); } if( bZero==0 ) nZero--; } pLvl->iLeafPgno -= nZero; pLvl->iOff = iOff - nZero; } return pLvl->bEof; } static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; assert( iLvl<pIter->nLvl ); if( fts5DlidxLvlPrev(pLvl) ){ if( (iLvl+1) < pIter->nLvl ){ fts5DlidxIterPrevR(p, pIter, iLvl+1); if( pLvl[1].bEof==0 ){ fts5DataRelease(pLvl->pData); memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) ); if( pLvl->pData ){ while( fts5DlidxLvlNext(pLvl)==0 ); pLvl->bEof = 0; } } } } return pIter->aLvl[0].bEof; } static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ return fts5DlidxIterPrevR(p, pIter, 0); } /* ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). */ static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ if( pIter ){ int i; for(i=0; i<pIter->nLvl; i++){ fts5DataRelease(pIter->aLvl[i].pData); } sqlite3_free(pIter); } } static Fts5DlidxIter *fts5DlidxIterInit( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ Fts5DlidxIter *pIter = 0; int i; int bDone = 0; for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); Fts5DlidxIter *pNew; pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); if( pNew==0 ){ p->rc = SQLITE_NOMEM; }else{ i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; pIter = pNew; memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ bDone = 1; } pIter->nLvl = i+1; } } if( p->rc==SQLITE_OK ){ pIter->iSegid = iSegid; if( bRev==0 ){ fts5DlidxIterFirst(pIter); }else{ fts5DlidxIterLast(p, pIter); } } if( p->rc!=SQLITE_OK ){ fts5DlidxIterFree(pIter); pIter = 0; } return pIter; } static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ return pIter->aLvl[0].iRowid; } static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ return pIter->aLvl[0].iLeafPgno; } static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ *piRowid = (int)fts5GetU16(&pLeaf->p[0]); *piTerm = (int)fts5GetU16(&pLeaf->p[2]); } /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->pNextLeaf ){ assert( pIter->iLeafPgno<=pSeg->pgnoLast ); pIter->pLeaf = pIter->pNextLeaf; pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; } } /* ** Argument p points to a buffer containing a varint to be interpreted as a ** position list size field. Read the varint and return the number of bytes ** read. Before returning, set *pnSz to the number of bytes in the position ** list, and *pbDel to true if the delete flag is set, or false otherwise. */ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ int nSz; int n = fts5GetVarint32(p, nSz); assert_nc( nSz>=0 ); *pnSz = nSz/2; *pbDel = nSz & 0x0001; return n; } /* ** Fts5SegIter.iLeafOffset currently points to the first byte of a ** position-list size field. Read the value of the field and store it ** in the following variables: ** ** Fts5SegIter.nPos ** Fts5SegIter.bDel ** ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ if( iOff>=pIter->pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } } /* ** Fts5SegIter.iLeafOffset currently points to the first byte of the ** "nSuffix" field of a term. Function parameter nKeep contains the value ** of the "nPrefix" field (if there was one - it is passed 0 if this is ** the first term in the segment). ** ** This function populates: ** ** Fts5SegIter.term ** Fts5SegIter.rowid ** ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of ** the first position list. The position list belonging to document ** (Fts5SegIter.iRowid). */ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; /* Offset to read at */ int nNew; /* Bytes of new data */ iOff += fts5GetVarint32(&a[iOff], nNew); pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; if( iOff>=pIter->pLeaf->n ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; return; } iOff = 4; a = pIter->pLeaf->p; } iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when ** this function returns. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterInit( Fts5Index *p, /* FTS index object */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ if( pSeg->pgnoFirst==0 ){ /* This happens if the segment is being used as an input to an incremental ** merge and all data has already been "trimmed". See function ** fts5TrimSegments() for details. In this case leave the iterator empty. ** The caller will see the (pIter->pLeaf==0) and assume the iterator is ** at EOF already. */ assert( pIter->pLeaf==0 ); return; } if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = fts5GetU16(&a[2]); fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } } /* ** This function is only ever called on iterators created by calls to ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** ** The iterator is in an unusual state when this function is called: the ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of ** the position-list size field for the first relevant rowid on the page. ** Fts5SegIter.rowid is set, but nPos and bDel are not. ** ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int n = pIter->pLeaf->n; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); if( aNew==0 ){ p->rc = SQLITE_NOMEM; break; } pIter->aRowidOffset = aNew; pIter->nRowidOffset = nNew; } pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; pIter->iLeafOffset = i; } pIter->iRowidOffset = iRowidOffset; fts5SegIterLoadNPos(p, pIter); } /* ** */ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ assert( pIter->flags & FTS5_SEGITER_REVERSE ); assert( pIter->flags & FTS5_SEGITER_ONETERM ); fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( pIter->pSeg->iSegid, 0, pIter->iLeafPgno )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ if( pIter->iTermLeafOffset<pNew->n ){ pIter->pLeaf = pNew; pIter->iLeafOffset = pIter->iTermLeafOffset; } }else{ int iRowidOff, dummy; fts5LeafHeader(pNew, &iRowidOff, &dummy); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; } } if( pIter->pLeaf ){ u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); break; }else{ fts5DataRelease(pNew); } } } if( pIter->pLeaf ){ fts5SegIterReverseInitPage(p, pIter); } } /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* ** Advance iterator pIter to the next entry. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It ** is not considered an error if the iterator reaches EOF. If an error has ** already occurred when this function is called, it is a no-op. */ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */ ){ assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ assert( pIter->pNextLeaf==0 ); if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; int nPos; int bDummy; i64 iDelta; pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += nPos; fts5GetVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; fts5SegIterLoadNPos(p, pIter); }else{ fts5SegIterReverseNewPage(p, pIter); } }else{ Fts5Data *pLeaf = pIter->pLeaf; int iOff; int bNewTerm = 0; int nKeep = 0; /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; int n = pLeaf->n; iOff = pIter->iLeafOffset + pIter->nPos; if( iOff<n ){ /* The next entry is on the current page */ u64 iDelta; iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iLeafOffset = iOff; if( iDelta==0 ){ bNewTerm = 1; if( iOff>=n ){ fts5SegIterNextPage(p, pIter); pIter->iLeafOffset = 4; }else if( iOff!=fts5GetU16(&a[2]) ){ pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); } }else{ pIter->iRowid += iDelta; } }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm; int nList; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ sqlite3Fts5HashScanNext(p->pHash); sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } }else{ iOff = 0; /* Next entry is not on the current page */ while( iOff==0 ){ fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ pIter->iLeafOffset = iOff; bNewTerm = 1; } if( iOff>=pLeaf->n ){ p->rc = FTS5_CORRUPT; return; } } } /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf ){ if( bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ fts5SegIterLoadTerm(p, pIter, nKeep); fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1; } }else{ fts5SegIterLoadNPos(p, pIter); } } } } } #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); iOff = pIter->iLeafOffset; assert( iOff>=4 ); /* Search for a new term within the current leaf. If one can be found, ** then this page contains the largest rowid for the current term. */ while( iOff<pLeaf->n ){ int nPos; i64 iDelta; int bDummy; /* Read the position-list size field */ iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) break; iOff += nPos; } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ if( iOff>=pLeaf->n ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, iTerm; fts5LeafHeader(pNew, &iRowid, &iTerm); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); pgnoLast = pgno; } fts5DataRelease(pNew); if( iTerm ) break; } } } } /* If pLast is NULL at this point, then the last rowid for this doclist ** lies on the page currently indicated by the iterator. In this case ** pIter->iLeafOffset is already set to point to the position-list size ** field associated with the first relevant rowid on the page. ** ** Or, if pLast is non-NULL, then it is the page that contains the last ** rowid. In this case configure the iterator so that it points to the ** first rowid on this page. */ if( pLast ){ int dummy; int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; fts5LeafHeader(pLast, &iOff, &dummy); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } fts5SegIterReverseInitPage(p, pIter); } /* ** Iterator pIter currently points to the first rowid of a doclist. ** There is a doclist-index associated with the final term on the current ** page. If the current term is the last term on the page, load the ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). */ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx==0 ); /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ int iOff = pIter->iLeafOffset + pIter->nPos; while( iOff<pLeaf->n ){ int bDummy; int nPos; i64 iDelta; /* iOff is currently the offset of the start of position list data */ iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; assert_nc( iOff<pLeaf->n ); iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; } } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int h; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ Fts5Data *pLeaf; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ for(h=pSeg->nHeight-1; h>0; h--){ Fts5NodeIter node; /* For iterating through internal nodes */ i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg); Fts5Data *pNode = fts5DataRead(p, iRowid); if( pNode==0 ) break; fts5NodeIterInit(pNode->p, pNode->n, &node); assert( node.term.n==0 ); iPg = node.iChild; bDlidx = node.bDlidx; for(fts5NodeIterNext(&p->rc, &node); node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; fts5NodeIterNext(&p->rc, &node) ){ iPg = node.iChild; bDlidx = node.bDlidx; } fts5NodeIterFree(&node); fts5DataRelease(pNode); } if( iPg<pSeg->pgnoFirst ){ iPg = pSeg->pgnoFirst; bDlidx = 0; } pIter->iLeafPgno = iPg - 1; fts5SegIterNextPage(p, pIter); if( (pLeaf = pIter->pLeaf) ){ int res; pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]); if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; fts5SegIterNext(p, pIter, 0); }while( pIter->pLeaf && p->rc==SQLITE_OK ); if( bGe==0 && res ){ /* Set iterator to point to EOF */ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; } } } if( p->rc==SQLITE_OK && bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; } if( bDlidx ){ fts5SegIterLoadDlidx(p, pIter); } if( flags & FTS5INDEX_QUERY_DESC ){ fts5SegIterReverse(p, pIter); } } } } /* ** Initialize the object pIter to point to term pTerm/nTerm within the ** in-memory hash table. If there is no such term in the hash-table, the ** iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterHashInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5SegIter *pIter /* Object to populate */ ){ const u8 *pList = 0; int nList = 0; const u8 *z = 0; int n = 0; assert( p->pHash ); assert( p->rc==SQLITE_OK ); if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); n = (z ? strlen((const char*)z) : 0); }else{ pIter->flags |= FTS5_SEGITER_ONETERM; sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); z = pTerm; n = nTerm; } if( pList ){ Fts5Data *pLeaf; sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; pLeaf->n = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } } } /* ** Zero the iterator passed as the only argument. */ static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); fts5DataRelease(pIter->pNextLeaf); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } #ifdef SQLITE_DEBUG /* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( Fts5MultiSegIter *pIter, Fts5SegIter *p1, Fts5SegIter *p2, Fts5CResult *pRes ){ int i1 = p1 - pIter->aSeg; int i2 = p2 - pIter->aSeg; if( p1->pLeaf || p2->pLeaf ){ if( p1->pLeaf==0 ){ assert( pRes->iFirst==i2 ); }else if( p2->pLeaf==0 ){ assert( pRes->iFirst==i1 ); }else{ int nMin = MIN(p1->term.n, p2->term.n); int res = memcmp(p1->term.p, p2->term.p, nMin); if( res==0 ) res = p1->term.n - p2->term.n; if( res==0 ){ assert( pRes->bTermEq==1 ); assert( p1->iRowid!=p2->iRowid ); res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; }else{ assert( pRes->bTermEq==0 ); } if( res<0 ){ assert( pRes->iFirst==i1 ); }else{ assert( pRes->iFirst==i2 ); } } } } /* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ if( p->rc==SQLITE_OK ){ int i; for(i=0; i<pIter->nSeg; i+=2){ Fts5SegIter *p1 = &pIter->aSeg[i]; Fts5SegIter *p2 = &pIter->aSeg[i+1]; Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } for(i=1; i<(pIter->nSeg / 2); i+=2){ Fts5CResult *pRes = &pIter->aFirst[i]; Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } } } #else # define fts5AssertMultiIterSetup(x,y) #endif /* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ Fts5CResult *pRes = &pIter->aFirst[iOut]; assert( iOut<pIter->nSeg && iOut>0 ); assert( pIter->bRev==0 || pIter->bRev==1 ); if( iOut>=(pIter->nSeg/2) ){ i1 = (iOut - pIter->nSeg/2) * 2; i2 = i1 + 1; }else{ i1 = pIter->aFirst[iOut*2].iFirst; i2 = pIter->aFirst[iOut*2+1].iFirst; } p1 = &pIter->aSeg[i1]; p2 = &pIter->aSeg[i2]; pRes->bTermEq = 0; if( p1->pLeaf==0 ){ /* If p1 is at EOF */ iRes = i2; }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ iRes = i1; }else{ int res = fts5BufferCompare(&p1->term, &p2->term); if( res==0 ){ assert( i2>i1 ); assert( i2!=0 ); pRes->bTermEq = 1; if( p1->iRowid==p2->iRowid ){ p1->bDel = p2->bDel; return i2; } res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); if( res<0 ){ iRes = i1; }else{ iRes = i2; } } pRes->iFirst = iRes; return 0; } /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. ** It is an error if leaf iLeafPgno does not exist or contains no rowids. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); if( iLeafPgno>pIter->pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; }else{ fts5DataRelease(pIter->pNextLeaf); pIter->pNextLeaf = 0; pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; int n = pIter->pLeaf->n; iOff = fts5GetU16(&a[0]); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; fts5SegIterLoadNPos(p, pIter); } } } } /* ** Advance the iterator passed as the second argument until it is at or ** past rowid iFrom. Regardless of the value of iFrom, the iterator is ** always advanced at least once. */ static void fts5SegIterNextFrom( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ i64 iMatch /* Advance iterator at least this far */ ){ int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5DlidxIter *pDlidx = pIter->pDlidx; int iLeafPgno = pIter->iLeafPgno; int bMove = 1; assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx ); assert( pIter->pLeaf ); if( bRev==0 ){ while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ iLeafPgno = fts5DlidxIterPgno(pDlidx); fts5DlidxIterNext(p, pDlidx); } assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); bMove = 0; } }else{ assert( pIter->pNextLeaf==0 ); assert( iMatch<pIter->iRowid ); while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ fts5DlidxIterPrev(p, pDlidx); } iLeafPgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); if( iLeafPgno<pIter->iLeafPgno ){ pIter->iLeafPgno = iLeafPgno+1; fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } while( p->rc==SQLITE_OK ){ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; } } /* ** Free the iterator object passed as the second argument. */ static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ if( pIter ){ int i; for(i=0; i<pIter->nSeg; i++){ fts5SegIterClear(&pIter->aSeg[i]); } sqlite3_free(pIter); } } static void fts5MultiIterAdvanced( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ fts5SegIterNext(p, &pIter->aSeg[iEq], 0); i = pIter->nSeg + iEq; } } } static int fts5MultiIterAdvanceRowid( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ int iChanged /* Index of sub-iterator just advanced */ ){ int i; Fts5SegIter *pNew = &pIter->aSeg[iChanged]; Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){ Fts5CResult *pRes = &pIter->aFirst[i]; assert( pNew->pLeaf ); assert( pRes->bTermEq==0 || pOther->pLeaf ); if( pRes->bTermEq ){ if( pNew->iRowid==pOther->iRowid ){ return 1; }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ pNew = pOther; } } pRes->iFirst = (pNew - pIter->aSeg); if( i==1 ) break; pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; } return 0; } /* ** Move the iterator to the next entry. ** ** If an error occurs, an error code is left in Fts5Index.rc. It is not ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ static void fts5MultiIterNext( Fts5Index *p, Fts5MultiSegIter *pIter, int bFrom, /* True if argument iFrom is valid */ i64 iFrom /* Advance at least as far as this */ ){ if( p->rc==SQLITE_OK ){ int bUseFrom = bFrom; do { int iFirst = pIter->aFirst[1].iFirst; int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ fts5SegIterNext(p, pSeg, &bNewTerm); } if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); } fts5AssertMultiIterSetup(p, pIter); bUseFrom = 0; }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); } } static Fts5MultiSegIter *fts5MultiIterAlloc( Fts5Index *p, /* FTS5 backend to iterate within */ int nSeg ){ Fts5MultiSegIter *pNew; int nSlot; /* Power of two >= nSeg */ for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); pNew = fts5IdxMalloc(p, sizeof(Fts5MultiSegIter) + /* pNew */ sizeof(Fts5SegIter) * nSlot + /* pNew->aSeg[] */ sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew ){ pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; } return pNew; } /* ** Allocate a new Fts5MultiSegIter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data. */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ ){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; Fts5MultiSegIter *pNew; assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ if( p->rc==SQLITE_OK ){ if( iLevel<0 ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; nSeg += (p->pHash ? 1 : 0); }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } } *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); if( pNew==0 ) return; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; if( p->pHash ){ /* Add a segment iterator for the current contents of the hash table. */ Fts5SegIter *pIter = &pNew->aSeg[iIter++]; fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); } for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; Fts5SegIter *pIter = &pNew->aSeg[iIter++]; if( pTerm==0 ){ fts5SegIterInit(p, pSeg, pIter); }else{ fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); } } } }else{ pLvl = &pStruct->aLevel[iLevel]; for(iSeg=nSeg-1; iSeg>=0; iSeg--){ fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); } } assert( iIter==nSeg ); /* If the above was successful, each component iterators now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ fts5SegIterNext(p, &pNew->aSeg[iEq], 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ fts5MultiIterNext(p, pNew, 0, 0); } }else{ fts5MultiIterFree(p, pNew); *ppOut = 0; } } /* ** Create an Fts5MultiSegIter that iterates through the doclist provided ** as the second argument. */ static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ int bDesc, /* True for descending rowid order */ Fts5MultiSegIter **ppOut /* New object */ ){ Fts5MultiSegIter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->n>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } pData = 0; } *ppOut = pNew; } fts5DataRelease(pData); } /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0); } /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ static void fts5MultiIterNextFrom( Fts5Index *p, Fts5MultiSegIter *pIter, i64 iMatch ){ while( 1 ){ i64 iRowid; fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid>=iMatch ) break; if( pIter->bRev!=0 && iRowid<=iMatch ) break; } } /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } static void fts5ChunkIterate( Fts5Index *p, /* Index object */ Fts5SegIter *pSeg, /* Poslist of this iterator */ void *pCtx, /* Context pointer for xChunk callback */ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; fts5DataRelease(pData); if( nRem<=0 ){ break; }else{ pgno++; pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; nChunk = MIN(nRem, pData->n - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; pData = 0; } } } } /* ** Allocate a new segment-id for the structure pStruct. The new segment ** id must be between 1 and 65335 inclusive, and must not be used by ** any currently existing segment. If a free segment id cannot be found, ** SQLITE_FULL is returned. ** ** If an error has already occurred, this function is a no-op. 0 is ** returned in this case. */ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ u32 iSegid = 0; if( p->rc==SQLITE_OK ){ if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ p->rc = SQLITE_FULL; }else{ while( iSegid==0 ){ int iLvl, iSeg; sqlite3_randomness(sizeof(u32), (void*)&iSegid); iSegid = (iSegid % ((1 << FTS5_DATA_ID_B) - 2)) + 1; assert( iSegid>0 && iSegid<=65535 ); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ iSegid = 0; } } } } } } return (int)iSegid; } /* ** Discard all data currently cached in the hash-tables. */ static void fts5IndexDiscardData(Fts5Index *p){ assert( p->pHash || p->nPendingData==0 ); if( p->pHash ){ sqlite3Fts5HashClear(p->pHash); p->nPendingData = 0; } } /* ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares ** with buffer (nOld/pOld). */ static int fts5PrefixCompress( int nOld, const u8 *pOld, int nNew, const u8 *pNew ){ int i; assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 ); for(i=0; i<nOld; i++){ if( pOld[i]!=pNew[i] ) break; } return i; } static void fts5WriteDlidxClear( Fts5Index *p, Fts5SegWriter *pWriter, int bFlush /* If true, write dlidx to disk */ ){ int i; assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); for(i=0; i<pWriter->nDlidx; i++){ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; if( pDlidx->buf.n==0 ) break; if( bFlush ){ assert( pDlidx->pgno!=0 ); fts5DataWrite(p, FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), pDlidx->buf.p, pDlidx->buf.n ); } sqlite3Fts5BufferZero(&pDlidx->buf); pDlidx->bPrevValid = 0; } } /* ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. ** Any new array elements are zeroed before returning. */ static int fts5WriteDlidxGrow( Fts5Index *p, Fts5SegWriter *pWriter, int nLvl ){ if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl ); if( aDlidx==0 ){ p->rc = SQLITE_NOMEM; }else{ int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); memset(&aDlidx[pWriter->nDlidx], 0, nByte); pWriter->aDlidx = aDlidx; pWriter->nDlidx = nLvl; } } return p->rc; } /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written ** to the database, also write the doclist-index to disk. */ if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ bFlag = 1; } fts5WriteDlidxClear(p, pWriter, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); pWriter->nEmpty = 0; }else{ fts5WriteDlidxClear(p, pWriter, 0); } assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *aNew; Fts5PageWriter *pNew; int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); if( aNew==0 ){ p->rc = SQLITE_NOMEM; return; } pNew = &aNew[pWriter->nWriter]; memset(pNew, 0, sizeof(Fts5PageWriter)); pNew->pgno = 1; fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); pWriter->nWriter++; pWriter->aWriter = aNew; } } /* ** This is called once for each leaf page except the first that contains ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that ** is larger than all terms written to earlier leaves, and equal to or ** smaller than the first term on the new leaf. ** ** If an error occurs, an error code is left in Fts5Index.rc. If an error ** has already occurred when this function is called, it is a no-op. */ static void fts5WriteBtreeTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter, /* Writer object */ int nTerm, const u8 *pTerm /* First term on new page */ ){ int iHeight; for(iHeight=1; 1; iHeight++){ Fts5PageWriter *pPage; if( iHeight>=pWriter->nWriter ){ fts5WriteBtreeGrow(p, pWriter); if( p->rc ) return; } pPage = &pWriter->aWriter[iHeight]; fts5WriteBtreeNEmpty(p, pWriter); if( pPage->buf.n>=p->pConfig->pgsz ){ /* pPage will be written to disk. The term will be written into the ** parent of pPage. */ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->term); fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); pPage->pgno++; }else{ int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); break; } } } /* ** This function is called when flushing a leaf page that contains no ** terms at all to disk. */ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ /* If there were no rowids on the leaf page either and the doclist-index ** has already been started, append an 0x00 byte to it. */ if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; assert( pDlidx->bPrevValid ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); } /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ i64 iRowid; int iOff; iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); return iRowid; } /* ** Rowid iRowid has just been appended to the current leaf page. It is the ** first on the page. This function appends an appropriate entry to the current ** doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ int i; int bDone = 0; for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ i64 iVal; Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; if( pDlidx->buf.n>=p->pConfig->pgsz ){ /* The current doclist-index page is full. Write it to disk and push ** a copy of iRowid (which will become the first rowid on the next ** doclist-index leaf page) up into the next level of the b-tree ** hierarchy. If the node being flushed is currently the root node, ** also push its first rowid upwards. */ pDlidx->buf.p[0] = 0x01; /* Not the root node */ fts5DataWrite(p, FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), pDlidx->buf.p, pDlidx->buf.n ); fts5WriteDlidxGrow(p, pWriter, i+2); pDlidx = &pWriter->aDlidx[i]; if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); /* This was the root node. Push its first rowid up to the new root. */ pDlidx[1].pgno = pDlidx->pgno; sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); pDlidx[1].bPrevValid = 1; pDlidx[1].iPrev = iFirst; } sqlite3Fts5BufferZero(&pDlidx->buf); pDlidx->bPrevValid = 0; pDlidx->pgno++; }else{ bDone = 1; } if( pDlidx->bPrevValid ){ iVal = iRowid - pDlidx->iPrev; }else{ i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); iVal = iRowid; } sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); pDlidx->bPrevValid = 1; pDlidx->iPrev = iRowid; } } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->aWriter[0]; i64 iRowid; if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); } /* Write the current page to the db. */ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; pWriter->bFirstRowidInPage = 1; } /* ** Append term pTerm/nTerm to the segment being written by the writer passed ** as the second argument. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5WriteAppendTerm( Fts5Index *p, Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->aWriter[0]; assert( pPage->buf.n==0 || pPage->buf.n>4 ); if( pPage->buf.n==0 ){ /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); assert( pWriter->bFirstTermInPage ); } if( p->rc ) return; if( pWriter->bFirstTermInPage ){ /* Update the "first term" field of the page header. */ assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term ** already written to the segment and (b) smaller than or equal to ** this term. In other words, a prefix of (pTerm/nTerm) that is one ** byte longer than the longest prefix (pTerm/nTerm) shares with the ** previous term. ** ** Usually, the previous term is available in pPage->term. The exception ** is if this is the first term written in an incremental-merge step. ** In this case the previous term is not available, so just write a ** copy of (pTerm/nTerm) into the parent node. This is slightly ** inefficient, but still correct. */ int n = nTerm; if( pPage->term.n ){ n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); } fts5WriteBtreeTerm(p, pWriter, n, pTerm); pPage = &pWriter->aWriter[0]; } }else{ nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); } /* Append the number of bytes of new data, then the term data itself ** to the page. */ fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); /* Update the Fts5PageWriter.term field. */ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } /* ** Append a docid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid, int nPos ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; /* If this is to be the first docid written to the page, set the ** docid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){ fts5PutU16(pPage->buf.p, pPage->buf.n); fts5WriteDlidxAppend(p, pWriter, iRowid); } /* Write the docid. */ if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); }else{ assert( p->rc || iRowid>pWriter->iPrevRowid ); fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } } static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, const u8 *aData, int nData ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; const u8 *a = aData; int n = nData; assert( p->pConfig->pgsz>0 ); while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ int nReq = p->pConfig->pgsz - pPage->buf.n; int nCopy = 0; while( nCopy<nReq ){ i64 dummy; nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); } fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); } if( n>0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); } } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; if( p->rc==SQLITE_OK ){ Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ if( pLeaf->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } *pnHeight = pWriter->nWriter; for(i=1; i<pWriter->nWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5DataWrite(p, FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno), pPg->buf.p, pPg->buf.n ); } } } for(i=0; i<pWriter->nWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5BufferFree(&pPg->term); fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); for(i=0; i<pWriter->nDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); } sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, int iSegid ){ memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); if( fts5WriteDlidxGrow(p, pWriter, 1) ) return; pWriter->nWriter = 1; pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; pWriter->bFirstTermInPage = 1; } static void fts5WriteInitForAppend( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter, /* Writer to initialize */ Fts5StructureSegment *pSeg /* Segment object to append to */ ){ int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); if( p->rc==SQLITE_OK ){ int pgno = 1; int i; pWriter->nDlidx = 1; pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); Fts5PageWriter *pPg = &pWriter->aWriter[i]; pPg->pgno = pgno; fts5DataBuffer(p, &pPg->buf, iRowid); if( p->rc==SQLITE_OK ){ Fts5NodeIter ss; fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); pgno = ss.iChild; fts5NodeIterFree(&ss); } } assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); pWriter->bFirstTermInPage = 1; assert( pWriter->aWriter[0].term.n==0 ); } } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ int i; Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; i<pIter->nSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */ }else if( pSeg->pLeaf==0 ){ /* All keys from this input segment have been transfered to the output. ** Set both the first and last page-numbers to 0 to indicate that the ** segment is now empty. */ pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } } fts5BufferFree(&buf); } static void fts5MergeChunkCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); } /* ** */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ int bOldest; /* True if the output segment is the oldest */ assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); /* Extend the Fts5Structure object as required to ensure the output ** segment exists. */ if( iLvl==pStruct->nLevel-1 ){ fts5StructureAddLevel(&p->rc, ppStruct); pStruct = *ppStruct; } fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); if( p->rc ) return; pLvl = &pStruct->aLevel[iLvl]; pLvlOut = &pStruct->aLevel[iLvl+1]; fts5WriteInit(p, &writer, iSegid); /* Add the new segment to the output level */ pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; pLvlOut->nSeg++; pSeg->pgnoFirst = 1; pSeg->iSegid = iSegid; pStruct->nSegment++; /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; /* Check for key annihilation. */ if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ break; } /* This is a new term. Append a term to the output segment. */ if( bRequireDoclistTerm ){ fts5WriteAppendZerobyte(p, &writer); } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); bRequireDoclistTerm = 1; } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ nPos = pSeg->nPos*2 + pSeg->bDel; fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); /* Append the position-list data to the output */ fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ int i; /* Remove the redundant segments from the %_data table */ for(i=0; i<nInput; i++){ fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); } /* Remove the redundant segments from the input level */ if( pLvl->nSeg!=nInput ){ int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); } pStruct->nSegment -= nInput; pLvl->nSeg -= nInput; pLvl->nMerge = 0; if( pSeg->pgnoLast==0 ){ pLvlOut->nSeg--; pStruct->nSegment--; } }else{ assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); if( pnRem ) *pnRem -= writer.nLeafWritten; } /* ** Do up to nPg pages of automerge work on the index. */ static void fts5IndexMerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nPg /* Pages of work to do */ ){ int nRem = nPg; Fts5Structure *pStruct = *ppStruct; while( nRem>0 && p->rc==SQLITE_OK ){ int iLvl; /* To iterate through levels */ int iBestLvl = 0; /* Level offering the most input segments */ int nBest = 0; /* Number of input segments on best level */ /* Set iBestLvl to the level to read input segments from. */ assert( pStruct->nLevel>0 ); for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; if( pLvl->nMerge ){ if( pLvl->nMerge>nBest ){ iBestLvl = iLvl; nBest = pLvl->nMerge; } break; } if( pLvl->nSeg>nBest ){ nBest = pLvl->nSeg; iBestLvl = iLvl; } } /* If nBest is still 0, then the index must be empty. */ #ifdef SQLITE_DEBUG for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBest<p->pConfig->nAutomerge && pStruct->aLevel[iBestLvl].nMerge==0 ){ break; } fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ fts5StructurePromote(p, iBestLvl+1, pStruct); } } *ppStruct = pStruct; } /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segment. This function updates the write-counter accordingly and, if ** necessary, performs incremental merge work. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5IndexAutomerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ Fts5Structure *pStruct = *ppStruct; u64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ int nRem; /* Number of leaf pages left to write */ /* Update the write-counter. While doing so, set nWork. */ nWrite = pStruct->nWriteCounter; nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); pStruct->nWriteCounter += nLeaf; nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); fts5IndexMerge(p, ppStruct, nRem); } } static void fts5IndexCrisismerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ const int nCrisis = p->pConfig->nCrisisMerge; Fts5Structure *pStruct = *ppStruct; int iLvl = 0; assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } static int fts5IndexReturn(Fts5Index *p){ int rc = p->rc; p->rc = SQLITE_OK; return rc; } typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; /* ** Buffer aBuf[] contains a list of varints, all small enough to fit ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); while( 1 ){ int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break; ret += i; } return ret; } #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ assert( pBuf->nSpace>=(pBuf->n+nBlob) ); \ memcpy(&pBuf->p[pBuf->n], pBlob, nBlob); \ pBuf->n += nBlob; \ } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p){ Fts5Hash *pHash = p->pHash; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ const u8 *zPrev = 0; Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); /* Pre-allocate the buffer used to assemble leaf pages to the target ** page size. */ assert( pgsz>0 ); pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); /* Decide if the term will fit on the current leaf. If it will not, ** flush the leaf to disk here. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); if( p->rc ) break; } } /* Write the term to the leaf. And if it is the first on the leaf, and ** the leaf is not page number 1, push it up into the b-tree hierarchy ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.aWriter[0].pgno!=1 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); pBuf = &writer.aWriter[0].buf; assert( nPre<nTerm ); } nSuffix = nTerm; } pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); /* We just wrote a term into page writer.aWriter[0].pgno. If a ** doclist-index is to be generated for this doclist, it will be ** associated with this page. */ assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); writer.aDlidx[0].pgno = writer.aWriter[0].pgno; if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; writer.bFirstRowidInPage = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOff<nDoclist ){ int nPos; int nCopy; int bDummy; iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); nCopy += nPos; iRowid += iDelta; if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); if( (pBuf->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); }else{ /* The entire poslist will not fit on this leaf. So it needs ** to be broken into sections. The only qualification being ** that each varint must be stored contiguously. */ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( p->rc==SQLITE_OK ){ int nSpace = pgsz - pBuf->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( pBuf->n>=pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; } if( iPos>=nCopy ) break; } } iOff += nCopy; } } pBuf->p[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; pStruct->nSegment++; } fts5StructurePromote(p, 0, pStruct); } fts5IndexAutomerge(p, &pStruct, pgnoLast); fts5IndexCrisismerge(p, &pStruct); fts5StructureWrite(p, pStruct); fts5StructureRelease(pStruct); } /* ** Flush any data stored in the in-memory hash tables to the database. */ static void fts5IndexFlush(Fts5Index *p){ /* Unless it is empty, flush the hash table to disk */ if( p->nPendingData ){ assert( p->pHash ); p->nPendingData = 0; fts5FlushOneHash(p); } } int sqlite3Fts5IndexOptimize(Fts5Index *p){ Fts5Structure *pStruct; Fts5Structure *pNew = 0; int nSeg = 0; assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); pStruct = fts5StructureRead(p); if( pStruct ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; if( nSeg>1 ){ int nByte = sizeof(Fts5Structure); nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); } } if( pNew ){ Fts5StructureLevel *pLvl; int nByte = nSeg * sizeof(Fts5StructureSegment); pNew->nLevel = pStruct->nLevel+1; pNew->nWriteCounter = pStruct->nWriteCounter; pLvl = &pNew->aLevel[pStruct->nLevel]; pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); if( pLvl->aSeg ){ int iLvl, iSeg; int iSegOut = 0; for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; iSegOut++; } } pNew->nSegment = pLvl->nSeg = nSeg; }else{ sqlite3_free(pNew); pNew = 0; } } if( pNew ){ int iLvl = pNew->nLevel-1; while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ int nRem = FTS5_OPT_WORK_UNIT; fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); } fts5StructureWrite(p, pNew); fts5StructureRelease(pNew); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); if( pStruct && pStruct->nLevel ){ fts5IndexMerge(p, &pStruct, nMerge); fts5StructureWrite(p, pStruct); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } static void fts5PoslistCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); } /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. */ static void fts5SegiterPoslist( Fts5Index *p, Fts5SegIter *pSeg, Fts5Buffer *pBuf ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); } /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti ** currently points to to buffer pBuf. ** ** If an error occurs, an error code is left in p->rc. It is assumed ** no error has already occurred when this function is called. */ static void fts5MultiIterPoslist( Fts5Index *p, Fts5MultiSegIter *pMulti, int bSz, /* Append a size field before the data */ Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); if( bSz ){ /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2); } fts5SegiterPoslist(p, pSeg, pBuf); } } static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->i<pIter->n ){ int bDummy; if( pIter->i ){ i64 iDelta; pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&iDelta); pIter->iRowid += iDelta; }else{ pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } pIter->i += fts5GetPoslistSize( &pIter->a[pIter->i], &pIter->nPoslist, &bDummy ); pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ pIter->aPoslist = 0; } } static void fts5DoclistIterInit( Fts5Buffer *pBuf, Fts5DoclistIter *pIter ){ memset(pIter, 0, sizeof(*pIter)); pIter->a = pBuf->p; pIter->n = pBuf->n; fts5DoclistIterNext(pIter); } /* ** Append a doclist to buffer pBuf. */ static void fts5MergeAppendDocid( int *pRc, /* IN/OUT: Error code */ Fts5Buffer *pBuf, /* Buffer to write to */ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ i64 iRowid /* Rowid to append */ ){ if( pBuf->n==0 ){ fts5BufferAppendVarint(pRc, pBuf, iRowid); }else{ fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); } *piLastRowid = iRowid; } /* ** Buffers p1 and p2 contain doclists. This function merges the content ** of the two doclists together and sets buffer p1 to the result before ** returning. ** ** If an error occurs, an error code is left in p->rc. If an error has ** already occurred, this function is a no-op. */ static void fts5MergePrefixLists( Fts5Index *p, /* FTS5 backend object */ Fts5Buffer *p1, /* First list to merge */ Fts5Buffer *p2 /* Second list to merge */ ){ if( p2->n ){ i64 iLastRowid = 0; Fts5DoclistIter i1; Fts5DoclistIter i2; Fts5Buffer out; Fts5Buffer tmp; memset(&out, 0, sizeof(out)); memset(&tmp, 0, sizeof(tmp)); fts5DoclistIterInit(p1, &i1); fts5DoclistIterInit(p2, &i2); while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowid<i2.iRowid) ){ /* Copy entry from i1 */ fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i1.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); } else{ Fts5PoslistReader r1; Fts5PoslistReader r2; Fts5PoslistWriter writer; memset(&writer, 0, sizeof(writer)); /* Merge the two position lists. */ fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); fts5BufferZero(&tmp); sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ i64 iNew; if( r2.bEof || (r1.bEof==0 && r1.iPos<r2.iPos) ){ iNew = r1.iPos; sqlite3Fts5PoslistReaderNext(&r1); }else{ iNew = r2.iPos; sqlite3Fts5PoslistReaderNext(&r2); if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1); } p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); } /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); fts5DoclistIterNext(&i1); fts5DoclistIterNext(&i2); } } fts5BufferSet(&p->rc, p1, out.n, out.p); fts5BufferFree(&tmp); fts5BufferFree(&out); } } static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ Fts5Buffer tmp = *p1; *p1 = *p2; *p2 = tmp; } static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5IndexIter *pIter /* Populate this object */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN; int i; i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) ){ i64 iRowid = fts5MultiIterRowid(p1); int nTerm; const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; if( doclist.n>0 && iRowid<=iLastRowid ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( i<nBuf ); if( aBuf[i].n==0 ){ fts5BufferSwap(&doclist, &aBuf[i]); fts5BufferZero(&doclist); }else{ fts5MergePrefixLists(p, &doclist, &aBuf[i]); fts5BufferZero(&aBuf[i]); } } } fts5MergeAppendDocid(&p->rc, &doclist, &iLastRowid, iRowid); fts5MultiIterPoslist(p, p1, 1, &doclist); } for(i=0; i<nBuf; i++){ fts5MergePrefixLists(p, &doclist, &aBuf[i]); fts5BufferFree(&aBuf[i]); } fts5MultiIterFree(p, p1); pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); if( pData ){ pData->p = (u8*)&pData[1]; pData->n = doclist.n; memcpy(pData->p, doclist.p, doclist.n); fts5MultiIterNew2(p, pData, bDesc, &pIter->pMulti); } fts5BufferFree(&doclist); } fts5StructureRelease(pStruct); sqlite3_free(aBuf); } /* ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain ** to the document with rowid iRowid. */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ assert( p->rc==SQLITE_OK ); /* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){ p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); } /* Flush the hash table to disk if required */ if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } p->iWriteRowid = iRowid; return fts5IndexReturn(p); } /* ** Commit data to disk. */ int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); if( bCommit ) fts5CloseReader(p); return fts5IndexReturn(p); } /* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated. */ int sqlite3Fts5IndexRollback(Fts5Index *p){ fts5CloseReader(p); fts5IndexDiscardData(p); assert( p->rc==SQLITE_OK ); return SQLITE_OK; } /* ** The %_data table is completely empty when this function is called. This ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; assert( p->rc==SQLITE_OK ); p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); memset(&s, 0, sizeof(Fts5Structure)); fts5StructureWrite(p, &s); return fts5IndexReturn(p); } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying %_data table. ** ** If successful, set *pp to point to the new object and return SQLITE_OK. ** Otherwise, set *pp to NULL and return an SQLite error code. */ int sqlite3Fts5IndexOpen( Fts5Config *pConfig, int bCreate, Fts5Index **pp, char **pzErr ){ int rc = SQLITE_OK; Fts5Index *p; /* New object */ *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); if( rc==SQLITE_OK ){ p->pConfig = pConfig; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); if( p->zDataTbl && bCreate ){ rc = sqlite3Fts5CreateTable( pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p); } } } assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); if( rc ){ sqlite3Fts5IndexClose(p); *pp = 0; } return rc; } /* ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). */ int sqlite3Fts5IndexClose(Fts5Index *p){ int rc = SQLITE_OK; if( p ){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); sqlite3Fts5HashFree(p->pHash); sqlite3Fts5BufferFree(&p->scratch); sqlite3_free(p->zDataTbl); sqlite3_free(p); } return rc; } /* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ int n = 0; int i; for(i=0; i<nChar; i++){ if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ if( (unsigned char)p[n++]>=0xc0 ){ while( (p[n] & 0xc0)==0x80 ) n++; } } return n; } /* ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of ** unicode characters in the string. */ static int fts5IndexCharlen(const char *pIn, int nIn){ int nChar = 0; int i = 0; while( i<nIn ){ if( (unsigned char)pIn[i++]>=0xc0 ){ while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; } nChar++; } return nChar; } /* ** Insert or remove data to or from the index. Each time a document is ** added to or removed from the index, this function is called one or more ** times. ** ** For an insert, it must be called once for each token in the new document. ** If the operation is a delete, it must be called (at least) once for each ** unique token in the document with an iCol value less than zero. The iPos ** argument is ignored for a delete. */ int sqlite3Fts5IndexWrite( Fts5Index *p, /* Index to write to */ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ int i; /* Used to iterate through indexes */ int rc = SQLITE_OK; /* Return code */ Fts5Config *pConfig = p->pConfig; assert( p->rc==SQLITE_OK ); /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX+i+1, pToken, nByte ); } } return rc; } /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet; int iIdx = 0; Fts5Buffer buf = {0, 0, 0}; /* If the QUERY_SCAN flag is set, all other flags must be clear. */ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); } #ifdef SQLITE_DEBUG if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); if( pRet ){ pRet->pIndex = p; if( iIdx<=pConfig->nPrefix ){ buf.p[0] = FTS5_MAIN_PREFIX + iIdx; pRet->pStruct = fts5StructureRead(p); if( pRet->pStruct ){ fts5MultiIterNew( p, pRet->pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet->pMulti ); } }else{ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pRet); } } if( p->rc ){ sqlite3Fts5IterClose(pRet); pRet = 0; } *ppIter = pRet; sqlite3Fts5BufferFree(&buf); return fts5IndexReturn(p); } /* ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); } /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching term/rowid. Used by the fts5vocab module. */ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ Fts5Index *p = pIter->pIndex; Fts5MultiSegIter *pMulti = pIter->pMulti; assert( pIter->pIndex->rc==SQLITE_OK ); assert( pMulti ); fts5MultiIterNext(p, pMulti, 0, 0); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ fts5DataRelease(pSeg->pLeaf); pSeg->pLeaf = 0; } } return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); return fts5IndexReturn(pIter->pIndex); } /* ** Return the current rowid. */ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ return fts5MultiIterRowid(pIter->pMulti); } /* ** Return the current term. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ int n; const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); *pn = n-1; return &z[1]; } /* ** Return a pointer to a buffer containing a copy of the position list for ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** ** The returned position list does not include the "number of bytes" varint ** field that starts the position list on disk. */ int sqlite3Fts5IterPoslist( Fts5IndexIter *pIter, const u8 **pp, /* OUT: Pointer to position-list data */ int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5MultiSegIter *pMulti = pIter->pMulti; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); *pp = pIter->poslist.p; } return fts5IndexReturn(pIter->pIndex); } /* ** This function is similar to sqlite3Fts5IterPoslist(), except that it ** copies the position list into the buffer supplied as the second ** argument. */ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ Fts5Index *p = pIter->pIndex; Fts5MultiSegIter *pMulti = pIter->pMulti; assert( p->rc==SQLITE_OK ); fts5BufferZero(pBuf); fts5MultiIterPoslist(p, pMulti, 0, pBuf); return fts5IndexReturn(p); } /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ if( pIter ){ fts5MultiIterFree(pIter->pIndex, pIter->pMulti); fts5StructureRelease(pIter->pStruct); fts5BufferFree(&pIter->poslist); fts5CloseReader(pIter->pIndex); sqlite3_free(pIter); } } /* ** Read the "averages" record into the buffer supplied as the second ** argument. Return SQLITE_OK if successful, or an SQLite error code ** if an error occurs. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ assert( p->rc==SQLITE_OK ); fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer ** supplied as the second argument. */ int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ assert( p->rc==SQLITE_OK ); fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); return fts5IndexReturn(p); } /* ** Return the total number of blocks this module has read from the %_data ** table since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p){ return p->nRead; } /* ** Set the 32-bit cookie value stored at the start of all structure ** records to the value passed as the second argument. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ int rc; /* Return code */ Fts5Config *pConfig = p->pConfig; /* Configuration object */ u8 aCookie[4]; /* Binary representation of iNew */ sqlite3_blob *pBlob = 0; assert( p->rc==SQLITE_OK ); sqlite3Fts5Put32(aCookie, iNew); rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", FTS5_STRUCTURE_ROWID, 1, &pBlob ); if( rc==SQLITE_OK ){ sqlite3_blob_write(pBlob, aCookie, 4, 0); rc = sqlite3_blob_close(pBlob); } return rc; } int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); fts5StructureRelease(pStruct); return fts5IndexReturn(p); } /************************************************************************* ************************************************************************** ** Below this point is the implementation of the integrity-check ** functionality. */ /* ** Return a simple checksum value based on the arguments. */ static u64 fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, int iIdx, const char *pTerm, int nTerm ){ int i; u64 ret = iRowid; ret += (ret<<3) + iCol; ret += (ret<<3) + iPos; if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; return ret; } static void fts5BtreeIterInit( Fts5Index *p, Fts5StructureSegment *pSeg, Fts5BtreeIter *pIter ){ int nByte; int i; nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); memset(pIter, 0, sizeof(*pIter)); if( nByte ){ pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); } if( p->rc==SQLITE_OK ){ pIter->nLvl = pSeg->nHeight-1; pIter->p = p; pIter->pSeg = pSeg; } for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){ i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1); Fts5Data *pData; pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); if( pData ){ fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); } } if( pIter->nLvl==0 || p->rc ){ pIter->bEof = 1; pIter->iLeaf = pSeg->pgnoLast; }else{ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->iLeaf = pIter->aLvl[0].s.iChild; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; } } static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ Fts5Index *p = pIter->p; int i; assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); for(i=0; i<pIter->nLvl && p->rc==SQLITE_OK; i++){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; fts5NodeIterNext(&p->rc, &pLvl->s); if( pLvl->s.aData ){ fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); break; }else{ fts5NodeIterFree(&pLvl->s); fts5DataRelease(pLvl->pData); pLvl->pData = 0; } } if( i==pIter->nLvl || p->rc ){ pIter->bEof = 1; }else{ int iSegid = pIter->pSeg->iSegid; for(i--; i>=0; i--){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData ){ fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); } } } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; } static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ int i; for(i=0; i<pIter->nLvl; i++){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; fts5NodeIterFree(&pLvl->s); if( pLvl->pData ){ fts5DataRelease(pLvl->pData); pLvl->pData = 0; } } sqlite3_free(pIter->aLvl); fts5BufferFree(&pIter->term); } #ifdef SQLITE_DEBUG /* ** This function is purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. ** ** Instead, it tests that the same set of pgno/rowid combinations are ** visited regardless of whether the doclist-index identified by parameters ** iSegid/iLeaf is iterated in forwards or reverse order. */ static void fts5TestDlidxReverse( Fts5Index *p, int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; u64 cksum1 = 13; u64 cksum2 = 13; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ i64 iRowid = fts5DlidxIterRowid(pDlidx); int pgno = fts5DlidxIterPgno(pDlidx); assert( pgno>iLeaf ); cksum1 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(p, pDlidx) ){ i64 iRowid = fts5DlidxIterRowid(pDlidx); int pgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); cksum2 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } static int fts5QueryCksum( Fts5Index *p, /* Fts5 index object */ int iIdx, const char *z, /* Index key to query for */ int n, /* Size of index key in bytes */ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ u64 cksum = *pCksum; Fts5IndexIter *pIdxIter = 0; int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ i64 dummy; const u8 *pPos; int nPos; i64 rowid = sqlite3Fts5IterRowid(pIdxIter); rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); sReader.bEof==0; sqlite3Fts5PoslistReaderNext(&sReader) ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } rc = sqlite3Fts5IterNext(pIdxIter); } } sqlite3Fts5IterClose(pIdxIter); *pCksum = cksum; return rc; } /* ** This function is also purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. */ static void fts5TestTerm( Fts5Index *p, Fts5Buffer *pPrev, /* Previous term */ const char *z, int n, /* Possibly new term to test */ u64 expected, u64 *pCksum ){ int rc = p->rc; if( pPrev->n==0 ){ fts5BufferSet(&rc, pPrev, n, (const u8*)z); }else if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ u64 cksum3 = *pCksum; const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); int rc; u64 ck1 = 0; u64 ck2 = 0; /* Check that the results returned for ASC and DESC queries are ** the same. If not, call this corruption. */ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); if( rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_DESC; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the ** the index is disabled are the same. In both ASC and DESC order. */ if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } cksum3 ^= ck1; fts5BufferSet(&rc, pPrev, n, (const u8*)z); if( rc==SQLITE_OK && cksum3!=expected ){ rc = FTS5_CORRUPT; } *pCksum = cksum3; } p->rc = rc; } #else # define fts5TestDlidxReverse(x,y,z) # define fts5TestTerm(u,v,w,x,y,z) #endif static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ if( pSeg->pgnoFirst==0 ) return; /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, pSeg, &iter); p->rc==SQLITE_OK && iter.bEof==0; fts5BtreeIterNext(&iter) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iter.iLeaf<pSeg->pgnoFirst ) continue; iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal ** to or larger than the split-key in iter.term. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ iOff = fts5GetU16(&pLeaf->p[2]); if( iOff==0 ){ p->rc = FTS5_CORRUPT; }else{ int iRowidOff; int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ iRowidOff = fts5GetU16(&pLeaf->p[0]); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); if( res==0 ) res = nTerm - iter.term.n; if( res<0 ) p->rc = FTS5_CORRUPT; } } fts5DataRelease(pLeaf); if( p->rc ) break; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ pLeaf = fts5DataRead(p, iRow+i); if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } /* If there is a doclist-index, check that it looks right. */ if( iter.bDlidx ){ Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ int iPrevLeaf = iter.iLeaf; int iSegid = pSeg->iSegid; int iPg; i64 iKey; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); if( iRowidOff>=pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } fts5DlidxIterFree(pDlidx); fts5TestDlidxReverse(p, iSegid, iter.iLeaf); } } /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } fts5BtreeIterFree(&iter); } /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums ** as calculated by fts5IndexEntryCksum() is cksum. ** ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the ** checksum does not match. Return SQLITE_OK if all checks pass without ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5MultiSegIter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ /* Load the FTS index structure */ pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ if( pStruct ){ int iLvl, iSeg; for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; fts5IndexIntegrityCheckSegment(p, pSeg); } } } /* The cksum argument passed to this function is a checksum calculated ** based on all expected entries in the FTS index (including prefix index ** entries). This block checks that a checksum calculated based on the ** actual contents of FTS index is identical. ** ** Two versions of the same checksum are calculated. The first (stack ** variable cksum2) based on entries extracted from the full-text index ** while doing a linear scan of each individual index in turn. ** ** As each term visited by the linear scans, a separate query for the ** same term is performed. cksum3 is calculated based on the entries ** extracted by these queries. */ for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ int n; /* Size of term in bytes */ i64 iPos = 0; /* Position read from poslist */ int iOff = 0; /* Offset within poslist */ i64 iRowid = fts5MultiIterRowid(pIter); char *z = (char*)fts5MultiIterTerm(pIter, &n); poslist.n = 0; fts5MultiIterPoslist(p, pIter, 0, &poslist); while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } /* If this is a new term, query for it. Update cksum3 with the results. */ fts5TestTerm(p, &term, z, n, cksum2, &cksum3); } fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); fts5MultiIterFree(p, pIter); if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; fts5StructureRelease(pStruct); fts5BufferFree(&term); fts5BufferFree(&poslist); return fts5IndexReturn(p); } /* ** Calculate and return a checksum that is the XOR of the index entry ** checksum of all entries that would be generated by the token specified ** by the final 5 arguments. */ u64 sqlite3Fts5IndexCksum( Fts5Config *pConfig, /* Configuration object */ i64 iRowid, /* Document term appears in */ int iCol, /* Column term appears in */ int iPos, /* Position term appears in */ const char *pTerm, int nTerm /* Term at iPos */ ){ u64 ret = 0; /* Return value */ int iIdx; /* For iterating through indexes */ ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm); for(iIdx=0; iIdx<pConfig->nPrefix; iIdx++){ int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); if( nByte ){ ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte); } } return ret; } /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar ** function only. */ /* ** Decode a segment-data rowid from the %_data table. This function is ** the opposite of macro FTS5_SEGMENT_ROWID(). */ static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ int *piSegid, /* OUT: Segment id */ int *pbDlidx, /* OUT: Dlidx flag */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ ){ *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); iRowid >>= FTS5_DATA_PAGE_B; *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); iRowid >>= FTS5_DATA_HEIGHT_B; *pbDlidx = (int)(iRowid & 0x0001); iRowid >>= FTS5_DATA_DLI_B; *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); } } else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } static void fts5DebugStructure( int *pRc, /* IN/OUT: error code */ Fts5Buffer *pBuf, Fts5Structure *p ){ int iLvl, iSeg; /* Iterate through levels, segments */ for(iLvl=0; iLvl<p->nLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge ); for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } } /* ** This is part of the fts5_decode() debugging aid. ** ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This ** function appends a human-readable representation of the same object ** to the buffer passed as the second argument. */ static void fts5DecodeStructure( int *pRc, /* IN/OUT: error code */ Fts5Buffer *pBuf, const u8 *pBlob, int nBlob ){ int rc; /* Return code */ Fts5Structure *p = 0; /* Decoded structure object */ rc = fts5StructureDecode(pBlob, nBlob, 0, &p); if( rc!=SQLITE_OK ){ *pRc = rc; return; } fts5DebugStructure(pRc, pBuf, p); fts5StructureRelease(p); } /* ** Buffer (a/n) is assumed to contain a list of serialized varints. Read ** each varint and append its string representation to buffer pBuf. Return ** after either the input buffer is exhausted or a 0 value is read. ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ int iOff = 0; while( iOff<n ){ int iVal; iOff += fts5GetVarint32(&a[iOff], iVal); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); } return iOff; } /* ** The start of buffer (a/n) contains the start of a doclist. The doclist ** may or may not finish within the buffer. This function appends a text ** representation of the part of the doclist that is present to buffer ** pBuf. ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); while( iOff<n ){ int nPos; int bDummy; iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); if( iOff<n ){ i64 iDelta; iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); if( iDelta==0 ) return iOff; iDocid += iDelta; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); } } return iOff; } /* ** The implementation of user-defined scalar function fts5_decode(). */ static void fts5DecodeFunction( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args (always 2) */ sqlite3_value **apVal /* Function arguments */ ){ i64 iRowid; /* Rowid for record being decoded */ int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ const u8 *aBlob; int n; /* Record to decode */ u8 *a = 0; Fts5Buffer s; /* Build up text to return here */ int rc = SQLITE_OK; /* Return code */ int nSpace = 0; assert( nArg==2 ); memset(&s, 0, sizeof(Fts5Buffer)); iRowid = sqlite3_value_int64(apVal[0]); n = sqlite3_value_bytes(apVal[1]); aBlob = sqlite3_value_blob(apVal[1]); nSpace = n + FTS5_DATA_ZERO_PADDING; a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); if( a==0 ) goto decode_out; memcpy(a, aBlob, n); fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); fts5DebugRowid(&rc, &s, iRowid); if( bDlidx ){ Fts5Data dlidx; Fts5DlidxLvl lvl; dlidx.p = a; dlidx.n = n; memset(&lvl, 0, sizeof(Fts5DlidxLvl)); lvl.pData = &dlidx; lvl.iLeafPgno = iPgno; for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " %d(%lld)", lvl.iLeafPgno, lvl.iRowid ); } }else if( iSegid==0 ){ if( iRowid==FTS5_AVERAGES_ROWID ){ /* todo */ }else{ fts5DecodeStructure(&rc, &s, a, n); } }else{ Fts5Buffer term; memset(&term, 0, sizeof(Fts5Buffer)); if( iHeight==0 ){ int iTermOff = 0; int iRowidOff = 0; int iOff; int nKeep = 0; if( n>=4 ){ iRowidOff = fts5GetU16(&a[0]); iTermOff = fts5GetU16(&a[2]); }else{ sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); goto decode_out; } if( iRowidOff ){ iOff = iRowidOff; }else if( iTermOff ){ iOff = iTermOff; }else{ iOff = n; } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); assert( iRowidOff==0 || iOff==iRowidOff ); if( iRowidOff ){ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); } assert( iTermOff==0 || iOff==iTermOff ); while( iOff<n ){ int nByte; iOff += fts5GetVarint32(&a[iOff], nByte); term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); if( iOff<n ){ iOff += fts5GetVarint32(&a[iOff], nKeep); } } fts5BufferFree(&term); }else{ Fts5NodeIter ss; for(fts5NodeIterInit(a, n, &ss); ss.aData; fts5NodeIterNext(&rc, &ss)){ if( ss.term.n==0 ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " left=%d", ss.iChild); }else{ sqlite3Fts5BufferAppendPrintf(&rc,&s, " \"%.*s\"", ss.term.n, ss.term.p ); } if( ss.nEmpty ){ sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty, ss.bDlidx ? "*" : "" ); } } fts5NodeIterFree(&ss); } } decode_out: sqlite3_free(a); if( rc==SQLITE_OK ){ sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); }else{ sqlite3_result_error_code(pCtx, rc); } fts5BufferFree(&s); } /* ** The implementation of user-defined scalar function fts5_rowid(). */ static void fts5RowidFunction( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args (always 2) */ sqlite3_value **apVal /* Function arguments */ ){ const char *zArg; if( nArg==0 ){ sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); }else{ zArg = (const char*)sqlite3_value_text(apVal[0]); if( 0==sqlite3_stricmp(zArg, "segment") ){ i64 iRowid; int segid, height, pgno; if( nArg!=4 ){ sqlite3_result_error(pCtx, "should be: fts5_rowid('segment', segid, height, pgno))", -1 ); }else{ segid = sqlite3_value_int(apVal[1]); height = sqlite3_value_int(apVal[2]); pgno = sqlite3_value_int(apVal[3]); iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno); sqlite3_result_int64(pCtx, iRowid); } }else { sqlite3_result_error(pCtx, "first arg to fts5_rowid() must be 'segment' " "or 'start-of-index'" , -1 ); } } } /* ** This is called as part of registering the FTS5 module with database ** connection db. It registers several user-defined scalar functions useful ** with FTS5. ** ** If successful, SQLITE_OK is returned. If an error occurs, some other ** SQLite error code is returned instead. */ int sqlite3Fts5IndexInit(sqlite3 *db){ int rc = sqlite3_create_function( db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 ); if( rc==SQLITE_OK ){ rc = sqlite3_create_function( db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 ); } return rc; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_main.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 | /* ** 2014 Jun 09 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This is an SQLite module implementing full-text search. */ #if defined(SQLITE_ENABLE_FTS5) #include "fts5Int.h" /* ** This variable is set to false when running tests for which the on disk ** structures should not be corrupt. Otherwise, true. If it is false, extra ** assert() conditions in the fts5 code are activated - conditions that are ** only true if it is guaranteed that the fts5 database is not corrupt. */ int sqlite3_fts5_may_be_corrupt = 1; typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; typedef struct Fts5Auxdata Fts5Auxdata; typedef struct Fts5TokenizerModule Fts5TokenizerModule; /* ** NOTES ON TRANSACTIONS: ** ** SQLite invokes the following virtual table methods as transactions are ** opened and closed by the user: ** ** xBegin(): Start of a new transaction. ** xSync(): Initial part of two-phase commit. ** xCommit(): Final part of two-phase commit. ** xRollback(): Rollback the transaction. ** ** Anything that is required as part of a commit that may fail is performed ** in the xSync() callback. Current versions of SQLite ignore any errors ** returned by xCommit(). ** ** And as sub-transactions are opened/closed: ** ** xSavepoint(int S): Open savepoint S. ** xRelease(int S): Commit and close savepoint S. ** xRollbackTo(int S): Rollback to start of savepoint S. ** ** During a write-transaction the fts5_index.c module may cache some data ** in-memory. It is flushed to disk whenever xSync(), xRelease() or ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() ** is called. ** ** Additionally, if SQLITE_DEBUG is defined, an instance of the following ** structure is used to record the current transaction state. This information ** is not required, but it is used in the assert() statements executed by ** function fts5CheckTransactionState() (see below). */ struct Fts5TransactionState { int eState; /* 0==closed, 1==open, 2==synced */ int iSavepoint; /* Number of open savepoints (0 -> none) */ }; /* ** A single object of this type is allocated when the FTS5 module is ** registered with a database handle. It is used to store pointers to ** all registered FTS5 extensions - tokenizers and auxiliary functions. */ struct Fts5Global { fts5_api api; /* User visible part of object (see fts5.h) */ sqlite3 *db; /* Associated database connection */ i64 iNextId; /* Used to allocate unique cursor ids */ Fts5Auxiliary *pAux; /* First in list of all aux. functions */ Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ Fts5Cursor *pCsr; /* First in list of all open cursors */ }; /* ** Each auxiliary function registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part ** of the Fts5Global.pAux list. */ struct Fts5Auxiliary { Fts5Global *pGlobal; /* Global context for this function */ char *zFunc; /* Function name (nul-terminated) */ void *pUserData; /* User-data pointer */ fts5_extension_function xFunc; /* Callback function */ void (*xDestroy)(void*); /* Destructor function */ Fts5Auxiliary *pNext; /* Next registered auxiliary function */ }; /* ** Each tokenizer module registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part ** of the Fts5Global.pTok list. */ struct Fts5TokenizerModule { char *zName; /* Name of tokenizer */ void *pUserData; /* User pointer passed to xCreate() */ fts5_tokenizer x; /* Tokenizer functions */ void (*xDestroy)(void*); /* Destructor function */ Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ }; /* ** Virtual-table object. */ struct Fts5Table { sqlite3_vtab base; /* Base class used by SQLite core */ Fts5Config *pConfig; /* Virtual table configuration */ Fts5Index *pIndex; /* Full-text index */ Fts5Storage *pStorage; /* Document store */ Fts5Global *pGlobal; /* Global (connection wide) data */ Fts5Cursor *pSortCsr; /* Sort data from this cursor */ #ifdef SQLITE_DEBUG struct Fts5TransactionState ts; #endif }; struct Fts5MatchPhrase { Fts5Buffer *pPoslist; /* Pointer to current poslist */ int nTerm; /* Size of phrase in terms */ }; /* ** pStmt: ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; ** ** aIdx[]: ** There is one entry in the aIdx[] array for each phrase in the query, ** the value of which is the offset within aPoslist[] following the last ** byte of the position list for the corresponding phrase. */ struct Fts5Sorter { sqlite3_stmt *pStmt; i64 iRowid; /* Current rowid */ const u8 *aPoslist; /* Position lists for current row */ int nIdx; /* Number of entries in aIdx[] */ int aIdx[0]; /* Offsets into aPoslist for current row */ }; /* ** Virtual-table cursor object. ** ** iSpecial: ** If this is a 'special' query (refer to function fts5SpecialMatch()), ** then this variable contains the result of the query. ** ** iFirstRowid, iLastRowid: ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the ** cursor iterates in ascending order of rowids, iFirstRowid is the lower ** limit of rowids to return, and iLastRowid the upper. In other words, the ** WHERE clause in the user's query might have been: ** ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid ** ** If the cursor iterates in descending order of rowid, iFirstRowid ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid ** the lower. */ struct Fts5Cursor { sqlite3_vtab_cursor base; /* Base class used by SQLite core */ int ePlan; /* FTS5_PLAN_XXX value */ int bDesc; /* True for "ORDER BY rowid DESC" queries */ i64 iFirstRowid; /* Return no rowids earlier than this */ i64 iLastRowid; /* Return no rowids later than this */ sqlite3_stmt *pStmt; /* Statement used to read %_content */ Fts5Expr *pExpr; /* Expression for MATCH queries */ Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ int csrflags; /* Mask of cursor flags (see below) */ Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ i64 iSpecial; /* Result of special query */ /* "rank" function. Populated on demand from vtab.xColumn(). */ char *zRank; /* Custom rank function */ char *zRankArgs; /* Custom rank function args */ Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ int nRankArg; /* Number of trailing arguments for rank() */ sqlite3_value **apRankArg; /* Array of trailing arguments */ sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ Fts5Auxiliary *pAux; /* Currently executing extension function */ Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ int *aColumnSize; /* Values for xColumnSize() */ /* Cache used by auxiliary functions xInst() and xInstCount() */ int nInstCount; /* Number of phrase instances */ int *aInst; /* 3 integers per phrase instance */ }; /* ** Bits that make up the "idxNum" parameter passed indirectly by ** xBestIndex() to xFilter(). */ #define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */ #define FTS5_BI_RANK 0x0002 /* rank MATCH ? */ #define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */ #define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */ #define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */ #define FTS5_BI_ORDER_RANK 0x0020 #define FTS5_BI_ORDER_ROWID 0x0040 #define FTS5_BI_ORDER_DESC 0x0080 /* ** Values for Fts5Cursor.csrflags */ #define FTS5CSR_REQUIRE_CONTENT 0x01 #define FTS5CSR_REQUIRE_DOCSIZE 0x02 #define FTS5CSR_EOF 0x04 #define FTS5CSR_FREE_ZRANK 0x08 #define FTS5CSR_REQUIRE_RESEEK 0x10 #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) /* ** Constants for the largest and smallest possible 64-bit signed integers. ** These are copied from sqliteInt.h. */ #ifndef SQLITE_AMALGAMATION # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) #endif /* ** Macros to Set(), Clear() and Test() cursor flags. */ #define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag)) #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) struct Fts5Auxdata { Fts5Auxiliary *pAux; /* Extension to which this belongs */ void *pPtr; /* Pointer value */ void(*xDelete)(void*); /* Destructor */ Fts5Auxdata *pNext; /* Next object in linked list */ }; #ifdef SQLITE_DEBUG #define FTS5_BEGIN 1 #define FTS5_SYNC 2 #define FTS5_COMMIT 3 #define FTS5_ROLLBACK 4 #define FTS5_SAVEPOINT 5 #define FTS5_RELEASE 6 #define FTS5_ROLLBACKTO 7 static void fts5CheckTransactionState(Fts5Table *p, int op, int iSavepoint){ switch( op ){ case FTS5_BEGIN: assert( p->ts.eState==0 ); p->ts.eState = 1; p->ts.iSavepoint = -1; break; case FTS5_SYNC: assert( p->ts.eState==1 ); p->ts.eState = 2; break; case FTS5_COMMIT: assert( p->ts.eState==2 ); p->ts.eState = 0; break; case FTS5_ROLLBACK: assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 ); p->ts.eState = 0; break; case FTS5_SAVEPOINT: assert( p->ts.eState==1 ); assert( iSavepoint>=0 ); assert( iSavepoint>p->ts.iSavepoint ); p->ts.iSavepoint = iSavepoint; break; case FTS5_RELEASE: assert( p->ts.eState==1 ); assert( iSavepoint>=0 ); assert( iSavepoint<=p->ts.iSavepoint ); p->ts.iSavepoint = iSavepoint-1; break; case FTS5_ROLLBACKTO: assert( p->ts.eState==1 ); assert( iSavepoint>=0 ); assert( iSavepoint<=p->ts.iSavepoint ); p->ts.iSavepoint = iSavepoint; break; } } #else # define fts5CheckTransactionState(x,y,z) #endif /* ** Return true if pTab is a contentless table. */ static int fts5IsContentless(Fts5Table *pTab){ return pTab->pConfig->eContent==FTS5_CONTENT_NONE; } /* ** Delete a virtual table handle allocated by fts5InitVtab(). */ static void fts5FreeVtab(Fts5Table *pTab){ if( pTab ){ sqlite3Fts5IndexClose(pTab->pIndex); sqlite3Fts5StorageClose(pTab->pStorage); sqlite3Fts5ConfigFree(pTab->pConfig); sqlite3_free(pTab); } } /* ** The xDisconnect() virtual table method. */ static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ fts5FreeVtab((Fts5Table*)pVtab); return SQLITE_OK; } /* ** The xDestroy() virtual table method. */ static int fts5DestroyMethod(sqlite3_vtab *pVtab){ Fts5Table *pTab = (Fts5Table*)pVtab; int rc = sqlite3Fts5DropAll(pTab->pConfig); if( rc==SQLITE_OK ){ fts5FreeVtab((Fts5Table*)pVtab); } return rc; } /* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. ** ** The argv[] array contains the following: ** ** argv[0] -> module name ("fts5") ** argv[1] -> database name ** argv[2] -> table name ** argv[...] -> "column name" and other module argument fields. */ static int fts5InitVtab( int bCreate, /* True for xCreate, false for xConnect */ sqlite3 *db, /* The SQLite database connection */ void *pAux, /* Hash table containing tokenizers */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ ){ Fts5Global *pGlobal = (Fts5Global*)pAux; const char **azConfig = (const char**)argv; int rc = SQLITE_OK; /* Return code */ Fts5Config *pConfig; /* Results of parsing argc/argv */ Fts5Table *pTab = 0; /* New virtual table object */ /* Allocate the new vtab object and parse the configuration */ pTab = (Fts5Table*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Table)); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); } if( rc==SQLITE_OK ){ pTab->pConfig = pConfig; pTab->pGlobal = pGlobal; } /* Open the index sub-system */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->pIndex, pzErr); } /* Open the storage sub-system */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageOpen( pConfig, pTab->pIndex, bCreate, &pTab->pStorage, pzErr ); } /* Call sqlite3_declare_vtab() */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigDeclareVtab(pConfig); } if( rc!=SQLITE_OK ){ fts5FreeVtab(pTab); pTab = 0; }else if( bCreate ){ fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); } *ppVTab = (sqlite3_vtab*)pTab; return rc; } /* ** The xConnect() and xCreate() methods for the virtual table. All the ** work is done in function fts5InitVtab(). */ static int fts5ConnectMethod( sqlite3 *db, /* Database connection */ void *pAux, /* Pointer to tokenizer hash table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); } static int fts5CreateMethod( sqlite3 *db, /* Database connection */ void *pAux, /* Pointer to tokenizer hash table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); } /* ** The different query plans. */ #define FTS5_PLAN_SCAN 1 /* No usable constraint */ #define FTS5_PLAN_MATCH 2 /* (<tbl> MATCH ?) */ #define FTS5_PLAN_SORTED_MATCH 3 /* (<tbl> MATCH ? ORDER BY rank) */ #define FTS5_PLAN_ROWID 4 /* (rowid = ?) */ #define FTS5_PLAN_SOURCE 5 /* A source cursor for SORTED_MATCH */ #define FTS5_PLAN_SPECIAL 6 /* An internal query */ /* ** Implementation of the xBestIndex method for FTS5 tables. Within the ** WHERE constraint, it searches for the following: ** ** 1. A MATCH constraint against the special column. ** 2. A MATCH constraint against the "rank" column. ** 3. An == constraint against the rowid column. ** 4. A < or <= constraint against the rowid column. ** 5. A > or >= constraint against the rowid column. ** ** Within the ORDER BY, either: ** ** 5. ORDER BY rank [ASC|DESC] ** 6. ORDER BY rowid [ASC|DESC] ** ** Costs are assigned as follows: ** ** a) If an unusable MATCH operator is present in the WHERE clause, the ** cost is unconditionally set to 1e50 (a really big number). ** ** a) If a MATCH operator is present, the cost depends on the other ** constraints also present. As follows: ** ** * No other constraints: cost=1000.0 ** * One rowid range constraint: cost=750.0 ** * Both rowid range constraints: cost=500.0 ** * An == rowid constraint: cost=100.0 ** ** b) Otherwise, if there is no MATCH: ** ** * No other constraints: cost=1000000.0 ** * One rowid range constraint: cost=750000.0 ** * Both rowid range constraints: cost=250000.0 ** * An == rowid constraint: cost=10.0 ** ** Costs are not modified by the ORDER BY clause. */ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ Fts5Table *pTab = (Fts5Table*)pVTab; Fts5Config *pConfig = pTab->pConfig; int idxFlags = 0; /* Parameter passed through to xFilter() */ int bHasMatch; int iNext; int i; struct Constraint { int op; /* Mask against sqlite3_index_constraint.op */ int fts5op; /* FTS5 mask for idxFlags */ int iCol; /* 0==rowid, 1==tbl, 2==rank */ int omit; /* True to omit this if found */ int iConsIndex; /* Index in pInfo->aConstraint[] */ } aConstraint[] = { {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_MATCH, 1, 1, -1}, {SQLITE_INDEX_CONSTRAINT_MATCH, FTS5_BI_RANK, 2, 1, -1}, {SQLITE_INDEX_CONSTRAINT_EQ, FTS5_BI_ROWID_EQ, 0, 0, -1}, {SQLITE_INDEX_CONSTRAINT_LT|SQLITE_INDEX_CONSTRAINT_LE, FTS5_BI_ROWID_LE, 0, 0, -1}, {SQLITE_INDEX_CONSTRAINT_GT|SQLITE_INDEX_CONSTRAINT_GE, FTS5_BI_ROWID_GE, 0, 0, -1}, }; int aColMap[3]; aColMap[0] = -1; aColMap[1] = pConfig->nCol; aColMap[2] = pConfig->nCol+1; /* Set idxFlags flags for all WHERE clause terms that will be used. */ for(i=0; i<pInfo->nConstraint; i++){ struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; int j; for(j=0; j<sizeof(aConstraint)/sizeof(aConstraint[0]); j++){ struct Constraint *pC = &aConstraint[j]; if( p->iColumn==aColMap[pC->iCol] && p->op & pC->op ){ if( p->usable ){ pC->iConsIndex = i; idxFlags |= pC->fts5op; }else if( j==0 ){ /* As there exists an unusable MATCH constraint this is an ** unusable plan. Set a prohibitively high cost. */ pInfo->estimatedCost = 1e50; return SQLITE_OK; } } } } /* Set idxFlags flags for the ORDER BY clause */ if( pInfo->nOrderBy==1 ){ int iSort = pInfo->aOrderBy[0].iColumn; if( iSort==(pConfig->nCol+1) && BitFlagTest(idxFlags, FTS5_BI_MATCH) ){ idxFlags |= FTS5_BI_ORDER_RANK; }else if( iSort==-1 ){ idxFlags |= FTS5_BI_ORDER_ROWID; } if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ pInfo->orderByConsumed = 1; if( pInfo->aOrderBy[0].desc ){ idxFlags |= FTS5_BI_ORDER_DESC; } } } /* Calculate the estimated cost based on the flags set in idxFlags. */ bHasMatch = BitFlagTest(idxFlags, FTS5_BI_MATCH); if( BitFlagTest(idxFlags, FTS5_BI_ROWID_EQ) ){ pInfo->estimatedCost = bHasMatch ? 100.0 : 10.0; }else if( BitFlagAllTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ pInfo->estimatedCost = bHasMatch ? 500.0 : 250000.0; }else if( BitFlagTest(idxFlags, FTS5_BI_ROWID_LE|FTS5_BI_ROWID_GE) ){ pInfo->estimatedCost = bHasMatch ? 750.0 : 750000.0; }else{ pInfo->estimatedCost = bHasMatch ? 1000.0 : 1000000.0; } /* Assign argvIndex values to each constraint in use. */ iNext = 1; for(i=0; i<sizeof(aConstraint)/sizeof(aConstraint[0]); i++){ struct Constraint *pC = &aConstraint[i]; if( pC->iConsIndex>=0 ){ pInfo->aConstraintUsage[pC->iConsIndex].argvIndex = iNext++; pInfo->aConstraintUsage[pC->iConsIndex].omit = pC->omit; } } pInfo->idxNum = idxFlags; return SQLITE_OK; } /* ** Implementation of xOpen method. */ static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ Fts5Table *pTab = (Fts5Table*)pVTab; Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr; /* New cursor object */ int nByte; /* Bytes of space to allocate */ int rc = SQLITE_OK; /* Return code */ nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); pCsr = (Fts5Cursor*)sqlite3_malloc(nByte); if( pCsr ){ Fts5Global *pGlobal = pTab->pGlobal; memset(pCsr, 0, nByte); pCsr->aColumnSize = (int*)&pCsr[1]; pCsr->pNext = pGlobal->pCsr; pGlobal->pCsr = pCsr; pCsr->iCsrId = ++pGlobal->iNextId; }else{ rc = SQLITE_NOMEM; } *ppCsr = (sqlite3_vtab_cursor*)pCsr; return rc; } static int fts5StmtType(Fts5Cursor *pCsr){ if( pCsr->ePlan==FTS5_PLAN_SCAN ){ return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; } return FTS5_STMT_LOOKUP; } /* ** This function is called after the cursor passed as the only argument ** is moved to point at a different row. It clears all cached data ** specific to the previous row stored by the cursor object. */ static void fts5CsrNewrow(Fts5Cursor *pCsr){ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); sqlite3_free(pCsr->aInst); pCsr->aInst = 0; pCsr->nInstCount = 0; } /* ** Close the cursor. For additional information see the documentation ** on the xClose method of the virtual table interface. */ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ if( pCursor ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; Fts5Cursor **pp; Fts5Auxdata *pData; Fts5Auxdata *pNext; fts5CsrNewrow(pCsr); if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } if( pCsr->pSorter ){ Fts5Sorter *pSorter = pCsr->pSorter; sqlite3_finalize(pSorter->pStmt); sqlite3_free(pSorter); } if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){ sqlite3Fts5ExprFree(pCsr->pExpr); } for(pData=pCsr->pAuxdata; pData; pData=pNext){ pNext = pData->pNext; if( pData->xDelete ) pData->xDelete(pData->pPtr); sqlite3_free(pData); } /* Remove the cursor from the Fts5Global.pCsr list */ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; sqlite3_finalize(pCsr->pRankArgStmt); sqlite3_free(pCsr->apRankArg); if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ sqlite3_free(pCsr->zRank); sqlite3_free(pCsr->zRankArgs); } sqlite3_free(pCsr); } return SQLITE_OK; } static int fts5SorterNext(Fts5Cursor *pCsr){ Fts5Sorter *pSorter = pCsr->pSorter; int rc; rc = sqlite3_step(pSorter->pStmt); if( rc==SQLITE_DONE ){ rc = SQLITE_OK; CsrFlagSet(pCsr, FTS5CSR_EOF); }else if( rc==SQLITE_ROW ){ const u8 *a; const u8 *aBlob; int nBlob; int i; int iOff = 0; rc = SQLITE_OK; pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); for(i=0; i<(pSorter->nIdx-1); i++){ int iVal; a += fts5GetVarint32(a, iVal); iOff += iVal; pSorter->aIdx[i] = iOff; } pSorter->aIdx[i] = &aBlob[nBlob] - a; pSorter->aPoslist = a; fts5CsrNewrow(pCsr); } return rc; } /* ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors ** open on table pTab. */ static void fts5TripCursors(Fts5Table *pTab){ Fts5Cursor *pCsr; for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ if( pCsr->ePlan==FTS5_PLAN_MATCH && pCsr->base.pVtab==(sqlite3_vtab*)pTab ){ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); } } } /* ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first ** argument, close and reopen all Fts5IndexIter iterators that the cursor ** is using. Then attempt to move the cursor to a rowid equal to or laster ** (in the cursors sort order - ASC or DESC) than the current rowid. ** ** If the new rowid is not equal to the old, set output parameter *pbSkip ** to 1 before returning. Otherwise, leave it unchanged. ** ** Return SQLITE_OK if successful or if no reseek was required, or an ** error code if an error occurred. */ static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ int rc = SQLITE_OK; assert( *pbSkip==0 ); if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int bDesc = pCsr->bDesc; i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, iRowid, bDesc); if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ *pbSkip = 1; } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); fts5CsrNewrow(pCsr); if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } } return rc; } /* ** Advance the cursor to the next row in the table that matches the ** search criteria. ** ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned ** even if we reach end-of-file. The fts5EofMethod() will be called ** subsequently to determine whether or not an EOF was hit. */ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = pCsr->ePlan; int bSkip = 0; int rc; if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; switch( ePlan ){ case FTS5_PLAN_MATCH: case FTS5_PLAN_SOURCE: rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } fts5CsrNewrow(pCsr); break; case FTS5_PLAN_SPECIAL: { CsrFlagSet(pCsr, FTS5CSR_EOF); break; } case FTS5_PLAN_SORTED_MATCH: { rc = fts5SorterNext(pCsr); break; } default: rc = sqlite3_step(pCsr->pStmt); if( rc!=SQLITE_ROW ){ CsrFlagSet(pCsr, FTS5CSR_EOF); rc = sqlite3_reset(pCsr->pStmt); }else{ rc = SQLITE_OK; } break; } return rc; } static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ Fts5Config *pConfig = pTab->pConfig; Fts5Sorter *pSorter; int nPhrase; int nByte; int rc = SQLITE_OK; char *zSql; const char *zRank = pCsr->zRank; const char *zRankArgs = pCsr->zRankArgs; nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); nByte = sizeof(Fts5Sorter) + sizeof(int) * nPhrase; pSorter = (Fts5Sorter*)sqlite3_malloc(nByte); if( pSorter==0 ) return SQLITE_NOMEM; memset(pSorter, 0, nByte); pSorter->nIdx = nPhrase; /* TODO: It would be better to have some system for reusing statement ** handles here, rather than preparing a new one for each query. But that ** is not possible as SQLite reference counts the virtual table objects. ** And since the statement required here reads from this very virtual ** table, saving it creates a circular reference. ** ** If SQLite a built-in statement cache, this wouldn't be a problem. */ zSql = sqlite3Fts5Mprintf(&rc, "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s", pConfig->zDb, pConfig->zName, zRank, pConfig->zName, (zRankArgs ? ", " : ""), (zRankArgs ? zRankArgs : ""), bDesc ? "DESC" : "ASC" ); if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pSorter->pStmt, 0); sqlite3_free(zSql); } pCsr->pSorter = pSorter; if( rc==SQLITE_OK ){ assert( pTab->pSortCsr==0 ); pTab->pSortCsr = pCsr; rc = fts5SorterNext(pCsr); pTab->pSortCsr = 0; } if( rc!=SQLITE_OK ){ sqlite3_finalize(pSorter->pStmt); sqlite3_free(pSorter); pCsr->pSorter = 0; } return rc; } static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){ int rc; Fts5Expr *pExpr = pCsr->pExpr; rc = sqlite3Fts5ExprFirst(pExpr, pTab->pIndex, pCsr->iFirstRowid, bDesc); if( sqlite3Fts5ExprEof(pExpr) ){ CsrFlagSet(pCsr, FTS5CSR_EOF); } fts5CsrNewrow(pCsr); return rc; } /* ** Process a "special" query. A special query is identified as one with a ** MATCH expression that begins with a '*' character. The remainder of ** the text passed to the MATCH operator are used as the special query ** parameters. */ static int fts5SpecialMatch( Fts5Table *pTab, Fts5Cursor *pCsr, const char *zQuery ){ int rc = SQLITE_OK; /* Return code */ const char *z = zQuery; /* Special query text */ int n; /* Number of bytes in text at z */ while( z[0]==' ' ) z++; for(n=0; z[n] && z[n]!=' '; n++); assert( pTab->base.zErrMsg==0 ); pCsr->ePlan = FTS5_PLAN_SPECIAL; if( 0==sqlite3_strnicmp("reads", z, n) ){ pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->pIndex); } else if( 0==sqlite3_strnicmp("id", z, n) ){ pCsr->iSpecial = pCsr->iCsrId; } else{ /* An unrecognized directive. Return an error message. */ pTab->base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z); rc = SQLITE_ERROR; } return rc; } /* ** Search for an auxiliary function named zName that can be used with table ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary ** structure. Otherwise, if no such function exists, return NULL. */ static Fts5Auxiliary *fts5FindAuxiliary(Fts5Table *pTab, const char *zName){ Fts5Auxiliary *pAux; for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux; } /* No function of the specified name was found. Return 0. */ return 0; } static int fts5FindRankFunction(Fts5Cursor *pCsr){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; int rc = SQLITE_OK; Fts5Auxiliary *pAux = 0; const char *zRank = pCsr->zRank; const char *zRankArgs = pCsr->zRankArgs; if( zRankArgs ){ char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); if( zSql ){ sqlite3_stmt *pStmt = 0; rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 ); if( rc==SQLITE_OK ){ if( SQLITE_ROW==sqlite3_step(pStmt) ){ int nByte; pCsr->nRankArg = sqlite3_column_count(pStmt); nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); if( rc==SQLITE_OK ){ int i; for(i=0; i<pCsr->nRankArg; i++){ pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i); } } pCsr->pRankArgStmt = pStmt; }else{ rc = sqlite3_finalize(pStmt); assert( rc!=SQLITE_OK ); } } } } if( rc==SQLITE_OK ){ pAux = fts5FindAuxiliary(pTab, zRank); if( pAux==0 ){ assert( pTab->base.zErrMsg==0 ); pTab->base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank); rc = SQLITE_ERROR; } } pCsr->pRank = pAux; return rc; } static int fts5CursorParseRank( Fts5Config *pConfig, Fts5Cursor *pCsr, sqlite3_value *pRank ){ int rc = SQLITE_OK; if( pRank ){ const char *z = (const char*)sqlite3_value_text(pRank); char *zRank = 0; char *zRankArgs = 0; if( z==0 ){ if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); } if( rc==SQLITE_OK ){ pCsr->zRank = zRank; pCsr->zRankArgs = zRankArgs; CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); }else if( rc==SQLITE_ERROR ){ pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( "parse error in rank function: %s", z ); } }else{ if( pConfig->zRank ){ pCsr->zRank = (char*)pConfig->zRank; pCsr->zRankArgs = (char*)pConfig->zRankArgs; }else{ pCsr->zRank = (char*)FTS5_DEFAULT_RANK; pCsr->zRankArgs = 0; } } return rc; } static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ if( pVal ){ int eType = sqlite3_value_numeric_type(pVal); if( eType==SQLITE_INTEGER ){ return sqlite3_value_int64(pVal); } } return iDefault; } /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional ** information. ** ** There are three possible query strategies: ** ** 1. Full-text search using a MATCH operator. ** 2. A by-rowid lookup. ** 3. A full-table scan. */ static int fts5FilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; /* Error code */ int iVal = 0; /* Counter for apVal[] */ int bDesc; /* True if ORDER BY [rank|rowid] DESC */ int bOrderByRank; /* True if ORDER BY rank */ sqlite3_value *pMatch = 0; /* <tbl> MATCH ? expression (or NULL) */ sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ char **pzErrmsg = pConfig->pzErrmsg; assert( pCsr->pStmt==0 ); assert( pCsr->pExpr==0 ); assert( pCsr->csrflags==0 ); assert( pCsr->pRank==0 ); assert( pCsr->zRank==0 ); assert( pCsr->zRankArgs==0 ); assert( pzErrmsg==0 || pzErrmsg==&pTab->base.zErrMsg ); pConfig->pzErrmsg = &pTab->base.zErrMsg; /* Decode the arguments passed through to this function. ** ** Note: The following set of if(...) statements must be in the same ** order as the corresponding entries in the struct at the top of ** fts5BestIndexMethod(). */ if( BitFlagTest(idxNum, FTS5_BI_MATCH) ) pMatch = apVal[iVal++]; if( BitFlagTest(idxNum, FTS5_BI_RANK) ) pRank = apVal[iVal++]; if( BitFlagTest(idxNum, FTS5_BI_ROWID_EQ) ) pRowidEq = apVal[iVal++]; if( BitFlagTest(idxNum, FTS5_BI_ROWID_LE) ) pRowidLe = apVal[iVal++]; if( BitFlagTest(idxNum, FTS5_BI_ROWID_GE) ) pRowidGe = apVal[iVal++]; assert( iVal==nVal ); bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0); pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0); /* Set the cursor upper and lower rowid limits. Only some strategies ** actually use them. This is ok, as the xBestIndex() method leaves the ** sqlite3_index_constraint.omit flag clear for range constraints ** on the rowid field. */ if( pRowidEq ){ pRowidLe = pRowidGe = pRowidEq; } if( bDesc ){ pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); }else{ pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); } if( pTab->pSortCsr ){ /* If pSortCsr is non-NULL, then this call is being made as part of ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will ** return results to the user for this query. The current cursor ** (pCursor) is used to execute the query issued by function ** fts5CursorFirstSorted() above. */ assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 ); assert( nVal==0 && pMatch==0 && bOrderByRank==0 && bDesc==0 ); assert( pCsr->iLastRowid==LARGEST_INT64 ); assert( pCsr->iFirstRowid==SMALLEST_INT64 ); pCsr->ePlan = FTS5_PLAN_SOURCE; pCsr->pExpr = pTab->pSortCsr->pExpr; rc = fts5CursorFirst(pTab, pCsr, bDesc); }else if( pMatch ){ const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); rc = fts5CursorParseRank(pConfig, pCsr, pRank); if( rc==SQLITE_OK ){ if( zExpr[0]=='*' ){ /* The user has issued a query of the form "MATCH '*...'". This ** indicates that the MATCH expression is not a full text query, ** but a request for an internal parameter. */ rc = fts5SpecialMatch(pTab, pCsr, &zExpr[1]); }else{ char **pzErr = &pTab->base.zErrMsg; rc = sqlite3Fts5ExprNew(pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ if( bOrderByRank ){ pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); }else{ pCsr->ePlan = FTS5_PLAN_MATCH; rc = fts5CursorFirst(pTab, pCsr, bDesc); } } } } }else if( pConfig->zContent==0 ){ *pConfig->pzErrmsg = sqlite3_mprintf( "%s: table does not support scanning", pConfig->zName ); rc = SQLITE_ERROR; }else{ /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup ** by rowid (ePlan==FTS5_PLAN_ROWID). */ pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN); rc = sqlite3Fts5StorageStmt( pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->base.zErrMsg ); if( rc==SQLITE_OK ){ if( pCsr->ePlan==FTS5_PLAN_ROWID ){ sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); }else{ sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); } rc = fts5NextMethod(pCursor); } } pConfig->pzErrmsg = pzErrmsg; return rc; } /* ** This is the xEof method of the virtual table. SQLite calls this ** routine to find out if it has reached the end of a result set. */ static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); } /* ** Return the rowid that the cursor currently points to. */ static i64 fts5CursorRowid(Fts5Cursor *pCsr){ assert( pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE ); if( pCsr->pSorter ){ return pCsr->pSorter->iRowid; }else{ return sqlite3Fts5ExprRowid(pCsr->pExpr); } } /* ** This is the xRowid method. The SQLite core calls this routine to ** retrieve the rowid for the current row of the result set. fts5 ** exposes %_content.docid as the rowid for the virtual table. The ** rowid should be written to *pRowid. */ static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int ePlan = pCsr->ePlan; assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); switch( ePlan ){ case FTS5_PLAN_SPECIAL: *pRowid = 0; break; case FTS5_PLAN_SOURCE: case FTS5_PLAN_MATCH: case FTS5_PLAN_SORTED_MATCH: *pRowid = fts5CursorRowid(pCsr); break; default: *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); break; } return SQLITE_OK; } /* ** If the cursor requires seeking (bSeekRequired flag is set), seek it. ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. ** ** If argument bErrormsg is true and an error occurs, an error message may ** be left in sqlite3_vtab.zErrMsg. */ static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ int rc = SQLITE_OK; /* If the cursor does not yet have a statement handle, obtain one now. */ if( pCsr->pStmt==0 ){ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int eStmt = fts5StmtType(pCsr); rc = sqlite3Fts5StorageStmt( pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->base.zErrMsg:0) ); assert( rc!=SQLITE_OK || pTab->base.zErrMsg==0 ); assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); } if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ assert( pCsr->pExpr ); sqlite3_reset(pCsr->pStmt); sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); rc = sqlite3_step(pCsr->pStmt); if( rc==SQLITE_ROW ){ rc = SQLITE_OK; CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT); }else{ rc = sqlite3_reset(pCsr->pStmt); if( rc==SQLITE_OK ){ rc = FTS5_CORRUPT; } } } return rc; } static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){ va_list ap; /* ... printf arguments */ va_start(ap, zFormat); assert( p->base.zErrMsg==0 ); p->base.zErrMsg = sqlite3_vmprintf(zFormat, ap); va_end(ap); } /* ** This function is called to handle an FTS INSERT command. In other words, ** an INSERT statement of the form: ** ** INSERT INTO fts(fts) VALUES($pCmd) ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) ** ** Argument pVal is the value assigned to column "fts" by the INSERT ** statement. This function returns SQLITE_OK if successful, or an SQLite ** error code if an error occurs. ** ** The commands implemented by this function are documented in the "Special ** INSERT Directives" section of the documentation. It should be updated if ** more commands are added to this function. */ static int fts5SpecialInsert( Fts5Table *pTab, /* Fts5 table object */ sqlite3_value *pCmd, /* Value inserted into special column */ sqlite3_value *pVal /* Value inserted into rank column */ ){ Fts5Config *pConfig = pTab->pConfig; const char *z = (const char*)sqlite3_value_text(pCmd); int rc = SQLITE_OK; int bError = 0; if( 0==sqlite3_stricmp("delete-all", z) ){ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ fts5SetVtabError(pTab, "'delete-all' may only be used with a " "contentless or external content fts5 table" ); rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); } }else if( 0==sqlite3_stricmp("rebuild", z) ){ if( pConfig->eContent==FTS5_CONTENT_NONE ){ fts5SetVtabError(pTab, "'rebuild' may not be used with a contentless fts5 table" ); rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5StorageRebuild(pTab->pStorage); } }else if( 0==sqlite3_stricmp("optimize", z) ){ rc = sqlite3Fts5StorageOptimize(pTab->pStorage); }else if( 0==sqlite3_stricmp("merge", z) ){ int nMerge = sqlite3_value_int(pVal); rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError); } if( rc==SQLITE_OK ){ if( bError ){ rc = SQLITE_ERROR; }else{ rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, z, pVal, 0); } } } return rc; } static int fts5SpecialDelete( Fts5Table *pTab, sqlite3_value **apVal, sqlite3_int64 *piRowid ){ int rc = SQLITE_OK; int eType1 = sqlite3_value_type(apVal[1]); if( eType1==SQLITE_INTEGER ){ sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); rc = sqlite3Fts5StorageSpecialDelete(pTab->pStorage, iDel, &apVal[2]); } return rc; } /* ** This function is the implementation of the xUpdate callback used by ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be ** inserted, updated or deleted. */ static int fts5UpdateMethod( sqlite3_vtab *pVtab, /* Virtual table handle */ int nArg, /* Size of argument array */ sqlite3_value **apVal, /* Array of arguments */ sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ ){ Fts5Table *pTab = (Fts5Table*)pVtab; Fts5Config *pConfig = pTab->pConfig; int eType0; /* value_type() of apVal[0] */ int eConflict; /* ON CONFLICT for this DML */ int rc = SQLITE_OK; /* Return code */ /* A transaction must be open when this is called. */ assert( pTab->ts.eState==1 ); assert( pTab->pConfig->pzErrmsg==0 ); pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; /* A delete specifies a single argument - the rowid of the row to remove. ** Update and insert operations pass: ** ** 1. The "old" rowid, or NULL. ** 2. The "new" rowid. ** 3. Values for each of the nCol matchable columns. ** 4. Values for the two hidden columns (<tablename> and "rank"). */ eType0 = sqlite3_value_type(apVal[0]); eConflict = sqlite3_vtab_on_conflict(pConfig->db); assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); assert( pVtab->zErrMsg==0 ); assert( (nArg==1 && eType0==SQLITE_INTEGER) || nArg==(2+pConfig->nCol+2) ); fts5TripCursors(pTab); if( eType0==SQLITE_INTEGER ){ if( fts5IsContentless(pTab) ){ pTab->base.zErrMsg = sqlite3_mprintf( "cannot %s contentless fts5 table: %s", (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName ); rc = SQLITE_ERROR; }else{ i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel); } }else{ sqlite3_value *pCmd = apVal[2 + pConfig->nCol]; assert( nArg>1 ); if( SQLITE_NULL!=sqlite3_value_type(pCmd) ){ const char *z = (const char*)sqlite3_value_text(pCmd); if( pConfig->eContent!=FTS5_CONTENT_NORMAL && 0==sqlite3_stricmp("delete", z) ){ rc = fts5SpecialDelete(pTab, apVal, pRowid); }else{ rc = fts5SpecialInsert(pTab, pCmd, apVal[2 + pConfig->nCol + 1]); } goto update_method_out; } } if( rc==SQLITE_OK && nArg>1 ){ rc = sqlite3Fts5StorageInsert(pTab->pStorage, apVal, eConflict, pRowid); } update_method_out: pTab->pConfig->pzErrmsg = 0; return rc; } /* ** Implementation of xSync() method. */ static int fts5SyncMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SYNC, 0); pTab->pConfig->pzErrmsg = &pTab->base.zErrMsg; fts5TripCursors(pTab); rc = sqlite3Fts5StorageSync(pTab->pStorage, 1); pTab->pConfig->pzErrmsg = 0; return rc; } /* ** Implementation of xBegin() method. */ static int fts5BeginMethod(sqlite3_vtab *pVtab){ fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_BEGIN, 0); return SQLITE_OK; } /* ** Implementation of xCommit() method. This is a no-op. The contents of ** the pending-terms hash-table have already been flushed into the database ** by fts5SyncMethod(). */ static int fts5CommitMethod(sqlite3_vtab *pVtab){ fts5CheckTransactionState((Fts5Table*)pVtab, FTS5_COMMIT, 0); return SQLITE_OK; } /* ** Implementation of xRollback(). Discard the contents of the pending-terms ** hash-table. Any changes made to the database are reverted by SQLite. */ static int fts5RollbackMethod(sqlite3_vtab *pVtab){ int rc; Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); rc = sqlite3Fts5StorageRollback(pTab->pStorage); return rc; } static void *fts5ApiUserData(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return pCsr->pAux->pUserData; } static int fts5ApiColumnCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; } static int fts5ApiColumnTotalSize( Fts5Context *pCtx, int iCol, sqlite3_int64 *pnToken ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); } static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( Fts5Context *pCtx, const char *pText, int nText, void *pUserData, int (*xToken)(void*, const char*, int, int, int) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken); } static int fts5ApiPhraseCount(Fts5Context *pCtx){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); } static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); } static int fts5CsrPoslist(Fts5Cursor *pCsr, int iPhrase, const u8 **pa){ int n; if( pCsr->pSorter ){ Fts5Sorter *pSorter = pCsr->pSorter; int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); n = pSorter->aIdx[iPhrase] - i1; *pa = &pSorter->aPoslist[i1]; }else{ n = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); } return n; } /* ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated ** correctly for the current view. Return SQLITE_OK if successful, or an ** SQLite error code otherwise. */ static int fts5CacheInstArray(Fts5Cursor *pCsr){ int rc = SQLITE_OK; if( pCsr->aInst==0 ){ Fts5PoslistReader *aIter; /* One iterator for each phrase */ int nIter; /* Number of iterators/phrases */ int nByte; nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); nByte = sizeof(Fts5PoslistReader) * nIter; aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); if( aIter ){ Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */ int nInst = 0; /* Number instances seen so far */ int i; /* Initialize all iterators */ for(i=0; i<nIter; i++){ const u8 *a; int n = fts5CsrPoslist(pCsr, i, &a); sqlite3Fts5PoslistReaderInit(-1, a, n, &aIter[i]); } while( 1 ){ int *aInst; int iBest = -1; for(i=0; i<nIter; i++){ if( (aIter[i].bEof==0) && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) ){ iBest = i; } } if( iBest<0 ) break; nInst++; if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break; aInst = &((int*)buf.p)[3 * (nInst-1)]; aInst[0] = iBest; aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); sqlite3Fts5PoslistReaderNext(&aIter[iBest]); } pCsr->aInst = (int*)buf.p; pCsr->nInstCount = nInst; sqlite3_free(aIter); } } return rc; } static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; int rc; if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ *pnInst = pCsr->nInstCount; } return rc; } static int fts5ApiInst( Fts5Context *pCtx, int iIdx, int *piPhrase, int *piCol, int *piOff ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; int rc; if( SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ if( iIdx<0 || iIdx>=pCsr->nInstCount ){ rc = SQLITE_RANGE; }else{ *piPhrase = pCsr->aInst[iIdx*3]; *piCol = pCsr->aInst[iIdx*3 + 1]; *piOff = pCsr->aInst[iIdx*3 + 2]; } } return rc; } static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ return fts5CursorRowid((Fts5Cursor*)pCtx); } static int fts5ApiColumnText( Fts5Context *pCtx, int iCol, const char **pz, int *pn ){ int rc = SQLITE_OK; Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; if( fts5IsContentless((Fts5Table*)(pCsr->base.pVtab)) ){ *pz = 0; *pn = 0; }else{ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); } } return rc; } static int fts5ColumnSizeCb( void *pContext, /* Pointer to int */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ int *pCnt = (int*)pContext; *pCnt = *pCnt + 1; return SQLITE_OK; } static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); Fts5Config *pConfig = pTab->pConfig; int rc = SQLITE_OK; if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ if( pConfig->bColumnsize ){ i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); }else if( pConfig->zContent==0 ){ int i; for(i=0; i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ pCsr->aColumnSize[i] = -1; } } }else{ int i; for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i]==0 ){ const char *z; int n; void *p = (void*)(&pCsr->aColumnSize[i]); pCsr->aColumnSize[i] = 0; rc = fts5ApiColumnText(pCtx, i, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb); } } } } CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); } if( iCol<0 ){ int i; *pnToken = 0; for(i=0; i<pConfig->nCol; i++){ *pnToken += pCsr->aColumnSize[i]; } }else if( iCol<pConfig->nCol ){ *pnToken = pCsr->aColumnSize[iCol]; }else{ *pnToken = 0; rc = SQLITE_RANGE; } return rc; } /* ** Implementation of the xSetAuxdata() method. */ static int fts5ApiSetAuxdata( Fts5Context *pCtx, /* Fts5 context */ void *pPtr, /* Pointer to save as auxdata */ void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Auxdata *pData; /* Search through the cursors list of Fts5Auxdata objects for one that ** corresponds to the currently executing auxiliary function. */ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ if( pData->pAux==pCsr->pAux ) break; } if( pData ){ if( pData->xDelete ){ pData->xDelete(pData->pPtr); } }else{ int rc = SQLITE_OK; pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); if( pData==0 ){ if( xDelete ) xDelete(pPtr); return rc; } pData->pAux = pCsr->pAux; pData->pNext = pCsr->pAuxdata; pCsr->pAuxdata = pData; } pData->xDelete = xDelete; pData->pPtr = pPtr; return SQLITE_OK; } static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Auxdata *pData; void *pRet = 0; for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ if( pData->pAux==pCsr->pAux ) break; } if( pData ){ pRet = pData->pPtr; if( bClear ){ pData->pPtr = 0; pData->xDelete = 0; } } return pRet; } static int fts5ApiQueryPhrase(Fts5Context*, int, void*, int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) ); static const Fts5ExtensionApi sFts5Api = { 1, /* iVersion */ fts5ApiUserData, fts5ApiColumnCount, fts5ApiRowCount, fts5ApiColumnTotalSize, fts5ApiTokenize, fts5ApiPhraseCount, fts5ApiPhraseSize, fts5ApiInstCount, fts5ApiInst, fts5ApiRowid, fts5ApiColumnText, fts5ApiColumnSize, fts5ApiQueryPhrase, fts5ApiSetAuxdata, fts5ApiGetAuxdata, }; /* ** Implementation of API function xQueryPhrase(). */ static int fts5ApiQueryPhrase( Fts5Context *pCtx, int iPhrase, void *pUserData, int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) ){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int rc; Fts5Cursor *pNew = 0; rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); if( rc==SQLITE_OK ){ Fts5Config *pConf = pTab->pConfig; pNew->ePlan = FTS5_PLAN_MATCH; pNew->iFirstRowid = SMALLEST_INT64; pNew->iLastRowid = LARGEST_INT64; pNew->base.pVtab = (sqlite3_vtab*)pTab; rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); } if( rc==SQLITE_OK ){ for(rc = fts5CursorFirst(pTab, pNew, 0); rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) ){ rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); if( rc!=SQLITE_OK ){ if( rc==SQLITE_DONE ) rc = SQLITE_OK; break; } } } fts5CloseMethod((sqlite3_vtab_cursor*)pNew); return rc; } static void fts5ApiInvoke( Fts5Auxiliary *pAux, Fts5Cursor *pCsr, sqlite3_context *context, int argc, sqlite3_value **argv ){ assert( pCsr->pAux==0 ); pCsr->pAux = pAux; pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); pCsr->pAux = 0; } static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ Fts5Cursor *pCsr; for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ if( pCsr->iCsrId==iCsrId ) break; } return pCsr; } static void fts5ApiCallback( sqlite3_context *context, int argc, sqlite3_value **argv ){ Fts5Auxiliary *pAux; Fts5Cursor *pCsr; i64 iCsrId; assert( argc>=1 ); pAux = (Fts5Auxiliary*)sqlite3_user_data(context); iCsrId = sqlite3_value_int64(argv[0]); pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); if( pCsr==0 ){ char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); sqlite3_result_error(context, zErr, -1); sqlite3_free(zErr); }else{ fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); } } /* ** Given cursor id iId, return a pointer to the corresponding Fts5Index ** object. Or NULL If the cursor id does not exist. ** ** If successful, set *pnCol to the number of indexed columns in the ** table before returning. */ Fts5Index *sqlite3Fts5IndexFromCsrid( Fts5Global *pGlobal, i64 iCsrId, int *pnCol ){ Fts5Cursor *pCsr; Fts5Table *pTab; pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); pTab = (Fts5Table*)pCsr->base.pVtab; *pnCol = pTab->pConfig->nCol; return pTab->pIndex; } /* ** Return a "position-list blob" corresponding to the current position of ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains ** the current position-list for each phrase in the query associated with ** cursor pCsr. ** ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is ** the number of phrases in the query. Following the varints are the ** concatenated position lists for each phrase, in order. ** ** The first varint (if it exists) contains the size of the position list ** for phrase 0. The second (same disclaimer) contains the size of position ** list 1. And so on. There is no size field for the final position list, ** as it can be derived from the total size of the blob. */ static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ int i; int rc = SQLITE_OK; int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); Fts5Buffer val; memset(&val, 0, sizeof(Fts5Buffer)); /* Append the varints */ for(i=0; i<(nPhrase-1); i++){ const u8 *dummy; int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); } /* Append the position lists */ for(i=0; i<nPhrase; i++){ const u8 *pPoslist; int nPoslist; nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); } sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); return rc; } /* ** This is the xColumn method, called by SQLite to request a value from ** the row that the supplied cursor currently points to. */ static int fts5ColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Config *pConfig = pTab->pConfig; Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; int rc = SQLITE_OK; assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){ if( iCol==pConfig->nCol ){ sqlite3_result_int64(pCtx, pCsr->iSpecial); } }else if( iCol==pConfig->nCol ){ /* User is requesting the value of the special column with the same name ** as the table. Return the cursor integer id number. This value is only ** useful in that it may be passed as the first argument to an FTS5 ** auxiliary function. */ sqlite3_result_int64(pCtx, pCsr->iCsrId); }else if( iCol==pConfig->nCol+1 ){ /* The value of the "rank" column. */ if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ fts5PoslistBlob(pCtx, pCsr); }else if( pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH ){ if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); } } }else if( !fts5IsContentless(pTab) ){ rc = fts5SeekCursor(pCsr, 1); if( rc==SQLITE_OK ){ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); } } return rc; } /* ** This routine implements the xFindFunction method for the FTS3 ** virtual table. */ static int fts5FindFunctionMethod( sqlite3_vtab *pVtab, /* Virtual table handle */ int nArg, /* Number of SQL function arguments */ const char *zName, /* Name of SQL function */ void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ void **ppArg /* OUT: User data for *pxFunc */ ){ Fts5Table *pTab = (Fts5Table*)pVtab; Fts5Auxiliary *pAux; pAux = fts5FindAuxiliary(pTab, zName); if( pAux ){ *pxFunc = fts5ApiCallback; *ppArg = (void*)pAux; return 1; } /* No function of the specified name was found. Return 0. */ return 0; } /* ** Implementation of FTS5 xRename method. Rename an fts5 table. */ static int fts5RenameMethod( sqlite3_vtab *pVtab, /* Virtual table handle */ const char *zName /* New name of table */ ){ Fts5Table *pTab = (Fts5Table*)pVtab; return sqlite3Fts5StorageRename(pTab->pStorage, zName); } /* ** The xSavepoint() method. ** ** Flush the contents of the pending-terms table to disk. */ static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_SAVEPOINT, iSavepoint); fts5TripCursors(pTab); return sqlite3Fts5StorageSync(pTab->pStorage, 0); } /* ** The xRelease() method. ** ** This is a no-op. */ static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_RELEASE, iSavepoint); fts5TripCursors(pTab); return sqlite3Fts5StorageSync(pTab->pStorage, 0); } /* ** The xRollbackTo() method. ** ** Discard the contents of the pending terms table. */ static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ Fts5Table *pTab = (Fts5Table*)pVtab; fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); fts5TripCursors(pTab); return sqlite3Fts5StorageRollback(pTab->pStorage); } /* ** Register a new auxiliary function with global context pGlobal. */ static int fts5CreateAux( fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void *pUserData, /* User data for aux. function */ fts5_extension_function xFunc, /* Aux. function implementation */ void(*xDestroy)(void*) /* Destructor for pUserData */ ){ Fts5Global *pGlobal = (Fts5Global*)pApi; int rc = sqlite3_overload_function(pGlobal->db, zName, -1); if( rc==SQLITE_OK ){ Fts5Auxiliary *pAux; int nByte; /* Bytes of space to allocate */ nByte = sizeof(Fts5Auxiliary) + strlen(zName) + 1; pAux = (Fts5Auxiliary*)sqlite3_malloc(nByte); if( pAux ){ memset(pAux, 0, nByte); pAux->zFunc = (char*)&pAux[1]; strcpy(pAux->zFunc, zName); pAux->pGlobal = pGlobal; pAux->pUserData = pUserData; pAux->xFunc = xFunc; pAux->xDestroy = xDestroy; pAux->pNext = pGlobal->pAux; pGlobal->pAux = pAux; }else{ rc = SQLITE_NOMEM; } } return rc; } /* ** Register a new tokenizer. This is the implementation of the ** fts5_api.xCreateTokenizer() method. */ static int fts5CreateTokenizer( fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void *pUserData, /* User data for aux. function */ fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ void(*xDestroy)(void*) /* Destructor for pUserData */ ){ Fts5Global *pGlobal = (Fts5Global*)pApi; Fts5TokenizerModule *pNew; int nByte; /* Bytes of space to allocate */ int rc = SQLITE_OK; nByte = sizeof(Fts5TokenizerModule) + strlen(zName) + 1; pNew = (Fts5TokenizerModule*)sqlite3_malloc(nByte); if( pNew ){ memset(pNew, 0, nByte); pNew->zName = (char*)&pNew[1]; strcpy(pNew->zName, zName); pNew->pUserData = pUserData; pNew->x = *pTokenizer; pNew->xDestroy = xDestroy; pNew->pNext = pGlobal->pTok; pGlobal->pTok = pNew; if( pNew->pNext==0 ){ pGlobal->pDfltTok = pNew; } }else{ rc = SQLITE_NOMEM; } return rc; } static Fts5TokenizerModule *fts5LocateTokenizer( Fts5Global *pGlobal, const char *zName ){ Fts5TokenizerModule *pMod = 0; if( zName==0 ){ pMod = pGlobal->pDfltTok; }else{ for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ if( sqlite3_stricmp(zName, pMod->zName)==0 ) break; } } return pMod; } /* ** Find a tokenizer. This is the implementation of the ** fts5_api.xFindTokenizer() method. */ static int fts5FindTokenizer( fts5_api *pApi, /* Global context (one per db handle) */ const char *zName, /* Name of new function */ void **ppUserData, fts5_tokenizer *pTokenizer /* Populate this object */ ){ int rc = SQLITE_OK; Fts5TokenizerModule *pMod; pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); if( pMod ){ *pTokenizer = pMod->x; *ppUserData = pMod->pUserData; }else{ memset(pTokenizer, 0, sizeof(fts5_tokenizer)); rc = SQLITE_ERROR; } return rc; } int sqlite3Fts5GetTokenizer( Fts5Global *pGlobal, const char **azArg, int nArg, Fts5Tokenizer **ppTok, fts5_tokenizer **ppTokApi, char **pzErr ){ Fts5TokenizerModule *pMod; int rc = SQLITE_OK; pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); if( pMod==0 ){ assert( nArg>0 ); rc = SQLITE_ERROR; *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); }else{ rc = pMod->x.xCreate(pMod->pUserData, &azArg[1], (nArg?nArg-1:0), ppTok); *ppTokApi = &pMod->x; if( rc!=SQLITE_OK && pzErr ){ *pzErr = sqlite3_mprintf("error in tokenizer constructor"); } } if( rc!=SQLITE_OK ){ *ppTokApi = 0; *ppTok = 0; } return rc; } static void fts5ModuleDestroy(void *pCtx){ Fts5TokenizerModule *pTok, *pNextTok; Fts5Auxiliary *pAux, *pNextAux; Fts5Global *pGlobal = (Fts5Global*)pCtx; for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ pNextAux = pAux->pNext; if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); sqlite3_free(pAux); } for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ pNextTok = pTok->pNext; if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); sqlite3_free(pTok); } sqlite3_free(pGlobal); } static void fts5Fts5Func( sqlite3_context *pCtx, /* Function call context */ int nArg, /* Number of args */ sqlite3_value **apVal /* Function arguments */ ){ Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); char buf[8]; assert( nArg==0 ); assert( sizeof(buf)>=sizeof(pGlobal) ); memcpy(buf, (void*)&pGlobal, sizeof(pGlobal)); sqlite3_result_blob(pCtx, buf, sizeof(pGlobal), SQLITE_TRANSIENT); } #ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_fts5_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ){ static const sqlite3_module fts5Mod = { /* iVersion */ 2, /* xCreate */ fts5CreateMethod, /* xConnect */ fts5ConnectMethod, /* xBestIndex */ fts5BestIndexMethod, /* xDisconnect */ fts5DisconnectMethod, /* xDestroy */ fts5DestroyMethod, /* xOpen */ fts5OpenMethod, /* xClose */ fts5CloseMethod, /* xFilter */ fts5FilterMethod, /* xNext */ fts5NextMethod, /* xEof */ fts5EofMethod, /* xColumn */ fts5ColumnMethod, /* xRowid */ fts5RowidMethod, /* xUpdate */ fts5UpdateMethod, /* xBegin */ fts5BeginMethod, /* xSync */ fts5SyncMethod, /* xCommit */ fts5CommitMethod, /* xRollback */ fts5RollbackMethod, /* xFindFunction */ fts5FindFunctionMethod, /* xRename */ fts5RenameMethod, /* xSavepoint */ fts5SavepointMethod, /* xRelease */ fts5ReleaseMethod, /* xRollbackTo */ fts5RollbackToMethod, }; int rc; Fts5Global *pGlobal = 0; SQLITE_EXTENSION_INIT2(pApi); pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); if( pGlobal==0 ){ rc = SQLITE_NOMEM; }else{ void *p = (void*)pGlobal; memset(pGlobal, 0, sizeof(Fts5Global)); pGlobal->db = db; pGlobal->api.iVersion = 1; pGlobal->api.xCreateFunction = fts5CreateAux; pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; pGlobal->api.xFindTokenizer = fts5FindTokenizer; rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db); if( rc==SQLITE_OK ){ rc = sqlite3_create_function( db, "fts5", 0, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 ); } } return rc; } #endif /* defined(SQLITE_ENABLE_FTS5) */ |
Added ext/fts5/fts5_storage.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" struct Fts5Storage { Fts5Config *pConfig; Fts5Index *pIndex; int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ i64 nTotalRow; /* Total number of rows in FTS table */ i64 *aTotalSize; /* Total sizes of each column */ sqlite3_stmt *aStmt[11]; }; #if FTS5_STMT_SCAN_ASC!=0 # error "FTS5_STMT_SCAN_ASC mismatch" #endif #if FTS5_STMT_SCAN_DESC!=1 # error "FTS5_STMT_SCAN_DESC mismatch" #endif #if FTS5_STMT_LOOKUP!=2 # error "FTS5_STMT_LOOKUP mismatch" #endif #define FTS5_STMT_INSERT_CONTENT 3 #define FTS5_STMT_REPLACE_CONTENT 4 #define FTS5_STMT_DELETE_CONTENT 5 #define FTS5_STMT_REPLACE_DOCSIZE 6 #define FTS5_STMT_DELETE_DOCSIZE 7 #define FTS5_STMT_LOOKUP_DOCSIZE 8 #define FTS5_STMT_REPLACE_CONFIG 9 #define FTS5_STMT_SCAN 10 /* ** Prepare the two insert statements - Fts5Storage.pInsertContent and ** Fts5Storage.pInsertDocsize - if they have not already been prepared. ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5StorageGetStmt( Fts5Storage *p, /* Storage handle */ int eStmt, /* FTS5_STMT_XXX constant */ sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ char **pzErrMsg /* OUT: Error message (if any) */ ){ int rc = SQLITE_OK; /* If there is no %_docsize table, there should be no requests for ** statements to operate on it. */ assert( p->pConfig->bColumnsize || ( eStmt!=FTS5_STMT_REPLACE_DOCSIZE && eStmt!=FTS5_STMT_DELETE_DOCSIZE && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE )); assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) ); if( p->aStmt[eStmt]==0 ){ const char *azStmt[] = { "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */ "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ "SELECT %s FROM %s AS T", /* SCAN */ }; Fts5Config *pC = p->pConfig; char *zSql = 0; switch( eStmt ){ case FTS5_STMT_SCAN: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, pC->zContent ); break; case FTS5_STMT_SCAN_ASC: case FTS5_STMT_SCAN_DESC: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, pC->zContent, pC->zContentRowid, pC->zContentRowid, pC->zContentRowid ); break; case FTS5_STMT_LOOKUP: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, pC->zContent, pC->zContentRowid ); break; case FTS5_STMT_INSERT_CONTENT: case FTS5_STMT_REPLACE_CONTENT: { int nCol = pC->nCol + 1; char *zBind; int i; zBind = sqlite3_malloc(1 + nCol*2); if( zBind ){ for(i=0; i<nCol; i++){ zBind[i*2] = '?'; zBind[i*2 + 1] = ','; } zBind[i*2-1] = '\0'; zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind); sqlite3_free(zBind); } break; } default: zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName); break; } if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare_v2(pC->db, zSql, -1, &p->aStmt[eStmt], 0); sqlite3_free(zSql); if( rc!=SQLITE_OK && pzErrMsg ){ *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db)); } } } *ppStmt = p->aStmt[eStmt]; return rc; } static int fts5ExecPrintf( sqlite3 *db, char **pzErr, const char *zFormat, ... ){ int rc; va_list ap; /* ... printf arguments */ char *zSql; va_start(ap, zFormat); zSql = sqlite3_vmprintf(zFormat, ap); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_exec(db, zSql, 0, 0, pzErr); sqlite3_free(zSql); } va_end(ap); return rc; } /* ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error ** code otherwise. */ int sqlite3Fts5DropAll(Fts5Config *pConfig){ int rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_data';" "DROP TABLE IF EXISTS %Q.'%q_config';", pConfig->zDb, pConfig->zName, pConfig->zDb, pConfig->zName ); if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_docsize';", pConfig->zDb, pConfig->zName ); } if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_content';", pConfig->zDb, pConfig->zName ); } return rc; } static void fts5StorageRenameOne( Fts5Config *pConfig, /* Current FTS5 configuration */ int *pRc, /* IN/OUT: Error code */ const char *zTail, /* Tail of table name e.g. "data", "config" */ const char *zName /* New name of FTS5 table */ ){ if( *pRc==SQLITE_OK ){ *pRc = fts5ExecPrintf(pConfig->db, 0, "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", pConfig->zDb, pConfig->zName, zTail, zName, zTail ); } } int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ Fts5Config *pConfig = pStorage->pConfig; int rc = sqlite3Fts5StorageSync(pStorage, 1); fts5StorageRenameOne(pConfig, &rc, "data", zName); fts5StorageRenameOne(pConfig, &rc, "config", zName); if( pConfig->bColumnsize ){ fts5StorageRenameOne(pConfig, &rc, "docsize", zName); } if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ fts5StorageRenameOne(pConfig, &rc, "content", zName); } return rc; } /* ** Create the shadow table named zPost, with definition zDefn. Return ** SQLITE_OK if successful, or an SQLite error code otherwise. */ int sqlite3Fts5CreateTable( Fts5Config *pConfig, /* FTS5 configuration */ const char *zPost, /* Shadow table to create (e.g. "content") */ const char *zDefn, /* Columns etc. for shadow table */ int bWithout, /* True for without rowid */ char **pzErr /* OUT: Error message */ ){ int rc; char *zErr = 0; rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", pConfig->zDb, pConfig->zName, zPost, zDefn, bWithout?" WITHOUT ROWID":"" ); if( zErr ){ *pzErr = sqlite3_mprintf( "fts5: error creating shadow table %q_%s: %s", pConfig->zName, zPost, zErr ); sqlite3_free(zErr); } return rc; } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying tables ** ** If successful, set *pp to point to the new object and return SQLITE_OK. ** Otherwise, set *pp to NULL and return an SQLite error code. */ int sqlite3Fts5StorageOpen( Fts5Config *pConfig, Fts5Index *pIndex, int bCreate, Fts5Storage **pp, char **pzErr /* OUT: Error message */ ){ int rc = SQLITE_OK; Fts5Storage *p; /* New object */ int nByte; /* Bytes of space to allocate */ nByte = sizeof(Fts5Storage) /* Fts5Storage object */ + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ *pp = p = (Fts5Storage*)sqlite3_malloc(nByte); if( !p ) return SQLITE_NOMEM; memset(p, 0, nByte); p->aTotalSize = (i64*)&p[1]; p->pConfig = pConfig; p->pIndex = pIndex; if( bCreate ){ if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ int nDefn = 32 + pConfig->nCol*10; char *zDefn = sqlite3_malloc(32 + pConfig->nCol * 10); if( zDefn==0 ){ rc = SQLITE_NOMEM; }else{ int i; int iOff; sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); iOff = strlen(zDefn); for(i=0; i<pConfig->nCol; i++){ sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); iOff += strlen(&zDefn[iOff]); } rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } sqlite3_free(zDefn); } if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = sqlite3Fts5CreateTable( pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr ); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5CreateTable( pConfig, "config", "k PRIMARY KEY, v", 1, pzErr ); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); } } if( rc ){ sqlite3Fts5StorageClose(p); *pp = 0; } return rc; } /* ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). */ int sqlite3Fts5StorageClose(Fts5Storage *p){ int rc = SQLITE_OK; if( p ){ int i; /* Finalize all SQL statements */ for(i=0; i<ArraySize(p->aStmt); i++){ sqlite3_finalize(p->aStmt[i]); } sqlite3_free(p); } return rc; } typedef struct Fts5InsertCtx Fts5InsertCtx; struct Fts5InsertCtx { Fts5Storage *pStorage; int iCol; int szCol; /* Size of column value in tokens */ }; /* ** Tokenization callback used when inserting tokens into the FTS index. */ static int fts5StorageInsertCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; int iPos = pCtx->szCol++; return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken); } /* ** If a row with rowid iDel is present in the %_content table, add the ** delete-markers to the FTS index necessary to delete it. Do not actually ** remove the %_content row at this time though. */ static int fts5StorageDeleteFromIndex(Fts5Storage *p, i64 iDel){ Fts5Config *pConfig = p->pConfig; sqlite3_stmt *pSeek; /* SELECT to read row iDel from %_data */ int rc; /* Return code */ rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); if( rc==SQLITE_OK ){ int rc2; sqlite3_bind_int64(pSeek, 1, iDel); if( sqlite3_step(pSeek)==SQLITE_ROW ){ int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol-1] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pSeek, iCol), sqlite3_column_bytes(pSeek, iCol), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol-1] -= (i64)ctx.szCol; } p->nTotalRow--; } rc2 = sqlite3_reset(pSeek); if( rc==SQLITE_OK ) rc = rc2; } return rc; } /* ** Insert a record into the %_docsize table. Specifically, do: ** ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); ** ** If there is no %_docsize table (as happens if the columnsize=0 option ** is specified when the FTS5 table is created), this function is a no-op. */ static int fts5StorageInsertDocsize( Fts5Storage *p, /* Storage module to write to */ i64 iRowid, /* id value */ Fts5Buffer *pBuf /* sz value */ ){ int rc = SQLITE_OK; if( p->pConfig->bColumnsize ){ sqlite3_stmt *pReplace = 0; rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int64(pReplace, 1, iRowid); sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } } return rc; } /* ** Load the contents of the "averages" record from disk into the ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if ** argument bCache is true, set the p->bTotalsValid flag to indicate ** that the contents of aTotalSize[] and nTotalRow are valid until ** further notice. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ int rc = SQLITE_OK; if( p->bTotalsValid==0 ){ int nCol = p->pConfig->nCol; Fts5Buffer buf; memset(&buf, 0, sizeof(buf)); memset(p->aTotalSize, 0, sizeof(i64) * nCol); p->nTotalRow = 0; rc = sqlite3Fts5IndexGetAverages(p->pIndex, &buf); if( rc==SQLITE_OK && buf.n ){ int i = 0; int iCol; i += fts5GetVarint(&buf.p[i], (u64*)&p->nTotalRow); for(iCol=0; i<buf.n && iCol<nCol; iCol++){ i += fts5GetVarint(&buf.p[i], (u64*)&p->aTotalSize[iCol]); } } sqlite3_free(buf.p); p->bTotalsValid = bCache; } return rc; } /* ** Store the current contents of the p->nTotalRow and p->aTotalSize[] ** variables in the "averages" record on disk. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ static int fts5StorageSaveTotals(Fts5Storage *p){ int nCol = p->pConfig->nCol; int i; Fts5Buffer buf; int rc = SQLITE_OK; memset(&buf, 0, sizeof(buf)); sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); for(i=0; i<nCol; i++){ sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); } sqlite3_free(buf.p); return rc; } /* ** Remove a row from the FTS table. */ int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel){ Fts5Config *pConfig = p->pConfig; int rc; sqlite3_stmt *pDel; rc = fts5StorageLoadTotals(p, 1); /* Delete the index records */ if( rc==SQLITE_OK ){ rc = fts5StorageDeleteFromIndex(p, iDel); } /* Delete the %_docsize record */ if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); sqlite3_step(pDel); rc = sqlite3_reset(pDel); } } /* Delete the %_content record */ if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); } if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); sqlite3_step(pDel); rc = sqlite3_reset(pDel); } /* Write the averages record */ if( rc==SQLITE_OK ){ rc = fts5StorageSaveTotals(p); } return rc; } int sqlite3Fts5StorageSpecialDelete( Fts5Storage *p, i64 iDel, sqlite3_value **apVal ){ Fts5Config *pConfig = p->pConfig; int rc; sqlite3_stmt *pDel; assert( pConfig->eContent!=FTS5_CONTENT_NORMAL ); rc = fts5StorageLoadTotals(p, 1); /* Delete the index records */ if( rc==SQLITE_OK ){ int iCol; Fts5InsertCtx ctx; ctx.pStorage = p; ctx.iCol = -1; rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel); for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){ if( pConfig->abUnindexed[iCol] ) continue; ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[iCol]), sqlite3_value_bytes(apVal[iCol]), (void*)&ctx, fts5StorageInsertCallback ); p->aTotalSize[iCol] -= (i64)ctx.szCol; } p->nTotalRow--; } /* Delete the %_docsize record */ if( pConfig->bColumnsize ){ if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); } if( rc==SQLITE_OK ){ sqlite3_bind_int64(pDel, 1, iDel); sqlite3_step(pDel); rc = sqlite3_reset(pDel); } } /* Write the averages record */ if( rc==SQLITE_OK ){ rc = fts5StorageSaveTotals(p); } return rc; } /* ** Delete all entries in the FTS5 index. */ int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ Fts5Config *pConfig = p->pConfig; int rc; /* Delete the contents of the %_data and %_docsize tables. */ rc = fts5ExecPrintf(pConfig->db, 0, "DELETE FROM %Q.'%q_data';", pConfig->zDb, pConfig->zName ); if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5ExecPrintf(pConfig->db, 0, "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName ); } /* Reinitialize the %_data table. This call creates the initial structure ** and averages records. */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p->pIndex); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); } return rc; } int sqlite3Fts5StorageRebuild(Fts5Storage *p){ Fts5Buffer buf = {0,0,0}; Fts5Config *pConfig = p->pConfig; sqlite3_stmt *pScan = 0; Fts5InsertCtx ctx; int rc; memset(&ctx, 0, sizeof(Fts5InsertCtx)); ctx.pStorage = p; rc = sqlite3Fts5StorageDeleteAll(p); if( rc==SQLITE_OK ){ rc = fts5StorageLoadTotals(p, 1); } if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); } while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ i64 iRowid = sqlite3_column_int64(pScan, 0); sqlite3Fts5BufferZero(&buf); rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid); for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_column_text(pScan, ctx.iCol+1), sqlite3_column_bytes(pScan, ctx.iCol+1), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } p->nTotalRow++; if( rc==SQLITE_OK ){ rc = fts5StorageInsertDocsize(p, iRowid, &buf); } } sqlite3_free(buf.p); /* Write the averages record */ if( rc==SQLITE_OK ){ rc = fts5StorageSaveTotals(p); } return rc; } int sqlite3Fts5StorageOptimize(Fts5Storage *p){ return sqlite3Fts5IndexOptimize(p->pIndex); } int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ return sqlite3Fts5IndexMerge(p->pIndex, nMerge); } /* ** Allocate a new rowid. This is used for "external content" tables when ** a NULL value is inserted into the rowid column. The new rowid is allocated ** by inserting a dummy row into the %_docsize table. The dummy will be ** overwritten later. ** ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In ** this case the user is required to provide a rowid explicitly. */ static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ int rc = SQLITE_MISMATCH; if( p->pConfig->bColumnsize ){ sqlite3_stmt *pReplace = 0; rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_null(pReplace, 1); sqlite3_bind_null(pReplace, 2); sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } if( rc==SQLITE_OK ){ *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); } } return rc; } /* ** Insert a new row into the FTS table. */ int sqlite3Fts5StorageInsert( Fts5Storage *p, /* Storage module to write to */ sqlite3_value **apVal, /* Array of values passed to xUpdate() */ int eConflict, /* on conflict clause */ i64 *piRowid /* OUT: rowid of new record */ ){ Fts5Config *pConfig = p->pConfig; int rc = SQLITE_OK; /* Return code */ sqlite3_stmt *pInsert; /* Statement used to write %_content table */ int eStmt = 0; /* Type of statement used on %_content */ int i; /* Counter variable */ Fts5InsertCtx ctx; /* Tokenization callback context object */ Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ memset(&buf, 0, sizeof(Fts5Buffer)); rc = fts5StorageLoadTotals(p, 1); /* Insert the new row into the %_content table. */ if( rc==SQLITE_OK ){ if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ *piRowid = sqlite3_value_int64(apVal[1]); }else{ rc = fts5StorageNewRowid(p, piRowid); } }else{ if( eConflict==SQLITE_REPLACE ){ eStmt = FTS5_STMT_REPLACE_CONTENT; rc = fts5StorageDeleteFromIndex(p, sqlite3_value_int64(apVal[1])); }else{ eStmt = FTS5_STMT_INSERT_CONTENT; } if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, eStmt, &pInsert, 0); } for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ rc = sqlite3_bind_value(pInsert, i, apVal[i]); } if( rc==SQLITE_OK ){ sqlite3_step(pInsert); rc = sqlite3_reset(pInsert); } *piRowid = sqlite3_last_insert_rowid(pConfig->db); } } /* Add new entries to the FTS index */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid); ctx.pStorage = p; } for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ rc = sqlite3Fts5Tokenize(pConfig, (const char*)sqlite3_value_text(apVal[ctx.iCol+2]), sqlite3_value_bytes(apVal[ctx.iCol+2]), (void*)&ctx, fts5StorageInsertCallback ); } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; } p->nTotalRow++; /* Write the %_docsize record */ if( rc==SQLITE_OK ){ rc = fts5StorageInsertDocsize(p, *piRowid, &buf); } sqlite3_free(buf.p); /* Write the averages record */ if( rc==SQLITE_OK ){ rc = fts5StorageSaveTotals(p); } return rc; } static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ Fts5Config *pConfig = p->pConfig; char *zSql; int rc; zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", pConfig->zDb, pConfig->zName, zSuffix ); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ sqlite3_stmt *pCnt = 0; rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); if( rc==SQLITE_OK ){ if( SQLITE_ROW==sqlite3_step(pCnt) ){ *pnRow = sqlite3_column_int64(pCnt, 0); } rc = sqlite3_finalize(pCnt); } } sqlite3_free(zSql); return rc; } /* ** Context object used by sqlite3Fts5StorageIntegrity(). */ typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; struct Fts5IntegrityCtx { i64 iRowid; int iCol; int szCol; u64 cksum; Fts5Config *pConfig; }; /* ** Tokenization callback used by integrity check. */ static int fts5StorageIntegrityCallback( void *pContext, /* Pointer to Fts5InsertCtx object */ const char *pToken, /* Buffer containing token */ int nToken, /* Size of token in bytes */ int iStart, /* Start offset of token */ int iEnd /* End offset of token */ ){ Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; int iPos = pCtx->szCol++; pCtx->cksum ^= sqlite3Fts5IndexCksum( pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken ); return SQLITE_OK; } /* ** Check that the contents of the FTS index match that of the %_content ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return ** some other SQLite error code if an error occurs while attempting to ** determine this. */ int sqlite3Fts5StorageIntegrity(Fts5Storage *p){ Fts5Config *pConfig = p->pConfig; int rc; /* Return code */ int *aColSize; /* Array of size pConfig->nCol */ i64 *aTotalSize; /* Array of size pConfig->nCol */ Fts5IntegrityCtx ctx; sqlite3_stmt *pScan; memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); ctx.pConfig = p->pConfig; aTotalSize = (i64*)sqlite3_malloc(pConfig->nCol * (sizeof(int)+sizeof(i64))); if( !aTotalSize ) return SQLITE_NOMEM; aColSize = (int*)&aTotalSize[pConfig->nCol]; memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); /* Generate the expected index checksum based on the contents of the ** %_content table. This block stores the checksum in ctx.cksum. */ rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); if( rc==SQLITE_OK ){ int rc2; while( SQLITE_ROW==sqlite3_step(pScan) ){ int i; ctx.iRowid = sqlite3_column_int64(pScan, 0); ctx.szCol = 0; rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( pConfig->abUnindexed[i] ) continue; ctx.iCol = i; ctx.szCol = 0; rc = sqlite3Fts5Tokenize( pConfig, (const char*)sqlite3_column_text(pScan, i+1), sqlite3_column_bytes(pScan, i+1), (void*)&ctx, fts5StorageIntegrityCallback ); if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT; aTotalSize[i] += ctx.szCol; } if( rc!=SQLITE_OK ) break; } rc2 = sqlite3_reset(pScan); if( rc==SQLITE_OK ) rc = rc2; } /* Test that the "totals" (sometimes called "averages") record looks Ok */ if( rc==SQLITE_OK ){ int i; rc = fts5StorageLoadTotals(p, 0); for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; } } /* Check that the %_docsize and %_content tables contain the expected ** number of rows. */ if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ i64 nRow; rc = fts5StorageCount(p, "content", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } if( rc==SQLITE_OK ){ i64 nRow; rc = fts5StorageCount(p, "docsize", &nRow); if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; } /* Pass the expected checksum down to the FTS index module. It will ** verify, amongst other things, that it matches the checksum generated by ** inspecting the index itself. */ if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum); } sqlite3_free(aTotalSize); return rc; } /* ** Obtain an SQLite statement handle that may be used to read data from the ** %_content table. */ int sqlite3Fts5StorageStmt( Fts5Storage *p, int eStmt, sqlite3_stmt **pp, char **pzErrMsg ){ int rc; assert( eStmt==FTS5_STMT_SCAN_ASC || eStmt==FTS5_STMT_SCAN_DESC || eStmt==FTS5_STMT_LOOKUP ); rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); if( rc==SQLITE_OK ){ assert( p->aStmt[eStmt]==*pp ); p->aStmt[eStmt] = 0; } return rc; } /* ** Release an SQLite statement handle obtained via an earlier call to ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function ** must match that passed to the sqlite3Fts5StorageStmt() call. */ void sqlite3Fts5StorageStmtRelease( Fts5Storage *p, int eStmt, sqlite3_stmt *pStmt ){ assert( eStmt==FTS5_STMT_SCAN_ASC || eStmt==FTS5_STMT_SCAN_DESC || eStmt==FTS5_STMT_LOOKUP ); if( p->aStmt[eStmt]==0 ){ sqlite3_reset(pStmt); p->aStmt[eStmt] = pStmt; }else{ sqlite3_finalize(pStmt); } } static int fts5StorageDecodeSizeArray( int *aCol, int nCol, /* Array to populate */ const u8 *aBlob, int nBlob /* Record to read varints from */ ){ int i; int iOff = 0; for(i=0; i<nCol; i++){ if( iOff>=nBlob ) return 1; iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]); } return (iOff!=nBlob); } /* ** Argument aCol points to an array of integers containing one entry for ** each table column. This function reads the %_docsize record for the ** specified rowid and populates aCol[] with the results. ** ** An SQLite error code is returned if an error occurs, or SQLITE_OK ** otherwise. */ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ int nCol = p->pConfig->nCol; sqlite3_stmt *pLookup = 0; int rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); if( rc==SQLITE_OK ){ int bCorrupt = 1; sqlite3_bind_int64(pLookup, 1, iRowid); if( SQLITE_ROW==sqlite3_step(pLookup) ){ const u8 *aBlob = sqlite3_column_blob(pLookup, 0); int nBlob = sqlite3_column_bytes(pLookup, 0); if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ bCorrupt = 0; } } rc = sqlite3_reset(pLookup); if( bCorrupt && rc==SQLITE_OK ){ rc = FTS5_CORRUPT; } } return rc; } int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ int rc = fts5StorageLoadTotals(p, 0); if( rc==SQLITE_OK ){ *pnToken = 0; if( iCol<0 ){ int i; for(i=0; i<p->pConfig->nCol; i++){ *pnToken += p->aTotalSize[i]; } }else if( iCol<p->pConfig->nCol ){ *pnToken = p->aTotalSize[iCol]; }else{ rc = SQLITE_RANGE; } } return rc; } int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ int rc = fts5StorageLoadTotals(p, 0); if( rc==SQLITE_OK ){ *pnRow = p->nTotalRow; } return rc; } /* ** Flush any data currently held in-memory to disk. */ int sqlite3Fts5StorageSync(Fts5Storage *p, int bCommit){ if( bCommit && p->bTotalsValid ){ int rc = fts5StorageSaveTotals(p); p->bTotalsValid = 0; if( rc!=SQLITE_OK ) return rc; } return sqlite3Fts5IndexSync(p->pIndex, bCommit); } int sqlite3Fts5StorageRollback(Fts5Storage *p){ p->bTotalsValid = 0; return sqlite3Fts5IndexRollback(p->pIndex); } int sqlite3Fts5StorageConfigValue( Fts5Storage *p, const char *z, sqlite3_value *pVal, int iVal ){ sqlite3_stmt *pReplace = 0; int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); if( rc==SQLITE_OK ){ sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); if( pVal ){ sqlite3_bind_value(pReplace, 2, pVal); }else{ sqlite3_bind_int(pReplace, 2, iVal); } sqlite3_step(pReplace); rc = sqlite3_reset(pReplace); } if( rc==SQLITE_OK && pVal ){ int iNew = p->pConfig->iCookie + 1; rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); if( rc==SQLITE_OK ){ p->pConfig->iCookie = iNew; } } return rc; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_tcl.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 | /* ** 2014 Dec 01 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ #ifdef SQLITE_TEST #include <tcl.h> #ifdef SQLITE_ENABLE_FTS5 #include "fts5.h" #include <string.h> #include <assert.h> extern int sqlite3_fts5_may_be_corrupt; /************************************************************************* ** This is a copy of the first part of the SqliteDb structure in ** tclsqlite.c. We need it here so that the get_sqlite_pointer routine ** can extract the sqlite3* pointer from an existing Tcl SQLite ** connection. */ extern const char *sqlite3ErrName(int); struct SqliteDb { sqlite3 *db; }; /* ** Decode a pointer to an sqlite3 object. */ static int f5tDbPointer(Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb){ struct SqliteDb *p; Tcl_CmdInfo cmdInfo; char *z = Tcl_GetString(pObj); if( Tcl_GetCommandInfo(interp, z, &cmdInfo) ){ p = (struct SqliteDb*)cmdInfo.objClientData; *ppDb = p->db; return TCL_OK; } return TCL_ERROR; } /* End of code that accesses the SqliteDb struct. **************************************************************************/ static int f5tResultToErrorCode(const char *zRes){ struct ErrorCode { int rc; const char *zError; } aErr[] = { { SQLITE_DONE, "SQLITE_DONE" }, { SQLITE_ERROR, "SQLITE_ERROR" }, { SQLITE_OK, "SQLITE_OK" }, { SQLITE_OK, "" }, }; int i; for(i=0; i<sizeof(aErr)/sizeof(aErr[0]); i++){ if( 0==sqlite3_stricmp(zRes, aErr[i].zError) ){ return aErr[i].rc; } } return SQLITE_ERROR; } static int f5tDbAndApi( Tcl_Interp *interp, Tcl_Obj *pObj, sqlite3 **ppDb, fts5_api **ppApi ){ sqlite3 *db = 0; int rc = f5tDbPointer(interp, pObj, &db); if( rc!=TCL_OK ){ return TCL_ERROR; }else{ sqlite3_stmt *pStmt = 0; fts5_api *pApi = 0; rc = sqlite3_prepare_v2(db, "SELECT fts5()", -1, &pStmt, 0); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); return TCL_ERROR; } if( SQLITE_ROW==sqlite3_step(pStmt) ){ const void *pPtr = sqlite3_column_blob(pStmt, 0); memcpy((void*)&pApi, pPtr, sizeof(pApi)); } if( sqlite3_finalize(pStmt)!=SQLITE_OK ){ Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); return TCL_ERROR; } *ppDb = db; *ppApi = pApi; } return TCL_OK; } typedef struct F5tFunction F5tFunction; struct F5tFunction { Tcl_Interp *interp; Tcl_Obj *pScript; }; typedef struct F5tApi F5tApi; struct F5tApi { const Fts5ExtensionApi *pApi; Fts5Context *pFts; }; /* ** An object of this type is used with the xSetAuxdata() and xGetAuxdata() ** API test wrappers. The tcl interface allows a single tcl value to be ** saved using xSetAuxdata(). Instead of simply storing a pointer to the ** tcl object, the code in this file wraps it in an sqlite3_malloc'd ** instance of the following struct so that if the destructor is not ** correctly invoked it will be reported as an SQLite memory leak. */ typedef struct F5tAuxData F5tAuxData; struct F5tAuxData { Tcl_Obj *pObj; }; static int xTokenizeCb( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript); int rc; Tcl_IncrRefCount(pEval); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewIntObj(iEnd)); rc = Tcl_EvalObjEx(p->interp, pEval, 0); Tcl_DecrRefCount(pEval); if( rc==TCL_OK ){ rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp)); } return rc; } static int xF5tApi(void*, Tcl_Interp*, int, Tcl_Obj *CONST []); static int xQueryPhraseCb( const Fts5ExtensionApi *pApi, Fts5Context *pFts, void *pCtx ){ F5tFunction *p = (F5tFunction*)pCtx; static sqlite3_int64 iCmd = 0; Tcl_Obj *pEval; int rc; char zCmd[64]; F5tApi sApi; sApi.pApi = pApi; sApi.pFts = pFts; sprintf(zCmd, "f5t_2_%lld", iCmd++); Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0); pEval = Tcl_DuplicateObj(p->pScript); Tcl_IncrRefCount(pEval); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1)); rc = Tcl_EvalObjEx(p->interp, pEval, 0); Tcl_DecrRefCount(pEval); Tcl_DeleteCommand(p->interp, zCmd); if( rc==TCL_OK ){ rc = f5tResultToErrorCode(Tcl_GetStringResult(p->interp)); } return rc; } static void xSetAuxdataDestructor(void *p){ F5tAuxData *pData = (F5tAuxData*)p; Tcl_DecrRefCount(pData->pObj); sqlite3_free(pData); } /* ** api sub-command... ** ** Description... */ static int xF5tApi( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ struct Sub { const char *zName; int nArg; const char *zMsg; } aSub[] = { { "xColumnCount", 0, "" }, /* 0 */ { "xRowCount", 0, "" }, /* 1 */ { "xColumnTotalSize", 1, "COL" }, /* 2 */ { "xTokenize", 2, "TEXT SCRIPT" }, /* 3 */ { "xPhraseCount", 0, "" }, /* 4 */ { "xPhraseSize", 1, "PHRASE" }, /* 5 */ { "xInstCount", 0, "" }, /* 6 */ { "xInst", 1, "IDX" }, /* 7 */ { "xRowid", 0, "" }, /* 8 */ { "xColumnText", 1, "COL" }, /* 9 */ { "xColumnSize", 1, "COL" }, /* 10 */ { "xQueryPhrase", 2, "PHRASE SCRIPT" }, /* 11 */ { "xSetAuxdata", 1, "VALUE" }, /* 12 */ { "xGetAuxdata", 1, "CLEAR" }, /* 13 */ { "xSetAuxdataInt", 1, "INTEGER" }, /* 14 */ { "xGetAuxdataInt", 1, "CLEAR" }, /* 15 */ { 0, 0, 0} }; int rc; int iSub = 0; F5tApi *p = (F5tApi*)clientData; if( objc<2 ){ Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND"); return TCL_ERROR; } rc = Tcl_GetIndexFromObjStruct( interp, objv[1], aSub, sizeof(aSub[0]), "SUB-COMMAND", 0, &iSub ); if( rc!=TCL_OK ) return rc; if( aSub[iSub].nArg!=objc-2 ){ Tcl_WrongNumArgs(interp, 1, objv, aSub[iSub].zMsg); return TCL_ERROR; } #define CASE(i,str) case i: assert( strcmp(aSub[i].zName, str)==0 ); switch( iSub ){ CASE(0, "xColumnCount") { int nCol; nCol = p->pApi->xColumnCount(p->pFts); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewIntObj(nCol)); } break; } CASE(1, "xRowCount") { sqlite3_int64 nRow; rc = p->pApi->xRowCount(p->pFts, &nRow); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nRow)); } break; } CASE(2, "xColumnTotalSize") { int iCol; sqlite3_int64 nSize; if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ) return TCL_ERROR; rc = p->pApi->xColumnTotalSize(p->pFts, iCol, &nSize); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize)); } break; } CASE(3, "xTokenize") { int nText; char *zText = Tcl_GetStringFromObj(objv[2], &nText); F5tFunction ctx; ctx.interp = interp; ctx.pScript = objv[3]; rc = p->pApi->xTokenize(p->pFts, zText, nText, &ctx, xTokenizeCb); if( rc==SQLITE_OK ){ Tcl_ResetResult(interp); } return rc; } CASE(4, "xPhraseCount") { int nPhrase; nPhrase = p->pApi->xPhraseCount(p->pFts); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewIntObj(nPhrase)); } break; } CASE(5, "xPhraseSize") { int iPhrase; int sz; if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){ return TCL_ERROR; } sz = p->pApi->xPhraseSize(p->pFts, iPhrase); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewIntObj(sz)); } break; } CASE(6, "xInstCount") { int nInst; rc = p->pApi->xInstCount(p->pFts, &nInst); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewIntObj(nInst)); } break; } CASE(7, "xInst") { int iIdx, ip, ic, io; if( Tcl_GetIntFromObj(interp, objv[2], &iIdx) ){ return TCL_ERROR; } rc = p->pApi->xInst(p->pFts, iIdx, &ip, &ic, &io); if( rc==SQLITE_OK ){ Tcl_Obj *pList = Tcl_NewObj(); Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ip)); Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(ic)); Tcl_ListObjAppendElement(interp, pList, Tcl_NewIntObj(io)); Tcl_SetObjResult(interp, pList); } break; } CASE(8, "xRowid") { sqlite3_int64 iRowid = p->pApi->xRowid(p->pFts); Tcl_SetObjResult(interp, Tcl_NewWideIntObj(iRowid)); break; } CASE(9, "xColumnText") { const char *z = 0; int n = 0; int iCol; if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ return TCL_ERROR; } rc = p->pApi->xColumnText(p->pFts, iCol, &z, &n); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewStringObj(z, n)); } break; } CASE(10, "xColumnSize") { int n = 0; int iCol; if( Tcl_GetIntFromObj(interp, objv[2], &iCol) ){ return TCL_ERROR; } rc = p->pApi->xColumnSize(p->pFts, iCol, &n); if( rc==SQLITE_OK ){ Tcl_SetObjResult(interp, Tcl_NewIntObj(n)); } break; } CASE(11, "xQueryPhrase") { int iPhrase; F5tFunction ctx; if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ){ return TCL_ERROR; } ctx.interp = interp; ctx.pScript = objv[3]; rc = p->pApi->xQueryPhrase(p->pFts, iPhrase, &ctx, xQueryPhraseCb); if( rc==SQLITE_OK ){ Tcl_ResetResult(interp); } break; } CASE(12, "xSetAuxdata") { F5tAuxData *pData = (F5tAuxData*)sqlite3_malloc(sizeof(F5tAuxData)); if( pData==0 ){ Tcl_AppendResult(interp, "out of memory", 0); return TCL_ERROR; } pData->pObj = objv[2]; Tcl_IncrRefCount(pData->pObj); rc = p->pApi->xSetAuxdata(p->pFts, pData, xSetAuxdataDestructor); break; } CASE(13, "xGetAuxdata") { F5tAuxData *pData; int bClear; if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ){ return TCL_ERROR; } pData = (F5tAuxData*)p->pApi->xGetAuxdata(p->pFts, bClear); if( pData==0 ){ Tcl_ResetResult(interp); }else{ Tcl_SetObjResult(interp, pData->pObj); if( bClear ){ xSetAuxdataDestructor((void*)pData); } } break; } /* These two - xSetAuxdataInt and xGetAuxdataInt - are similar to the ** xSetAuxdata and xGetAuxdata methods implemented above. The difference ** is that they may only save an integer value as auxiliary data, and ** do not specify a destructor function. */ CASE(14, "xSetAuxdataInt") { int iVal; if( Tcl_GetIntFromObj(interp, objv[2], &iVal) ) return TCL_ERROR; rc = p->pApi->xSetAuxdata(p->pFts, (void*)iVal, 0); break; } CASE(15, "xGetAuxdataInt") { int iVal; int bClear; if( Tcl_GetBooleanFromObj(interp, objv[2], &bClear) ) return TCL_ERROR; iVal = (int)p->pApi->xGetAuxdata(p->pFts, bClear); Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal)); break; } default: assert( 0 ); break; } #undef CASE if( rc!=SQLITE_OK ){ Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_ERROR; } return TCL_OK; } static void xF5tFunction( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ F5tFunction *p = (F5tFunction*)pApi->xUserData(pFts); Tcl_Obj *pEval; /* Script to evaluate */ int i; int rc; static sqlite3_int64 iCmd = 0; char zCmd[64]; F5tApi sApi; sApi.pApi = pApi; sApi.pFts = pFts; sprintf(zCmd, "f5t_%lld", iCmd++); Tcl_CreateObjCommand(p->interp, zCmd, xF5tApi, &sApi, 0); pEval = Tcl_DuplicateObj(p->pScript); Tcl_IncrRefCount(pEval); Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zCmd, -1)); for(i=0; i<nVal; i++){ Tcl_Obj *pObj = 0; switch( sqlite3_value_type(apVal[i]) ){ case SQLITE_TEXT: pObj = Tcl_NewStringObj((const char*)sqlite3_value_text(apVal[i]), -1); break; case SQLITE_BLOB: pObj = Tcl_NewByteArrayObj( sqlite3_value_blob(apVal[i]), sqlite3_value_bytes(apVal[i]) ); break; case SQLITE_INTEGER: pObj = Tcl_NewWideIntObj(sqlite3_value_int64(apVal[i])); break; case SQLITE_FLOAT: pObj = Tcl_NewDoubleObj(sqlite3_value_double(apVal[i])); break; default: pObj = Tcl_NewObj(); break; } Tcl_ListObjAppendElement(p->interp, pEval, pObj); } rc = Tcl_EvalObjEx(p->interp, pEval, TCL_GLOBAL_ONLY); Tcl_DecrRefCount(pEval); Tcl_DeleteCommand(p->interp, zCmd); if( rc!=TCL_OK ){ sqlite3_result_error(pCtx, Tcl_GetStringResult(p->interp), -1); }else{ Tcl_Obj *pVar = Tcl_GetObjResult(p->interp); int n; const char *zType = (pVar->typePtr ? pVar->typePtr->name : ""); char c = zType[0]; if( c=='b' && strcmp(zType,"bytearray")==0 && pVar->bytes==0 ){ /* Only return a BLOB type if the Tcl variable is a bytearray and ** has no string representation. */ unsigned char *data = Tcl_GetByteArrayFromObj(pVar, &n); sqlite3_result_blob(pCtx, data, n, SQLITE_TRANSIENT); }else if( c=='b' && strcmp(zType,"boolean")==0 ){ Tcl_GetIntFromObj(0, pVar, &n); sqlite3_result_int(pCtx, n); }else if( c=='d' && strcmp(zType,"double")==0 ){ double r; Tcl_GetDoubleFromObj(0, pVar, &r); sqlite3_result_double(pCtx, r); }else if( (c=='w' && strcmp(zType,"wideInt")==0) || (c=='i' && strcmp(zType,"int")==0) ){ Tcl_WideInt v; Tcl_GetWideIntFromObj(0, pVar, &v); sqlite3_result_int64(pCtx, v); }else{ unsigned char *data = (unsigned char *)Tcl_GetStringFromObj(pVar, &n); sqlite3_result_text(pCtx, (char *)data, n, SQLITE_TRANSIENT); } } } static void xF5tDestroy(void *pCtx){ F5tFunction *p = (F5tFunction*)pCtx; Tcl_DecrRefCount(p->pScript); ckfree((char *)p); } /* ** sqlite3_fts5_create_function DB NAME SCRIPT ** ** Description... */ static int f5tCreateFunction( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ char *zName; Tcl_Obj *pScript; sqlite3 *db = 0; fts5_api *pApi = 0; F5tFunction *pCtx = 0; int rc; if( objc!=4 ){ Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); return TCL_ERROR; } if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR; zName = Tcl_GetString(objv[2]); pScript = objv[3]; pCtx = (F5tFunction*)ckalloc(sizeof(F5tFunction)); pCtx->interp = interp; pCtx->pScript = pScript; Tcl_IncrRefCount(pScript); rc = pApi->xCreateFunction( pApi, zName, (void*)pCtx, xF5tFunction, xF5tDestroy ); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error: ", sqlite3_errmsg(db), 0); return TCL_ERROR; } return TCL_OK; } typedef struct F5tTokenizeCtx F5tTokenizeCtx; struct F5tTokenizeCtx { Tcl_Obj *pRet; int bSubst; const char *zInput; }; static int xTokenizeCb2( void *pCtx, const char *zToken, int nToken, int iStart, int iEnd ){ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx; if( p->bSubst ){ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement( 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart) ); }else{ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart)); Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd)); } return SQLITE_OK; } /* ** sqlite3_fts5_tokenize DB TOKENIZER TEXT ** ** Description... */ static int f5tTokenize( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ char *zText; int nText; sqlite3 *db = 0; fts5_api *pApi = 0; Fts5Tokenizer *pTok = 0; fts5_tokenizer tokenizer; Tcl_Obj *pRet = 0; void *pUserdata; int rc; int nArg; const char **azArg; F5tTokenizeCtx ctx; if( objc!=4 && objc!=5 ){ Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT"); return TCL_ERROR; } if( objc==5 ){ char *zOpt = Tcl_GetString(objv[1]); if( strcmp("-subst", zOpt) ){ Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0); return TCL_ERROR; } } if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR; if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){ return TCL_ERROR; } if( nArg==0 ){ Tcl_AppendResult(interp, "no such tokenizer: ", 0); Tcl_Free((void*)azArg); return TCL_ERROR; } zText = Tcl_GetStringFromObj(objv[objc-1], &nText); rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0); return TCL_ERROR; } rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0); return TCL_ERROR; } pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet); ctx.bSubst = (objc==5); ctx.pRet = pRet; ctx.zInput = zText; rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2); tokenizer.xDelete(pTok); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0); Tcl_DecrRefCount(pRet); return TCL_ERROR; } Tcl_Free((void*)azArg); Tcl_SetObjResult(interp, pRet); Tcl_DecrRefCount(pRet); return TCL_OK; } /************************************************************************* ** Start of tokenizer wrapper. */ typedef struct F5tTokenizerContext F5tTokenizerContext; typedef struct F5tTokenizerCb F5tTokenizerCb; typedef struct F5tTokenizerModule F5tTokenizerModule; typedef struct F5tTokenizerModule F5tTokenizerInstance; struct F5tTokenizerContext { void *pCtx; int (*xToken)(void*, const char*, int, int, int); }; struct F5tTokenizerModule { Tcl_Interp *interp; Tcl_Obj *pScript; F5tTokenizerContext *pContext; }; static int f5tTokenizerCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_Obj *pEval; int rc = TCL_OK; int i; pEval = Tcl_DuplicateObj(pMod->pScript); Tcl_IncrRefCount(pEval); for(i=0; rc==TCL_OK && i<nArg; i++){ Tcl_Obj *pObj = Tcl_NewStringObj(azArg[i], -1); rc = Tcl_ListObjAppendElement(pMod->interp, pEval, pObj); } if( rc==TCL_OK ){ rc = Tcl_EvalObjEx(pMod->interp, pEval, TCL_GLOBAL_ONLY); } Tcl_DecrRefCount(pEval); if( rc==TCL_OK ){ F5tTokenizerInstance *pInst; pInst = (F5tTokenizerInstance*)ckalloc(sizeof(F5tTokenizerInstance)); memset(pInst, 0, sizeof(F5tTokenizerInstance)); pInst->interp = pMod->interp; pInst->pScript = Tcl_GetObjResult(pMod->interp); pInst->pContext = pMod->pContext; Tcl_IncrRefCount(pInst->pScript); *ppOut = (Fts5Tokenizer*)pInst; } return rc; } static void f5tTokenizerDelete(Fts5Tokenizer *p){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; Tcl_DecrRefCount(pInst->pScript); ckfree((char *)pInst); } static int f5tTokenizerTokenize( Fts5Tokenizer *p, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int, int, int) ){ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p; void *pOldCtx; int (*xOldToken)(void*, const char*, int, int, int); Tcl_Obj *pEval; int rc; pOldCtx = pInst->pContext->pCtx; xOldToken = pInst->pContext->xToken; pEval = Tcl_DuplicateObj(pInst->pScript); Tcl_IncrRefCount(pEval); rc = Tcl_ListObjAppendElement( pInst->interp, pEval, Tcl_NewStringObj(pText, nText) ); if( rc==TCL_OK ){ rc = Tcl_EvalObjEx(pInst->interp, pEval, TCL_GLOBAL_ONLY); } Tcl_DecrRefCount(pEval); pInst->pContext->pCtx = pOldCtx; pInst->pContext->xToken = xOldToken; return rc; } /* ** sqlite3_fts5_token TEXT START END POS */ static int f5tTokenizerReturn( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData; int iStart; int iEnd; int nToken; char *zToken; int rc; assert( p ); if( objc!=4 ){ Tcl_WrongNumArgs(interp, 1, objv, "TEXT START END"); return TCL_ERROR; } if( p->xToken==0 ){ Tcl_AppendResult(interp, "sqlite3_fts5_token may only be used by tokenizer callback", 0 ); return TCL_ERROR; } zToken = Tcl_GetStringFromObj(objv[1], &nToken); if( Tcl_GetIntFromObj(interp, objv[2], &iStart) || Tcl_GetIntFromObj(interp, objv[3], &iEnd) ){ return TCL_ERROR; } rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd); Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE); return TCL_OK; } static void f5tDelTokenizer(void *pCtx){ F5tTokenizerModule *pMod = (F5tTokenizerModule*)pCtx; Tcl_DecrRefCount(pMod->pScript); ckfree((char *)pMod); } /* ** sqlite3_fts5_create_tokenizer DB NAME SCRIPT ** ** Register a tokenizer named NAME implemented by script SCRIPT. When ** a tokenizer instance is created (fts5_tokenizer.xCreate), any tokenizer ** arguments are appended to SCRIPT and the result executed. ** ** The value returned by (SCRIPT + args) is itself a tcl script. This ** script - call it SCRIPT2 - is executed to tokenize text using the ** tokenizer instance "returned" by SCRIPT. Specifically, to tokenize ** text SCRIPT2 is invoked with a single argument appended to it - the ** text to tokenize. ** ** SCRIPT2 should invoke the [sqlite3_fts5_token] command once for each ** token within the tokenized text. */ static int f5tCreateTokenizer( ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ F5tTokenizerContext *pContext = (F5tTokenizerContext*)clientData; sqlite3 *db; fts5_api *pApi; char *zName; Tcl_Obj *pScript; fts5_tokenizer t; F5tTokenizerModule *pMod; int rc; if( objc!=4 ){ Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT"); return TCL_ERROR; } if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){ return TCL_ERROR; } zName = Tcl_GetString(objv[2]); pScript = objv[3]; t.xCreate = f5tTokenizerCreate; t.xTokenize = f5tTokenizerTokenize; t.xDelete = f5tTokenizerDelete; pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule)); pMod->interp = interp; pMod->pScript = pScript; pMod->pContext = pContext; Tcl_IncrRefCount(pScript); rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer); if( rc!=SQLITE_OK ){ Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0); return TCL_ERROR; } return TCL_OK; } static void xF5tFree(ClientData clientData){ ckfree(clientData); } /* ** sqlite3_fts5_may_be_corrupt BOOLEAN ** ** Set or clear the global "may-be-corrupt" flag. Return the old value. */ static int f5tMayBeCorrupt( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ int bOld = sqlite3_fts5_may_be_corrupt; if( objc!=2 && objc!=1 ){ Tcl_WrongNumArgs(interp, 1, objv, "?BOOLEAN?"); return TCL_ERROR; } if( objc==2 ){ int bNew; if( Tcl_GetBooleanFromObj(interp, objv[1], &bNew) ) return TCL_ERROR; sqlite3_fts5_may_be_corrupt = bNew; } Tcl_SetObjResult(interp, Tcl_NewIntObj(bOld)); return TCL_OK; } static unsigned int f5t_fts5HashKey(int nSlot, const char *p, int n){ int i; unsigned int h = 13; for(i=n-1; i>=0; i--){ h = (h << 3) ^ h ^ p[i]; } return (h % nSlot); } static int f5tTokenHash( void * clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ int bOld = sqlite3_fts5_may_be_corrupt; char *z; int n; unsigned int iVal; int nSlot; if( objc!=3 ){ Tcl_WrongNumArgs(interp, 1, objv, "NSLOT TOKEN"); return TCL_ERROR; } if( Tcl_GetIntFromObj(interp, objv[1], &nSlot) ){ return TCL_ERROR; } z = Tcl_GetStringFromObj(objv[2], &n); iVal = f5t_fts5HashKey(nSlot, z, n); Tcl_SetObjResult(interp, Tcl_NewIntObj(iVal)); return TCL_OK; } /* ** Entry point. */ int Fts5tcl_Init(Tcl_Interp *interp){ static struct Cmd { char *zName; Tcl_ObjCmdProc *xProc; int bTokenizeCtx; } aCmd[] = { { "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 }, { "sqlite3_fts5_token", f5tTokenizerReturn, 1 }, { "sqlite3_fts5_tokenize", f5tTokenize, 0 }, { "sqlite3_fts5_create_function", f5tCreateFunction, 0 }, { "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 }, { "sqlite3_fts5_token_hash", f5tTokenHash, 0 } }; int i; F5tTokenizerContext *pContext; pContext = (F5tTokenizerContext*)ckalloc(sizeof(F5tTokenizerContext)); memset(pContext, 0, sizeof(*pContext)); for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){ struct Cmd *p = &aCmd[i]; void *pCtx = 0; if( p->bTokenizeCtx ) pCtx = (void*)pContext; Tcl_CreateObjCommand(interp, p->zName, p->xProc, pCtx, (i ? 0 : xF5tFree)); } return TCL_OK; } #else /* SQLITE_ENABLE_FTS5 */ int Fts5tcl_Init(Tcl_Interp *interp){ return TCL_OK; } #endif /* SQLITE_ENABLE_FTS5 */ #endif /* SQLITE_TEST */ |
Added ext/fts5/fts5_tokenize.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ #if defined(SQLITE_ENABLE_FTS5) #include "fts5Int.h" /************************************************************************** ** Start of ascii tokenizer implementation. */ /* ** For tokenizers with no "unicode" modifier, the set of token characters ** is the same as the set of ASCII range alphanumeric characters. */ static unsigned char aAsciiTokenChar[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ }; typedef struct AsciiTokenizer AsciiTokenizer; struct AsciiTokenizer { unsigned char aTokenChar[128]; }; static void fts5AsciiAddExceptions( AsciiTokenizer *p, const char *zArg, int bTokenChars ){ int i; for(i=0; zArg[i]; i++){ if( (zArg[i] & 0x80)==0 ){ p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; } } } /* ** Delete a "ascii" tokenizer. */ static void fts5AsciiDelete(Fts5Tokenizer *p){ sqlite3_free(p); } /* ** Create an "ascii" tokenizer. */ static int fts5AsciiCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; AsciiTokenizer *p = 0; if( nArg%2 ){ rc = SQLITE_ERROR; }else{ p = sqlite3_malloc(sizeof(AsciiTokenizer)); if( p==0 ){ rc = SQLITE_NOMEM; }else{ int i; memset(p, 0, sizeof(AsciiTokenizer)); memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ const char *zArg = azArg[i+1]; if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ fts5AsciiAddExceptions(p, zArg, 1); }else if( 0==sqlite3_stricmp(azArg[i], "separators") ){ fts5AsciiAddExceptions(p, zArg, 0); }else{ rc = SQLITE_ERROR; } } if( rc!=SQLITE_OK ){ fts5AsciiDelete((Fts5Tokenizer*)p); p = 0; } } } *ppOut = (Fts5Tokenizer*)p; return rc; } static void asciiFold(char *aOut, const char *aIn, int nByte){ int i; for(i=0; i<nByte; i++){ char c = aIn[i]; if( c>='A' && c<='Z' ) c += 32; aOut[i] = c; } } /* ** Tokenize some text using the ascii tokenizer. */ static int fts5AsciiTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; int rc = SQLITE_OK; int ie; int is = 0; char aFold[64]; int nFold = sizeof(aFold); char *pFold = aFold; unsigned char *a = p->aTokenChar; while( is<nText && rc==SQLITE_OK ){ int nByte; /* Skip any leading divider characters. */ while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ is++; } if( is==nText ) break; /* Count the token characters */ ie = is+1; while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ ie++; } /* Fold to lower case */ nByte = ie-is; if( nByte>nFold ){ if( pFold!=aFold ) sqlite3_free(pFold); pFold = sqlite3_malloc(nByte*2); if( pFold==0 ){ rc = SQLITE_NOMEM; break; } nFold = nByte*2; } asciiFold(pFold, &pText[is], nByte); /* Invoke the token callback */ rc = xToken(pCtx, pFold, nByte, is, ie); is = ie+1; } if( pFold!=aFold ) sqlite3_free(pFold); if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } /************************************************************************** ** Start of unicode61 tokenizer implementation. */ /* ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied ** from the sqlite3 source file utf.c. If this file is compiled as part ** of the amalgamation, they are not required. */ #ifndef SQLITE_AMALGAMATION static const unsigned char sqlite3Utf8Trans1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, }; #define READ_UTF8(zIn, zTerm, c) \ c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ c = (c<<6) + (0x3f & *(zIn++)); \ } \ if( c<0x80 \ || (c&0xFFFFF800)==0xD800 \ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ } #define WRITE_UTF8(zOut, c) { \ if( c<0x00080 ){ \ *zOut++ = (unsigned char)(c&0xFF); \ } \ else if( c<0x00800 ){ \ *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ } \ else if( c<0x10000 ){ \ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ }else{ \ *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ } \ } #endif /* ifndef SQLITE_AMALGAMATION */ typedef struct Unicode61Tokenizer Unicode61Tokenizer; struct Unicode61Tokenizer { unsigned char aTokenChar[128]; /* ASCII range token characters */ char *aFold; /* Buffer to fold text into */ int nFold; /* Size of aFold[] in bytes */ int bRemoveDiacritic; /* True if remove_diacritics=1 is set */ int nException; int *aiException; }; static int fts5UnicodeAddExceptions( Unicode61Tokenizer *p, /* Tokenizer object */ const char *z, /* Characters to treat as exceptions */ int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ ){ int rc = SQLITE_OK; int n = strlen(z); int *aNew; if( n>0 ){ aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int)); if( aNew ){ int nNew = p->nException; const unsigned char *zCsr = (const unsigned char*)z; const unsigned char *zTerm = (const unsigned char*)&z[n]; while( zCsr<zTerm ){ int iCode; int bToken; READ_UTF8(zCsr, zTerm, iCode); if( iCode<128 ){ p->aTokenChar[iCode] = bTokenChars; }else{ bToken = sqlite3Fts5UnicodeIsalnum(iCode); assert( (bToken==0 || bToken==1) ); assert( (bTokenChars==0 || bTokenChars==1) ); if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ int i; for(i=0; i<nNew; i++){ if( aNew[i]>iCode ) break; } memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); aNew[i] = iCode; nNew++; } } } p->aiException = aNew; p->nException = nNew; }else{ rc = SQLITE_NOMEM; } } return rc; } /* ** Return true if the p->aiException[] array contains the value iCode. */ static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ if( p->nException>0 ){ int *a = p->aiException; int iLo = 0; int iHi = p->nException-1; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( iCode==a[iTest] ){ return 1; }else if( iCode>a[iTest] ){ iLo = iTest+1; }else{ iHi = iTest-1; } } } return 0; } /* ** Delete a "unicode61" tokenizer. */ static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ if( pTok ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; sqlite3_free(p->aiException); sqlite3_free(p->aFold); sqlite3_free(p); } return; } /* ** Create a "unicode61" tokenizer. */ static int fts5UnicodeCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; /* Return code */ Unicode61Tokenizer *p = 0; /* New tokenizer object */ if( nArg%2 ){ rc = SQLITE_ERROR; }else{ p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); if( p ){ int i; memset(p, 0, sizeof(Unicode61Tokenizer)); memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); p->bRemoveDiacritic = 1; p->nFold = 64; p->aFold = sqlite3_malloc(p->nFold * sizeof(char)); if( p->aFold==0 ){ rc = SQLITE_NOMEM; } for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ const char *zArg = azArg[i+1]; if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ rc = SQLITE_ERROR; } p->bRemoveDiacritic = (zArg[0]=='1'); }else if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ rc = fts5UnicodeAddExceptions(p, zArg, 1); }else if( 0==sqlite3_stricmp(azArg[i], "separators") ){ rc = fts5UnicodeAddExceptions(p, zArg, 0); }else{ rc = SQLITE_ERROR; } } }else{ rc = SQLITE_NOMEM; } if( rc!=SQLITE_OK ){ fts5UnicodeDelete((Fts5Tokenizer*)p); p = 0; } *ppOut = (Fts5Tokenizer*)p; } return rc; } /* ** Return true if, for the purposes of tokenizing with the tokenizer ** passed as the first argument, codepoint iCode is considered a token ** character (not a separator). */ static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode); } static int fts5UnicodeTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; int rc = SQLITE_OK; unsigned char *a = p->aTokenChar; unsigned char *zTerm = (unsigned char*)&pText[nText]; unsigned char *zCsr = (unsigned char *)pText; /* Output buffer */ char *aFold = p->aFold; int nFold = p->nFold; const char *pEnd = &aFold[nFold-6]; /* Each iteration of this loop gobbles up a contiguous run of separators, ** then the next token. */ while( rc==SQLITE_OK ){ int iCode; /* non-ASCII codepoint read from input */ char *zOut = aFold; int is; int ie; /* Skip any separator characters. */ while( 1 ){ if( zCsr>=zTerm ) goto tokenize_done; if( *zCsr & 0x80 ) { /* A character outside of the ascii range. Skip past it if it is ** a separator character. Or break out of the loop if it is not. */ is = zCsr - (unsigned char*)pText; READ_UTF8(zCsr, zTerm, iCode); if( fts5UnicodeIsAlnum(p, iCode) ){ goto non_ascii_tokenchar; } }else{ if( a[*zCsr] ){ is = zCsr - (unsigned char*)pText; goto ascii_tokenchar; } zCsr++; } } /* Run through the tokenchars. Fold them into the output buffer along ** the way. */ while( zCsr<zTerm ){ /* Grow the output buffer so that there is sufficient space to fit the ** largest possible utf-8 character. */ if( zOut>pEnd ){ aFold = sqlite3_malloc(nFold*2); if( aFold==0 ){ rc = SQLITE_NOMEM; goto tokenize_done; } zOut = &aFold[zOut - p->aFold]; memcpy(aFold, p->aFold, nFold); sqlite3_free(p->aFold); p->aFold = aFold; p->nFold = nFold = nFold*2; pEnd = &aFold[nFold-6]; } if( *zCsr & 0x80 ){ /* An non-ascii-range character. Fold it into the output buffer if ** it is a token character, or break out of the loop if it is not. */ READ_UTF8(zCsr, zTerm, iCode); if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ non_ascii_tokenchar: iCode = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic); if( iCode ) WRITE_UTF8(zOut, iCode); }else{ break; } }else if( a[*zCsr]==0 ){ /* An ascii-range separator character. End of token. */ break; }else{ ascii_tokenchar: if( *zCsr>='A' && *zCsr<='Z' ){ *zOut++ = *zCsr + 32; }else{ *zOut++ = *zCsr; } zCsr++; } ie = zCsr - (unsigned char*)pText; } /* Invoke the token callback */ rc = xToken(pCtx, aFold, zOut-aFold, is, ie); } tokenize_done: if( rc==SQLITE_DONE ) rc = SQLITE_OK; return rc; } /************************************************************************** ** Start of porter stemmer implementation. */ /* Any tokens larger than this (in bytes) are passed through without ** stemming. */ #define FTS5_PORTER_MAX_TOKEN 64 typedef struct PorterTokenizer PorterTokenizer; struct PorterTokenizer { fts5_tokenizer tokenizer; /* Parent tokenizer module */ Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; }; /* ** Delete a "porter" tokenizer. */ static void fts5PorterDelete(Fts5Tokenizer *pTok){ if( pTok ){ PorterTokenizer *p = (PorterTokenizer*)pTok; if( p->pTokenizer ){ p->tokenizer.xDelete(p->pTokenizer); } sqlite3_free(p); } } /* ** Create a "porter" tokenizer. */ static int fts5PorterCreate( void *pCtx, const char **azArg, int nArg, Fts5Tokenizer **ppOut ){ fts5_api *pApi = (fts5_api*)pCtx; int rc = SQLITE_OK; PorterTokenizer *pRet; void *pUserdata = 0; const char *zBase = "unicode61"; if( nArg>0 ){ zBase = azArg[0]; } pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); if( pRet ){ memset(pRet, 0, sizeof(PorterTokenizer)); rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); }else{ rc = SQLITE_NOMEM; } if( rc==SQLITE_OK ){ rc = pRet->tokenizer.xCreate(pUserdata, 0, 0, &pRet->pTokenizer); } if( rc!=SQLITE_OK ){ fts5PorterDelete((Fts5Tokenizer*)pRet); pRet = 0; } *ppOut = (Fts5Tokenizer*)pRet; return rc; } typedef struct PorterContext PorterContext; struct PorterContext { void *pCtx; int (*xToken)(void*, const char*, int, int, int); char *aBuf; }; typedef struct PorterRule PorterRule; struct PorterRule { const char *zSuffix; int nSuffix; int (*xCond)(char *zStem, int nStem); const char *zOutput; int nOutput; }; #if 0 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ int ret = -1; int nBuf = *pnBuf; PorterRule *p; for(p=aRule; p->zSuffix; p++){ assert( strlen(p->zSuffix)==p->nSuffix ); assert( strlen(p->zOutput)==p->nOutput ); if( nBuf<p->nSuffix ) continue; if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; } if( p->zSuffix ){ int nStem = nBuf - p->nSuffix; if( p->xCond==0 || p->xCond(aBuf, nStem) ){ memcpy(&aBuf[nStem], p->zOutput, p->nOutput); *pnBuf = nStem + p->nOutput; ret = p - aRule; } } return ret; } #endif static int fts5PorterIsVowel(char c, int bYIsVowel){ return ( c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') ); } static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ int i; int bCons = bPrevCons; /* Scan for a vowel */ for(i=0; i<nStem; i++){ if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; } /* Scan for a consonent */ for(i++; i<nStem; i++){ if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; } return 0; } /* porter rule condition: (m > 0) */ static int fts5Porter_MGt0(char *zStem, int nStem){ return !!fts5PorterGobbleVC(zStem, nStem, 0); } /* porter rule condition: (m > 1) */ static int fts5Porter_MGt1(char *zStem, int nStem){ int n; n = fts5PorterGobbleVC(zStem, nStem, 0); if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ return 1; } return 0; } /* porter rule condition: (m = 1) */ static int fts5Porter_MEq1(char *zStem, int nStem){ int n; n = fts5PorterGobbleVC(zStem, nStem, 0); if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ return 1; } return 0; } /* porter rule condition: (*o) */ static int fts5Porter_Ostar(char *zStem, int nStem){ if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ return 0; }else{ int i; int mask = 0; int bCons = 0; for(i=0; i<nStem; i++){ bCons = !fts5PorterIsVowel(zStem[i], bCons); assert( bCons==0 || bCons==1 ); mask = (mask << 1) + bCons; } return ((mask & 0x0007)==0x0005); } } /* porter rule condition: (m > 1 and (*S or *T)) */ static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ assert( nStem>0 ); return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') && fts5Porter_MGt1(zStem, nStem); } /* porter rule condition: (*v*) */ static int fts5Porter_Vowel(char *zStem, int nStem){ int i; for(i=0; i<nStem; i++){ if( fts5PorterIsVowel(zStem[i], i>0) ){ return 1; } } return 0; } /************************************************************************** *************************************************************************** ** GENERATED CODE STARTS HERE (mkportersteps.tcl) */ static int fts5PorterStep4(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ case 'a': if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ if( fts5Porter_MGt1(aBuf, nBuf-2) ){ *pnBuf = nBuf - 2; } } break; case 'c': if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt1(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt1(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } } break; case 'e': if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ if( fts5Porter_MGt1(aBuf, nBuf-2) ){ *pnBuf = nBuf - 2; } } break; case 'i': if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ if( fts5Porter_MGt1(aBuf, nBuf-2) ){ *pnBuf = nBuf - 2; } } break; case 'l': if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt1(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt1(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } } break; case 'n': if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt1(aBuf, nBuf-5) ){ *pnBuf = nBuf - 5; } }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt1(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 'o': if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ if( fts5Porter_MGt1(aBuf, nBuf-2) ){ *pnBuf = nBuf - 2; } } break; case 's': if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 't': if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 'u': if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 'v': if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 'z': if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt1(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; } return ret; } static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ case 'a': if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ memcpy(&aBuf[nBuf-2], "ate", 3); *pnBuf = nBuf - 2 + 3; ret = 1; } break; case 'b': if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ memcpy(&aBuf[nBuf-2], "ble", 3); *pnBuf = nBuf - 2 + 3; ret = 1; } break; case 'i': if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ memcpy(&aBuf[nBuf-2], "ize", 3); *pnBuf = nBuf - 2 + 3; ret = 1; } break; } return ret; } static int fts5PorterStep2(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ case 'a': if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ if( fts5Porter_MGt0(aBuf, nBuf-7) ){ memcpy(&aBuf[nBuf-7], "ate", 3); *pnBuf = nBuf - 7 + 3; } }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ if( fts5Porter_MGt0(aBuf, nBuf-6) ){ memcpy(&aBuf[nBuf-6], "tion", 4); *pnBuf = nBuf - 6 + 4; } } break; case 'c': if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "ence", 4); *pnBuf = nBuf - 4 + 4; } }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "ance", 4); *pnBuf = nBuf - 4 + 4; } } break; case 'e': if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "ize", 3); *pnBuf = nBuf - 4 + 3; } } break; case 'g': if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "log", 3); *pnBuf = nBuf - 4 + 3; } } break; case 'l': if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt0(aBuf, nBuf-3) ){ memcpy(&aBuf[nBuf-3], "ble", 3); *pnBuf = nBuf - 3 + 3; } }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "al", 2); *pnBuf = nBuf - 4 + 2; } }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ent", 3); *pnBuf = nBuf - 5 + 3; } }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt0(aBuf, nBuf-3) ){ memcpy(&aBuf[nBuf-3], "e", 1); *pnBuf = nBuf - 3 + 1; } }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ous", 3); *pnBuf = nBuf - 5 + 3; } } break; case 'o': if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ if( fts5Porter_MGt0(aBuf, nBuf-7) ){ memcpy(&aBuf[nBuf-7], "ize", 3); *pnBuf = nBuf - 7 + 3; } }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ate", 3); *pnBuf = nBuf - 5 + 3; } }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "ate", 3); *pnBuf = nBuf - 4 + 3; } } break; case 's': if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "al", 2); *pnBuf = nBuf - 5 + 2; } }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ if( fts5Porter_MGt0(aBuf, nBuf-7) ){ memcpy(&aBuf[nBuf-7], "ive", 3); *pnBuf = nBuf - 7 + 3; } }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ if( fts5Porter_MGt0(aBuf, nBuf-7) ){ memcpy(&aBuf[nBuf-7], "ful", 3); *pnBuf = nBuf - 7 + 3; } }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ if( fts5Porter_MGt0(aBuf, nBuf-7) ){ memcpy(&aBuf[nBuf-7], "ous", 3); *pnBuf = nBuf - 7 + 3; } } break; case 't': if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "al", 2); *pnBuf = nBuf - 5 + 2; } }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ive", 3); *pnBuf = nBuf - 5 + 3; } }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ if( fts5Porter_MGt0(aBuf, nBuf-6) ){ memcpy(&aBuf[nBuf-6], "ble", 3); *pnBuf = nBuf - 6 + 3; } } break; } return ret; } static int fts5PorterStep3(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ case 'a': if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ memcpy(&aBuf[nBuf-4], "ic", 2); *pnBuf = nBuf - 4 + 2; } } break; case 's': if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ if( fts5Porter_MGt0(aBuf, nBuf-4) ){ *pnBuf = nBuf - 4; } } break; case 't': if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ic", 2); *pnBuf = nBuf - 5 + 2; } }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "ic", 2); *pnBuf = nBuf - 5 + 2; } } break; case 'u': if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt0(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; } } break; case 'v': if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ *pnBuf = nBuf - 5; } } break; case 'z': if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ if( fts5Porter_MGt0(aBuf, nBuf-5) ){ memcpy(&aBuf[nBuf-5], "al", 2); *pnBuf = nBuf - 5 + 2; } } break; } return ret; } static int fts5PorterStep1B(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ case 'e': if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ if( fts5Porter_MGt0(aBuf, nBuf-3) ){ memcpy(&aBuf[nBuf-3], "ee", 2); *pnBuf = nBuf - 3 + 2; } }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ if( fts5Porter_Vowel(aBuf, nBuf-2) ){ *pnBuf = nBuf - 2; ret = 1; } } break; case 'n': if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ if( fts5Porter_Vowel(aBuf, nBuf-3) ){ *pnBuf = nBuf - 3; ret = 1; } } break; } return ret; } /* ** GENERATED CODE ENDS HERE (mkportersteps.tcl) *************************************************************************** **************************************************************************/ static void fts5PorterStep1A(char *aBuf, int *pnBuf){ int nBuf = *pnBuf; if( aBuf[nBuf-1]=='s' ){ if( aBuf[nBuf-2]=='e' ){ if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') || (nBuf>3 && aBuf[nBuf-3]=='i' ) ){ *pnBuf = nBuf-2; }else{ *pnBuf = nBuf-1; } } else if( aBuf[nBuf-2]!='s' ){ *pnBuf = nBuf-1; } } } static int fts5PorterCb( void *pCtx, const char *pToken, int nToken, int iStart, int iEnd ){ PorterContext *p = (PorterContext*)pCtx; char *aBuf; int nBuf; if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; aBuf = p->aBuf; nBuf = nToken; memcpy(aBuf, pToken, nBuf); /* Step 1. */ fts5PorterStep1A(aBuf, &nBuf); if( fts5PorterStep1B(aBuf, &nBuf) ){ if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ char c = aBuf[nBuf-1]; if( fts5PorterIsVowel(c, 0)==0 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] ){ nBuf--; }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ aBuf[nBuf++] = 'e'; } } } /* Step 1C. */ if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ aBuf[nBuf-1] = 'i'; } /* Steps 2 through 4. */ fts5PorterStep2(aBuf, &nBuf); fts5PorterStep3(aBuf, &nBuf); fts5PorterStep4(aBuf, &nBuf); /* Step 5a. */ assert( nBuf>0 ); if( aBuf[nBuf-1]=='e' ){ if( fts5Porter_MGt1(aBuf, nBuf-1) || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) ){ nBuf--; } } /* Step 5b. */ if( nBuf>1 && aBuf[nBuf-1]=='l' && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) ){ nBuf--; } return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd); pass_through: return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd); } /* ** Tokenize using the porter tokenizer. */ static int fts5PorterTokenize( Fts5Tokenizer *pTokenizer, void *pCtx, const char *pText, int nText, int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd) ){ PorterTokenizer *p = (PorterTokenizer*)pTokenizer; PorterContext sCtx; sCtx.xToken = xToken; sCtx.pCtx = pCtx; sCtx.aBuf = p->aBuf; return p->tokenizer.xTokenize( p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb ); } /* ** Register all built-in tokenizers with FTS5. */ int sqlite3Fts5TokenizerInit(fts5_api *pApi){ struct BuiltinTokenizer { const char *zName; fts5_tokenizer x; } aBuiltin[] = { { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, }; int rc = SQLITE_OK; /* Return code */ int i; /* To iterate through builtin functions */ for(i=0; rc==SQLITE_OK && i<sizeof(aBuiltin)/sizeof(aBuiltin[0]); i++){ rc = pApi->xCreateTokenizer(pApi, aBuiltin[i].zName, (void*)pApi, &aBuiltin[i].x, 0 ); } return SQLITE_OK; } #endif /* defined(SQLITE_ENABLE_FTS5) */ |
Added ext/fts5/fts5_unicode2.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 | /* ** 2012 May 25 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** */ /* ** DO NOT EDIT THIS MACHINE GENERATED FILE. */ #if defined(SQLITE_ENABLE_FTS5) #include <assert.h> /* ** Return true if the argument corresponds to a unicode codepoint ** classified as either a letter or a number. Otherwise false. ** ** The results are undefined if the value passed to this function ** is less than zero. */ int sqlite3Fts5UnicodeIsalnum(int c){ /* Each unsigned integer in the following array corresponds to a contiguous ** range of unicode codepoints that are not either letters or numbers (i.e. ** codepoints for which this function should return 0). ** ** The most significant 22 bits in each 32-bit value contain the first ** codepoint in the range. The least significant 10 bits are used to store ** the size of the range (always at least 1). In other words, the value ** ((C<<22) + N) represents a range of N codepoints starting with codepoint ** C. It is not possible to represent a range larger than 1023 codepoints ** using this format. */ static const unsigned int aEntry[] = { 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, 0x380400F0, }; static const unsigned int aAscii[4] = { 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, }; if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes = 0; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( aEntry[0]<key ); assert( key>=aEntry[iRes] ); return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); } return 1; } /* ** If the argument is a codepoint corresponding to a lowercase letter ** in the ASCII range with a diacritic added, return the codepoint ** of the ASCII letter only. For example, if passed 235 - "LATIN ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER ** E"). The resuls of passing a codepoint that corresponds to an ** uppercase letter are undefined. */ static int fts5_remove_diacritic(int c){ unsigned short aDia[] = { 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62924, 63050, 63082, 63274, 63390, }; char aChar[] = { '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', 'e', 'i', 'o', 'u', 'y', }; unsigned int key = (((unsigned int)c)<<3) | 0x00000007; int iRes = 0; int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aDia[iTest] ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( key>=aDia[iRes] ); return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); } /* ** Return true if the argument interpreted as a unicode codepoint ** is a diacritical modifier character. */ int sqlite3Fts5UnicodeIsdiacritic(int c){ unsigned int mask0 = 0x08029FDF; unsigned int mask1 = 0x000361F8; if( c<768 || c>817 ) return 0; return (c < 768+32) ? (mask0 & (1 << (c-768))) : (mask1 & (1 << (c-768-32))); } /* ** Interpret the argument as a unicode codepoint. If the codepoint ** is an upper case character that has a lower case equivalent, ** return the codepoint corresponding to the lower case version. ** Otherwise, return a copy of the argument. ** ** The results are undefined if the value passed to this function ** is less than zero. */ int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){ /* Each entry in the following array defines a rule for folding a range ** of codepoints to lower case. The rule applies to a range of nRange ** codepoints starting at codepoint iCode. ** ** If the least significant bit in flags is clear, then the rule applies ** to all nRange codepoints (i.e. all nRange codepoints are upper case and ** need to be folded). Or, if it is set, then the rule only applies to ** every second codepoint in the range, starting with codepoint C. ** ** The 7 most significant bits in flags are an index into the aiOff[] ** array. If a specific codepoint C does require folding, then its lower ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). ** ** The contents of this array are generated by parsing the CaseFolding.txt ** file distributed as part of the "Unicode Character Database". See ** http://www.unicode.org for details. */ static const struct TableEntry { unsigned short iCode; unsigned char flags; unsigned char nRange; } aEntry[] = { {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, {65313, 14, 26}, }; static const unsigned short aiOff[] = { 1, 2, 8, 15, 16, 26, 28, 32, 37, 38, 40, 48, 63, 64, 69, 71, 79, 80, 116, 202, 203, 205, 206, 207, 209, 210, 211, 213, 214, 217, 218, 219, 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 65514, 65521, 65527, 65528, 65529, }; int ret = c; assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ const struct TableEntry *p; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; assert( c>aEntry[0].iCode ); while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); if( cmp>=0 ){ iRes = iTest; iLo = iTest+1; }else{ iHi = iTest-1; } } assert( iRes>=0 && c>=aEntry[iRes].iCode ); p = &aEntry[iRes]; if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; assert( ret>0 ); } if( bRemoveDiacritic ) ret = fts5_remove_diacritic(ret); } else if( c>=66560 && c<66600 ){ ret = c + 40; } return ret; } #endif /* defined(SQLITE_ENABLE_FTS5) */ |
Added ext/fts5/fts5_varint.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | /* ** 2015 May 30 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Routines for varint serialization and deserialization. */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" /* ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. ** Except, this version does handle the single byte case that the core ** version depends on being handled before its function is called. */ int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ u32 a,b; /* The 1-byte case. Overwhelmingly the most common. */ a = *p; /* a: p0 (unmasked) */ if (!(a&0x80)) { /* Values between 0 and 127 */ *v = a; return 1; } /* The 2-byte case */ p++; b = *p; /* b: p1 (unmasked) */ if (!(b&0x80)) { /* Values between 128 and 16383 */ a &= 0x7f; a = a<<7; *v = a | b; return 2; } /* The 3-byte case */ p++; a = a<<14; a |= *p; /* a: p0<<14 | p2 (unmasked) */ if (!(a&0x80)) { /* Values between 16384 and 2097151 */ a &= (0x7f<<14)|(0x7f); b &= 0x7f; b = b<<7; *v = a | b; return 3; } /* A 32-bit varint is used to store size information in btrees. ** Objects are rarely larger than 2MiB limit of a 3-byte varint. ** A 3-byte varint is sufficient, for example, to record the size ** of a 1048569-byte BLOB or string. ** ** We only unroll the first 1-, 2-, and 3- byte cases. The very ** rare larger cases can be handled by the slower 64-bit varint ** routine. */ { u64 v64; u8 n; p -= 2; n = sqlite3Fts5GetVarint(p, &v64); *v = (u32)v64; assert( n>3 && n<=9 ); return n; } } /* ** Bitmasks used by sqlite3GetVarint(). These precomputed constants ** are defined here rather than simply putting the constant expressions ** inline in order to work around bugs in the RVT compiler. ** ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f ** ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 */ #define SLOT_2_0 0x001fc07f #define SLOT_4_2_0 0xf01fc07f /* ** Read a 64-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read. The value is stored in *v. */ u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ u32 a,b,s; a = *p; /* a: p0 (unmasked) */ if (!(a&0x80)) { *v = a; return 1; } p++; b = *p; /* b: p1 (unmasked) */ if (!(b&0x80)) { a &= 0x7f; a = a<<7; a |= b; *v = a; return 2; } /* Verify that constants are precomputed correctly */ assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); p++; a = a<<14; a |= *p; /* a: p0<<14 | p2 (unmasked) */ if (!(a&0x80)) { a &= SLOT_2_0; b &= 0x7f; b = b<<7; a |= b; *v = a; return 3; } /* CSE1 from below */ a &= SLOT_2_0; p++; b = b<<14; b |= *p; /* b: p1<<14 | p3 (unmasked) */ if (!(b&0x80)) { b &= SLOT_2_0; /* moved CSE1 up */ /* a &= (0x7f<<14)|(0x7f); */ a = a<<7; a |= b; *v = a; return 4; } /* a: p0<<14 | p2 (masked) */ /* b: p1<<14 | p3 (unmasked) */ /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ /* moved CSE1 up */ /* a &= (0x7f<<14)|(0x7f); */ b &= SLOT_2_0; s = a; /* s: p0<<14 | p2 (masked) */ p++; a = a<<14; a |= *p; /* a: p0<<28 | p2<<14 | p4 (unmasked) */ if (!(a&0x80)) { /* we can skip these cause they were (effectively) done above in calc'ing s */ /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ /* b &= (0x7f<<14)|(0x7f); */ b = b<<7; a |= b; s = s>>18; *v = ((u64)s)<<32 | a; return 5; } /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ s = s<<7; s |= b; /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ p++; b = b<<14; b |= *p; /* b: p1<<28 | p3<<14 | p5 (unmasked) */ if (!(b&0x80)) { /* we can skip this cause it was (effectively) done above in calc'ing s */ /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ a &= SLOT_2_0; a = a<<7; a |= b; s = s>>18; *v = ((u64)s)<<32 | a; return 6; } p++; a = a<<14; a |= *p; /* a: p2<<28 | p4<<14 | p6 (unmasked) */ if (!(a&0x80)) { a &= SLOT_4_2_0; b &= SLOT_2_0; b = b<<7; a |= b; s = s>>11; *v = ((u64)s)<<32 | a; return 7; } /* CSE2 from below */ a &= SLOT_2_0; p++; b = b<<14; b |= *p; /* b: p3<<28 | p5<<14 | p7 (unmasked) */ if (!(b&0x80)) { b &= SLOT_4_2_0; /* moved CSE2 up */ /* a &= (0x7f<<14)|(0x7f); */ a = a<<7; a |= b; s = s>>4; *v = ((u64)s)<<32 | a; return 8; } p++; a = a<<15; a |= *p; /* a: p4<<29 | p6<<15 | p8 (unmasked) */ /* moved CSE2 up */ /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ b &= SLOT_2_0; b = b<<8; a |= b; s = s<<4; b = p[-4]; b &= 0x7f; b = b>>3; s |= b; *v = ((u64)s)<<32 | a; return 9; } /* ** The variable-length integer encoding is as follows: ** ** KEY: ** A = 0xxxxxxx 7 bits of data and one flag bit ** B = 1xxxxxxx 7 bits of data and one flag bit ** C = xxxxxxxx 8 bits of data ** ** 7 bits - A ** 14 bits - BA ** 21 bits - BBA ** 28 bits - BBBA ** 35 bits - BBBBA ** 42 bits - BBBBBA ** 49 bits - BBBBBBA ** 56 bits - BBBBBBBA ** 64 bits - BBBBBBBBC */ #ifdef SQLITE_NOINLINE # define FTS5_NOINLINE SQLITE_NOINLINE #else # define FTS5_NOINLINE #endif /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data write will be between 1 and 9 bytes. The number ** of bytes written is returned. ** ** A variable-length integer consists of the lower 7 bits of each byte ** for all bytes that have the 8th bit set and one byte with the 8th ** bit clear. Except, if we get to the 9th byte, it stores the full ** 8 bits and is the last byte. */ static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ int i, j, n; u8 buf[10]; if( v & (((u64)0xff000000)<<32) ){ p[8] = (u8)v; v >>= 8; for(i=7; i>=0; i--){ p[i] = (u8)((v & 0x7f) | 0x80); v >>= 7; } return 9; } n = 0; do{ buf[n++] = (u8)((v & 0x7f) | 0x80); v >>= 7; }while( v!=0 ); buf[0] &= 0x7f; assert( n<=9 ); for(i=0, j=n-1; j>=0; j--, i++){ p[i] = buf[j]; } return n; } int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ if( v<=0x7f ){ p[0] = v&0x7f; return 1; } if( v<=0x3fff ){ p[0] = ((v>>7)&0x7f)|0x80; p[1] = v&0x7f; return 2; } return fts5PutVarint64(p,v); } int sqlite3Fts5GetVarintLen(u32 iVal){ if( iVal<(1 << 7 ) ) return 1; if( iVal<(1 << 14) ) return 2; if( iVal<(1 << 21) ) return 3; if( iVal<(1 << 28) ) return 4; return 5; } #endif /* SQLITE_ENABLE_FTS5 */ |
Added ext/fts5/fts5_vocab.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 | /* ** 2015 May 08 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This is an SQLite virtual table module implementing direct access to an ** existing FTS5 index. The module may create several different types of ** tables: ** ** col: ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); ** ** One row for each term/column combination. The value of $doc is set to ** the number of fts5 rows that contain at least one instance of term ** $term within column $col. Field $cnt is set to the total number of ** instances of term $term in column $col (in any row of the fts5 table). ** ** row: ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); ** ** One row for each term in the database. The value of $doc is set to ** the number of fts5 rows that contain at least one instance of term ** $term. Field $cnt is set to the total number of instances of term ** $term in the database. */ #if defined(SQLITE_ENABLE_FTS5) #include "fts5Int.h" typedef struct Fts5VocabTable Fts5VocabTable; typedef struct Fts5VocabCursor Fts5VocabCursor; struct Fts5VocabTable { sqlite3_vtab base; char *zFts5Tbl; /* Name of fts5 table */ char *zFts5Db; /* Db containing fts5 table */ sqlite3 *db; /* Database handle */ Fts5Global *pGlobal; /* FTS5 global object for this database */ int eType; /* FTS5_VOCAB_COL or ROW */ }; struct Fts5VocabCursor { sqlite3_vtab_cursor base; sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ Fts5Index *pIndex; /* Associated FTS5 index */ int bEof; /* True if this cursor is at EOF */ Fts5IndexIter *pIter; /* Term/rowid iterator object */ /* These are used by 'col' tables only */ int nCol; int iCol; i64 *aCnt; i64 *aDoc; /* Output values */ i64 rowid; /* This table's current rowid value */ Fts5Buffer term; /* Current value of 'term' column */ i64 aVal[3]; /* Up to three columns left of 'term' */ }; #define FTS5_VOCAB_COL 0 #define FTS5_VOCAB_ROW 1 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" /* ** Translate a string containing an fts5vocab table type to an ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message ** and return SQLITE_ERROR. */ static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ int rc = SQLITE_OK; char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); if( rc==SQLITE_OK ){ sqlite3Fts5Dequote(zCopy); if( sqlite3_stricmp(zCopy, "col")==0 ){ *peType = FTS5_VOCAB_COL; }else if( sqlite3_stricmp(zCopy, "row")==0 ){ *peType = FTS5_VOCAB_ROW; }else { *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); rc = SQLITE_ERROR; } sqlite3_free(zCopy); } return rc; } /* ** The xDisconnect() virtual table method. */ static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; sqlite3_free(pTab); return SQLITE_OK; } /* ** The xDestroy() virtual table method. */ static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; sqlite3_free(pTab); return SQLITE_OK; } /* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. ** ** The argv[] array contains the following: ** ** argv[0] -> module name ("fts5vocab") ** argv[1] -> database name ** argv[2] -> table name ** ** then: ** ** argv[3] -> name of fts5 table ** argv[4] -> type of fts5vocab table ** ** or, for tables in the TEMP schema only. ** ** argv[3] -> name of fts5 tables database ** argv[4] -> name of fts5 table ** argv[5] -> type of fts5vocab table */ static int fts5VocabInitVtab( sqlite3 *db, /* The SQLite database connection */ void *pAux, /* Pointer to Fts5Global object */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ char **pzErr /* Write any error message here */ ){ const char *azSchema[] = { "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" }; Fts5VocabTable *pRet = 0; int rc = SQLITE_OK; /* Return code */ int bDb; bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); if( argc!=5 && bDb==0 ){ *pzErr = sqlite3_mprintf("wrong number of vtable arguments"); rc = SQLITE_ERROR; }else{ int nByte; /* Bytes of space to allocate */ const char *zDb = bDb ? argv[3] : argv[1]; const char *zTab = bDb ? argv[4] : argv[3]; const char *zType = bDb ? argv[5] : argv[4]; int nDb = strlen(zDb)+1; int nTab = strlen(zTab)+1; int eType; rc = fts5VocabTableType(zType, pzErr, &eType); if( rc==SQLITE_OK ){ assert( eType>=0 && eType<sizeof(azSchema)/sizeof(azSchema[0]) ); rc = sqlite3_declare_vtab(db, azSchema[eType]); } nByte = sizeof(Fts5VocabTable) + nDb + nTab; pRet = sqlite3Fts5MallocZero(&rc, nByte); if( pRet ){ pRet->pGlobal = (Fts5Global*)pAux; pRet->eType = eType; pRet->db = db; pRet->zFts5Tbl = (char*)&pRet[1]; pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; memcpy(pRet->zFts5Tbl, zTab, nTab); memcpy(pRet->zFts5Db, zDb, nDb); sqlite3Fts5Dequote(pRet->zFts5Tbl); sqlite3Fts5Dequote(pRet->zFts5Db); } } *ppVTab = (sqlite3_vtab*)pRet; return rc; } /* ** The xConnect() and xCreate() methods for the virtual table. All the ** work is done in function fts5VocabInitVtab(). */ static int fts5VocabConnectMethod( sqlite3 *db, /* Database connection */ void *pAux, /* Pointer to tokenizer hash table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); } static int fts5VocabCreateMethod( sqlite3 *db, /* Database connection */ void *pAux, /* Pointer to tokenizer hash table */ int argc, /* Number of elements in argv array */ const char * const *argv, /* xCreate/xConnect argument array */ sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ char **pzErr /* OUT: sqlite3_malloc'd error message */ ){ return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); } /* ** Implementation of the xBestIndex method. */ static int fts5VocabBestIndexMethod( sqlite3_vtab *pVTab, sqlite3_index_info *pInfo ){ return SQLITE_OK; } /* ** Implementation of xOpen method. */ static int fts5VocabOpenMethod( sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr ){ Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; Fts5Index *pIndex = 0; int nCol = 0; Fts5VocabCursor *pCsr = 0; int rc = SQLITE_OK; sqlite3_stmt *pStmt = 0; char *zSql = 0; int nByte; zSql = sqlite3Fts5Mprintf(&rc, "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl ); if( zSql ){ rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); } sqlite3_free(zSql); assert( rc==SQLITE_OK || pStmt==0 ); if( rc==SQLITE_ERROR ) rc = SQLITE_OK; if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ i64 iId = sqlite3_column_int64(pStmt, 0); pIndex = sqlite3Fts5IndexFromCsrid(pTab->pGlobal, iId, &nCol); } if( rc==SQLITE_OK && pIndex==0 ){ rc = sqlite3_finalize(pStmt); pStmt = 0; if( rc==SQLITE_OK ){ pVTab->zErrMsg = sqlite3_mprintf( "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl ); rc = SQLITE_ERROR; } } nByte = nCol * sizeof(i64) * 2 + sizeof(Fts5VocabCursor); pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); if( pCsr ){ pCsr->pIndex = pIndex; pCsr->pStmt = pStmt; pCsr->nCol = nCol; pCsr->aCnt = (i64*)&pCsr[1]; pCsr->aDoc = &pCsr->aCnt[nCol]; }else{ sqlite3_finalize(pStmt); } *ppCsr = (sqlite3_vtab_cursor*)pCsr; return rc; } static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ pCsr->rowid = 0; sqlite3Fts5IterClose(pCsr->pIter); pCsr->pIter = 0; } /* ** Close the cursor. For additional information see the documentation ** on the xClose method of the virtual table interface. */ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; fts5VocabResetCursor(pCsr); sqlite3Fts5BufferFree(&pCsr->term); sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr); return SQLITE_OK; } /* ** Advance the cursor to the next row in the table. */ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; int rc = SQLITE_OK; pCsr->rowid++; if( pTab->eType==FTS5_VOCAB_COL ){ for(pCsr->iCol++; pCsr->iCol<pCsr->nCol; pCsr->iCol++){ if( pCsr->aCnt[pCsr->iCol] ) break; } } if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=pCsr->nCol ){ if( sqlite3Fts5IterEof(pCsr->pIter) ){ pCsr->bEof = 1; }else{ const char *zTerm; int nTerm; zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); memset(pCsr->aVal, 0, sizeof(pCsr->aVal)); memset(pCsr->aCnt, 0, pCsr->nCol * sizeof(i64)); memset(pCsr->aDoc, 0, pCsr->nCol * sizeof(i64)); pCsr->iCol = 0; assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ i64 dummy; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ rc = sqlite3Fts5IterPoslist(pCsr->pIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ if( pTab->eType==FTS5_VOCAB_ROW ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ pCsr->aVal[1]++; } pCsr->aVal[0]++; }else{ int iCol = -1; while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ int ii = FTS5_POS2COLUMN(iPos); pCsr->aCnt[ii]++; if( iCol!=ii ){ pCsr->aDoc[ii]++; iCol = ii; } } } rc = sqlite3Fts5IterNextScan(pCsr->pIter); } if( rc==SQLITE_OK ){ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); if( nTerm!=pCsr->term.n || memcmp(zTerm, pCsr->term.p, nTerm) ) break; if( sqlite3Fts5IterEof(pCsr->pIter) ) break; } } } } if( pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ while( pCsr->aCnt[pCsr->iCol]==0 ) pCsr->iCol++; pCsr->aVal[0] = pCsr->iCol; pCsr->aVal[1] = pCsr->aDoc[pCsr->iCol]; pCsr->aVal[2] = pCsr->aCnt[pCsr->iCol]; } return rc; } /* ** This is the xFilter implementation for the virtual table. */ static int fts5VocabFilterMethod( sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ int idxNum, /* Strategy index */ const char *idxStr, /* Unused */ int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; int rc; const int flags = FTS5INDEX_QUERY_SCAN; fts5VocabResetCursor(pCsr); rc = sqlite3Fts5IndexQuery(pCsr->pIndex, 0, 0, flags, &pCsr->pIter); if( rc==SQLITE_OK ){ rc = fts5VocabNextMethod(pCursor); } return rc; } /* ** This is the xEof method of the virtual table. SQLite calls this ** routine to find out if it has reached the end of a result set. */ static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; return pCsr->bEof; } static int fts5VocabColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; switch( iCol ){ case 0: /* term */ sqlite3_result_text( pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT ); break; default: assert( iCol<4 && iCol>0 ); sqlite3_result_int64(pCtx, pCsr->aVal[iCol-1]); break; } return SQLITE_OK; } /* ** This is the xRowid method. The SQLite core calls this routine to ** retrieve the rowid for the current row of the result set. fts5 ** exposes %_content.docid as the rowid for the virtual table. The ** rowid should be written to *pRowid. */ static int fts5VocabRowidMethod( sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid ){ Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; *pRowid = pCsr->rowid; return SQLITE_OK; } int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ static const sqlite3_module fts5Vocab = { /* iVersion */ 2, /* xCreate */ fts5VocabCreateMethod, /* xConnect */ fts5VocabConnectMethod, /* xBestIndex */ fts5VocabBestIndexMethod, /* xDisconnect */ fts5VocabDisconnectMethod, /* xDestroy */ fts5VocabDestroyMethod, /* xOpen */ fts5VocabOpenMethod, /* xClose */ fts5VocabCloseMethod, /* xFilter */ fts5VocabFilterMethod, /* xNext */ fts5VocabNextMethod, /* xEof */ fts5VocabEofMethod, /* xColumn */ fts5VocabColumnMethod, /* xRowid */ fts5VocabRowidMethod, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindFunction */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ 0, /* xRollbackTo */ 0, }; void *p = (void*)pGlobal; return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); } #endif /* defined(SQLITE_ENABLE_FTS5) */ |
Added ext/fts5/fts5parse.y.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | /* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** */ // All token codes are small integers with #defines that begin with "TK_" %token_prefix FTS5_ // The type of the data attached to each token is Token. This is also the // default type for non-terminals. // %token_type {Fts5Token} %default_type {Fts5Token} // The generated parser function takes a 4th argument as follows: %extra_argument {Fts5Parse *pParse} // This code runs whenever there is a syntax error // %syntax_error { sqlite3Fts5ParseError( pParse, "fts5: syntax error near \"%.*s\"",TOKEN.n,TOKEN.p ); } %stack_overflow { assert( 0 ); } // The name of the generated procedure that implements the parser // is as follows: %name sqlite3Fts5Parser // The following text is included near the beginning of the C source // code file that implements the parser. // %include { #include "fts5Int.h" #include "fts5parse.h" /* ** Disable all error recovery processing in the parser push-down ** automaton. */ #define YYNOERRORRECOVERY 1 /* ** Make yytestcase() the same as testcase() */ #define yytestcase(X) testcase(X) } // end %include %left OR. %left AND. %left NOT. %left TERM. %left COLON. input ::= expr(X). { sqlite3Fts5ParseFinished(pParse, X); } %type cnearset {Fts5ExprNode*} %type expr {Fts5ExprNode*} %type exprlist {Fts5ExprNode*} %destructor cnearset { sqlite3Fts5ParseNodeFree($$); } %destructor expr { sqlite3Fts5ParseNodeFree($$); } %destructor exprlist { sqlite3Fts5ParseNodeFree($$); } expr(A) ::= expr(X) AND expr(Y). { A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); } expr(A) ::= expr(X) OR expr(Y). { A = sqlite3Fts5ParseNode(pParse, FTS5_OR, X, Y, 0); } expr(A) ::= expr(X) NOT expr(Y). { A = sqlite3Fts5ParseNode(pParse, FTS5_NOT, X, Y, 0); } expr(A) ::= LP expr(X) RP. {A = X;} expr(A) ::= exprlist(X). {A = X;} exprlist(A) ::= cnearset(X). {A = X;} exprlist(A) ::= exprlist(X) cnearset(Y). { A = sqlite3Fts5ParseNode(pParse, FTS5_AND, X, Y, 0); } cnearset(A) ::= nearset(X). { A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, X); } cnearset(A) ::= colset(X) COLON nearset(Y). { sqlite3Fts5ParseSetColset(pParse, Y, X); A = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, Y); } %type colset {Fts5ExprColset*} %destructor colset { sqlite3_free($$); } %type colsetlist {Fts5ExprColset*} %destructor colsetlist { sqlite3_free($$); } colset(A) ::= LCP colsetlist(X) RCP. { A = X; } colset(A) ::= STRING(X). { A = sqlite3Fts5ParseColset(pParse, 0, &X); } colsetlist(A) ::= colsetlist(Y) STRING(X). { A = sqlite3Fts5ParseColset(pParse, Y, &X); } colsetlist(A) ::= STRING(X). { A = sqlite3Fts5ParseColset(pParse, 0, &X); } %type nearset {Fts5ExprNearset*} %type nearphrases {Fts5ExprNearset*} %destructor nearset { sqlite3Fts5ParseNearsetFree($$); } %destructor nearphrases { sqlite3Fts5ParseNearsetFree($$); } nearset(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); } nearset(A) ::= STRING(X) LP nearphrases(Y) neardist_opt(Z) RP. { sqlite3Fts5ParseNear(pParse, &X); sqlite3Fts5ParseSetDistance(pParse, Y, &Z); A = Y; } nearphrases(A) ::= phrase(X). { A = sqlite3Fts5ParseNearset(pParse, 0, X); } nearphrases(A) ::= nearphrases(X) phrase(Y). { A = sqlite3Fts5ParseNearset(pParse, X, Y); } /* ** The optional ", <integer>" at the end of the NEAR() arguments. */ neardist_opt(A) ::= . { A.p = 0; A.n = 0; } neardist_opt(A) ::= COMMA STRING(X). { A = X; } /* ** A phrase. A set of primitives connected by "+" operators. Examples: ** ** "the" + "quick brown" + fo * ** "the quick brown fo" * ** the+quick+brown+fo* */ %type phrase {Fts5ExprPhrase*} %destructor phrase { sqlite3Fts5ParsePhraseFree($$); } phrase(A) ::= phrase(X) PLUS STRING(Y) star_opt(Z). { A = sqlite3Fts5ParseTerm(pParse, X, &Y, Z); } phrase(A) ::= STRING(Y) star_opt(Z). { A = sqlite3Fts5ParseTerm(pParse, 0, &Y, Z); } /* ** Optional "*" character. */ %type star_opt {int} star_opt(A) ::= STAR. { A = 1; } star_opt(A) ::= . { A = 0; } |
Added ext/fts5/mkportersteps.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | # # 2014 Jun 09 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #------------------------------------------------------------------------- # # This script generates the implementations of the following C functions, # which are part of the porter tokenizer implementation: # # static int fts5PorterStep1B(char *aBuf, int *pnBuf); # static int fts5PorterStep1B2(char *aBuf, int *pnBuf); # static int fts5PorterStep2(char *aBuf, int *pnBuf); # static int fts5PorterStep3(char *aBuf, int *pnBuf); # static int fts5PorterStep4(char *aBuf, int *pnBuf); # set O(Step1B2) { { at {} ate 1 } { bl {} ble 1 } { iz {} ize 1 } } set O(Step1B) { { "eed" fts5Porter_MGt0 "ee" 0 } { "ed" fts5Porter_Vowel "" 1 } { "ing" fts5Porter_Vowel "" 1 } } set O(Step2) { { "ational" fts5Porter_MGt0 "ate" } { "tional" fts5Porter_MGt0 "tion" } { "enci" fts5Porter_MGt0 "ence" } { "anci" fts5Porter_MGt0 "ance" } { "izer" fts5Porter_MGt0 "ize" } { "logi" fts5Porter_MGt0 "log" } { "bli" fts5Porter_MGt0 "ble" } { "alli" fts5Porter_MGt0 "al" } { "entli" fts5Porter_MGt0 "ent" } { "eli" fts5Porter_MGt0 "e" } { "ousli" fts5Porter_MGt0 "ous" } { "ization" fts5Porter_MGt0 "ize" } { "ation" fts5Porter_MGt0 "ate" } { "ator" fts5Porter_MGt0 "ate" } { "alism" fts5Porter_MGt0 "al" } { "iveness" fts5Porter_MGt0 "ive" } { "fulness" fts5Porter_MGt0 "ful" } { "ousness" fts5Porter_MGt0 "ous" } { "aliti" fts5Porter_MGt0 "al" } { "iviti" fts5Porter_MGt0 "ive" } { "biliti" fts5Porter_MGt0 "ble" } } set O(Step3) { { "icate" fts5Porter_MGt0 "ic" } { "ative" fts5Porter_MGt0 "" } { "alize" fts5Porter_MGt0 "al" } { "iciti" fts5Porter_MGt0 "ic" } { "ical" fts5Porter_MGt0 "ic" } { "ful" fts5Porter_MGt0 "" } { "ness" fts5Porter_MGt0 "" } } set O(Step4) { { "al" fts5Porter_MGt1 "" } { "ance" fts5Porter_MGt1 "" } { "ence" fts5Porter_MGt1 "" } { "er" fts5Porter_MGt1 "" } { "ic" fts5Porter_MGt1 "" } { "able" fts5Porter_MGt1 "" } { "ible" fts5Porter_MGt1 "" } { "ant" fts5Porter_MGt1 "" } { "ement" fts5Porter_MGt1 "" } { "ment" fts5Porter_MGt1 "" } { "ent" fts5Porter_MGt1 "" } { "ion" fts5Porter_MGt1_and_S_or_T "" } { "ou" fts5Porter_MGt1 "" } { "ism" fts5Porter_MGt1 "" } { "ate" fts5Porter_MGt1 "" } { "iti" fts5Porter_MGt1 "" } { "ous" fts5Porter_MGt1 "" } { "ive" fts5Porter_MGt1 "" } { "ize" fts5Porter_MGt1 "" } } proc sort_cb {lhs rhs} { set L [string range [lindex $lhs 0] end-1 end-1] set R [string range [lindex $rhs 0] end-1 end-1] string compare $L $R } proc create_step_function {name data} { set T(function) { static int fts5Porter${name}(char *aBuf, int *pnBuf){ int ret = 0; int nBuf = *pnBuf; switch( aBuf[nBuf-2] ){ ${switchbody} } return ret; } } set T(case) { case '${k}': ${ifstmts} break; } set T(if_0_0_0) { if( ${match} ){ *pnBuf = nBuf - $n; } } set T(if_1_0_0) { if( ${match} ){ if( ${cond} ){ *pnBuf = nBuf - $n; } } } set T(if_0_1_0) { if( ${match} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; } } set T(if_1_1_0) { if( ${match} ){ if( ${cond} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; } } } set T(if_1_0_1) { if( ${match} ){ if( ${cond} ){ *pnBuf = nBuf - $n; ret = 1; } } } set T(if_0_1_1) { if( ${match} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; ret = 1; } } set T(if_1_1_1) { if( ${match} ){ if( ${cond} ){ ${memcpy} *pnBuf = nBuf - $n + $nRep; ret = 1; } } } set switchbody "" foreach I $data { set k [string range [lindex $I 0] end-1 end-1] lappend aCase($k) $I } foreach k [lsort [array names aCase]] { set ifstmts "" foreach I $aCase($k) { set zSuffix [lindex $I 0] ;# Suffix text for this rule set zRep [lindex $I 2] ;# Replacement text for rule set xCond [lindex $I 1] ;# Condition callback (or "") set n [string length $zSuffix] set nRep [string length $zRep] set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" set cond "${xCond}(aBuf, nBuf-$n)" set bMemcpy [expr {$nRep>0}] set bCond [expr {$xCond!=""}] set bRet [expr {[llength $I]>3 && [lindex $I 3]}] set t $T(if_${bCond}_${bMemcpy}_${bRet}) lappend ifstmts [string trim [subst -nocommands $t]] } set ifstmts [join $ifstmts "else "] append switchbody [subst -nocommands $T(case)] } puts [subst -nocommands $T(function)] } puts [string trim { /************************************************************************** *************************************************************************** ** GENERATED CODE STARTS HERE (mkportersteps.tcl) */ }] foreach step [array names O] { create_step_function $step $O($step) } puts [string trim { /* ** GENERATED CODE ENDS HERE (mkportersteps.tcl) *************************************************************************** **************************************************************************/ }] |
Added ext/fts5/test/fts5_common.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | # 2014 Dec 19 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # if {![info exists testdir]} { set testdir [file join [file dirname [info script]] .. .. .. test] } source $testdir/tester.tcl catch { sqlite3_fts5_may_be_corrupt 0 append G(perm:dbconfig) "; load_static_extension \$::dbhandle fts5" reset_db } proc fts5_test_poslist {cmd} { set res [list] for {set i 0} {$i < [$cmd xInstCount]} {incr i} { lappend res [string map {{ } .} [$cmd xInst $i]] } set res } proc fts5_test_columnsize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnSize $i] } set res } proc fts5_test_columntext {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnText $i] } set res } proc fts5_test_columntotalsize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { lappend res [$cmd xColumnTotalSize $i] } set res } proc test_append_token {varname token iStart iEnd} { upvar $varname var lappend var $token return "SQLITE_OK" } proc fts5_test_tokenize {cmd} { set res [list] for {set i 0} {$i < [$cmd xColumnCount]} {incr i} { set tokens [list] $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens] lappend res $tokens } set res } proc fts5_test_rowcount {cmd} { $cmd xRowCount } proc test_queryphrase_cb {cnt cmd} { upvar $cnt L for {set i 0} {$i < [$cmd xInstCount]} {incr i} { foreach {ip ic io} [$cmd xInst $i] break set A($ic) 1 } foreach ic [array names A] { lset L $ic [expr {[lindex $L $ic] + 1}] } } proc fts5_test_queryphrase {cmd} { set res [list] for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} { set cnt [list] for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 } $cmd xQueryPhrase $i [list test_queryphrase_cb cnt] lappend res $cnt } set res } proc fts5_test_all {cmd} { set res [list] lappend res columnsize [fts5_test_columnsize $cmd] lappend res columntext [fts5_test_columntext $cmd] lappend res columntotalsize [fts5_test_columntotalsize $cmd] lappend res poslist [fts5_test_poslist $cmd] lappend res tokenize [fts5_test_tokenize $cmd] lappend res rowcount [fts5_test_rowcount $cmd] set res } proc fts5_aux_test_functions {db} { foreach f { fts5_test_columnsize fts5_test_columntext fts5_test_columntotalsize fts5_test_poslist fts5_test_tokenize fts5_test_rowcount fts5_test_all fts5_test_queryphrase } { sqlite3_fts5_create_function $db $f $f } } proc fts5_level_segs {tbl} { set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" set ret [list] foreach L [lrange [db one $sql] 1 end] { lappend ret [expr [llength $L] - 2] } set ret } proc fts5_level_segids {tbl} { set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10" set ret [list] foreach L [lrange [db one $sql] 1 end] { set lvl [list] foreach S [lrange $L 2 end] { regexp {id=([1234567890]*)} $S -> segid lappend lvl $segid } lappend ret $lvl } set ret } proc fts5_rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] for {set i 0} {$i < $n} {incr i} { lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]" } set doc } #------------------------------------------------------------------------- # Usage: # # nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2... # # This command is used to test if a document (set of column values) matches # the logical equivalent of a single FTS5 NEAR() clump and, if so, return # the equivalent of an FTS5 position list. # # Parameter $aCol is passed a list of the column values for the document # to test. Parameters $phrase1 and so on are the phrases. # # The result is a list of phrase hits. Each phrase hit is formatted as # three integers separated by "." characters, in the following format: # # <phrase number> . <column number> . <token offset> # # Options: # # -near N (NEAR distance. Default 10) # -col C (List of column indexes to match against) # -pc VARNAME (variable in caller frame to use for phrase numbering) # proc nearset {aCol args} { set O(-near) 10 set O(-col) {} set O(-pc) "" set nOpt [lsearch -exact $args --] if {$nOpt<0} { error "no -- option" } foreach {k v} [lrange $args 0 [expr $nOpt-1]] { if {[info exists O($k)]==0} { error "unrecognized option $k" } set O($k) $v } if {$O(-pc) == ""} { set counter 0 } else { upvar $O(-pc) counter } # Set $phraselist to be a list of phrases. $nPhrase its length. set phraselist [lrange $args [expr $nOpt+1] end] set nPhrase [llength $phraselist] for {set j 0} {$j < [llength $aCol]} {incr j} { for {set i 0} {$i < $nPhrase} {incr i} { set A($j,$i) [list] } } set iCol -1 foreach col $aCol { incr iCol if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue set nToken [llength $col] set iFL [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)] for { } {$iFL < $nToken} {incr iFL} { for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set B($iPhrase) [list] } for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set p [lindex $phraselist $iPhrase] set nPm1 [expr {[llength $p] - 1}] set iFirst [expr $iFL - $O(-near) - [llength $p]] for {set i $iFirst} {$i <= $iFL} {incr i} { if {[lrange $col $i [expr $i+$nPm1]] == $p} { lappend B($iPhrase) $i } } if {[llength $B($iPhrase)] == 0} break } if {$iPhrase==$nPhrase} { for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)] set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)] } } } } set res [list] #puts [array names A] for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} { for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} { foreach a $A($iCol,$iPhrase) { lappend res "$counter.$iCol.$a" } } incr counter } #puts $res sort_poslist $res } #------------------------------------------------------------------------- # Usage: # # sort_poslist LIST # # Sort a position list of the type returned by command [nearset] # proc sort_poslist {L} { lsort -command instcompare $L } proc instcompare {lhs rhs} { foreach {p1 c1 o1} [split $lhs .] {} foreach {p2 c2 o2} [split $rhs .] {} set res [expr $c1 - $c2] if {$res==0} { set res [expr $o1 - $o2] } if {$res==0} { set res [expr $p1 - $p2] } return $res } #------------------------------------------------------------------------- # Logical operators used by the commands returned by fts5_tcl_expr(). # proc AND {args} { foreach a $args { if {[llength $a]==0} { return [list] } } sort_poslist [concat {*}$args] } proc OR {args} { sort_poslist [concat {*}$args] } proc NOT {a b} { if {[llength $b]>0} { return [list] } return $a } |
Added ext/fts5/test/fts5aa.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aa # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, c); SELECT name, sql FROM sqlite_master; } { t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)} t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)} t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)} t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)} t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID} } do_execsql_test 1.1 { DROP TABLE t1; SELECT name, sql FROM sqlite_master; } { } #------------------------------------------------------------------------- # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); } do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } } {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} do_execsql_test 2.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } #------------------------------------------------------------------------- # reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); } foreach {i x y} { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} 3 {e e i f a} {e h f d f} 4 {h j f j i} {h a c f j} 5 {d b j c g} {f e i b e} 6 {a j a e e} {j d f d e} 7 {g i j c h} {j d h c a} 8 {j j i d d} {e e d f b} 9 {c j j d c} {h j i f g} 10 {b f h i a} {c f b b j} } { do_execsql_test 3.$i.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 3.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} 3 {e e i f a} {e h f d f} 4 {h j f j i} {h a c f j} 5 {d b j c g} {f e i b e} 6 {a j a e e} {j d f d e} 7 {g i j c h} {j d h c a} 8 {j j i d d} {e e d f b} 9 {c j j d c} {h j i f g} 10 {b f h i a} {c f b b j} } { do_execsql_test 4.$i.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 4.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db do_execsql_test 5.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {i x y} { 1 {dd abc abc abc abcde} {aaa dd ddd ddd aab} 2 {dd aab d aaa b} {abcde c aaa aaa aaa} 3 {abcde dd b b dd} {abc abc d abc ddddd} 4 {aaa abcde dddd dddd abcde} {abc b b abcde abc} 5 {aab dddd d dddd c} {ddd abcde dddd abcde c} 6 {ddd dd b aab abcde} {d ddddd dddd c abc} 7 {d ddddd ddd c abcde} {c aab d abcde ddd} 8 {abcde aaa aab c c} {ddd c dddd b aaa} 9 {abcde aab ddddd c aab} {dddd dddd b c dd} 10 {ddd abcde dddd dd c} {dddd c c d abcde} } { do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') } if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # breakpoint reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 6.1 { INSERT INTO t1(rowid, x, y) VALUES(22, 'a b c', 'c b a'); REPLACE INTO t1(rowid, x, y) VALUES(22, 'd e f', 'f e d'); } do_execsql_test 6.2 { INSERT INTO t1(t1) VALUES('integrity-check') } do_execsql_test 6.3 { REPLACE INTO t1(rowid, x, y) VALUES('22', 'l l l', 'l l l'); } do_execsql_test 6.4 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # reset_db expr srand(0) do_execsql_test 7.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } proc doc {} { set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] set ret [list] for {set j 0} {$j < 20} {incr j} { lappend ret [lindex $v [expr int(rand()*[llength $v])]] } return $ret } proc dump_structure {} { db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { foreach lvl [lrange $t 1 end] { set seg [string repeat . [expr [llength $lvl]-2]] puts "[lrange $lvl 0 1] $seg" } } } for {set i 1} {$i <= 10} {incr i} { do_test 7.$i { for {set j 0} {$j < 10} {incr j} { set x [doc] set y [doc] set z [doc] set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #exit #------------------------------------------------------------------------- # reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 8.1 { INSERT INTO t1 VALUES('the quick brown fox'); INSERT INTO t1(t1) VALUES('integrity-check'); } #------------------------------------------------------------------------- # reset_db expr srand(0) do_execsql_test 9.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y,z, prefix="1,2,3"); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } proc doc {} { set v [list aaa aab abc abcde b c d dd ddd dddd ddddd] set ret [list] for {set j 0} {$j < 20} {incr j} { lappend ret [lindex $v [expr int(rand()*[llength $v])]] } return $ret } proc dump_structure {} { db eval {SELECT fts5_decode(id, block) AS t FROM t1_data WHERE id=10} { foreach lvl [lrange $t 1 end] { set seg [string repeat . [expr [llength $lvl]-2]] puts "[lrange $lvl 0 1] $seg" } } } for {set i 1} {$i <= 10} {incr i} { do_test 9.$i { for {set j 0} {$j < 100} {incr j} { set x [doc] set y [doc] set z [doc] set rowid [expr int(rand() * 100)] execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} if {[set_test_counter errors]} break } #------------------------------------------------------------------------- # reset_db do_execsql_test 10.0 { CREATE VIRTUAL TABLE t1 USING fts5(x,y); } set d10 { 1 {g f d b f} {h h e i a} 2 {f i g j e} {i j c f f} 3 {e e i f a} {e h f d f} 4 {h j f j i} {h a c f j} 5 {d b j c g} {f e i b e} 6 {a j a e e} {j d f d e} 7 {g i j c h} {j d h c a} 8 {j j i d d} {e e d f b} 9 {c j j d c} {h j i f g} 10 {b f h i a} {c f b b j} } foreach {rowid x y} $d10 { do_execsql_test 10.1.$rowid.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 10.1.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } } foreach rowid {5 9 8 1 2 4 10 7 3 5 6} { do_execsql_test 10.2.$rowid.1 { DELETE FROM t1 WHERE rowid = $rowid } do_execsql_test 10.2.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } } foreach {rowid x y} $d10 { do_execsql_test 10.3.$rowid.1 { INSERT INTO t1 VALUES($x, $y) } do_execsql_test 10.3.$rowid.2 { INSERT INTO t1(t1) VALUES('integrity-check') } } do_execsql_test 10.4.1 { DELETE FROM t1 } do_execsql_test 10.4.2 { INSERT INTO t1(t1) VALUES('integrity-check') } #------------------------------------------------------------------------- # do_catchsql_test 11.1 { CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rank); } {1 {reserved fts5 column name: rank}} do_catchsql_test 11.2 { CREATE VIRTUAL TABLE rank USING fts5(a, b, c); } {1 {reserved fts5 table name: rank}} do_catchsql_test 11.3 { CREATE VIRTUAL TABLE t2 USING fts5(a, b, c, rowid); } {1 {reserved fts5 column name: rowid}} #------------------------------------------------------------------------- # do_execsql_test 12.1 { CREATE VIRTUAL TABLE t2 USING fts5(x,y); } {} do_catchsql_test 12.2 { SELECT t2 FROM t2 WHERE t2 MATCH '*stuff' } {1 {unknown special query: stuff}} do_test 12.3 { set res [db eval { SELECT t2 FROM t2 WHERE t2 MATCH '* reads ' }] string is integer $res } {1} #------------------------------------------------------------------------- # reset_db do_execsql_test 13.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(rowid, x) VALUES(1, 'o n e'), (2, 't w o'); } {} do_execsql_test 13.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1 2} do_execsql_test 13.4 { DELETE FROM t1 WHERE rowid=2; } {} do_execsql_test 13.5 { SELECT rowid FROM t1 WHERE t1 MATCH 'o'; } {1} do_execsql_test 13.6 { SELECT rowid FROM t1 WHERE t1 MATCH '.'; } {} #------------------------------------------------------------------------- # reset_db do_execsql_test 14.1 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH d(x,y) AS ( SELECT NULL, 'xyz xyz xyz xyz xyz xyz' UNION ALL SELECT NULL, 'xyz xyz xyz xyz xyz xyz' FROM d ) INSERT INTO t1 SELECT * FROM d LIMIT 200; } do_test 14.2 { set nRow 0 db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } { db eval { BEGIN; CREATE TABLE t2(a, b); ROLLBACK; } incr nRow } set nRow } {200} do_test 14.3 { set nRow 0 db eval { BEGIN; } db eval { SELECT * FROM t1 WHERE t1 MATCH 'xyz' } { db eval { SAVEPOINT aaa; CREATE TABLE t2(a, b); ROLLBACK TO aaa; RELEASE aaa; } incr nRow } set nRow } {200} do_execsql_test 15.0 { INSERT INTO t1(t1) VALUES('integrity-check'); } do_execsql_test 15.1 { UPDATE t1_content SET c1 = 'xyz xyz xyz xyz xyz abc' WHERE rowid = 1; } do_catchsql_test 15.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } {1 {database disk image is malformed}} #------------------------------------------------------------------------- # do_execsql_test 16.1 { CREATE VIRTUAL TABLE n1 USING fts5(a); INSERT INTO n1 VALUES('a b c d'); } proc funk {} { set fd [db incrblob main n1_data block 10] fconfigure $fd -encoding binary -translation binary puts -nonewline $fd "\x44\x45" close $fd db eval { UPDATE n1_config SET v=50 WHERE k='version' } } db func funk funk do_catchsql_test 16.2 { SELECT funk(), bm25(n1), funk() FROM n1 WHERE n1 MATCH 'a+b+c+d' } {1 {SQL logic error or missing database}} #------------------------------------------------------------------------- # reset_db do_execsql_test 17.1 { CREATE VIRTUAL TABLE b2 USING fts5(x); INSERT INTO b2 VALUES('a'); INSERT INTO b2 VALUES('b'); INSERT INTO b2 VALUES('c'); } do_test 17.2 { set res [list] db eval { SELECT * FROM b2 ORDER BY rowid ASC } { lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }] } set res } {{a b c} {a b c} {a b c}} reset_db do_execsql_test 18.1 { CREATE VIRTUAL TABLE c2 USING fts5(x, y); INSERT INTO c2 VALUES('x x x', 'x x x'); SELECT rowid FROM c2 WHERE c2 MATCH 'y:x'; } {1} #------------------------------------------------------------------------- # reset_db do_execsql_test 17.1 { CREATE VIRTUAL TABLE uio USING fts5(ttt); INSERT INTO uio VALUES(NULL); INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; INSERT INTO uio SELECT NULL FROM uio; SELECT count(*) FROM uio; } {256} do_execsql_test 17.2 { SELECT count(*) FROM uio WHERE rowid BETWEEN 8 AND 17 } {10} do_execsql_test 17.3 { SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 } {8 9 10 11 12 13 14 15 16 17} do_execsql_test 17.4 { SELECT rowid FROM uio WHERE rowid BETWEEN 8 AND 17 ORDER BY rowid DESC } {17 16 15 14 13 12 11 10 9 8} do_execsql_test 17.5 { SELECT count(*) FROM uio } {256} do_execsql_test 17.6 { INSERT INTO uio(rowid) VALUES(9223372036854775807); INSERT INTO uio(rowid) VALUES(-9223372036854775808); SELECT count(*) FROM uio; } {258} do_execsql_test 17.7 { SELECT min(rowid), max(rowid) FROM uio; } {-9223372036854775808 9223372036854775807} do_execsql_test 17.8 { INSERT INTO uio DEFAULT VALUES; SELECT min(rowid), max(rowid), count(*) FROM uio; } {-9223372036854775808 9223372036854775807 259} do_execsql_test 17.9 { SELECT min(rowid), max(rowid), count(*) FROM uio WHERE rowid < 10; } {-9223372036854775808 9 10} #-------------------------------------------------------------------- # do_execsql_test 18.1 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); CREATE VIRTUAL TABLE t2 USING fts5(c, d); INSERT INTO t1 VALUES('abc*', NULL); INSERT INTO t2 VALUES(1, 'abcdefg'); } do_execsql_test 18.2 { SELECT t1.rowid, t2.rowid FROM t1, t2 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} do_execsql_test 18.3 { SELECT t1.rowid, t2.rowid FROM t2, t1 WHERE t2 MATCH t1.a AND t1.rowid = t2.c } {1 1} finish_test |
Added ext/fts5/test/fts5ab.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ab # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); INSERT INTO t1 VALUES('hello', 'world'); INSERT INTO t1 VALUES('one two', 'three four'); INSERT INTO t1(rowid, a, b) VALUES(45, 'forty', 'five'); } do_execsql_test 1.1 { SELECT * FROM t1 ORDER BY rowid DESC; } { forty five {one two} {three four} hello world } do_execsql_test 1.2 { SELECT rowid FROM t1 ORDER BY rowid DESC; } {45 2 1} do_execsql_test 1.3 { SELECT rowid FROM t1 ORDER BY rowid ASC; } {1 2 45} do_execsql_test 1.4 { SELECT * FROM t1 WHERE rowid=2; } {{one two} {three four}} do_execsql_test 1.5 { SELECT * FROM t1 WHERE rowid=2.01; } {} do_execsql_test 1.6 { SELECT * FROM t1 WHERE rowid=1.99; } {} #------------------------------------------------------------------------- reset_db do_execsql_test 2.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); INSERT INTO t1 VALUES('one'); INSERT INTO t1 VALUES('two'); INSERT INTO t1 VALUES('three'); } do_catchsql_test 2.2 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'AND AND' } {1 {fts5: syntax error near "AND"}} do_execsql_test 2.3 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'two' } {2 two} do_execsql_test 2.4 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'three' } {3 three} do_execsql_test 2.5 { SELECT rowid, * FROM t1 WHERE t1 MATCH 'one' } {1 one} do_execsql_test 2.6 { INSERT INTO t1 VALUES('a b c d e f g'); INSERT INTO t1 VALUES('b d e a a a i'); INSERT INTO t1 VALUES('x y z b c c c'); } foreach {tn expr res} { 1 a {5 4} 2 b {6 5 4} 3 c {6 4} 4 d {5 4} 5 e {5 4} 6 f {4} 7 g {4} 8 x {6} 9 y {6} 10 z {6} } { do_execsql_test 2.7.$tn.1 { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC } $res do_execsql_test 2.7.$tn.2 { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid ASC } [lsort -integer $res] } #------------------------------------------------------------------------- # reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(a,b); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } foreach {tn a b} { 1 {abashed abandons abase abash abaft} {abases abased} 2 {abasing abases abaft abated abandons} {abases abandoned} 3 {abatement abash abash abated abase} {abasements abashing} 4 {abaft abasements abase abasement abasing} {abasement abases} 5 {abaft abashing abatement abash abasements} {abandons abandoning} 6 {aback abate abasements abashes abandoned} {abasement abased} 7 {abandons abated abased aback abandoning} {abases abandoned} 8 {abashing abases abasement abaft abashing} {abashed abate} 9 {abash abase abate abashing abashed} {abandon abandoned} 10 {abate abandoning abandons abasement aback} {abandon abandoning} } { do_execsql_test 3.1.$tn.1 { INSERT INTO t1 VALUES($a, $b) } do_execsql_test 3.1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } } foreach {tn expr res} { 1 {abash} {9 5 3 1} 2 {abase} {9 4 3 1} 3 {abase + abash} {1} 4 {abash + abase} {9} 5 {abaft + abashing} {8 5} 6 {abandon + abandoning} {10} 7 {"abashing abases abasement abaft abashing"} {8} } { do_execsql_test 3.2.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr ORDER BY rowid DESC } $res } do_execsql_test 3.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'NEAR(aback abate, 2)' } {6} foreach {tn expr res} { 1 {abash} {1 3 5 9} 2 {abase} {1 3 4 9} 3 {abase + abash} {1} 4 {abash + abase} {9} 5 {abaft + abashing} {5 8} 6 {abandon + abandoning} {10} 7 {"abashing abases abasement abaft abashing"} {8} } { do_execsql_test 3.4.$tn { SELECT rowid FROM t1 WHERE t1 MATCH $expr } $res } #------------------------------------------------------------------------- # Documents with more than 2M tokens. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE s1 USING fts5(x); } foreach {tn doc} [list \ 1 [string repeat {a x } 1500000] \ 2 "[string repeat {a a } 1500000] x" \ ] { do_execsql_test 4.$tn { INSERT INTO s1 VALUES($doc) } } do_execsql_test 4.3 { SELECT rowid FROM s1 WHERE s1 MATCH 'x' } {1 2} do_execsql_test 4.4 { SELECT rowid FROM s1 WHERE s1 MATCH '"a x"' } {1 2} #------------------------------------------------------------------------- # Check that a special case of segment promotion works. The case is where # a new segment is written to level L, but the oldest segment within level # (L-2) is larger than it. # do_execsql_test 5.0 { CREATE VIRTUAL TABLE s2 USING fts5(x); INSERT INTO s2(s2, rank) VALUES('pgsz', 32); INSERT INTO s2(s2, rank) VALUES('automerge', 0); } proc rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] for {set i 0} {$i < $n} {incr i} { lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]] } set doc } db func rnddoc rnddoc do_test 5.1 { for {set i 1} {$i <= 65} {incr i} { execsql { INSERT INTO s2 VALUES(rnddoc(10)) } } for {set i 1} {$i <= 63} {incr i} { execsql { DELETE FROM s2 WHERE rowid = $i } } fts5_level_segs s2 } {0 8} do_test 5.2 { execsql { INSERT INTO s2(s2, rank) VALUES('automerge', 8); } for {set i 0} {$i < 7} {incr i} { execsql { INSERT INTO s2 VALUES(rnddoc(50)) } } fts5_level_segs s2 } {8 0 0} # Test also the other type of segment promotion - when a new segment is written # that is larger than segments immediately following it. do_test 5.3 { execsql { DROP TABLE s2; CREATE VIRTUAL TABLE s2 USING fts5(x); INSERT INTO s2(s2, rank) VALUES('pgsz', 32); INSERT INTO s2(s2, rank) VALUES('automerge', 0); } for {set i 1} {$i <= 16} {incr i} { execsql { INSERT INTO s2 VALUES(rnddoc(5)) } } fts5_level_segs s2 } {0 1} do_test 5.4 { execsql { INSERT INTO s2 VALUES(rnddoc(160)) } fts5_level_segs s2 } {2 0} #------------------------------------------------------------------------- # do_execsql_test 6.0 { CREATE VIRTUAL TABLE s3 USING fts5(x); BEGIN; INSERT INTO s3 VALUES('a b c'); INSERT INTO s3 VALUES('A B C'); } do_execsql_test 6.1.1 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' } {1 2} do_execsql_test 6.1.2 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' ORDER BY rowid DESC } {2 1} do_execsql_test 6.2 { COMMIT; } do_execsql_test 6.3 { SELECT rowid FROM s3 WHERE s3 MATCH 'a' } {1 2} do_test 6.4 { db close sqlite3 db test.db execsql { BEGIN; INSERT INTO s3(s3) VALUES('optimize'); ROLLBACK; } } {} #------------------------------------------------------------------------- # set doc [string repeat "a b c " 500] breakpoint do_execsql_test 7.0 { CREATE VIRTUAL TABLE x1 USING fts5(x); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); INSERT INTO x1 VALUES($doc); } finish_test |
Added ext/fts5/test/fts5ac.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ac # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } set data { 0 {p o q e z k z p n f y u z y n y} {l o o l v v k} 1 {p k h h p y l l h i p v n} {p p l u r i f a j g e r r x w} 2 {l s z j k i m p s} {l w e j t j e e i t w r o p o} 3 {x g y m y m h p} {k j j b r e y y a k y} 4 {q m a i y i z} {o w a g k x g j m w e u k} 5 {k o a w y b s z} {s g l m m l m g p} 6 {d a q i z h b l c p k j g k} {p x u j x t v c z} 7 {f d a g o c t i} {w f c x l d r k i j} 8 {y g w u b q p o m j y b p a e k} {r i d k y w o z q m a t p} 9 {r k o m c c j s x m x m x m q r} {y r c a q d z k n x n} 10 {k j q m g q a j d} {d d e z g w h c d o o g x d} 11 {j z u m o y q j f w e e w t r j w} {g m o r x n t n w i f g l z f} 12 {s y w a w d o h x m k} {c w k z b p o r a} 13 {u t h x e g s k n g i} {f j w g c s r} 14 {b f i c s u z t k} {c k q s j u i z o} 15 {n a f n u s w h y n s i q e w} {x g e g a s s h n} 16 {k s q e j n p} {t r j f t o e k k l m i} 17 {g d t u w r o p m n m n p h b o u} {h s w o s l j e} 18 {f l q y q q g e e x j r} {n b r r g e i r t x q k} 19 {f i r g o a w e p i l o a w} {e k r z t d g h g i b d i e m} 20 {l d u u f p y} {g o m m u x m g l j t t x x u} 21 {m c d k x i c z l} {m i a i e u h} 22 {w b f o c g x y j} {z d w x d f h i p} 23 {w u i u x t c h k i b} {b y k h b v r t g j} 24 {h f d j s w s b a p k} {a q y u z e y m m j q r} 25 {d i x y x x k i y f s d j h z p n} {l l q m e t c w g y h t s v g} 26 {g s q w t d k x g f m j p k y} {r m b x e l t d} 27 {j l s q u g y v e c l o} {m f l m m m h g x x l n c} 28 {c t j g v r s b z j} {l c f y d t q n} 29 {e x z y w i h l} {b n b x e y q e n u m} 30 {g y y h j b w r} {q b q f u s k c k g r} 31 {g u l x l b r c m z b u c} {k g t b x k x n t e z d h o} 32 {w g v l z f b z h p s c v h} {g e w v m h k r g w a r f q} 33 {c g n f u d o y o b} {e y o h x x y y i z s b h a j} 34 {v y h c q u u s q y x x k s q} {d n r m y k n t i r n w e} 35 {o u c x l e b t a} {y b a x y f z x r} 36 {x p h l j a a u u j h} {x o f s z m b c q p} 37 {k q t i c a q n m v v} {v r z e f m y o} 38 {r w t t t t r v v o e p g h} {l w x a g a u h y} 39 {o p v g v b a g o} {j t q c r b b g y z} 40 {f s o r o d t h q f x l} {r d b m k i f s t d l m y x j w} 41 {t m o t m f m f} {i p i q j v n v m b q} 42 {t x w a r l w d t b c o d o} {a h f h w z d n s} 43 {t u q c d g p q x j o l c x c} {m n t o z z j a y} 44 {v d i i k b f s z r v r z y} {g n q y s x x m b x c l w} 45 {p v v a c s z y e o l} {m v t u d k m k q b d c v z r} 46 {f y k l d r q w r s t r e} {h m v r r l r r t f q e x y} 47 {w l n l t y x} {n h s l a f c h u f l x x m v n o} 48 {t n v i k e b p z p d j j l i o} {i v z p g u e j s i k n h w d c} 49 {z v x p n l t a j c} {e j l e n c e t a d} 50 {w u b x u i v h a i y m m r p m s} {s r h d o g z y f f x e} 51 {d c c x b c a x g} {p r a j v u y} 52 {f w g r c o d l t u e z h i} {j l l s s b j m} 53 {p m t f k i x} {u v y a z g w v v m x h i} 54 {l c z g l o j i c d e b} {b f v y w u i b e i y} 55 {r h c x f x a d s} {z x y k f l r b q c v} 56 {v x x c y h z x b g m o q n c} {h n b i t g h a q b c o r u} 57 {d g l o h t b s b r} {n u e p t i m u} 58 {t d y e t d c w u o s w x f c h} {i o s v y b r d r} 59 {l b a p q n d r} {k d c c d n y q h g a o p e x} 60 {f r z v m p k r} {x x r i s b a g f c} 61 {s a z i e r f i w c n y v z t k s} {y y i r y n l s b w i e k n} 62 {n x p r e x q r m v i b y} {f o o z n b s r q j} 63 {y j s u j x o n r q t f} {f v k n v x u s o a d e f e} 64 {u s i l y c x q} {r k c h p c h b o s s u s p b} 65 {m p i o s h o} {s w h u n d m n q t y k b w c} 66 {l d f g m x x x o} {s w d d f b y j j h h t i y p j o} 67 {c b m h f n v w n h} {i r w i e x r w l z p x u g u l s} 68 {y a h u h i m a y q} {d d r x h e v q n z y c j} 69 {c x f d x o n p o b r t b l p l} {m i t k b x v f p t m l l y r o} 70 {u t l w w m s} {m f m o l t k o p e} 71 {f g q e l n d m z x q} {z s i i i m f w w f n g p e q} 72 {n l h a v u o d f j d e x} {v v s l f g d g r a j x i f z x} 73 {x v m v f i g q e w} {r y s j i k m j j e d g r n o i f} 74 {g d y n o h p s y q z j d w n h w} {x o d l t j i b r d o r y} 75 {p g b i u r b e q d v o a g w m k} {q y z s f q o h} 76 {u z a q u f i f f b} {b s p b a a d x r r i q f} 77 {w h h z t h p o a h h e e} {h w r p h k z v y f r x} 78 {c a r k i a p u x} {f w l p t e m l} 79 {q q u k o t r k z} {f b m c w p s s o z} 80 {t i g v y q s r x m r x z e f} {x o j w a u e y s j c b u p p r o} 81 {n j n h r l a r e o z w e} {v o r r j a v b} 82 {i f i d k w d n h} {o i d z i z l m w s b q v u} 83 {m d g q q b k b w f q q p p} {j m q f b y c i z k y q p l e a} 84 {m x o n y f g} {y c n x n q j i y c l h b r q z} 85 {v o z l n p c} {g n j n t b b x n c l d a g j v} 86 {z n a y f b t k k t d b z a v} {r p c n r u k u} 87 {b q t x z e c w} {q a o a l o a h i m j r} 88 {j f h o x x a z g b a f a m i b} {j z c z y x e x w t} 89 {t c t p r s u c q n} {z x l i k n f q l n t} 90 {w t d q j g m r f k n} {l e w f w w a l y q k i q t p c t} 91 {c b o k l i c b s j n m b l} {y f p q o w g} 92 {f y d j o q t c c q m f j s t} {f h e d y m o k} 93 {k x j r m a d o i z j} {r t t t f e b r x i v j v g o} 94 {s f e a e t i h h d q p z t q} {b k m k w h c} 95 {h b n j t k i h o q u} {w n g i t o k c a m y p f l x c p} 96 {f c x p y r b m o l m o a} {p c a q s u n n x d c f a o} 97 {u h h k m n k} {u b v n u a o c} 98 {s p e t c z d f n w f} {l s f j b l c e s h} 99 {r c v w i v h a t a c v c r e} {h h u m g o f b a e o} } # Argument $expr is an FTS5 match expression designed to be executed against # an FTS5 table with the following schema: # # CREATE VIRTUAL TABLE xy USING fts5(x, y); # # Assuming the table contains the same records as stored int the global # $::data array (see above), this function returns a list containing one # element for each match in the dataset. The elements are themselves lists # formatted as follows: # # <rowid> {<phrase 0 matches> <phrase 1 matches>...} # # where each <phrase X matches> element is a list of phrase matches in the # same form as returned by auxiliary scalar function fts5_test(). # proc matchdata {bPos expr {bAsc 1}} { set tclexpr [db one { SELECT fts5_expr_tcl($expr, 'nearset $cols -pc ::pc', 'x', 'y') }] set res [list] #puts $tclexpr foreach {id x y} $::data { set cols [list $x $y] set ::pc 0 #set hits [lsort -command instcompare [eval $tclexpr]] set hits [eval $tclexpr] if {[llength $hits]>0} { if {$bPos} { lappend res [list $id $hits] } else { lappend res $id } } } if {$bAsc} { set res [lsort -integer -increasing -index 0 $res] } else { set res [lsort -integer -decreasing -index 0 $res] } return [concat {*}$res] } # # End of test code #------------------------------------------------------------------------- proc fts5_test_poslist {cmd} { set res [list] for {set i 0} {$i < [$cmd xInstCount]} {incr i} { lappend res [string map {{ } .} [$cmd xInst $i]] } set res } foreach {tn2 sql} { 1 {} 2 {BEGIN} } { reset_db sqlite3_fts5_create_function db fts5_test_poslist fts5_test_poslist do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x,y); INSERT INTO xx(xx, rank) VALUES('pgsz', 32); } execsql $sql do_test $tn2.1.1 { foreach {id x y} $data { execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) } } execsql { INSERT INTO xx(xx) VALUES('integrity-check') } } {} #------------------------------------------------------------------------- # Test phrase queries. # foreach {tn phrase} { 1 "o" 2 "b q" 3 "e a e" 4 "m d g q q b k b w f q q p p" 5 "l o o l v v k" 6 "a" 7 "b" 8 "c" 9 "no" 10 "L O O L V V K" } { set expr "\"$phrase\"" set res [matchdata 1 $expr] do_execsql_test $tn2.1.2.$tn.[llength $res] { SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } #------------------------------------------------------------------------- # Test some AND and OR queries. # foreach {tn expr} { 1.1 "a AND b" 1.2 "a+b AND c" 1.3 "d+c AND u" 1.4 "d+c AND u+d" 2.1 "a OR b" 2.2 "a+b OR c" 2.3 "d+c OR u" 2.4 "d+c OR u+d" 3.1 { a AND b AND c } } { set res [matchdata 1 $expr] do_execsql_test $tn2.2.$tn.[llength $res] { SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } #------------------------------------------------------------------------- # Queries on a specific column. # foreach {tn expr} { 1.1 "x:a" 1.2 "y:a" 1.3 "x:b" 1.4 "y:b" 2.1 "{x}:a" 2.2 "{y}:a" 2.3 "{x}:b" 2.4 "{y}:b" 3.1 "{x y}:a" 3.2 "{y x}:a" 3.3 "{x x}:b" 3.4 "{y y}:b" 4.1 {{"x" "y"}:a} 4.2 {{"y" x}:a} 4.3 {{x "x"}:b} 4.4 {{"y" y}:b} } { set res [matchdata 1 $expr] do_execsql_test $tn2.3.$tn.[llength $res] { SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } #------------------------------------------------------------------------- # Some NEAR queries. # foreach {tn expr} { 1 "NEAR(a b)" 2 "NEAR(r c)" 2 { NEAR(r c, 5) } 3 { NEAR(r c, 3) } 4 { NEAR(r c, 2) } 5 { NEAR(r c, 0) } 6 { NEAR(a b c) } 7 { NEAR(a b c, 8) } 8 { x : NEAR(r c) } 9 { y : NEAR(r c) } } { set res [matchdata 1 $expr] do_execsql_test $tn2.4.1.$tn.[llength $res] { SELECT rowid, fts5_test_poslist(xx) FROM xx WHERE xx match $expr } $res } do_test $tn2.4.1 { nearset {{a b c}} -- a } {0.0.0} do_test $tn2.4.2 { nearset {{a b c}} -- c } {0.0.2} foreach {tn expr tclexpr} { 1 {a b} {AND [N $x -- {a}] [N $x -- {b}]} } { do_execsql_test $tn2.5.$tn { SELECT fts5_expr_tcl($expr, 'N $x') } [list $tclexpr] } #------------------------------------------------------------------------- # do_execsql_test $tn2.6.integrity { INSERT INTO xx(xx) VALUES('integrity-check'); } #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM xx_data} {puts $r} foreach {bAsc sql} { 1 {SELECT rowid FROM xx WHERE xx MATCH $expr} 0 {SELECT rowid FROM xx WHERE xx MATCH $expr ORDER BY rowid DESC} } { foreach {tn expr} { 0.1 x 1 { NEAR(r c) } 2 { NEAR(r c, 5) } 3 { NEAR(r c, 3) } 4 { NEAR(r c, 2) } 5 { NEAR(r c, 0) } 6 { NEAR(a b c) } 7 { NEAR(a b c, 8) } 8 { x : NEAR(r c) } 9 { y : NEAR(r c) } 10 { x : "r c" } 11 { y : "r c" } 12 { a AND b } 13 { a AND b AND c } 14a { a } 14b { a OR b } 15 { a OR b AND c } 16 { c AND b OR a } 17 { c AND (b OR a) } 18 { c NOT (b OR a) } 19 { c NOT b OR a AND d } } { set res [matchdata 0 $expr $bAsc] do_execsql_test $tn2.6.$bAsc.$tn.[llength $res] $sql $res } } } do_execsql_test 3.1 { SELECT fts5_expr_tcl('a AND b'); } {{AND [nearset -- {a}] [nearset -- {b}]}} finish_test |
Added ext/fts5/test/fts5ad.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ad # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE yy USING fts5(x, y); INSERT INTO yy VALUES('Changes the result to be', 'the list of all matching'); INSERT INTO yy VALUES('indices (or all matching', 'values if -inline is'); INSERT INTO yy VALUES('specified as well.) If', 'indices are returned, the'); } {} foreach {tn match res} { 1 {c*} {1} 2 {i*} {3 2} 3 {t*} {3 1} 4 {r*} {3 1} } { do_execsql_test 1.$tn { SELECT rowid FROM yy WHERE yy MATCH $match ORDER BY rowid DESC } $res } foreach {tn match res} { 5 {c*} {1} 6 {i*} {2 3} 7 {t*} {1 3} 8 {r*} {1 3} } { do_execsql_test 1.$tn { SELECT rowid FROM yy WHERE yy MATCH $match } $res } foreach {T create} { 2 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 3 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } 4 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } 5 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix=1,2,3,4,5); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); BEGIN; } } { do_test $T.1 { execsql { DROP TABLE IF EXISTS t1 } execsql $create } {} do_test $T.1 { foreach {rowid a b} { 0 {fghij uvwxyz klmn pq uvwx} {klmn f fgh uv fghij klmno} 1 {uv f abcd abcd fghi} {pq klm uv uv fgh uv a} 2 {klmn klm pqrs fghij uv} {f k uvw ab abcd pqr uv} 3 {ab pqrst a fghi ab pqr fg} {k klmno a fg abcd} 4 {abcd pqrst uvwx a fgh} {f klmno fghij kl pqrst} 5 {uvwxyz k abcde u a} {uv k k kl klmn} 6 {uvwxyz k klmn pqrst uv} {fghi pqrs abcde u k} 7 {uvwxy klmn u p pqrst fgh} {p f fghi abcd uvw kl uv} 8 {f klmno pqrst uvwxy pqrst} {uv abcde klm pq pqr} 9 {f abcde a uvwxyz pqrst} {fghij abc k uvwx pqr fghij uvwxy} 10 {ab uv f fg pqrst uvwxy} {fgh p uv k abc klm uvw} 11 {pq klmno a uvw abcde uvwxyz} {fghij pq uvwxyz pqr fghi} 12 {fgh u pq fgh uvw} {uvw pqr f uvwxy uvwx} 13 {uvwx klmn f fgh abcd pqr} {uvw k fg uv klm abcd} 14 {ab uvwx pqrst pqr uvwxyz pqrs} {uvwxyz abcde ab ab uvw abcde} 15 {abc abcde uvwxyz abc kl k pqr} {klm k k klmno u fgh} 16 {fghi abcd fghij uv uvwxyz ab uv} {klmn pqr a uvw fghi} 17 {abc pqrst fghi uvwx uvw klmn fghi} {ab fg pqr pqrs p} 18 {pqr kl a fghij fgh fg kl} {pqr uvwxyz uvw abcd uvwxyz} 19 {fghi fghi pqr kl fghi f} {klmn u u klmno klmno} 20 {abc pqrst klmno kl pq uvwxy} {abc k fghi pqrs klm} 21 {a pqr uvwxyz uv fghi a fgh} {abc pqrs pqrst pq klm} 22 {klm abc uvwxyz klm pqrst} {fghij k pq pqr u klm fghij} 23 {p klm uv p a a} {uvwxy klmn uvw abcde pq} 24 {uv fgh fg pq uvwxy u uvwxy} {pqrs a uvw p uvwx uvwxyz fg} 25 {fghij fghi klmn abcd pq kl} {fghi abcde pqrs abcd fgh uvwxy} 26 {pq fgh a abc klmno klmn} {fgh p k p fg fghij} 27 {fg pq kl uvwx fghij pqrst klmn} {abcd uvw abcd fghij f fghij} 28 {uvw fghi p fghij pq fgh uvwx} {k fghij abcd uvwx pqr fghi} 29 {klm pq abcd pq f uvwxy} {pqrst p fghij pqr p} 30 {ab uvwx fg uvwx klmn klm} {klmn klmno fghij klmn klm} 31 {pq k pqr abcd a pqrs} {abcd abcd uvw a abcd klmno ab} 32 {pqrst u abc pq klm} {abc kl uvwxyz fghij u fghi p} 33 {f uvwxy u k f uvw uvwx} {pqrs uvw fghi fg pqrst klm} 34 {pqrs pq fghij uvwxyz pqr} {ab abc abc uvw f pq f} 35 {uvwxy ab uvwxy klmno kl pqrs} {abcde uvw pqrs uvwx k k} 36 {uvwxyz k ab abcde abc uvw} {uvw abcde uvw klmn uv klmn} 37 {k kl uv abcde uvwx fg u} {u abc uvwxy k fg abcd} 38 {fghi pqrst fghi pqr pqrst uvwx} {u uv uvwx fghi abcde} 39 {k pqrst k uvw fg pqrst fghij} {uvwxy ab kl klmn uvwxyz abcde} 40 {fg uvwxy pqrs klmn uvwxyz klm p} {k uv ab fghij fgh k pqrs} 41 {uvwx abc f pq uvwxy k} {ab uvwxyz abc f fghij} 42 {uvwxy klmno uvwxyz uvwxyz pqrst} {uv kl kl klmno k f abcde} 43 {abcde ab pqrs fg f fgh} {abc fghij fghi k k} 44 {uvw abcd a ab pqrst klmn fg} {pqrst u uvwx pqrst fghij f pqrst} 45 {uvwxy p kl uvwxyz ab pqrst fghi} {abc f pqr fg a k} 46 {u p f a fgh} {a kl pq uv f} 47 {pqrs abc fghij fg abcde ab a} {p ab uv pqrs kl fghi abcd} 48 {abcde uvwxy pqrst uv abc pqr uvwx} {uvwxy klm uvwxy uvwx k} 49 {fgh klm abcde klmno u} {a f fghij f uvwxyz abc u} 50 {uv uvw uvwxyz uvwxyz uv ab} {uvwx pq fg u k uvwxy} 51 {uvwxy pq p kl fghi} {pqrs fghi pqrs abcde uvwxyz ab} 52 {pqr p uvwxy kl pqrs klmno fghij} {ab abcde abc pqrst pqrs uv} 53 {fgh pqrst p a klmno} {ab ab pqrst pqr kl pqrst} 54 {abcd klm ab uvw a fg u} {f pqr f abcd uv} 55 {u fg uvwxyz k uvw} {abc pqrs f fghij fg pqrs uvwxy} 56 {klm fg p fghi fg a} {uv a fghi uvwxyz a fghi} 57 {uvwxy k abcde fgh f fghi} {f kl klmn f fghi klm} 58 {klm k fgh uvw fgh fghi} {klmno uvwx u pqrst u} 59 {fghi pqr pqrst p uvw fghij} {uv pqrst pqrs pq fghij klm} 60 {uvwx klm uvwxy uv klmn} {p a a abc klmn ab k} 61 {uvwxy uvwx klm uvwx klm} {pqrs ab ab uvwxyz fg} 62 {kl uv uv uvw fg kl k} {abcde uvw fgh uvwxy klm} 63 {a abc fgh u klm abcd} {fgh pqr uv klmn fghij} 64 {klmn k klmn klmno pqrs pqr} {fg kl abcde klmno uvwxy kl pq} 65 {uvwxyz klm fghi abc abcde kl} {uvwxy uvw uvwxyz uvwxyz pq pqrst} 66 {pq klm abc pqrst fgh f} {u abcde pqrst abcde fg} 67 {u pqrst kl u uvw klmno} {u pqr pqrs fgh u p} 68 {abc fghi uvwxy fgh k pq} {uv p uvwx uvwxyz ab} 69 {klmno f uvwxyz uvwxy klmn fg ab} {fgh kl a pqr abcd pqr} 70 {fghi pqrst pqrst uv a} {uvwxy k p uvw uvwx a} 71 {a fghij f p uvw} {klm fg abcd abcde klmno pqrs} 72 {uv uvwx uvwx uvw klm} {uv fghi klmno uvwxy uvw} 73 {kl uvwxy ab f pq klm u} {uvwxy klmn klm abcd pq fg k} 74 {uvw pqrst abcd uvwxyz ab} {fgh fgh klmn abc pq} 75 {uvwxyz klm pq abcd klmno pqr uvwxyz} {kl f a fg pqr klmn} 76 {uvw uvwxy pqr k pqrst kl} {uvwxy abc uvw uvw u} 77 {fgh klm u uvwxyz f uvwxy abcde} {uv abcde klmno u u ab} 78 {klmno abc pq pqr fgh} {p uv abcd fgh abc u k} 79 {fg pqr uvw pq uvwx} {uv uvw fghij pqrs fg p} 80 {abcd pqrs uvwx uvwxy uvwx} {u uvw pqrst pqr abcde pqrs kl} 81 {uvwxyz klm pq uvwxy fghij} {p pq klm fghij u a a} 82 {uvwx k uvwxyz klmno pqrst kl} {abcde p f pqrst abcd uvwxyz p} 83 {abcd abcde klm pqrst uvwxyz} {uvw pqrst u p uvwxyz a pqrs} 84 {k klm abc uv uvwxy klm klmn} {k abc pqr a abc p kl} 85 {klmn abcd pqrs p pq klm a} {klmn kl ab uvw pq} 86 {klmn a pqrs abc uvw pqrst} {a pqr kl klm a k f} 87 {pqrs ab uvwx uvwxy a pqr f} {fg klm uvwx pqr pqr} 88 {klmno ab k kl u uvwxyz} {uv kl uvw fghi uv uvw} 89 {pq fghi pqrst klmn uvwxy abc pqrs} {fg f f fg abc abcde klm} 90 {kl a k fghi uvwx fghi u} {ab uvw pqr fg a p abc} 91 {uvwx pqrs klmno ab fgh uvwx} {pqr uvwx abc kl f klmno kl} 92 {fghij pq pqrs fghij f pqrst} {u abcde fg pq pqr fgh k} 93 {fgh u pqrs abcde klmno abc} {abc fg pqrst pqr abcde} 94 {uvwx p abc f pqr p} {k pqrs kl klm abc fghi klm} 95 {kl p klmno uvwxyz klmn} {fghi ab a fghi pqrs kl} 96 {pqr fgh pq uvwx a} {uvw klm klmno fg uvwxy uvwx} 97 {fg abc uvwxyz fghi pqrst pq} {abc k a ab abcde f} 98 {uvwxy fghi uvwxy u abcde abcde uvw} {klmn uvwx pqrs uvw uvwxy abcde} 99 {pq fg fghi uvwx uvwx fghij uvwxy} {klmn klmn f abc fg a} } { execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b); } } } {} proc prefix_query {prefixlist} { set ret [list] db eval {SELECT rowid, a, b FROM t1 ORDER BY rowid DESC} { set bMatch 1 foreach pref $prefixlist { if { [lsearch -glob $a $pref]<0 && [lsearch -glob $b $pref]<0 } { set bMatch 0 break } } if {$bMatch} { lappend ret $rowid } } return $ret } foreach {bAsc sql} { 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 6 {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*} 11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*} 16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*} 21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*} 27 {x*} 28 {a f*} 29 {a* f*} 30 {a* fghij*} } { set res [prefix_query $prefix] if {$bAsc} { set res [lsort -integer -increasing $res] } set n [llength $res] if {$T==5} breakpoint do_execsql_test $T.$bAsc.$tn.$n $sql $res } } catchsql COMMIT } finish_test |
Added ext/fts5/test/fts5ae.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ae # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_execsql_test 1.1 { INSERT INTO t1 VALUES('hello', 'world'); SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; } {1} do_execsql_test 1.2 { INSERT INTO t1 VALUES('world', 'hello'); SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; } {1 2} do_execsql_test 1.3 { INSERT INTO t1 VALUES('world', 'world'); SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; } {1 2} do_execsql_test 1.4.1 { INSERT INTO t1 VALUES('hello', 'hello'); } do_execsql_test 1.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'hello' ORDER BY rowid ASC; } {1 2 4} fts5_aux_test_functions db #------------------------------------------------------------------------- # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts5(x, y); INSERT INTO t2 VALUES('u t l w w m s', 'm f m o l t k o p e'); INSERT INTO t2 VALUES('f g q e l n d m z x q', 'z s i i i m f w w f n g p'); } do_execsql_test 2.1 { SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'm' ORDER BY rowid; } { 1 {0.0.5 0.1.0 0.1.2} 2 {0.0.7 0.1.5} } do_execsql_test 2.2 { SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'u OR q' ORDER BY rowid; } { 1 {0.0.0} 2 {1.0.2 1.0.10} } do_execsql_test 2.3 { SELECT rowid, fts5_test_poslist(t2) FROM t2 WHERE t2 MATCH 'y:o' ORDER BY rowid; } { 1 {0.1.3 0.1.7} } #------------------------------------------------------------------------- # do_execsql_test 3.0 { CREATE VIRTUAL TABLE t3 USING fts5(x, y); INSERT INTO t3 VALUES( 'j f h o x x a z g b a f a m i b', 'j z c z y x w t'); INSERT INTO t3 VALUES( 'r c', ''); } do_execsql_test 3.1 { SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(a b)'; } { 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15} } do_execsql_test 3.2 { SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'NEAR(r c)'; } { 2 {0.0.0 1.0.1} } do_execsql_test 3.3 { INSERT INTO t3 VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); SELECT rowid, fts5_test_poslist(t3) FROM t3 WHERE t3 MATCH 'a OR b AND c'; } { 1 {0.0.6 1.0.9 0.0.10 0.0.12 1.0.15 2.1.2} 3 0.0.5 } #------------------------------------------------------------------------- # do_execsql_test 4.0 { CREATE VIRTUAL TABLE t4 USING fts5(x, y); INSERT INTO t4 VALUES('k x j r m a d o i z j', 'r t t t f e b r x i v j v g o'); } do_execsql_test 4.1 { SELECT rowid, fts5_test_poslist(t4) FROM t4 WHERE t4 MATCH 'a OR b AND c'; } { 1 0.0.5 } #------------------------------------------------------------------------- # Test that the xColumnSize() and xColumnAvgsize() APIs work. # reset_db fts5_aux_test_functions db do_execsql_test 5.1 { CREATE VIRTUAL TABLE t5 USING fts5(x, y); INSERT INTO t5 VALUES('a b c d', 'e f g h i j'); INSERT INTO t5 VALUES('', 'a'); INSERT INTO t5 VALUES('a', ''); } do_execsql_test 5.2 { SELECT rowid, fts5_test_columnsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {1 0} 2 {0 1} 1 {4 6} } do_execsql_test 5.3 { SELECT rowid, fts5_test_columntext(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {a {}} 2 {{} a} 1 {{a b c d} {e f g h i j}} } do_execsql_test 5.4 { SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {5 7} 2 {5 7} 1 {5 7} } do_execsql_test 5.5 { INSERT INTO t5 VALUES('x y z', 'v w x y z'); SELECT rowid, fts5_test_columntotalsize(t5) FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { 3 {8 12} 2 {8 12} 1 {8 12} } #------------------------------------------------------------------------- # Test the xTokenize() API # reset_db fts5_aux_test_functions db do_execsql_test 6.1 { CREATE VIRTUAL TABLE t6 USING fts5(x, y); INSERT INTO t6 VALUES('There are more', 'things in heaven and earth'); INSERT INTO t6 VALUES(', Horatio, Than are', 'dreamt of in your philosophy.'); } do_execsql_test 6.2 { SELECT rowid, fts5_test_tokenize(t6) FROM t6 WHERE t6 MATCH 't*' } { 1 {{there are more} {things in heaven and earth}} 2 {{horatio than are} {dreamt of in your philosophy}} } #------------------------------------------------------------------------- # Test the xQueryPhrase() API # reset_db fts5_aux_test_functions db do_execsql_test 7.1 { CREATE VIRTUAL TABLE t7 USING fts5(x, y); } do_test 7.2 { foreach {x y} { {q i b w s a a e l o} {i b z a l f p t e u} {b a z t a l o x d i} {b p a d b f h d w y} {z m h n p p u i e g} {v h d v b x j j c z} {a g i m v a u c b i} {p k s o t l r t b m} {v v c j o d a s c p} {f f v o k p o f o g} } { execsql {INSERT INTO t7 VALUES($x, $y)} } execsql { SELECT count(*) FROM t7 } } {5} foreach {tn q res} { 1 a {{4 2}} 2 b {{3 4}} 3 c {{2 1}} 4 d {{2 2}} 5 {a AND b} {{4 2} {3 4}} 6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}} } { do_execsql_test 7.3.$tn { SELECT fts5_test_queryphrase(t7) FROM t7 WHERE t7 MATCH $q LIMIT 1 } [list $res] } do_execsql_test 7.4 { SELECT fts5_test_rowcount(t7) FROM t7 WHERE t7 MATCH 'a'; } {5 5 5 5} #do_execsql_test 7.4 { # SELECT rowid, bm25debug(t7) FROM t7 WHERE t7 MATCH 'a'; #} {5 5 5 5} # #------------------------------------------------------------------------- # do_test 8.1 { execsql { CREATE VIRTUAL TABLE t8 USING fts5(x, y) } foreach {rowid x y} { 0 {A o} {o o o C o o o o o o o o} 1 {o o B} {o o o C C o o o o o o o} 2 {A o o} {o o o o D D o o o o o o} 3 {o B} {o o o o o D o o o o o o} 4 {E o G} {H o o o o o o o o o o o} 5 {F o G} {I o J o o o o o o o o o} 6 {E o o} {H o J o o o o o o o o o} 7 {o o o} {o o o o o o o o o o o o} 9 {o o o} {o o o o o o o o o o o o} } { execsql { INSERT INTO t8(rowid, x, y) VALUES($rowid, $x, $y) } } } {} foreach {tn q res} { 1 {a} {0 2} 2 {b} {3 1} 3 {c} {1 0} 4 {d} {2 3} 5 {g AND (e OR f)} {5 4} 6 {j AND (h OR i)} {5 6} } { do_execsql_test 8.2.$tn.1 { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY bm25(t8); } $res do_execsql_test 8.2.$tn.2 { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY +rank; } $res do_execsql_test 8.2.$tn.3 { SELECT rowid FROM t8 WHERE t8 MATCH $q ORDER BY rank; } $res } finish_test |
Added ext/fts5/test/fts5af.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # More specifically, the tests in this file focus on the built-in # snippet() function. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5af # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); } proc do_snippet_test {tn doc match res} { uplevel #0 [list set v1 $doc] uplevel #0 [list set v2 $match] do_execsql_test $tn.1 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.2 { DELETE FROM t1; INSERT INTO t1 VALUES(NULL, $v1); SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; } [list $res] do_execsql_test $tn.3 { DELETE FROM t1; INSERT INTO t1 VALUES($v1, NULL); SELECT snippet(t1, -1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2 ORDER BY rank DESC; } [list $res] } foreach {tn doc res} { 1.1 {X o o o o o o} {[X] o o o o o o} 1.2 {o X o o o o o} {o [X] o o o o o} 1.3 {o o X o o o o} {o o [X] o o o o} 1.4 {o o o X o o o} {o o o [X] o o o} 1.5 {o o o o X o o} {o o o o [X] o o} 1.6 {o o o o o X o} {o o o o o [X] o} 1.7 {o o o o o o X} {o o o o o o [X]} 2.1 {X o o o o o o o} {[X] o o o o o o...} 2.2 {o X o o o o o o} {o [X] o o o o o...} 2.3 {o o X o o o o o} {o o [X] o o o o...} 2.4 {o o o X o o o o} {o o o [X] o o o...} 2.5 {o o o o X o o o} {...o o o [X] o o o} 2.6 {o o o o o X o o} {...o o o o [X] o o} 2.7 {o o o o o o X o} {...o o o o o [X] o} 2.8 {o o o o o o o X} {...o o o o o o [X]} 3.1 {X o o o o o o o o} {[X] o o o o o o...} 3.2 {o X o o o o o o o} {o [X] o o o o o...} 3.3 {o o X o o o o o o} {o o [X] o o o o...} 3.4 {o o o X o o o o o} {o o o [X] o o o...} 3.5 {o o o o X o o o o} {...o o o [X] o o o...} 3.6 {o o o o o X o o o} {...o o o [X] o o o} 3.7 {o o o o o o X o o} {...o o o o [X] o o} 3.8 {o o o o o o o X o} {...o o o o o [X] o} 3.9 {o o o o o o o o X} {...o o o o o o [X]} 4.1 {X o o o o o X o o} {[X] o o o o o [X]...} 4.2 {o X o o o o o X o} {...[X] o o o o o [X]...} 4.3 {o o X o o o o o X} {...[X] o o o o o [X]} 5.1 {X o o o o X o o o} {[X] o o o o [X] o...} 5.2 {o X o o o o X o o} {...[X] o o o o [X] o...} 5.3 {o o X o o o o X o} {...[X] o o o o [X] o} 5.4 {o o o X o o o o X} {...o [X] o o o o [X]} 6.1 {X o o o X o o o} {[X] o o o [X] o o...} 6.2 {o X o o o X o o o} {o [X] o o o [X] o...} 6.3 {o o X o o o X o o} {...o [X] o o o [X] o...} 6.4 {o o o X o o o X o} {...o [X] o o o [X] o} 6.5 {o o o o X o o o X} {...o o [X] o o o [X]} 7.1 {X o o X o o o o o} {[X] o o [X] o o o...} 7.2 {o X o o X o o o o} {o [X] o o [X] o o...} 7.3 {o o X o o X o o o} {...o [X] o o [X] o o...} 7.4 {o o o X o o X o o} {...o [X] o o [X] o o} 7.5 {o o o o X o o X o} {...o o [X] o o [X] o} 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} } { do_snippet_test 1.$tn $doc X $res } foreach {tn doc res} { 1.1 {X Y o o o o o} {[X Y] o o o o o} 1.2 {o X Y o o o o} {o [X Y] o o o o} 1.3 {o o X Y o o o} {o o [X Y] o o o} 1.4 {o o o X Y o o} {o o o [X Y] o o} 1.5 {o o o o X Y o} {o o o o [X Y] o} 1.6 {o o o o o X Y} {o o o o o [X Y]} 2.1 {X Y o o o o o o} {[X Y] o o o o o...} 2.2 {o X Y o o o o o} {o [X Y] o o o o...} 2.3 {o o X Y o o o o} {o o [X Y] o o o...} 2.4 {o o o X Y o o o} {...o o [X Y] o o o} 2.5 {o o o o X Y o o} {...o o o [X Y] o o} 2.6 {o o o o o X Y o} {...o o o o [X Y] o} 2.7 {o o o o o o X Y} {...o o o o o [X Y]} 3.1 {X Y o o o o o o o} {[X Y] o o o o o...} 3.2 {o X Y o o o o o o} {o [X Y] o o o o...} 3.3 {o o X Y o o o o o} {o o [X Y] o o o...} 3.4 {o o o X Y o o o o} {...o o [X Y] o o o...} 3.5 {o o o o X Y o o o} {...o o [X Y] o o o} 3.6 {o o o o o X Y o o} {...o o o [X Y] o o} 3.7 {o o o o o o X Y o} {...o o o o [X Y] o} 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} } { do_snippet_test 2.$tn $doc "X + Y" $res } finish_test |
Added ext/fts5/test/fts5ag.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ag # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # This file attempts to verify that the extension APIs work with # "ORDER BY rank" queries. This is done by comparing the results of # the fts5_test() function when run with queries of the form: # # ... WHERE fts MATCH ? ORDER BY bm25(fts) [ASC|DESC] # # and # # ... WHERE fts MATCH ? ORDER BY rank [ASC|DESC] # do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, y, z); } do_test 1.1 { foreach {x y z} { {j s m y m r n l u k} {z k f u z g h s w g} {r n o s s b v n w w} {m v g n d x q r r s} {q t d a q a v l h j} {s k l f s i n v q v} {m f f d h h s o h a} {y e v r q i u m h d} {b c k q m z l z h n} {j e m v k p e c j m} {m p v z d x l n i a} {v p u p m t p q i f} {v r w l e e t d z p} {c s b w k m n k o u} {w g y f v w v w v p} {k d g o u j p z n o} {t g e q l z i g b j} {f i q q j y h b g h} {j s w x o t j b t m} {v a v v r t x c q a} {r t k x w u l h a g} {j y b i u d e m d w} {y s o j h i n a u p} {n a g b u c w e b m} {b c k s c w j p w b} {m o c o w o b d q q} {n t y o y z y r z e} {p n q l e l h z q c} {n s e i h c v b b u} {m p d i t a o o f f} {k c o n v e z l b m} {s m n i n s d e s u} {t a u e q d a o u c} {h d t o i a g b b p} {k x c i g f g b b k} {x f i v n a n n j i} {f z k r b u s k z e} {n z v z w l e r h t} {t i s v v a v p n s} {k f e c t z r e f d} {f m g r c w q k b v} {v y s y f r b f e f} {z r c t d q q h x b} {u c g z n z u v s s} {y t n f f x b f d x} {u n p n u t i m e j} {p j j d m f k p m z} {d o l v c o e a h w} {h o q w t f v i c y} {c q u n r z s l l q} {z x a q w s b w s y} {y m s x k i m n x c} {b i a n v h z n k a} {w l q p b h h g d y} {z v s j f p v l f w} {c s b i z e k i g c} {x b v d w j f e d z} {r k k j e o m k g b} {h b d c h m y b t u} {u j s h k z c u d y} {v h i v s y z i k l} {d t m w q w c a z p} {r s e s x v d w k b} {u r e q j y h o o s} {x x z r x y t f j s} {k n h x i i u e c v} {q l f d a p w l q o} {y z q w j o p b o v} {s u h z h f d f n l} {q o e o x x l g q i} {j g m h q q w c d b} {o m d h w a g b f n} {m x k t s s y l v a} {j x t c a u w b w g} {n f j b v x y p u t} {u w k a q b u w k w} {a h j u o w f s k p} {j o f s h y t j h g} {x v b l m t l m h l} {t p y i y i q b q a} {k o o z w a c h c f} {j g c d k w b d t v} {a k v c m a v h v p} {i c a i j g h l j h} {l m v l c z j b p b} {z p z f l n k i b a} {j v q k g i x g i b} {m c i w u z m i s z} {i z r f n l q z k w} {x n b p b q r g i z} {d g i o o x l f x d} {r t m f b n q y c b} {i u g k w x n m p o} {t o s i q d z x d t} {v a k s q z j c o o} {z f n n r l y w v v} {w k h d t l j g n n} {r z m v y b l n c u} {v b v s c l n k g v} {m a g r a b u u n z} {u y l h v w v k b f} {x l p g i s j f x v} {v s g x k z a k a r} {l t g v j q l k p l} {f h n a x t v s t y} {z u v u x p s j y t} {g b q e e g l n w g} {e n p j i g j f u r} {q z l t w o l m p e} {t s g h r p r o t z} {y b f a o n u m z g} {d t w n y b o g f o} {d a j e r l g g s h} {d z e l w q l t h f} {f l u w q v x j a h} {f n u l l d m h h w} {d x c c e r o d q j} {b y f q s q f u l g} {u z w l f d b i a g} {m v q b g u o z e z} {h z p t s e x i v m} {l h q m e o x x x j} {e e d n p r m g j f} {k h s g o n s d a x} {u d t t s j o v h a} {z r b a e u v o e s} {m b b g a f c p a t} {w c m j o d b l g e} {f p j p m o s y v j} {c r n h d w c a b l} {s g e u s d n j b g} {b o n a x a b x y l} {r h u x f c d z n o} {x y l g u m i i w d} {t f h b z v r s r g} {t i o r b v g g p a} {d x l u q k m o s u} {j f h t u n z u k m} {g j t y d c n j y g} {w e s k v c w i g t} {g a h r g v g h r o} {e j l a q j g i n h} {d z k c u p n u p p} {t u e e v z v r r g} {l j s g k j k h z l} {p v d a t x d e q u} {r l u z b m g k s j} {i e y d u x d i n l} {p f z k m m w p u l} {z l p m r q w n d a} } { execsql { INSERT INTO t1 VALUES($x, $y, $z) } } set {} {} } {} fts5_aux_test_functions db proc do_fts5ag_test {tn E} { set q1 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY rank} set q2 {SELECT fts5_test_all(t1) FROM t1 WHERE t1 MATCH $E ORDER BY bm25(t1)} set res [execsql $q1] set expected [execsql $q2] uplevel [list do_test $tn.1 [list set {} $res] $expected] append q1 " DESC" append q2 " DESC" set res [execsql $q1] set expected [execsql $q2] uplevel [list do_test $tn.2 [list set {} $res] $expected] } foreach {tn expr} { 2.1 a 2.2 b 2.3 c 2.4 d 2.5 {"m m"} 2.6 {e + s} 3.0 {a AND b} 3.1 {a OR b} 3.2 {b OR c AND d} 3.3 {NEAR(c d)} } { do_fts5ag_test $tn $expr if {[set_test_counter errors]} break } finish_test |
Added ext/fts5/test/fts5ah.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ah # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # This file contains tests for very large doclists. # do_test 1.0 { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a) } execsql { INSERT INTO t1(t1, rank) VALUES('pgsz', 128) } set v {w w w w w w w w w w w w w w w w w w w w} execsql { INSERT INTO t1(rowid, a) VALUES(0, $v) } for {set i 1} {$i <= 10000} {incr i} { set v {x x x x x x x x x x x x x x x x x x x x} if {($i % 2139)==0} {lset v 3 Y ; lappend Y $i} if {($i % 1577)==0} {lset v 5 W ; lappend W $i} execsql { INSERT INTO t1 VALUES($v) } } set v {w w w w w w w w w w w w w w w w w w w w} execsql { INSERT INTO t1 VALUES($v) } } {} do_execsql_test 1.1.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [lsort -integer -incr $W] do_execsql_test 1.1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x* AND w*' } [lsort -integer -incr $W] do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [lsort -integer -incr $Y] do_execsql_test 1.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } proc reads {} { db one {SELECT t1 FROM t1 WHERE t1 MATCH '*reads'} } proc execsql_reads {sql} { set nRead [reads] execsql $sql expr [reads] - $nRead } do_test 1.4 { set nRead [reads] execsql { SELECT rowid FROM t1 WHERE t1 MATCH 'x' } set nReadX [expr [reads] - $nRead] expr $nReadX>1000 } {1} do_test 1.5 { set fwd [execsql_reads {SELECT rowid FROM t1 WHERE t1 MATCH 'x' }] set bwd [execsql_reads { SELECT rowid FROM t1 WHERE t1 MATCH 'x' ORDER BY 1 ASC }] expr {$bwd < $fwd + 12} } {1} foreach {tn q res} " 1 { SELECT rowid FROM t1 WHERE t1 MATCH 'w + x' } [list $W] 2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x + w' } [list $W] 3 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND w' } [list $W] 4 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND x' } [list $Y] " { do_test 1.6.$tn.1 { set n [execsql_reads $q] puts -nonewline "(n=$n nReadX=$nReadX)" expr {$n < ($nReadX / 8)} } {1} do_test 1.6.$tn.2 { set n [execsql_reads "$q ORDER BY rowid DESC"] puts -nonewline "(n=$n nReadX=$nReadX)" expr {$n < ($nReadX / 8)} } {1} do_execsql_test 1.6.$tn.3 $q [lsort -int -incr $res] do_execsql_test 1.6.$tn.4 "$q ORDER BY rowid DESC" [lsort -int -decr $res] } #------------------------------------------------------------------------- # Now test that adding range constraints on the rowid field reduces the # number of pages loaded from disk. # foreach {tn fraction tail cnt} { 1 0.6 {rowid > 5000} 5000 2 0.2 {rowid > 9000} 1000 3 0.2 {rowid < 1000} 999 4 0.2 {rowid BETWEEN 4000 AND 5000} 1001 5 0.6 {rowid >= 5000} 5001 6 0.2 {rowid >= 9000} 1001 7 0.2 {rowid <= 1000} 1000 8 0.6 {rowid > '5000'} 5000 9 0.2 {rowid > '9000'} 1000 10 0.1 {rowid = 444} 1 } { set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail" set n [execsql_reads $q] set ret [llength [execsql $q]] do_test "1.7.$tn.asc.(n=$n ret=$ret)" { expr {$n < ($fraction*$nReadX) && $ret==$cnt} } {1} set q "SELECT rowid FROM t1 WHERE t1 MATCH 'x' AND $tail ORDER BY rowid DESC" set n [execsql_reads $q] set ret [llength [execsql $q]] do_test "1.7.$tn.desc.(n=$n ret=$ret)" { expr {$n < 2*$fraction*$nReadX && $ret==$cnt} } {1} } do_execsql_test 1.8.1 { SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND +rowid < 'text'; } {10000} do_execsql_test 1.8.2 { SELECT count(*) FROM t1 WHERE t1 MATCH 'x' AND rowid < 'text'; } {10000} #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} finish_test |
Added ext/fts5/test/fts5ai.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, it tests transactions and savepoints # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ai # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(a); } {} do_execsql_test 1.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); INSERT INTO t1 VALUES('d e f'); SAVEPOINT one; INSERT INTO t1 VALUES('g h i'); SAVEPOINT two; INSERT INTO t1 VALUES('j k l'); ROLLBACK TO one; INSERT INTO t1 VALUES('m n o'); SAVEPOINT two; INSERT INTO t1 VALUES('p q r'); RELEASE one; SAVEPOINT one; INSERT INTO t1 VALUES('s t u'); ROLLBACK TO one; COMMIT; } do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } finish_test |
Added ext/fts5/test/fts5aj.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, this tests that, provided the amount of data remains # constant, the FTS index does not grow indefinitely as rows are inserted # and deleted, # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aj # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc doc {} { set dict [list a b c d e f g h i j k l m n o p q r s t u v w x y z] set res [list] for {set i 0} {$i < 20} {incr i} { lappend res [lindex $dict [expr int(rand() * 26)]] } set res } proc structure {} { set val [db one {SELECT fts5_decode(rowid,block) FROM t1_data WHERE rowid=10}] foreach lvl [lrange $val 1 end] { lappend res [expr [llength $lvl]-2] } set res } expr srand(0) do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 64); } for {set iTest 0} {$iTest < 50000} {incr iTest} { if {$iTest > 1000} { execsql { DELETE FROM t1 WHERE rowid=($iTest-1000) } } set new [doc] execsql { INSERT INTO t1 VALUES($new) } if {$iTest==10000} { set sz1 [db one {SELECT count(*) FROM t1_data}] } if {0==($iTest % 1000)} { set sz [db one {SELECT count(*) FROM t1_data}] set s [structure] do_execsql_test 1.$iTest.$sz.{$s} { INSERT INTO t1(t1) VALUES('integrity-check') } } } do_execsql_test 2.0 { INSERT INTO t1(t1) VALUES('integrity-check') } finish_test |
Added ext/fts5/test/fts5ak.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | # 2014 November 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, the auxiliary function "highlight". # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ak # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); INSERT INTO ft1 VALUES('i d d a g i b g d d'); INSERT INTO ft1 VALUES('h d b j c c g a c a'); INSERT INTO ft1 VALUES('e j a e f h b f h h'); INSERT INTO ft1 VALUES('j f h d g h i b d f'); INSERT INTO ft1 VALUES('d c j d c j b c g e'); INSERT INTO ft1 VALUES('i a d e g j g d a a'); INSERT INTO ft1 VALUES('j f c e d a h j d b'); INSERT INTO ft1 VALUES('i c c f a d g h j e'); INSERT INTO ft1 VALUES('i d i g c d c h b f'); INSERT INTO ft1 VALUES('g d a e h a b c f j'); } do_execsql_test 1.2 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e'; } { {[e] j a [e] f h b f h h} {d c j d c j b c g [e]} {i a d [e] g j g d a a} {j f c [e] d a h j d b} {i c c f a d g h j [e]} {g d a [e] h a b c f j} } do_execsql_test 1.3 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'h + d'; } { {[h d] b j c c g a c a} {j f [h d] g h i b d f} } do_execsql_test 1.4 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d'; } { {i [d d] a g i b g [d d]} } do_execsql_test 1.5 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'e e e' } { {[e] j a [e] f h b f h h} {d c j d c j b c g [e]} {i a d [e] g j g d a a} {j f c [e] d a h j d b} {i c c f a d g h j [e]} {g d a [e] h a b c f j} } do_execsql_test 1.6 { SELECT highlight(ft1, 0, '[', ']') FROM ft1 WHERE ft1 MATCH 'd + d d + d'; } { {i [d d] a g i b g [d d]} } do_execsql_test 2.1 { CREATE VIRTUAL TABLE ft2 USING fts5(x); INSERT INTO ft2 VALUES('a b c d e f g h i j'); } do_execsql_test 2.2 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c+d+e' } {{a [b c d e] f g h i j}} do_execsql_test 2.3 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d e+f+g' } { {a [b c d] [e f g] h i j} } do_execsql_test 2.4 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c+d c' } { {a [b c d] e f g h i j} } do_execsql_test 2.5 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'b+c c+d+e' } { {a [b c d e] f g h i j} } do_execsql_test 2.6.1 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'f d' } { {a b c [d] e [f] g h i j} } do_execsql_test 2.6.2 { SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'd f' } { {a b c [d] e [f] g h i j} } #------------------------------------------------------------------------- # The example from the docs. # do_execsql_test 3.1 { -- Assuming this: CREATE VIRTUAL TABLE ft USING fts5(a); INSERT INTO ft VALUES('a b c x c d e'); INSERT INTO ft VALUES('a b c c d e'); INSERT INTO ft VALUES('a b c d e'); -- The following SELECT statement returns these three rows: -- '[a b c] x [c d e]' -- '[a b c] [c d e]' -- '[a b c d e]' SELECT highlight(ft, 0, '[', ']') FROM ft WHERE ft MATCH 'a+b+c AND c+d+e'; } { {[a b c] x [c d e]} {[a b c] [c d e]} {[a b c d e]} } finish_test |
Added ext/fts5/test/fts5al.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | # 2014 November 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # # Specifically, this function tests the %_config table. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5al # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; } {version 2} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; } {pgsz 32 version 2} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; } {pgsz 64 version 2} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. # foreach {tn defn} { 1 "fname()" 2 "fname(1)" 3 "fname(1,2)" 4 "fname(null,NULL,nUlL)" 5 " fname ( null , NULL , nUlL ) " 6 "fname('abc')" 7 "fname('a''bc')" 8 "fname('''abc')" 9 "fname('abc''')" 7 "fname( 'a''bc' )" 8 "fname('''abc' )" 9 "fname( 'abc''' )" 10 "fname(X'1234ab')" 11 "myfunc(1.2)" 12 "myfunc(-1.0)" 13 "myfunc(.01,'abc')" } { do_execsql_test 2.1.$tn { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn); } } foreach {tn defn} { 1 "" 2 "fname" 3 "fname(X'234ab')" 4 "myfunc(-1.,'abc')" } { do_test 2.2.$tn { catchsql { INSERT INTO ft1(ft1, rank) VALUES('rank', $defn) } } {1 {SQL logic error or missing database}} } #------------------------------------------------------------------------- # Assorted tests of the tcl interface for creating extension functions. # do_execsql_test 3.1 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1 VALUES('q w e r t y'); INSERT INTO t1 VALUES('y t r e w q'); } proc argtest {cmd args} { return $args } sqlite3_fts5_create_function db argtest argtest do_execsql_test 3.2.1 { SELECT argtest(t1, 123) FROM t1 WHERE t1 MATCH 'q' } {123 123} do_execsql_test 3.2.2 { SELECT argtest(t1, 123, 456) FROM t1 WHERE t1 MATCH 'q' } {{123 456} {123 456}} proc rowidtest {cmd} { $cmd xRowid } sqlite3_fts5_create_function db rowidtest rowidtest do_execsql_test 3.3.1 { SELECT rowidtest(t1) FROM t1 WHERE t1 MATCH 'q' } {1 2} proc insttest {cmd} { set res [list] for {set i 0} {$i < [$cmd xInstCount]} {incr i} { lappend res [$cmd xInst $i] } set res } sqlite3_fts5_create_function db insttest insttest do_execsql_test 3.4.1 { SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'q' } { {{0 0 0}} {{0 0 5}} } do_execsql_test 3.4.2 { SELECT insttest(t1) FROM t1 WHERE t1 MATCH 'r+e OR w' } { {{1 0 1}} {{0 0 2} {1 0 4}} } proc coltest {cmd} { list [$cmd xColumnSize 0] [$cmd xColumnText 0] } sqlite3_fts5_create_function db coltest coltest do_execsql_test 3.5.1 { SELECT coltest(t1) FROM t1 WHERE t1 MATCH 'q' } { {6 {q w e r t y}} {6 {y t r e w q}} } #------------------------------------------------------------------------- # Tests for remapping the "rank" column. # # 4.1.*: Mapped to a function with no arguments. # 4.2.*: Mapped to a function with one or more arguments. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE t2 USING fts5(a, b); INSERT INTO t2 VALUES('a s h g s b j m r h', 's b p a d b b a o e'); INSERT INTO t2 VALUES('r h n t a g r d d i', 'l d n j r c f t o q'); INSERT INTO t2 VALUES('q k n i k c a a e m', 'c h n j p g s c i t'); INSERT INTO t2 VALUES('h j g t r e l s g s', 'k q k c i i c k n s'); INSERT INTO t2 VALUES('b l k h d n n n m i', 'p t i a r b t q o l'); INSERT INTO t2 VALUES('k r i l j b g i p a', 't q c h a i m g n l'); INSERT INTO t2 VALUES('a e c q n m o m d g', 'l c t g i s q g q e'); INSERT INTO t2 VALUES('b o j h f o g b p e', 'r t l h s b g i c p'); INSERT INTO t2 VALUES('s q k f q b j g h f', 'n m a o p e i e k t'); INSERT INTO t2 VALUES('o q g g q c o k a b', 'r t k p t f t h p c'); } proc firstinst {cmd} { foreach {p c o} [$cmd xInst 0] {} expr $c*100 + $o } sqlite3_fts5_create_function db firstinst firstinst do_execsql_test 4.1.1 { SELECT rowid, firstinst(t2) FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC } { 1 0 2 4 3 6 5 103 6 9 7 0 9 102 10 8 } do_execsql_test 4.1.2 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' ORDER BY rowid ASC } { 1 0 2 4 3 6 5 103 6 9 7 0 9 102 10 8 } do_execsql_test 4.1.3 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' AND rank MATCH 'firstinst()' ORDER BY rank DESC } { 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } do_execsql_test 4.1.4 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst()'); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rowid ASC } { 1 0 2 4 3 6 5 103 6 9 7 0 9 102 10 8 } do_execsql_test 4.1.5 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } do_execsql_test 4.1.6 { INSERT INTO t2(t2, rank) VALUES('rank', 'firstinst ( ) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'a' ORDER BY rank DESC } { 5 103 9 102 6 9 10 8 3 6 2 4 1 0 7 0 } proc rowidplus {cmd ival} { expr [$cmd xRowid] + $ival } sqlite3_fts5_create_function db rowidplus rowidplus do_execsql_test 4.2.1 { INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(100) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' } { 10 110 } do_execsql_test 4.2.2 { INSERT INTO t2(t2, rank) VALUES('rank', 'rowidplus(111) '); SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' } { 10 121 } do_execsql_test 4.2.3 { SELECT rowid, rank FROM t2 WHERE t2 MATCH 'o + q + g' AND rank MATCH 'rowidplus(112)' } { 10 122 } proc rowidmod {cmd imod} { expr [$cmd xRowid] % $imod } sqlite3_fts5_create_function db rowidmod rowidmod do_execsql_test 4.3.1 { CREATE VIRTUAL TABLE t3 USING fts5(x); INSERT INTO t3 VALUES('a one'); INSERT INTO t3 VALUES('a two'); INSERT INTO t3 VALUES('a three'); INSERT INTO t3 VALUES('a four'); INSERT INTO t3 VALUES('a five'); INSERT INTO t3(t3, rank) VALUES('rank', 'bm25()'); } breakpoint do_execsql_test 4.3.2 { SELECT * FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(4)' ORDER BY rank ASC } { {a four} {a one} {a five} {a two} {a three} } do_execsql_test 4.3.3 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'rowidmod(3)' ORDER BY rank ASC } { {a three} 0 {a one} 1 {a four} 1 {a two} 2 {a five} 2 } do_catchsql_test 4.4.3 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH 'xyz(3)' } {1 {no such function: xyz}} do_catchsql_test 4.4.4 { SELECT *, rank FROM t3 WHERE t3 MATCH 'a' AND rank MATCH NULL } {1 {parse error in rank function: }} finish_test |
Added ext/fts5/test/fts5alter.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | # 2015 Jun 10 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # The tests in this file focus on renaming FTS5 tables using the # "ALTER TABLE ... RENAME TO ..." command # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5alter # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Test renaming regular, contentless and columnsize=0 FTS5 tables. # do_execsql_test 1.1.0 { CREATE VIRTUAL TABLE "a x" USING fts5(a, x); INSERT INTO "a x" VALUES('a a a', 'x x x'); ALTER TABLE "a x" RENAME TO "x y"; } do_execsql_test 1.1.1 { SELECT * FROM "x y"; SELECT rowid FROM "x y" WHERE "x y" MATCH 'a' } {{a a a} {x x x} 1} do_execsql_test 1.2.0 { CREATE VIRTUAL TABLE "one/two" USING fts5(one, columnsize=0); INSERT INTO "one/two"(rowid, one) VALUES(456, 'd d d'); ALTER TABLE "one/two" RENAME TO "three/four"; } do_execsql_test 1.2.1 { SELECT * FROM "three/four"; SELECT rowid FROM "three/four" WHERE "three/four" MATCH 'd' } {{d d d} 456} do_execsql_test 1.3.0 { CREATE VIRTUAL TABLE t1 USING fts5(val, content=''); INSERT INTO t1(rowid, val) VALUES(-1, 'drop table'); INSERT INTO t1(rowid, val) VALUES(-2, 'drop view'); ALTER TABLE t1 RENAME TO t2; } do_execsql_test 1.3.1 { SELECT rowid, * FROM t2; SELECT rowid FROM t2 WHERE t2 MATCH 'table' } {-2 {} -1 {} -1} #------------------------------------------------------------------------- # Test renaming an FTS5 table within a transaction. # do_execsql_test 2.1 { CREATE VIRTUAL TABLE zz USING fts5(a); INSERT INTO zz(rowid, a) VALUES(-56, 'a b c'); BEGIN; INSERT INTO zz(rowid, a) VALUES(-22, 'a b c'); ALTER TABLE zz RENAME TO yy; SELECT rowid FROM yy WHERE yy MATCH 'a + b + c'; COMMIT; } {-56 -22} do_execsql_test 2.2 { BEGIN; ALTER TABLE yy RENAME TO ww; INSERT INTO ww(rowid, a) VALUES(-11, 'a b c'); SELECT rowid FROM ww WHERE ww MATCH 'a + b + c'; } {-56 -22 -11} do_execsql_test 2.3 { ROLLBACK; SELECT rowid FROM yy WHERE yy MATCH 'a + b + c'; } {-56 -22} finish_test |
Added ext/fts5/test/fts5auto.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | # 2015 May 30 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file contains automatically generated tests for various types # of MATCH expressions. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5auto # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } set data { -4026076 {n x w k b p x b n t t d s} {f j j s p j o} {w v i y r} {i p y s} {a o q v e n q r} {q v g u c y a z y} 3995120 {c} {e e w d t} {x c p f w r s m l r b f d} {g g u e} {s n u t d v p d} {b k v p m f} -2913881 {k m} {a} {w r j z n s l} {m j i w d t w e l} {z n c} {v f b m} 174082 {j} {q l w u k e q v r i} {j l} {u v w r s p e l} {p i k j k q c t g u s} {g u y s m h q k g t e s o r} 3207399 {e t} {} {p} {y v r b e k h d e v} {t m w z b g q t s d d h} {o n v u i t o y k j} 182399 {} {m o s o x d y f a x j z} {x n z r c d} {n r x i r} {s v s} {a u} 768994 {e u t q v z q k j p u f j p} {y c b} {p s d} {k n w p m p p} {u o x s d} {f s g r d b d r m m m z y} 3931037 {c j p x e} {c n k t h z o i} {} {r r p j k x w q} {o r d z d} {x} 3105748 {p x r u} {x i s w o t o g x m z i w} {q x m z} {h c j w b l y w x c o} {m b k v} {t v q i s a d x} -2501642 {o u d n w o m o o s n t r h} {k p e u y p e z d j r y g} {v b b h d d q y j q j} {a m w d t} {y e f n} {a k x i x} -1745680 {z u w j f d b f} {j w i c g u d w e} {m f p v m a s p v c o s} {s c r z o t w l b e a q} {m k q} {k b a v o} -932328 {r v i u m q d r} {f z u v h c m r f g} {r x r} {k p i d h h w h z u a x} {k m j p} {h l j a e u c i q x x f x g} -3923818 {t t p b n u i h e c k} {m z} {v u d c} {v y y j s g} {o a f k k q p h g x e n z x} {h d w c o l} -2145922 {z z l f a l g e d c d h} {j b j p k o o u b q} {d i g q t f d r h k} {n w g j c x r p t y f l c t} {d o c u k f o} {r y s x z s p p h g t p y c} 4552917 {j w j y h l k u} {n a} {y h w c n k} {b} {w} {z l r t s i m v c y} 2292008 {q v q j w y y x u t} {r q z n h a b o} {d q y} {y v o e j} {} {a b h c d l p d x} 1407892 {n j j u q d o a u c f} {r d b w o q n g} {d e v w s} {v d v o u o x s l s j z y} {j y w h i f g i h m} {v n z b n y} -4412544 {g h h r s} {h e r e} {n q s} {o p z r m l l t} {p} {f s u o b j} 1209110 {o a a z t t u h j} {z z i r k r} {i c x q w g v o x z i z p} {q o g k i n z x e d v w v} {p f v b g f e d n p u c y k} {q z z a i p a a s r e z} 3448977 {i v} {l u x t b o k} {f h u v p} {k a o y j} {d m k c j} {v c e r u e f i t} -4703774 {d h v w u z r e h x o l t} {p s f y w y r q d a m w} {c h g c g j j f t b i c q} {s e} {c t q j g f} {v n r w y r a g e j d} 2414151 {s o o s d s k q b f q v p e} {j r o b t o p d l o o x} {d d k t v e} {} {t v o d w} {w e q w h y c y y i j b a m} -3342407 {m c h n e p d o c r w n t} {j d k s p q l} {t g s r w x j l r z r} {h} {r q v x i r a n h s} {m y p b v w r a u o g q r} -993951 {l n p u o j d x t u u c o j} {k r n a r e k v i t o e} {q f t t a a c z v f} {o n m p v f o e n} {h z h i p s b j z h} {i t w m k c u g n i} 1575251 {} {z s i j d o x j a r t} {h g j u j n v e n z} {p z j n n f} {s q q f d w r l y i z d o m} {b a n d h t b y g h d} 4263668 {q g t h f s} {s g x p f q z i s o f l i} {q k} {w v h a x n a r b} {m j a h o b i x k r w z q u} {m t r g j o e q t m p u l} 2487819 {m w g x r n e u t s r} {b x a t u u j c r n} {j} {w f j r e e y l p} {o u h b} {o c a c a b v} 167966 {o d b s d o a u m o x y} {c} {r w d o b v} {z e b} {i n z a f g z o} {m u b a g} 1948599 {n r g q d j s} {n k} {l b p d v t k h y y} {u m k e c} {t b n y o t b} {j w c i r x x} 2941631 {l d p l b g f} {e k e} {p j} {m c s w t b k n l d x} {f o v y v l} {c w p s w j w c u t y} 3561104 {d r j j r j i g p} {u} {g r j q} {z l p d s n f c h t d c v z} {w r c f s x z y} {g f o k g g} -2223281 {y e t j j z f p o m m z} {h k o g o} {m x a t} {l q x l} {r w k d l s y b} {q g k b} -4502874 {k k b x k l f} {r} {} {q m z b k h k u n e z} {z q g y m y u} {} 1757599 {d p z j y u r} {z p l q w j t j} {n i r x r y j} {} {h} {w t d q c x z z x e e} -4809589 {} {z p x u h i i n g} {w q s u d b f x n} {l y k b b r x t i} {n d v j q o t o d p z e} {u r y u v u c} 1068408 {y e} {e g s k e w t p v o b k} {z c m s} {r u r u h n h b p q g b} {j k b l} {m c d t s r s q a d b o f} -1972554 {m s w} {d k v s a r k p a r i v} {g j z k p} {y k c v r e u o q f i b a} {i p i} {c z w c y b n z i v} -2052385 {} {x e u f f g n c i x n e i e} {} {p s w d x p g} {} {s j a h n} 2805981 {m x g c w o e} {k g u y r y i u e g g} {f k j v t x p h x k u} {w i} {b l f z f v t n} {i u d o d p h s m u} 2507621 {} {u b n l x f n j t} {u r x l h} {h r l m r} {d y e n b s q v t k n q q} {x l t v w h a s k} -3138375 {e o f j y x u w v e w z} {r d q g k n n v r c z n e w} {l y i q z k j p u f q s k} {c i l l i m a a g a z r x f} {a v k h m q z b y n z} {q g w c y r r o a} -457971 {j x a w e c s h f l f} {q} {j f v j u m d q r v v} {x n v a w} {i e h d h f u w t t z} {v s u l s v o v i k n e} 2265221 {z t c y w n y r t} {n b a x s} {q w a v} {a b s d x i g w t e z h} {t l} {j k r w f f y j o k u} -3941280 {r x t o z} {f j n z k} {t x e b t d b k w i s} {j t y h i h} {y q g n g s u v c z j z n g} {n n g t l p h} 2084745 {z d z d} {j} {o e k t b k a z l w} {o p i h k c x} {c r b t i j f} {z e n m} 1265843 {} {j s g j j x u y} {u q t f} {g o g} {w o j e d} {w q n a c t q x j} -2941116 {i n c u o} {f b} {o m s q d o z a q} {f s v o b b} {o a z c h r} {j e w h b f z} -1265441 {p g z q v a o a x a} {s t h} {w i p o c} {s n d g f z w q o d v v l j} {y f b i a s v} {u m o z k k s t s d p b l p} -1989158 {r i c n} {r e w w i n z} {q u s y b w u g y g f o} {y} {d} {j x i b x u y d c p v a h} 2391989 {b n w x w f q h p i} {e u b b i n a i o c d g} {v a z o i e n l x l r} {r u f o r k w m d w} {k s} {r f e j q p w} } do_execsql_test 1.0 { CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f); } {} fts5_aux_test_functions db proc matchdata {expr tbl collist {order ASC}} { set cols "" foreach e $collist { append cols ", '$e'" } set tclexpr [db one [subst -novar { SELECT fts5_expr_tcl( $expr, 'nearset $cols -pc ::pc' [set cols] ) }]] set res [list] db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x { set cols [list] foreach col $x(*) { if {$col != "rowid"} { lappend cols $x($col) } } # set cols [list $a $b $c $d $e $f] set ::pc 0 set rowdata [eval $tclexpr] if {$rowdata != ""} { lappend res $x(rowid) $rowdata } } set res } proc do_auto_test {tn tbl cols expr} { foreach order {asc desc} { set res [matchdata $expr $tbl $cols $order] set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]" set ::autotest_expr $expr do_execsql_test $testname [subst -novar { SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order] }] $res } } #------------------------------------------------------------------------- # for {set fold 0} {$fold < 3} {incr fold} { switch $fold { 0 { set map {} } 1 { set map { a a b a c b d b e c f c g d h d i e j e k f l f m g g g o h p h q i r i s j t j u k v k w l x l y m z m }} 2 { set map { a a b a c a d a e a f a g a h a i b j b k b l b m b g b o b p b q c r c s c t c u c v c w c x c }} } execsql { BEGIN; DELETE FROM tt; } foreach {rowid a b c d e f} [string map $map $data] { if {$rowid==-4703774} { execsql { INSERT INTO tt(rowid, a, b, c, d, e, f) VALUES($rowid, $a, $b, $c, $d, $e, $f) } } } execsql COMMIT foreach {tn expr} { A.1 { {a} : x } A.2 { {a b} : x } A.3 { {a b f} : x } A.4 { {f a b} : x } A.5 { {f a b} : x y } A.6 { {f a b} : x + y } A.7 { {c a b} : x + c } A.8 { {c d} : "l m" } A.9 { {c e} : "l m" } A.10 { {a b c a b c a b c f f e} : "l m" } B.1 { a NOT b } B.2 { a NOT a:b } B.3 { a OR (b AND c) } B.4 { a OR (b AND {a b c}:c) } B.5 { a OR "b c" } B.6 { a OR b OR c } C.1 { a OR (b AND "b c") } C.2 { a OR (b AND "z c") } } { do_auto_test 3.$fold.$tn tt {a b c d e f} $expr } } proc replace_elems {list args} { set ret $list foreach {idx elem} $args { set ret [lreplace $ret $idx $idx $elem] } set ret } #------------------------------------------------------------------------- # set bigdoc [string trim [string repeat "a " 1000]] do_test 4.0 { set a [replace_elems $bigdoc 50 x 950 x] set b [replace_elems $bigdoc 20 y 21 x 887 x 888 y] set c [replace_elems $bigdoc 1 z 444 z 789 z] execsql { CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3); INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c); INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c); } } {} foreach {tn expr} { 1 x 2 y 3 z 4 {c1 : x} 5 {c2 : x} 6 {c3 : x} 7 {c1 : y} 8 {c2 : y} 9 {c3 : y} 10 {c1 : z} 11 {c2 : z} 12 {c3 : z} } { breakpoint do_auto_test 4.$tn yy {c1 c2 c3} $expr } finish_test |
Added ext/fts5/test/fts5aux.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the auxiliary function APIs. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5aux # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc inst {cmd i} { $cmd xInst $i } sqlite3_fts5_create_function db inst inst proc colsize {cmd i} { $cmd xColumnSize $i } sqlite3_fts5_create_function db colsize colsize proc totalsize {cmd i} { $cmd xColumnTotalSize $i } sqlite3_fts5_create_function db totalsize totalsize do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1 VALUES('one two', 'two one zero'); INSERT INTO f1 VALUES('one one', 'one one one'); } do_catchsql_test 1.1 { SELECT inst(f1, -1) FROM f1 WHERE f1 MATCH 'two'; } {1 SQLITE_RANGE} do_catchsql_test 1.2 { SELECT inst(f1, 0) FROM f1 WHERE f1 MATCH 'two'; } {0 {{0 0 1}}} do_catchsql_test 1.3 { SELECT inst(f1, 1) FROM f1 WHERE f1 MATCH 'two'; } {0 {{0 1 0}}} do_catchsql_test 1.4 { SELECT inst(f1, 2) FROM f1 WHERE f1 MATCH 'two'; } {1 SQLITE_RANGE} do_catchsql_test 2.1 { SELECT colsize(f1, 2) FROM f1 WHERE f1 MATCH 'two'; } {1 SQLITE_RANGE} do_execsql_test 2.2 { SELECT colsize(f1, 0), colsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero'; } {2 3} do_execsql_test 2.3 { SELECT colsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero'; } {5} do_execsql_test 2.4.1 { SELECT totalsize(f1, -1) FROM f1 WHERE f1 MATCH 'zero'; } {10} do_execsql_test 2.4.2 { SELECT totalsize(f1, 0) FROM f1 WHERE f1 MATCH 'zero'; } {4} do_execsql_test 2.4.3 { SELECT totalsize(f1, 1) FROM f1 WHERE f1 MATCH 'zero'; } {6} do_catchsql_test 2.4.4 { SELECT totalsize(f1, 2) FROM f1 WHERE f1 MATCH 'zero'; } {1 SQLITE_RANGE} #------------------------------------------------------------------------- # Test the xSet and xGetAuxdata APIs with a NULL destructor. # proc prevrowid {add cmd} { set res [$cmd xGetAuxdataInt 0] set r [$cmd xRowid] $cmd xSetAuxdataInt $r return [expr $res + $add] } sqlite3_fts5_create_function db prevrowid [list prevrowid 0] sqlite3_fts5_create_function db prevrowid1 [list prevrowid 1] do_execsql_test 3.0 { CREATE VIRTUAL TABLE e5 USING fts5(x); INSERT INTO e5 VALUES('a b c'); INSERT INTO e5 VALUES('d e f'); INSERT INTO e5 VALUES('a b c'); INSERT INTO e5 VALUES('d e f'); INSERT INTO e5 VALUES('a b c'); } do_execsql_test 3.1 { SELECT prevrowid(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'c' } {0+1 1+3 3+5} do_execsql_test 3.2 { SELECT prevrowid(e5) || '+' || prevrowid1(e5) || '+' || rowid FROM e5 WHERE e5 MATCH 'e' } {0+1+2 2+3+4} #------------------------------------------------------------------------- # Test that if the xQueryPhrase callback returns other than SQLITE_OK, # the query is abandoned. And that if it returns an error code other than # SQLITE_DONE, the error is propagated back to the caller. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE e7 USING fts5(x); INSERT INTO e7 VALUES('a x a'); INSERT INTO e7 VALUES('b x b'); INSERT INTO e7 VALUES('c x c'); INSERT INTO e7 VALUES('d x d'); INSERT INTO e7 VALUES('e x e'); } proc xCallback {rowid code cmd} { set r [$cmd xRowid] lappend ::cb $r if {$r==$rowid} { return $code } return "" } proc phrasequery {cmd code} { set ::cb [list] $cmd xQueryPhrase 1 [list xCallback [$cmd xRowid] $code] set ::cb } sqlite3_fts5_create_function db phrasequery phrasequery do_execsql_test 4.1 { SELECT phrasequery(e7, 'SQLITE_OK') FROM e7 WHERE e7 MATCH 'c x' } {{1 2 3 4 5}} do_execsql_test 4.2 { SELECT phrasequery(e7, 'SQLITE_DONE') FROM e7 WHERE e7 MATCH 'c x' } {{1 2 3}} do_catchsql_test 4.3 { SELECT phrasequery(e7, 'SQLITE_ERROR') FROM e7 WHERE e7 MATCH 'c x' } {1 SQLITE_ERROR} #------------------------------------------------------------------------- # Auxiliary function calls with many cursors in the global cursor list. # do_execsql_test 5.0 { CREATE VIRTUAL TABLE e9 USING fts5(y); INSERT INTO e9(rowid, y) VALUES(1, 'i iii'); INSERT INTO e9(rowid, y) VALUES(2, 'ii iv'); INSERT INTO e9(rowid, y) VALUES(3, 'ii'); INSERT INTO e9(rowid, y) VALUES(4, 'i iv'); INSERT INTO e9(rowid, y) VALUES(5, 'iii'); } proc my_rowid {cmd} { $cmd xRowid } sqlite3_fts5_create_function db my_rowid my_rowid foreach {var q} { s1 i s2 ii s3 iii s4 iv } { set sql "SELECT my_rowid(e9) FROM e9 WHERE e9 MATCH '$q'" set $var [sqlite3_prepare db $sql -1 dummy] } do_test 5.1.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 1 do_test 5.1.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 2 do_test 5.1.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 1 do_test 5.1.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 2 do_test 5.2.1 { sqlite3_step $s1 ; sqlite3_column_int $s1 0 } 4 do_test 5.2.2 { sqlite3_step $s2 ; sqlite3_column_int $s2 0 } 3 do_test 5.2.3 { sqlite3_step $s3 ; sqlite3_column_int $s3 0 } 5 do_test 5.2.4 { sqlite3_step $s4 ; sqlite3_column_int $s4 0 } 4 sqlite3_finalize $s1 sqlite3_finalize $s2 sqlite3_finalize $s3 sqlite3_finalize $s4 #------------------------------------------------------------------------- # Passing an invalid first argument to an auxiliary function is detected. # do_execsql_test 6.0 { CREATE VIRTUAL TABLE e11 USING fts5(y, z); INSERT INTO e11(rowid, y, z) VALUES(1, 'a b', 45); INSERT INTO e11(rowid, y, z) VALUES(2, 'b c', 46); } do_catchsql_test 6.1 { SELECT my_rowid(z) FROM e11 WHERE e11 MATCH 'b' } {1 {no such cursor: 45}} do_catchsql_test 6.2 { SELECT my_rowid(y) FROM e11 WHERE e11 MATCH 'b' } {1 {no such cursor: 0}} #------------------------------------------------------------------------- # Test passing an out-of-range phrase number to xPhraseSize (should # return 0). # proc my_phrasesize {cmd iPhrase} { $cmd xPhraseSize $iPhrase } sqlite3_fts5_create_function db my_phrasesize my_phrasesize do_execsql_test 7.1 { CREATE VIRTUAL TABLE t1 USING fts5(a); INSERT INTO t1 VALUES('a b c'); } do_execsql_test 7.2 { SELECT my_phrasesize(t1, -1), my_phrasesize(t1, 0), my_phrasesize(t1, 1), my_phrasesize(t1, 2) FROM t1 WHERE t1 MATCH 'a OR b+c' } {0 1 2 0} #------------------------------------------------------------------------- # do_execsql_test 8.0 { CREATE VIRTUAL TABLE x1 USING fts5(a); } foreach {tn lRow res} { 4 {"a a a" "b" "a d"} {"[a] [a] [a]" "[a] d"} 1 {"b d" "a b"} {"[b] [d]" "[a] b"} 2 {"d b" "a d"} {"[d] [b]" "[a] d"} 3 {"a a d"} {"[a] [a] d"} } { execsql { DELETE FROM x1 } foreach row $lRow { execsql { INSERT INTO x1 VALUES($row) } } breakpoint do_execsql_test 8.$tn { SELECT highlight(x1, 0, '[', ']') FROM x1 WHERE x1 MATCH 'a OR (b AND d)'; } $res } finish_test |
Added ext/fts5/test/fts5auxdata.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 xSetAuxdata() and xGetAuxdata() APIs. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5auxdata # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1(rowid, a, b) VALUES(1, 'a', 'b1'); INSERT INTO f1(rowid, a, b) VALUES(2, 'a', 'b2'); INSERT INTO f1(rowid, a, b) VALUES(3, 'a', 'b3'); INSERT INTO f1(rowid, a, b) VALUES(4, 'a', 'b4'); INSERT INTO f1(rowid, a, b) VALUES(5, 'a', 'b5'); } proc aux_function_1 {cmd tn} { switch [$cmd xRowid] { 1 { do_test $tn.1 [list $cmd xGetAuxdata 0 ] {} $cmd xSetAuxdata "one" } 2 { do_test $tn.2 [list $cmd xGetAuxdata 0 ] {one} $cmd xSetAuxdata "two" } 3 { do_test $tn.3 [list $cmd xGetAuxdata 0 ] {two} } 4 { do_test $tn.4 [list $cmd xGetAuxdata 1 ] {two} } 5 { do_test $tn.5 [list $cmd xGetAuxdata 0 ] {} } } } sqlite3_fts5_create_function db aux_function_1 aux_function_1 db eval { SELECT aux_function_1(f1, 1) FROM f1 WHERE f1 MATCH 'a' ORDER BY rowid ASC } proc aux_function_2 {cmd tn inst} { if {$inst == "A"} { switch [$cmd xRowid] { 1 { do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] {} $cmd xSetAuxdata "one $inst" } 2 { do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "one $inst" $cmd xSetAuxdata "two $inst" } 3 { do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two $inst" } 4 { do_test $tn.4.$inst [list $cmd xGetAuxdata 1 ] "two $inst" } 5 { do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} } } } else { switch [$cmd xRowid] { 1 { do_test $tn.1.$inst [list $cmd xGetAuxdata 0 ] "one A" } 2 { do_test $tn.2.$inst [list $cmd xGetAuxdata 0 ] "two A" } 3 { do_test $tn.3.$inst [list $cmd xGetAuxdata 0 ] "two A" } 4 { do_test $tn.4.$inst [list $cmd xGetAuxdata 0 ] {} } 5 { do_test $tn.5.$inst [list $cmd xGetAuxdata 0 ] {} } } } } sqlite3_fts5_create_function db aux_function_2 aux_function_2 db eval { SELECT aux_function_2(f1, 2, 'A'), aux_function_2(f1, 2, 'B') FROM f1 WHERE f1 MATCH 'a' ORDER BY rowid ASC } finish_test |
Added ext/fts5/test/fts5bigpl.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | # 2015 April 21 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This test is focused on really large position lists. Those that require # 4 or 5 byte position-list size varints. Because of the amount of memory # required, these tests only run on 64-bit platforms. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5bigpl # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } if { $tcl_platform(wordSize)<8 } { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) } do_test 1.1 { foreach t {a b c d e f g h i j} { set doc [string repeat "$t " 1200000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} do_test 1.2 { execsql { DELETE FROM t1 } foreach t {"a b" "b a" "c d" "d c"} { set doc [string repeat "$t " 600000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} # 5-byte varint. This test takes 30 seconds or so on a 2014 workstation. # The generated database is roughly 635MiB. # do_test 2.1...slow { execsql { DELETE FROM t1 } foreach t {a} { set doc [string repeat "$t " 150000000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} finish_test |
Added ext/fts5/test/fts5columnsize.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | # 2015 Jun 10 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on fts5 tables with the columnsize=0 option. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5columnsize # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Check that the option can be parsed and that the %_docsize table is # only created if it is set to true. # foreach {tn outcome stmt} { 1 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0) } 2 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=1) } 3 0 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='0') } 4 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='1') } 5 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize='') } 6 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=2) } 7 1 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=0, columnsize=1) } 8 1 { CREATE VIRTUAL TABLE t1 USING fts5(x) } 9 2 { CREATE VIRTUAL TABLE t1 USING fts5(x, columnsize=11) } } { execsql { DROP TABLE IF EXISTS t1; } if {$outcome==2} { do_catchsql_test 1.$tn.1 $stmt {1 {malformed columnsize=... directive}} } else { do_execsql_test 1.$tn.2 $stmt do_execsql_test 1.$tn.3 { SELECT count(*) FROM sqlite_master WHERE name = 't1_docsize' } $outcome } } #------------------------------------------------------------------------- # Run tests on a table with no %_content or %_docsize backing store. # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts5(x, columnsize=0, content=''); } do_catchsql_test 2.1 { INSERT INTO t2 VALUES('a b c d e f'); } {1 {datatype mismatch}} do_execsql_test 2.2 { INSERT INTO t2(rowid, x) VALUES(1, 'c d e f'); INSERT INTO t2(rowid, x) VALUES(2, 'c d e f g h'); INSERT INTO t2(rowid, x) VALUES(3, 'a b c d e f g h'); } {} do_execsql_test 2.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'h'; } {3 :: 1 2 3 :: 2 3} do_execsql_test 2.4 { INSERT INTO t2(t2, rowid, x) VALUES('delete', 2, 'c d e f g h'); SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'h'; } {3 :: 1 3 :: 3} do_execsql_test 2.5 { INSERT INTO t2(t2) VALUES('delete-all'); SELECT rowid FROM t2 WHERE t2 MATCH 'b'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'e'; SELECT '::'; SELECT rowid FROM t2 WHERE t2 MATCH 'h'; } {:: ::} do_execsql_test 2.6 { INSERT INTO t2(rowid, x) VALUES(1, 'o t t f'); INSERT INTO t2(rowid, x) VALUES(2, 'f s s e'); INSERT INTO t2(rowid, x) VALUES(3, 'n t e t'); } do_catchsql_test 2.7.1 { SELECT rowid FROM t2 } {1 {t2: table does not support scanning}} do_catchsql_test 2.7.2 { SELECT rowid FROM t2 WHERE rowid=2 } {1 {t2: table does not support scanning}} do_catchsql_test 2.7.3 { SELECT rowid FROM t2 WHERE rowid BETWEEN 1 AND 3 } {1 {t2: table does not support scanning}} do_execsql_test 2.X { DROP TABLE t2 } #------------------------------------------------------------------------- # Test the xColumnSize() API # fts5_aux_test_functions db do_execsql_test 3.1.0 { CREATE VIRTUAL TABLE t3 USING fts5(x, y UNINDEXED, z, columnsize=0); INSERT INTO t3 VALUES('a a', 'b b b', 'c'); INSERT INTO t3 VALUES('x a x', 'b b b y', ''); } do_execsql_test 3.1.1 { SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a' } { 1 {2 0 1} 2 {3 0 0} } do_execsql_test 3.1.2 { INSERT INTO t3 VALUES(NULL, NULL, 'a a a a'); DELETE FROM t3 WHERE rowid = 1; SELECT rowid, fts5_test_columnsize(t3) FROM t3 WHERE t3 MATCH 'a' } { 2 {3 0 0} 3 {0 0 4} } do_execsql_test 3.2.0 { CREATE VIRTUAL TABLE t4 USING fts5(x, y UNINDEXED, z, columnsize=0, content=''); INSERT INTO t4(rowid, x, y, z) VALUES(1, 'a a', 'b b b', 'c'); INSERT INTO t4(rowid, x, y, z) VALUES(2, 'x a x', 'b b b y', ''); } do_execsql_test 3.2.1 { SELECT rowid, fts5_test_columnsize(t4) FROM t4 WHERE t4 MATCH 'a' } { 1 {-1 0 -1} 2 {-1 0 -1} } finish_test |
Added ext/fts5/test/fts5config.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | # 2015 Jan 13 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file focuses on the code in fts5_config.c, which is largely concerned # with parsing the various configuration and CREATE TABLE options. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5config # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Try different types of quote characters. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5('a', "b", [c], `d`); PRAGMA table_info = t1; } { 0 a {} 0 {} 0 1 b {} 0 {} 0 2 c {} 0 {} 0 3 d {} 0 {} 0 } #------------------------------------------------------------------------- # Syntax errors in the prefix= option. # foreach {tn opt} { 1 {prefix=x} 2 {prefix='x'} 3 {prefix='$'} } { set res [list 1 {malformed prefix=... directive}] do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res } #------------------------------------------------------------------------- # Syntax errors in the 'rank' option. # foreach {tn val} { 1 "f1(xyz)" 2 "f1(zyx)" 3 "f1(nzz)" 4 "f1(x'!!')" 5 "f1(x':;')" 6 "f1(x'[]')" 7 "f1(x'{}')" 8 "f1('abc)" } { do_catchsql_test 3.$tn { INSERT INTO t1(t1, rank) VALUES('rank', $val); } {1 {SQL logic error or missing database}} } #------------------------------------------------------------------------- # The parsing of SQL literals specified as part of 'rank' options. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE zzz USING fts5(one); INSERT INTO zzz VALUES('a b c'); } proc first {cmd A} { return $A } sqlite3_fts5_create_function db first first foreach {tn arg} { 1 "123" 2 "'01234567890ABCDEF'" 3 "x'0123'" 4 "x'ABCD'" 5 "x'0123456789ABCDEF'" 6 "x'0123456789abcdef'" 7 "22.5" 8 "-91.5" 9 "-.5" 10 "''''" 11 "+.5" } { set func [string map {' ''} "first($arg)"] do_execsql_test 4.1.$tn " INSERT INTO zzz(zzz, rank) VALUES('rank', '$func'); SELECT rank IS $arg FROM zzz WHERE zzz MATCH 'a + b + c' " 1 } do_execsql_test 4.2 { INSERT INTO zzz(zzz, rank) VALUES('rank', 'f1()'); } {} #------------------------------------------------------------------------- # Misquoting in tokenize= and other options. # do_catchsql_test 5.1 { CREATE VIRTUAL TABLE xx USING fts5(x, tokenize="porter 'ascii"); } {1 {parse error in tokenize directive}} breakpoint do_catchsql_test 5.2 { CREATE VIRTUAL TABLE xx USING fts5(x, [y[]); } {0 {}} do_catchsql_test 5.3 { CREATE VIRTUAL TABLE yy USING fts5(x, [y]]); } {1 {unrecognized token: "]"}} #------------------------------------------------------------------------- # Errors in prefix= directives. # do_catchsql_test 6.1 { CREATE VIRTUAL TABLE abc USING fts5(a, prefix=1, prefix=2); } {1 {multiple prefix=... directives}} do_catchsql_test 6.2 { CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1, 2, 1001'); } {1 {prefix length out of range: 1001}} do_catchsql_test 6.3 { CREATE VIRTUAL TAbLE abc USING fts5(a, prefix='1, 2, 0000'); } {1 {prefix length out of range: 0}} do_catchsql_test 6.4 { CREATE VIRTUAL TABLE abc USING fts5(a, prefix='1 , 1000000'); } {1 {malformed prefix=... directive}} #------------------------------------------------------------------------- # Duplicate tokenize= and other options. # do_catchsql_test 7.1 { CREATE VIRTUAL TABLE abc USING fts5(a, tokenize=porter, tokenize=ascii); } {1 {multiple tokenize=... directives}} do_catchsql_test 7.2 { CREATE VIRTUAL TABLE abc USING fts5(a, content=porter, content=ascii); } {1 {multiple content=... directives}} do_catchsql_test 7.3 { CREATE VIRTUAL TABLE abc USING fts5(a, content_rowid=porter, content_rowid=a); } {1 {multiple content_rowid=... directives}} #------------------------------------------------------------------------- # Unrecognized option. # do_catchsql_test 8.0 { CREATE VIRTUAL TABLE abc USING fts5(a, nosuchoption=123); } {1 {unrecognized option: "nosuchoption"}} do_catchsql_test 8.1 { CREATE VIRTUAL TABLE abc USING fts5(a, "nosuchoption"=123); } {1 {parse error in ""nosuchoption"=123"}} #------------------------------------------------------------------------- # Errors in: # # 9.1.* 'pgsz' options. # 9.2.* 'automerge' options. # 9.3.* 'crisismerge' options. # do_execsql_test 9.0 { CREATE VIRTUAL TABLE abc USING fts5(a, b); } {} do_catchsql_test 9.1.1 { INSERT INTO abc(abc, rank) VALUES('pgsz', -5); } {1 {SQL logic error or missing database}} do_catchsql_test 9.1.2 { INSERT INTO abc(abc, rank) VALUES('pgsz', 50000000); } {1 {SQL logic error or missing database}} do_catchsql_test 9.1.3 { INSERT INTO abc(abc, rank) VALUES('pgsz', 66.67); } {1 {SQL logic error or missing database}} do_catchsql_test 9.2.1 { INSERT INTO abc(abc, rank) VALUES('automerge', -5); } {1 {SQL logic error or missing database}} do_catchsql_test 9.2.2 { INSERT INTO abc(abc, rank) VALUES('automerge', 50000000); } {1 {SQL logic error or missing database}} do_catchsql_test 9.2.3 { INSERT INTO abc(abc, rank) VALUES('automerge', 66.67); } {1 {SQL logic error or missing database}} do_execsql_test 9.2.4 { INSERT INTO abc(abc, rank) VALUES('automerge', 1); } {} do_catchsql_test 9.3.1 { INSERT INTO abc(abc, rank) VALUES('crisismerge', -5); } {1 {SQL logic error or missing database}} do_catchsql_test 9.3.2 { INSERT INTO abc(abc, rank) VALUES('crisismerge', 66.67); } {1 {SQL logic error or missing database}} do_execsql_test 9.3.3 { INSERT INTO abc(abc, rank) VALUES('crisismerge', 1); } {} do_execsql_test 9.3.4 { INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000); } {} do_catchsql_test 9.4.1 { INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1); } {1 {SQL logic error or missing database}} finish_test |
Added ext/fts5/test/fts5content.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file contains tests for the content= and content_rowid= options. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5content # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Contentless tables # do_execsql_test 1.1 { CREATE VIRTUAL TABLE f1 USING fts5(a, b, content=''); INSERT INTO f1(rowid, a, b) VALUES(1, 'one', 'o n e'); INSERT INTO f1(rowid, a, b) VALUES(2, 'two', 't w o'); INSERT INTO f1(rowid, a, b) VALUES(3, 'three', 't h r e e'); } do_execsql_test 1.2 { SELECT rowid FROM f1 WHERE f1 MATCH 'o'; } {1 2} do_execsql_test 1.3 { INSERT INTO f1(a, b) VALUES('four', 'f o u r'); SELECT rowid FROM f1 WHERE f1 MATCH 'o'; } {1 2 4} do_execsql_test 1.4 { SELECT rowid, a, b FROM f1 WHERE f1 MATCH 'o'; } {1 {} {} 2 {} {} 4 {} {}} do_execsql_test 1.5 { SELECT rowid, highlight(f1, 0, '[', ']') FROM f1 WHERE f1 MATCH 'o'; } {1 {} 2 {} 4 {}} do_execsql_test 1.6 { SELECT rowid, highlight(f1, 0, '[', ']') IS NULL FROM f1 WHERE f1 MATCH 'o'; } {1 1 2 1 4 1} do_execsql_test 1.7 { SELECT rowid, snippet(f1, -1, '[', ']', '...', 5) IS NULL FROM f1 WHERE f1 MATCH 'o'; } {1 1 2 1 4 1} do_execsql_test 1.8 { SELECT rowid, snippet(f1, 1, '[', ']', '...', 5) IS NULL FROM f1 WHERE f1 MATCH 'o'; } {1 1 2 1 4 1} do_execsql_test 1.9 { SELECT rowid FROM f1; } {1 2 3 4} do_execsql_test 1.10 { SELECT * FROM f1; } {{} {} {} {} {} {} {} {}} do_execsql_test 1.11 { SELECT rowid, a, b FROM f1 ORDER BY rowid ASC; } {1 {} {} 2 {} {} 3 {} {} 4 {} {}} do_execsql_test 1.12 { SELECT a IS NULL FROM f1; } {1 1 1 1} do_catchsql_test 1.13 { DELETE FROM f1 WHERE rowid = 2; } {1 {cannot DELETE from contentless fts5 table: f1}} do_catchsql_test 1.14 { UPDATE f1 SET a = 'a b c' WHERE rowid = 2; } {1 {cannot UPDATE contentless fts5 table: f1}} do_execsql_test 1.15 { INSERT INTO f1(f1, rowid, a, b) VALUES('delete', 2, 'two', 't w o'); } {} do_execsql_test 1.16 { SELECT rowid FROM f1 WHERE f1 MATCH 'o'; } {1 4} do_execsql_test 1.17 { SELECT rowid FROM f1; } {1 3 4} #------------------------------------------------------------------------- # External content tables # reset_db do_execsql_test 2.1 { -- Create a table. And an external content fts5 table to index it. CREATE TABLE tbl(a INTEGER PRIMARY KEY, b, c); CREATE VIRTUAL TABLE fts_idx USING fts5(b, c, content='tbl', content_rowid='a'); -- Triggers to keep the FTS index up to date. CREATE TRIGGER tbl_ai AFTER INSERT ON tbl BEGIN INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c); END; CREATE TRIGGER tbl_ad AFTER DELETE ON tbl BEGIN INSERT INTO fts_idx(fts_idx, rowid, b, c) VALUES('delete', old.a, old.b, old.c); END; CREATE TRIGGER tbl_au AFTER UPDATE ON tbl BEGIN INSERT INTO fts_idx(fts_idx, rowid, b, c) VALUES('delete', old.a, old.b, old.c); INSERT INTO fts_idx(rowid, b, c) VALUES (new.a, new.b, new.c); END; } do_execsql_test 2.2 { INSERT INTO tbl VALUES(1, 'one', 'o n e'); INSERT INTO tbl VALUES(NULL, 'two', 't w o'); INSERT INTO tbl VALUES(3, 'three', 't h r e e'); } do_execsql_test 2.3 { INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); } do_execsql_test 2.4 { DELETE FROM tbl WHERE rowid=2; INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); } do_execsql_test 2.5 { UPDATE tbl SET c = c || ' x y z'; INSERT INTO fts_idx(fts_idx) VALUES('integrity-check'); } do_execsql_test 2.6 { SELECT * FROM fts_idx WHERE fts_idx MATCH 't AND x'; } {three {t h r e e x y z}} do_execsql_test 2.7 { SELECT highlight(fts_idx, 1, '[', ']') FROM fts_idx WHERE fts_idx MATCH 't AND x'; } {{[t] h r e e [x] y z}} #------------------------------------------------------------------------- # Quick tests of the 'delete-all' command. # do_execsql_test 3.1 { CREATE VIRTUAL TABLE t3 USING fts5(x, content=''); INSERT INTO t3 VALUES('a b c'); INSERT INTO t3 VALUES('d e f'); } do_execsql_test 3.2 { SELECT count(*) FROM t3_docsize; SELECT count(*) FROM t3_data; } {2 4} do_execsql_test 3.3 { INSERT INTO t3(t3) VALUES('delete-all'); SELECT count(*) FROM t3_docsize; SELECT count(*) FROM t3_data; } {0 2} do_execsql_test 3.4 { INSERT INTO t3 VALUES('a b c'); INSERT INTO t3 VALUES('d e f'); SELECT rowid FROM t3 WHERE t3 MATCH 'e'; } {2} do_execsql_test 3.5 { SELECT rowid FROM t3 WHERE t3 MATCH 'c'; } {1} do_execsql_test 3.6 { SELECT count(*) FROM t3_docsize; SELECT count(*) FROM t3_data; } {2 4} do_execsql_test 3.7 { CREATE VIRTUAL TABLE t4 USING fts5(x); } {} do_catchsql_test 3.8 { INSERT INTO t4(t4) VALUES('delete-all'); } {1 {'delete-all' may only be used with a contentless or external content fts5 table}} #------------------------------------------------------------------------- # Test an external content table with a more interesting schema. # do_execsql_test 4.1 { CREATE TABLE x2(a, "key col" PRIMARY KEY, b, c) WITHOUT ROWID; INSERT INTO x2 VALUES('a b', 1, 'c d' , 'e f'); INSERT INTO x2 VALUES('x y', -40, 'z z' , 'y x'); CREATE VIRTUAL TABLE t2 USING fts5(a, c, content=x2, content_rowid='key col'); INSERT INTO t2(t2) VALUES('rebuild'); } do_execsql_test 4.2 { SELECT rowid FROM t2 } {-40 1} do_execsql_test 4.3 { SELECT rowid FROM t2 WHERE t2 MATCH 'c'} {} do_execsql_test 4.4 { SELECT rowid FROM t2 WHERE t2 MATCH 'a'} {1} do_execsql_test 4.5 { SELECT rowid FROM t2 WHERE t2 MATCH 'x'} {-40} do_execsql_test 4.6 { INSERT INTO t2(t2) VALUES('integrity-check') } {} do_execsql_test 4.7 { DELETE FROM x2 WHERE "key col" = 1; INSERT INTO t2(t2, rowid, a, c) VALUES('delete', 1, 'a b', 'e f'); INSERT INTO t2(t2) VALUES('integrity-check'); } do_execsql_test 4.8 { SELECT rowid FROM t2 WHERE t2 MATCH 'b'} {} do_execsql_test 4.9 { SELECT rowid FROM t2 WHERE t2 MATCH 'y'} {-40} #------------------------------------------------------------------------- # Test that if the 'rowid' field of a 'delete' is not an integer, no # changes are made to the FTS index. # do_execsql_test 5.0 { CREATE VIRTUAL TABLE t5 USING fts5(a, b, content=); INSERT INTO t5(rowid, a, b) VALUES(-1, 'one', 'two'); INSERT INTO t5(rowid, a, b) VALUES( 0, 'three', 'four'); INSERT INTO t5(rowid, a, b) VALUES( 1, 'five', 'six'); } set ::checksum [execsql {SELECT md5sum(id, block) FROM t5_data}] do_execsql_test 5.1 { INSERT INTO t5(t5, rowid, a, b) VALUES('delete', NULL, 'three', 'four'); SELECT md5sum(id, block) FROM t5_data; } $::checksum #------------------------------------------------------------------------- # Check that a contentless table can be dropped. # reset_db do_execsql_test 6.1 { CREATE VIRTUAL TABLE xx USING fts5(x, y, content=""); SELECT name FROM sqlite_master; } {xx xx_data xx_docsize xx_config} do_execsql_test 6.2 { DROP TABLE xx; SELECT name FROM sqlite_master; } {} finish_test |
Added ext/fts5/test/fts5corrupt.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file tests that the FTS5 'integrity-check' command detects # inconsistencies (corruption) in the on-disk backing tables. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } do_test 1.1 { db transaction { for {set i 1} {$i < 200} {incr i} { set doc [list [string repeat x $i] [string repeat y $i]] execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) } } } fts5_level_segs t1 } {1} db_save do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') } set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} do_test 1.4 { db_restore_and_reopen execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE rowid = fts5_rowid('segment', $segid, 0, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} db_restore_and_reopen #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} #-------------------------------------------------------------------- # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts5(x); INSERT INTO t2(t2, rank) VALUES('pgsz', 64); } db func rnddoc fts5_rnddoc do_test 2.1 { for {set i 0} {$i < 500} {incr i} { execsql { INSERT INTO t2 VALUES(rnddoc(50)) } } execsql { INSERT INTO t2(t2) VALUES('integrity-check') } } {} #-------------------------------------------------------------------- # A mundane test - missing row in the %_content table. # do_execsql_test 3.0 { CREATE VIRTUAL TABLE t3 USING fts5(x); INSERT INTO t3 VALUES('one o'); INSERT INTO t3 VALUES('two e'); INSERT INTO t3 VALUES('three o'); INSERT INTO t3 VALUES('four e'); INSERT INTO t3 VALUES('five o'); } do_execsql_test 3.1 { SELECT * FROM t3 WHERE t3 MATCH 'o' } {{one o} {three o} {five o}} do_catchsql_test 3.1 { DELETE FROM t3_content WHERE rowid = 3; SELECT * FROM t3 WHERE t3 MATCH 'o'; } {1 {database disk image is malformed}} finish_test |
Added ext/fts5/test/fts5corrupt2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | # 2015 Apr 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file tests that FTS5 handles corrupt databases (i.e. internal # inconsistencies in the backing tables) correctly. In this case # "correctly" means without crashing. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt2 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". # expr srand(0) db func rnddoc fts5_rnddoc do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) INSERT INTO t1 SELECT rnddoc(10) FROM ii; } set mask [expr 31 << 31] if 1 { # Test 1: # # For each page in the t1_data table, open a transaction and DELETE # the t1_data entry. Then run: # # * an integrity-check, and # * unless the deleted block was a b-tree node, a query for "t1 MATCH 'x*'" # # and check that the corruption is detected in both cases. The # rollback the transaction. # # Test 2: # # Same thing, except instead of deleting a row from t1_data, replace its # blob content with integer value 14. # foreach {tno stmt} { 1 { DELETE FROM t1_data WHERE rowid=$rowid } 2 { UPDATE t1_data SET block=14 WHERE rowid=$rowid } } { set tn 0 foreach rowid [db eval {SELECT rowid FROM t1_data WHERE rowid>10}] { incr tn #if {$tn!=224} continue do_test 1.$tno.$tn.1.$rowid { execsql { BEGIN } execsql $stmt catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} if {($rowid & $mask)==0} { # Node is a leaf node, not a b-tree node. do_catchsql_test 1.$tno.$tn.2.$rowid { SELECT rowid FROM t1 WHERE t1 MATCH 'x*' } {1 {database disk image is malformed}} } do_execsql_test 1.$tno.$tn.3.$rowid { ROLLBACK; INSERT INTO t1(t1) VALUES('integrity-check'); } {} } } # Using the same database as the 1.* tests. # # Run N-1 tests, where N is the number of bytes in the rightmost leaf page # of the fts index. For test $i, truncate the rightmost leafpage to $i # bytes. Then test both the integrity-check detects the corruption. # # Also tested is that "MATCH 'x*'" does not crash and sometimes reports # corruption. It may not report the db as corrupt because truncating the # final leaf to some sizes may create a valid leaf page. # set lrowid [db one {SELECT max(rowid) FROM t1_data WHERE (rowid & $mask)=0}] set nbyte [db one {SELECT length(block) FROM t1_data WHERE rowid=$lrowid}] set all [db eval {SELECT rowid FROM t1}] for {set i [expr $nbyte-2]} {$i>=0} {incr i -1} { do_execsql_test 2.$i.1 { BEGIN; UPDATE t1_data SET block = substr(block, 1, $i) WHERE rowid=$lrowid; } do_catchsql_test 2.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } {1 {database disk image is malformed}} do_test 2.$i.3 { set res [catchsql {SELECT rowid FROM t1 WHERE t1 MATCH 'x*'}] expr { $res=="1 {database disk image is malformed}" || $res=="0 {$all}" } } 1 do_execsql_test 2.$i.4 { ROLLBACK; INSERT INTO t1(t1) VALUES('integrity-check'); } {} } #------------------------------------------------------------------------- # Test that corruption in leaf page headers is detected by queries that use # doclist-indexes. # set doc "A B C D E F G H I J " do_execsql_test 3.0 { CREATE VIRTUAL TABLE x3 USING fts5(tt); INSERT INTO x3(x3, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<1000) INSERT INTO x3 SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii; } foreach {tn hdr} { 1 "\x00\x00\x00\x00" 2 "\xFF\xFF\xFF\xFF" 3 "\x44\x45" } { set tn2 0 set nCorrupt 0 set nCorrupt2 0 foreach rowid [db eval {SELECT rowid FROM x3_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 do_test 3.$tn.$tn2.1 { execsql BEGIN set fd [db incrblob main x3_data block $rowid] fconfigure $fd -encoding binary -translation binary set existing [read $fd [string length $hdr]] seek $fd 0 puts -nonewline $fd $hdr close $fd set res [catchsql {SELECT rowid FROM x3 WHERE x3 MATCH 'x AND a'}] if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} set {} 1 } {1} if {($tn2 % 10)==0 && $existing != $hdr} { do_test 3.$tn.$tn2.2 { catchsql { INSERT INTO x3(x3) VALUES('integrity-check') } } {1 {database disk image is malformed}} } execsql ROLLBACK } do_test 3.$tn.x { expr $nCorrupt>0 } 1 } #-------------------------------------------------------------------- # set doc "A B C D E F G H I J " do_execsql_test 4.0 { CREATE VIRTUAL TABLE x4 USING fts5(tt); INSERT INTO x4(x4, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) INSERT INTO x4 SELECT ($doc || CASE WHEN (i%50)==0 THEN 'X' ELSE 'Y' END) FROM ii; } foreach {tn nCut} { 1 1 2 10 } { set tn2 0 set nCorrupt 0 foreach rowid [db eval {SELECT rowid FROM x4_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 do_test 4.$tn.$tn2 { execsql { BEGIN; UPDATE x4_data SET block = substr(block, 1, length(block)-$nCut) WHERE id = $rowid; } set res [catchsql { SELECT rowid FROM x4 WHERE x4 MATCH 'a' ORDER BY 1 DESC }] if {$res == "1 {database disk image is malformed}"} {incr nCorrupt} set {} 1 } {1} execsql ROLLBACK } do_test 4.$tn.x { expr $nCorrupt>0 } 1 } } set doc [string repeat "A B C " 1000] do_execsql_test 4.0 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5(x5, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) INSERT INTO x5 SELECT $doc FROM ii; } foreach {tn hdr} { 1 "\x00\x01" } { set tn2 0 set nCorrupt 0 foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 do_test 4.$tn.$tn2 { execsql BEGIN set fd [db incrblob main x5_data block $rowid] fconfigure $fd -encoding binary -translation binary puts -nonewline $fd $hdr close $fd catchsql { INSERT INTO x5(x5) VALUES('integrity-check') } set {} {} } {} execsql ROLLBACK } } #-------------------------------------------------------------------- reset_db do_execsql_test 5.1 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5 VALUES('a'); INSERT INTO x5 VALUES('a a'); INSERT INTO x5 VALUES('a a a'); INSERT INTO x5 VALUES('a a a a'); UPDATE x5_docsize SET sz = X'' WHERE id=3; } proc colsize {cmd i} { $cmd xColumnSize $i } sqlite3_fts5_create_function db colsize colsize do_catchsql_test 5.2 { SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a' } {1 SQLITE_CORRUPT_VTAB} sqlite3_fts5_may_be_corrupt 0 finish_test |
Added ext/fts5/test/fts5corrupt3.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | # 2015 Apr 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file tests that FTS5 handles corrupt databases (i.e. internal # inconsistencies in the backing tables) correctly. In this case # "correctly" means without crashing. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5corrupt3 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } sqlite3_fts5_may_be_corrupt 1 # Create a simple FTS5 table containing 100 documents. Each document # contains 10 terms, each of which start with the character "x". # expr srand(0) db func rnddoc fts5_rnddoc do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 64); WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) INSERT INTO t1 SELECT rnddoc(10) FROM ii; } set mask [expr 31 << 31] do_test 1.1 { # Pick out the rowid of the right-most b-tree leaf in the new segment. set rowid [db one { SELECT max(rowid) FROM t1_data WHERE ((rowid>>31) & 0x0F)==1 }] set L [db one {SELECT length(block) FROM t1_data WHERE rowid = $rowid}] set {} {} } {} for {set i 0} {$i < $L} {incr i} { do_test 1.2.$i { catchsql { BEGIN; UPDATE t1_data SET block = substr(block, 1, $i) WHERE id = $rowid; INSERT INTO t1(t1) VALUES('integrity-check'); } } {1 {database disk image is malformed}} catchsql ROLLBACK } #------------------------------------------------------------------------- # Test that trailing bytes appended to the averages record are ignored. # do_execsql_test 2.1 { CREATE VIRTUAL TABLE t2 USING fts5(x); INSERT INTO t2 VALUES(rnddoc(10)); INSERT INTO t2 VALUES(rnddoc(10)); SELECT length(block) FROM t2_data WHERE id=1; } {2} do_execsql_test 2.2 { UPDATE t2_data SET block = block || 'abcd' WHERE id=1; SELECT length(block) FROM t2_data WHERE id=1; } {6} do_execsql_test 2.2 { INSERT INTO t2 VALUES(rnddoc(10)); SELECT length(block) FROM t2_data WHERE id=1; } {2} sqlite3_fts5_may_be_corrupt 0 finish_test |
Added ext/fts5/test/fts5dlidx.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | # 2015 April 21 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This test is focused on uses of doclist-index records. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5dlidx # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } if { $tcl_platform(wordSize)<8 } { finish_test return } proc do_fb_test {tn sql res} { set res2 [lsort -integer -decr $res] uplevel [list do_execsql_test $tn.1 $sql $res] uplevel [list do_execsql_test $tn.2 "$sql ORDER BY rowid DESC" $res2] } # This test populates the FTS5 table containing $nEntry entries. Rows are # numbered from 0 to ($nEntry-1). The rowid for row $i is: # # ($iFirst + $i*$nStep) # # Each document is of the form "a b c a b c a b c...". If the row number ($i) # is an integer multiple of $spc1, then an "x" token is appended to the # document. If it is *also* a multiple of $spc2, a "y" token is also appended. # proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { do_execsql_test $tn.0 { DELETE FROM t1 } set xdoc [list] set ydoc [list] execsql BEGIN for {set i 0} {$i < $nEntry} {incr i} { set rowid [expr $i * $nStep] set doc [string trim [string repeat "a b c " 100]] if {($i % $spc1)==0} { lappend xdoc $rowid append doc " x" if {($i % $spc2)==0} { lappend ydoc $rowid append doc " y" } } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } execsql COMMIT breakpoint do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc do_fb_test $tn.5.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc do_fb_test $tn.5.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc } foreach {tn pgsz} { 1 32 2 200 } { do_execsql_test $tn.0 { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', $pgsz); } do_dlidx_test1 1.$tn.1 10 100 10000 0 1000 do_dlidx_test1 1.$tn.2 10 10 10000 0 128 do_dlidx_test1 1.$tn.3 10 10 66 0 36028797018963970 do_dlidx_test1 1.$tn.4 10 10 50 0 150000000000000000 do_dlidx_test1 1.$tn.5 10 10 200 0 [expr 1<<55] do_dlidx_test1 1.$tn.6 10 10 30 0 [expr 1<<58] } proc do_dlidx_test2 {tn nEntry iFirst nStep} { set str [string repeat "a " 500] execsql { BEGIN; DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 64); INSERT INTO t1 VALUES('b a'); WITH iii(ii, i) AS ( SELECT 1, $iFirst UNION ALL SELECT ii+1, i+$nStep FROM iii WHERE ii<$nEntry ) INSERT INTO t1(rowid,x) SELECT i, $str FROM iii; COMMIT; } do_execsql_test $tn.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' } {1} breakpoint do_execsql_test $tn.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b AND a' ORDER BY rowid DESC } {1} } do_dlidx_test2 2.1 [expr 20] [expr 1<<57] [expr (1<<57) + 128] finish_test |
Added ext/fts5/test/fts5doclist.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | # 2015 April 21 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This test is focused on edge cases in the doclist format. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5doclist # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Create a table with 1000 columns. Then add some large documents to it. # All text is in the right most column of the table. # do_test 1.0 { set cols [list] for {set i 0} {$i < 900} {incr i} { lappend cols "x$i" } execsql "CREATE VIRTUAL TABLE ccc USING fts5([join $cols ,])" } {} db func rnddoc fts5_rnddoc do_execsql_test 1.1 { WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<100) INSERT INTO ccc(x899) SELECT rnddoc(500) FROM ii; } do_execsql_test 1.2 { INSERT INTO ccc(ccc) VALUES('integrity-check'); } finish_test |
Added ext/fts5/test/fts5ea.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # Test the fts5 expression parser directly using the fts5_expr() SQL # test function. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5ea # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc do_syntax_error_test {tn expr err} { set ::se_expr $expr do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err] } proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { 1 {abc} {"abc"} 2 {abc def} {"abc" AND "def"} 3 {abc*} {"abc" *} 4 {"abc def ghi" *} {"abc" + "def" + "ghi" *} 5 {one AND two} {"one" AND "two"} 6 {one+two} {"one" + "two"} 7 {one AND two OR three} {("one" AND "two") OR "three"} 8 {one OR two AND three} {"one" OR ("two" AND "three")} 9 {NEAR(one two)} {NEAR("one" "two", 10)} 10 {NEAR("one three"* two, 5)} {NEAR("one" + "three" * "two", 5)} 11 {a OR b NOT c} {"a" OR ("b" NOT "c")} 12 "\x20one\x20two\x20three" {"one" AND "two" AND "three"} 13 "\x09one\x0Atwo\x0Dthree" {"one" AND "two" AND "three"} 14 {"abc""def"} {"abc" + "def"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } foreach {tn expr res} { 1 {c1:abc} {c1 : "abc"} 2 {c2 : NEAR(one two) c1:"hello world"} {c2 : NEAR("one" "two", 10) AND c1 : "hello" + "world"} } { do_execsql_test 2.$tn {SELECT fts5_expr($expr, 'c1', 'c2')} [list $res] } foreach {tn expr err} { 1 {AND} {fts5: syntax error near "AND"} 2 {abc def AND} {fts5: syntax error near ""} 3 {abc OR AND} {fts5: syntax error near "AND"} 4 {(a OR b) abc} {fts5: syntax error near "abc"} 5 {NEaR (a b)} {fts5: syntax error near "NEaR"} 6 {NEa (a b)} {fts5: syntax error near "NEa"} 7 {(a OR b) NOT c)} {fts5: syntax error near ")"} 8 {nosuch: a nosuch2: b} {no such column: nosuch} 9 {addr: a nosuch2: b} {no such column: nosuch2} 10 {NOT} {fts5: syntax error near "NOT"} 11 {a AND "abc} {unterminated string} 12 {NEAR(a b, xyz)} {expected integer, got "xyz"} 13 {NEAR(a b, // )} {fts5: syntax error near "/"} 14 {NEAR(a b, "xyz" )} {expected integer, got ""xyz""} } { do_catchsql_test 3.$tn {SELECT fts5_expr($expr, 'name', 'addr')} [list 1 $err] } #------------------------------------------------------------------------- # Experiment with a tokenizer that considers " to be a token character. # do_execsql_test 4.0 { SELECT fts5_expr('a AND """"', 'x', 'tokenize="unicode61 tokenchars ''""''"'); } {{"a" AND """"}} finish_test |
Added ext/fts5/test/fts5eb.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5eb # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc do_syntax_error_test {tn expr err} { set ::se_expr $expr do_catchsql_test $tn {SELECT fts5_expr($se_expr)} [list 1 $err] } proc do_syntax_test {tn expr res} { set ::se_expr $expr do_execsql_test $tn {SELECT fts5_expr($se_expr)} [list $res] } foreach {tn expr res} { 1 {abc} {"abc"} 2 {abc .} {"abc"} 3 {.} {} 4 {abc OR .} {"abc"} 5 {abc NOT .} {"abc"} 6 {abc AND .} {"abc"} 7 {. OR abc} {"abc"} 8 {. NOT abc} {"abc"} 9 {. AND abc} {"abc"} 10 {abc + . + def} {"abc" + "def"} 11 {abc . def} {"abc" AND "def"} 12 {r+e OR w} {"r" + "e" OR "w"} } { do_execsql_test 1.$tn {SELECT fts5_expr($expr)} [list $res] } finish_test |
Added ext/fts5/test/fts5fault1.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # This file implements regression tests for SQLite library. The # focus of this script is testing the FTS5 module. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault1 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } # Simple tests: # # 1: CREATE VIRTUAL TABLE # 2: INSERT statement # 3: DELETE statement # 4: MATCH expressions # # faultsim_save_and_close do_faultsim_test 1 -faults ioerr-t* -prep { faultsim_restore_and_reopen } -body { execsql { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3') } } -test { faultsim_test_result {0 {}} {1 {vtable constructor failed: t1}} } reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3'); } faultsim_save_and_close do_faultsim_test 2 -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno'); } } -test { faultsim_test_result {0 {}} } reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE t1 USING fts5(a, b, prefix='1, 2, 3'); INSERT INTO t1 VALUES('a b c', 'a bc def ghij klmno'); } faultsim_save_and_close do_faultsim_test 3 -prep { faultsim_restore_and_reopen } -body { execsql { DELETE FROM t1 } } -test { faultsim_test_result {0 {}} } reset_db do_execsql_test 4.0 { CREATE VIRTUAL TABLE t2 USING fts5(a, b); INSERT INTO t2 VALUES('m f a jj th q jr ar', 'hj n h h sg j i m'); INSERT INTO t2 VALUES('nr s t g od j kf h', 'sb h aq rg op rb n nl'); INSERT INTO t2 VALUES('do h h pb p p q fr', 'c rj qs or cr a l i'); INSERT INTO t2 VALUES('lk gp t i lq mq qm p', 'h mr g f op ld aj h'); INSERT INTO t2 VALUES('ct d sq kc qi k f j', 'sn gh c of g s qt q'); INSERT INTO t2 VALUES('d ea d d om mp s ab', 'dm hg l df cm ft pa c'); INSERT INTO t2 VALUES('tc dk c jn n t sr ge', 'a a kn bc n i af h'); INSERT INTO t2 VALUES('ie ii d i b sa qo rf', 'a h m aq i b m fn'); INSERT INTO t2 VALUES('gs r fo a er m h li', 'tm c p gl eb ml q r'); INSERT INTO t2 VALUES('k fe fd rd a gi ho kk', 'ng m c r d ml rm r'); } faultsim_save_and_close foreach {tn expr res} { 1 { dk } 7 2 { m f } 1 3 { f* } {1 3 4 5 6 8 9 10} 4 { m OR f } {1 4 5 8 9 10} 5 { sn + gh } {5} 6 { "sn gh" } {5} 7 { NEAR(r a, 5) } {9} 8 { m* f* } {1 4 6 8 9 10} 9 { m* + f* } {1 8} } { do_faultsim_test 4.$tn -prep { faultsim_restore_and_reopen } -body " execsql { SELECT rowid FROM t2 WHERE t2 MATCH '$expr' } " -test " faultsim_test_result {[list 0 $res]} " } #------------------------------------------------------------------------- # The following tests use a larger database populated with random data. # # The database page size is set to 512 bytes and the FTS5 page size left # at the default 1000 bytes. This means that reading a node may require # pulling an overflow page from disk, which is an extra opportunity for # an error to occur. # reset_db do_execsql_test 5.0.1 { PRAGMA main.page_size = 512; CREATE VIRTUAL TABLE x1 USING fts5(a, b); PRAGMA main.page_size; } {512} proc rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] for {set i 0} {$i < $n} {incr i} { lappend doc [string map $map [format %.3d [expr int(rand()*1000)]]] } set doc } db func rnddoc rnddoc do_execsql_test 5.0.2 { WITH r(a, b) AS ( SELECT rnddoc(6), rnddoc(6) UNION ALL SELECT rnddoc(6), rnddoc(6) FROM r ) INSERT INTO x1 SELECT * FROM r LIMIT 10000; } set res [db one { SELECT count(*) FROM x1 WHERE x1.a LIKE '%abc%' OR x1.b LIKE '%abc%'} ] do_faultsim_test 5.1 -faults oom* -body { execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abc' } } -test { faultsim_test_result [list 0 $::res] } do_faultsim_test 5.2 -faults oom* -body { execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'abcd' } } -test { faultsim_test_result [list 0 0] } proc test_astar {a b} { return [expr { [regexp {a[^ ][^ ]} $a] || [regexp {a[^ ][^ ]} $b] }] } db func test_astar test_astar set res [db one { SELECT count(*) FROM x1 WHERE test_astar(a, b) } ] do_faultsim_test 5.3 -faults oom* -body { execsql { SELECT count(*) FROM x1 WHERE x1 MATCH 'a*' } } -test { faultsim_test_result [list 0 $::res] } do_faultsim_test 5.4 -faults oom* -prep { db close sqlite3 db test.db } -body { execsql { INSERT INTO x1 VALUES('a b c d', 'e f g h') } } -test { faultsim_test_result [list 0 {}] } do_faultsim_test 5.5.1 -faults oom* -body { execsql { SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=1 } } -test { faultsim_test_result [list 0 1] } do_faultsim_test 5.5.2 -faults oom* -body { execsql { SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid=10 } } -test { faultsim_test_result [list 0 1] } do_faultsim_test 5.5.3 -faults oom* -body { execsql { SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = ( SELECT min(rowid) FROM x1_data WHERE rowid>20 ) } } -test { faultsim_test_result [list 0 1] } do_faultsim_test 5.5.4 -faults oom* -body { execsql { SELECT count(fts5_decode(rowid, block)) FROM x1_data WHERE rowid = ( SELECT max(rowid) FROM x1_data ) } } -test { faultsim_test_result [list 0 1] } #------------------------------------------------------------------------- # reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE x1 USING fts5(x); INSERT INTO x1(x1, rank) VALUES('automerge', 0); INSERT INTO x1 VALUES('a b c'); -- 1 INSERT INTO x1 VALUES('a b c'); -- 2 INSERT INTO x1 VALUES('a b c'); -- 3 INSERT INTO x1 VALUES('a b c'); -- 4 INSERT INTO x1 VALUES('a b c'); -- 5 INSERT INTO x1 VALUES('a b c'); -- 6 INSERT INTO x1 VALUES('a b c'); -- 7 INSERT INTO x1 VALUES('a b c'); -- 8 INSERT INTO x1 VALUES('a b c'); -- 9 INSERT INTO x1 VALUES('a b c'); -- 10 INSERT INTO x1 VALUES('a b c'); -- 11 INSERT INTO x1 VALUES('a b c'); -- 12 INSERT INTO x1 VALUES('a b c'); -- 13 INSERT INTO x1 VALUES('a b c'); -- 14 INSERT INTO x1 VALUES('a b c'); -- 15 SELECT count(*) FROM x1_data; } {17} faultsim_save_and_close do_faultsim_test 6.1 -faults oom* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO x1 VALUES('d e f') } } -test { faultsim_test_result [list 0 {}] if {$testrc==0} { set nCnt [db one {SELECT count(*) FROM x1_data}] if {$nCnt!=3} { error "expected 3 entries but there are $nCnt" } } } do_faultsim_test 6.2 -faults oom* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO x1(x1, rank) VALUES('pgsz', 32) } } -test { faultsim_test_result [list 0 {}] } do_faultsim_test 6.3 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO x1(x1) VALUES('integrity-check') } } -test { faultsim_test_result [list 0 {}] } do_faultsim_test 6.4 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO x1(x1) VALUES('optimize') } } -test { faultsim_test_result [list 0 {}] } #------------------------------------------------------------------------- # do_faultsim_test 7.0 -faults oom* -prep { catch { db close } } -body { sqlite3 db test.db } -test { faultsim_test_result [list 0 {}] {1 {}} {1 {initialization of fts5 failed: }} } #------------------------------------------------------------------------- # A prefix query against a large document set. # proc rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] for {set i 0} {$i < $n} {incr i} { lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]" } set doc } reset_db db func rnddoc rnddoc do_test 8.0 { execsql { CREATE VIRTUAL TABLE x1 USING fts5(a) } set ::res [list] for {set i 1} {$i<100} {incr i 1} { execsql { INSERT INTO x1 VALUES( rnddoc(50) ) } lappend ::res $i } } {} do_faultsim_test 8.1 -faults oom* -prep { } -body { execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'x*' } } -test { faultsim_test_result [list 0 $::res] } #------------------------------------------------------------------------- # Segment promotion. # do_test 9.0 { reset_db db func rnddoc fts5_rnddoc execsql { CREATE VIRTUAL TABLE s2 USING fts5(x); INSERT INTO s2(s2, rank) VALUES('pgsz', 32); INSERT INTO s2(s2, rank) VALUES('automerge', 0); } for {set i 1} {$i <= 16} {incr i} { execsql { INSERT INTO s2 VALUES(rnddoc(5)) } } fts5_level_segs s2 } {0 1} set insert_doc [db one {SELECT rnddoc(160)}] faultsim_save_and_close do_faultsim_test 9.1 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO s2 VALUES($::insert_doc) } } -test { faultsim_test_result {0 {}} if {$testrc==0} { set ls [fts5_level_segs s2] if {$ls != "2 0"} { error "fts5_level_segs says {$ls}" } } } finish_test |
Added ext/fts5/test/fts5fault2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault2 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } set doc [string trim [string repeat "x y z " 200]] do_execsql_test 1.0 { CREATE TABLE t1(a INTEGER PRIMARY KEY, x); CREATE VIRTUAL TABLE x1 USING fts5(x, content='t1', content_rowid='a'); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); WITH input(a,b) AS ( SELECT 1, $doc UNION ALL SELECT a+1, ($doc || CASE WHEN (a+1)%100 THEN '' ELSE ' xyz' END) FROM input WHERE a < 1000 ) INSERT INTO t1 SELECT * FROM input; INSERT INTO x1(x1) VALUES('rebuild'); } do_faultsim_test 1.1 -faults oom-* -prep { } -body { execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z AND xyz' } } -test { faultsim_test_result {0 {100 200 300 400 500 600 700 800 900 1000}} } do_faultsim_test 1.2 -faults oom-* -prep { } -body { execsql { SELECT rowid FROM x1 WHERE x1 MATCH 'z + xyz' ORDER BY 1 DESC} } -test { faultsim_test_result {0 {1000 900 800 700 600 500 400 300 200 100}} } #------------------------------------------------------------------------- # OOM within a query that accesses the in-memory hash table. # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE "a b c" USING fts5(a, b, c); INSERT INTO "a b c" VALUES('one two', 'x x x', 'three four'); INSERT INTO "a b c" VALUES('nine ten', 'y y y', 'two two'); } do_faultsim_test 2.1 -faults oom-trans* -prep { execsql { BEGIN; INSERT INTO "a b c" VALUES('one one', 'z z z', 'nine ten'); } } -body { execsql { SELECT rowid FROM "a b c" WHERE "a b c" MATCH 'one' } } -test { faultsim_test_result {0 {1 3}} catchsql { ROLLBACK } } #------------------------------------------------------------------------- # OOM within an 'optimize' operation that writes multiple pages to disk. # reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE zzz USING fts5(z); INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32); INSERT INTO zzz VALUES('a b c d'); INSERT INTO zzz SELECT 'c d e f' FROM zzz; INSERT INTO zzz SELECT 'e f g h' FROM zzz; INSERT INTO zzz SELECT 'i j k l' FROM zzz; INSERT INTO zzz SELECT 'l k m n' FROM zzz; INSERT INTO zzz SELECT 'o p q r' FROM zzz; } faultsim_save_and_close do_faultsim_test 3.1 -faults oom-trans* -prep { faultsim_restore_and_reopen execsql { SELECT rowid FROM zzz } } -body { execsql { INSERT INTO zzz(zzz) VALUES('optimize') } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM within an 'integrity-check' operation. # reset_db db func rnddoc fts5_rnddoc do_execsql_test 4.0 { CREATE VIRTUAL TABLE zzz USING fts5(z); INSERT INTO zzz(zzz, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION SELECT i+1 FROM ii WHERE i<10) INSERT INTO zzz SELECT rnddoc(10) || ' xccc' FROM ii; } do_faultsim_test 4.1 -faults oom-trans* -prep { } -body { execsql { INSERT INTO zzz(zzz) VALUES('integrity-check') } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM while parsing a tokenize=option # reset_db faultsim_save_and_close do_faultsim_test 5.0 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { CREATE VIRTUAL TABLE uio USING fts5(a, b, tokenize="porter 'ascii'", content="another table", content_rowid="somecolumn" ); } } -test { faultsim_test_result {0 {}} } finish_test |
Added ext/fts5/test/fts5fault3.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault3 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # An OOM while resuming a partially completed segment merge. # db func rnddoc fts5_rnddoc do_test 1.0 { expr srand(0) execsql { CREATE VIRTUAL TABLE xx USING fts5(x); INSERT INTO xx(xx, rank) VALUES('pgsz', 32); INSERT INTO xx(xx, rank) VALUES('automerge', 16); } for {set i 0} {$i < 10} {incr i} { execsql { BEGIN; INSERT INTO xx(x) VALUES(rnddoc(20)); INSERT INTO xx(x) VALUES(rnddoc(20)); INSERT INTO xx(x) VALUES(rnddoc(20)); COMMIT } } execsql { INSERT INTO xx(xx, rank) VALUES('automerge', 2); INSERT INTO xx(xx, rank) VALUES('merge', 50); } } {} faultsim_save_and_close do_faultsim_test 1 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO xx(xx, rank) VALUES('merge', 1) } } -test { faultsim_test_result [list 0 {}] } #------------------------------------------------------------------------- # An OOM while flushing an unusually large term to disk. # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE xx USING fts5(x); INSERT INTO xx(xx, rank) VALUES('pgsz', 32); } faultsim_save_and_close set doc "a long term abcdefghijklmnopqrstuvwxyz " append doc "and then abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz " append doc [string repeat "abcdefghijklmnopqrstuvwxyz" 10] do_faultsim_test 2 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO xx(x) VALUES ($::doc) } } -test { faultsim_test_result [list 0 {}] } #------------------------------------------------------------------------- # An OOM while flushing an unusually large term to disk. # reset_db do_execsql_test 3.0 { CREATE VIRTUAL TABLE xx USING fts5(x); } faultsim_save_and_close set doc [fts5_rnddoc 1000] do_faultsim_test 3.1 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO xx(x) VALUES ($::doc) } } -test { faultsim_test_result [list 0 {}] } set doc [string repeat "abc " 100] do_faultsim_test 3.2 -faults oom-* -prep { faultsim_restore_and_reopen } -body { execsql { INSERT INTO xx(x) VALUES ($::doc) } } -test { faultsim_test_result [list 0 {}] } finish_test |
Added ext/fts5/test/fts5fault4.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault4 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # An OOM while dropping an fts5 table. # db func rnddoc fts5_rnddoc do_test 1.0 { execsql { CREATE VIRTUAL TABLE xx USING fts5(x) } } {} faultsim_save_and_close do_faultsim_test 1 -faults oom-* -prep { faultsim_restore_and_reopen execsql { SELECT * FROM xx } } -body { execsql { DROP TABLE xx } } -test { faultsim_test_result [list 0 {}] } #------------------------------------------------------------------------- # An OOM within an "ORDER BY rank" query. # db func rnddoc fts5_rnddoc do_execsql_test 2.0 { CREATE VIRTUAL TABLE xx USING fts5(x); INSERT INTO xx VALUES ('abc ' || rnddoc(10)); INSERT INTO xx VALUES ('abc abc' || rnddoc(9)); INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8)); } {} faultsim_save_and_close do_faultsim_test 2 -faults oom-* -prep { faultsim_restore_and_reopen execsql { SELECT * FROM xx } } -body { execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank } } -test { faultsim_test_result [list 0 {3 2 1}] } #------------------------------------------------------------------------- # An OOM while "reseeking" an FTS cursor. # do_execsql_test 3.0 { CREATE VIRTUAL TABLE jj USING fts5(j); INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s'); INSERT INTO jj(rowid, j) VALUES(202, 't w t f s'); INSERT INTO jj(rowid, j) VALUES(303, 'w t f'); INSERT INTO jj(rowid, j) VALUES(404, 't'); } faultsim_save_and_close do_faultsim_test 3 -faults oom-* -prep { faultsim_restore_and_reopen execsql { SELECT * FROM jj } } -body { set res [list] db eval { SELECT rowid FROM jj WHERE jj MATCH 't' } { lappend res $rowid if {$rowid==303} { execsql { DELETE FROM jj WHERE rowid=404 } } } set res } -test { faultsim_test_result [list 0 {101 202 303}] } #------------------------------------------------------------------------- # An OOM within a special "*reads" query. # reset_db db func rnddoc fts5_rnddoc do_execsql_test 4.0 { CREATE VIRTUAL TABLE x1 USING fts5(x); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); WITH ii(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10 ) INSERT INTO x1 SELECT rnddoc(5) FROM ii; } set ::res [db eval {SELECT rowid, x1 FROM x1 WHERE x1 MATCH '*reads'}] do_faultsim_test 4 -faults oom-* -body { db eval {SELECT rowid, x, x1 FROM x1 WHERE x1 MATCH '*reads'} } -test { faultsim_test_result {0 {0 {} 3}} } #------------------------------------------------------------------------- # An OOM within a query that uses a custom rank function. # reset_db do_execsql_test 5.0 { PRAGMA encoding='utf16'; CREATE VIRTUAL TABLE x2 USING fts5(x); INSERT INTO x2(rowid, x) VALUES(10, 'a b c'); -- 3 INSERT INTO x2(rowid, x) VALUES(20, 'a b c'); -- 6 INSERT INTO x2(rowid, x) VALUES(30, 'a b c'); -- 2 INSERT INTO x2(rowid, x) VALUES(40, 'a b c'); -- 5 INSERT INTO x2(rowid, x) VALUES(50, 'a b c'); -- 1 } proc rowidmod {cmd mod} { set row [$cmd xRowid] expr {$row % $mod} } sqlite3_fts5_create_function db rowidmod rowidmod do_faultsim_test 5.1 -faults oom-* -body { db eval { SELECT rowid || '-' || rank FROM x2 WHERE x2 MATCH 'b' AND rank MATCH "rowidmod('7')" ORDER BY rank } } -test { faultsim_test_result {0 {50-1 30-2 10-3 40-5 20-6}} } proc rowidprefix {cmd prefix} { set row [$cmd xRowid] set {} "${row}-${prefix}" } sqlite3_fts5_create_function db rowidprefix rowidprefix set str [string repeat abcdefghijklmnopqrstuvwxyz 10] do_faultsim_test 5.2 -faults oom-* -body { db eval " SELECT rank, x FROM x2 WHERE x2 MATCH 'b' AND rank MATCH 'rowidprefix(''$::str'')' LIMIT 1 " } -test { faultsim_test_result "0 {10-$::str {a b c}}" } #------------------------------------------------------------------------- # OOM errors within auxiliary functions. # reset_db do_execsql_test 6.0 { CREATE VIRTUAL TABLE x3 USING fts5(xxx); INSERT INTO x3 VALUES('a b c d c b a'); INSERT INTO x3 VALUES('a a a a a a a'); INSERT INTO x3 VALUES('a a a a a a a'); } do_faultsim_test 6.1 -faults oom-t* -body { db eval { SELECT highlight(x3, 0, '*', '*') FROM x3 WHERE x3 MATCH 'c' } } -test { faultsim_test_result {0 {{a b *c* d *c* b a}}} } proc firstinst {cmd} { foreach {p c o} [$cmd xInst 0] {} expr $c*100 + $o } sqlite3_fts5_create_function db firstinst firstinst do_faultsim_test 6.2 -faults oom-t* -body { db eval { SELECT firstinst(x3) FROM x3 WHERE x3 MATCH 'c' } } -test { faultsim_test_result {0 2} {1 SQLITE_NOMEM} } proc previc {cmd} { set res [$cmd xGetAuxdataInt 0] $cmd xSetAuxdataInt [$cmd xInstCount] return $res } sqlite3_fts5_create_function db previc previc do_faultsim_test 6.2 -faults oom-t* -body { db eval { SELECT previc(x3) FROM x3 WHERE x3 MATCH 'a' } } -test { faultsim_test_result {0 {0 2 7}} {1 SQLITE_NOMEM} } #------------------------------------------------------------------------- # OOM error when querying for a phrase with many tokens. # reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE tt USING fts5(x, y); INSERT INTO tt VALUES('f b g b c b', 'f a d c c b'); -- 1 INSERT INTO tt VALUES('d a e f e d', 'f b b d e e'); -- 2 INSERT INTO tt VALUES('f b g a d c', 'e f c f a d'); -- 3 INSERT INTO tt VALUES('f f c d g f', 'f a e b g b'); -- 4 INSERT INTO tt VALUES('a g b d a g', 'e g a e a c'); -- 5 INSERT INTO tt VALUES('c d b d e f', 'f g e g e e'); -- 6 INSERT INTO tt VALUES('e g f f b c', 'f c e f g f'); -- 7 INSERT INTO tt VALUES('e g c f c e', 'f e e a f g'); -- 8 INSERT INTO tt VALUES('e a e b e e', 'd c c f f f'); -- 9 INSERT INTO tt VALUES('f a g g c c', 'e g d g c e'); -- 10 INSERT INTO tt VALUES('c d b a e f', 'f g e h e e'); -- 11 CREATE VIRTUAL TABLE tt2 USING fts5(o); INSERT INTO tt2(rowid, o) SELECT rowid, x||' '||y FROM tt; INSERT INTO tt2(rowid, o) VALUES(12, 'a b c d e f g h i j k l'); } do_faultsim_test 7.2 -faults oom-* -body { db eval { SELECT rowid FROM tt WHERE tt MATCH 'f+g+e+g+e+e' } } -test { faultsim_test_result {0 6} {1 SQLITE_NOMEM} } do_faultsim_test 7.3 -faults oom-* -body { db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d e f)' } } -test { faultsim_test_result {0 11} {1 SQLITE_NOMEM} } do_faultsim_test 7.4 -faults oom-t* -body { db eval { SELECT rowid FROM tt2 WHERE tt2 MATCH '"g c f c e f e e a f"' } } -test { faultsim_test_result {0 8} {1 SQLITE_NOMEM} } do_faultsim_test 7.5 -faults oom-* -body { db eval {SELECT rowid FROM tt2 WHERE tt2 MATCH 'NEAR(a b c d e f g h i j k)'} } -test { faultsim_test_result {0 12} {1 SQLITE_NOMEM} } do_faultsim_test 7.6 -faults oom-* -body { db eval {SELECT rowid FROM tt WHERE tt MATCH 'y: "c c"'} } -test { faultsim_test_result {0 {1 9}} {1 SQLITE_NOMEM} } #------------------------------------------------------------------------- # reset_db do_execsql_test 8.0 { CREATE VIRTUAL TABLE tt USING fts5(x); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); BEGIN; INSERT INTO tt(rowid, x) VALUES(1, 'a b c d x x'); WITH ii(i) AS (SELECT 2 UNION ALL SELECT i+1 FROM ii WHERE i<99) INSERT INTO tt(rowid, x) SELECT i, 'a b c x x d' FROM ii; INSERT INTO tt(rowid, x) VALUES(100, 'a b c d x x'); COMMIT; } do_faultsim_test 8.1 -faults oom-t* -body { db eval { SELECT rowid FROM tt WHERE tt MATCH 'NEAR(a b c d, 2)' } } -test { faultsim_test_result {0 {1 100}} {1 SQLITE_NOMEM} } do_faultsim_test 8.2 -faults oom-t* -body { db eval { SELECT count(*) FROM tt WHERE tt MATCH 'a OR d' } } -test { faultsim_test_result {0 100} {1 SQLITE_NOMEM} } #------------------------------------------------------------------------- # Fault in NOT query. # reset_db do_execsql_test 9.0 { CREATE VIRTUAL TABLE tt USING fts5(x); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); BEGIN; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<200) INSERT INTO tt(rowid, x) SELECT i, CASE WHEN (i%50)==0 THEN 'a a a a a a' ELSE 'a x a x a x' END FROM ii; COMMIT; } do_faultsim_test 9.1 -faults oom-* -body { db eval { SELECT rowid FROM tt WHERE tt MATCH 'a NOT x' } } -test { faultsim_test_result {0 {50 100 150 200}} {1 SQLITE_NOMEM} } #------------------------------------------------------------------------- # OOM in fts5_expr() SQL function. # do_faultsim_test 10.1 -faults oom-t* -body { db one { SELECT fts5_expr('a AND b NEAR(a b)') } } -test { faultsim_test_result {0 {"a" AND "b" AND NEAR("a" "b", 10)}} } do_faultsim_test 10.2 -faults oom-t* -body { db one { SELECT fts5_expr_tcl('x:"a b c" AND b NEAR(a b)', 'ns', 'x') } } -test { set res {AND [ns -col 0 -- {a b c}] [ns -- {b}] [ns -near 10 -- {a} {b}]} faultsim_test_result [list 0 $res] } do_faultsim_test 10.3 -faults oom-t* -body { db one { SELECT fts5_expr('x:a', 'x') } } -test { faultsim_test_result {0 {x : "a"}} } #------------------------------------------------------------------------- # OOM while configuring 'rank' option. # reset_db do_execsql_test 11.0 { CREATE VIRTUAL TABLE ft USING fts5(x); } do_faultsim_test 11.1 -faults oom-t* -body { db eval { INSERT INTO ft(ft, rank) VALUES('rank', 'bm25(10.0, 5.0)') } } -test { faultsim_test_result {0 {}} {1 {disk I/O error}} } #------------------------------------------------------------------------- # OOM while creating an fts5vocab table. # reset_db do_execsql_test 12.0 { CREATE VIRTUAL TABLE ft USING fts5(x); } faultsim_save_and_close do_faultsim_test 12.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM sqlite_master } } -body { db eval { CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row') } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM while querying an fts5vocab table. # reset_db do_execsql_test 13.0 { CREATE VIRTUAL TABLE ft USING fts5(x); INSERT INTO ft VALUES('a b'); CREATE VIRTUAL TABLE vv USING fts5vocab(ft, 'row'); } faultsim_save_and_close do_faultsim_test 13.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM vv } } -body { db eval { SELECT * FROM vv } } -test { faultsim_test_result {0 {a 1 1 b 1 1}} } #------------------------------------------------------------------------- # OOM in multi-column token query. # reset_db do_execsql_test 13.0 { CREATE VIRTUAL TABLE ft USING fts5(x, y, z); INSERT INTO ft(ft, rank) VALUES('pgsz', 32); INSERT INTO ft VALUES( 'x x x x x x x x x x x x x x x x', 'y y y y y y y y y y y y y y y y', 'z z z z z z z z x x x x x x x x' ); INSERT INTO ft SELECT * FROM ft; INSERT INTO ft SELECT * FROM ft; INSERT INTO ft SELECT * FROM ft; INSERT INTO ft SELECT * FROM ft; } faultsim_save_and_close do_faultsim_test 13.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM ft } } -body { db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' } } -test { faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}} } #------------------------------------------------------------------------- # OOM in an "ALTER TABLE RENAME TO" # reset_db do_execsql_test 14.0 { CREATE VIRTUAL TABLE "tbl one" USING fts5(x, y, z); } faultsim_save_and_close do_faultsim_test 14.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM "tbl one" } } -body { db eval { ALTER TABLE "tbl one" RENAME TO "tbl two" } } -test { faultsim_test_result {0 {}} } finish_test |
Added ext/fts5/test/fts5fault5.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault5 # If SQLITE_ENABLE_FTS3 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # OOM while creating an FTS5 table. # do_faultsim_test 1.1 -faults oom-t* -prep { db eval { DROP TABLE IF EXISTS abc } } -body { db eval { CREATE VIRTUAL TABLE abc USING fts5(x,y) } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM while writing a multi-tier doclist-index. And while running # integrity-check on the same. # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE tt USING fts5(x); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); } faultsim_save_and_close do_faultsim_test 2.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM tt } } -body { set str [string repeat "abc " 50] db eval { WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO tt(rowid, x) SELECT i, $str FROM ii; } } -test { faultsim_test_result {0 {}} } do_faultsim_test 2.2 -faults oom-t* -body { db eval { INSERT INTO tt(tt) VALUES('integrity-check') } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM while scanning an fts5vocab table. # reset_db do_test 3.0 { execsql { CREATE VIRTUAL TABLE tt USING fts5(x); CREATE VIRTUAL TABLE tv USING fts5vocab(tt, 'row'); INSERT INTO tt(tt, rank) VALUES('pgsz', 32); BEGIN; } for {set i 0} {$i < 20} {incr i} { set str [string repeat "$i " 50] execsql { INSERT INTO tt VALUES($str) } } execsql COMMIT } {} do_faultsim_test 3.1 -faults oom-t* -body { db eval { SELECT term FROM tv; } } -test { faultsim_test_result {0 {0 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8 9}} } finish_test |
Added ext/fts5/test/fts5fault6.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | # 2014 June 17 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # This file is focused on OOM errors. # source [file join [file dirname [info script]] fts5_common.tcl] source $testdir/malloc_common.tcl set testprefix fts5fault6 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # OOM while rebuilding an FTS5 table. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE tt USING fts5(a, b); INSERT INTO tt VALUES('c d c g g f', 'a a a d g a'); INSERT INTO tt VALUES('c d g b f d', 'b g e c g c'); INSERT INTO tt VALUES('c c f d e d', 'c e g d b c'); INSERT INTO tt VALUES('e a f c e f', 'g b a c d g'); INSERT INTO tt VALUES('c g f b b d', 'g c d c f g'); INSERT INTO tt VALUES('d a g a b b', 'g c g g c e'); INSERT INTO tt VALUES('e f a b c e', 'f d c d c c'); INSERT INTO tt VALUES('e c a g c d', 'b b g f f b'); INSERT INTO tt VALUES('g b d d e b', 'f f b d a c'); INSERT INTO tt VALUES('e a d a e d', 'c e a e f g'); } faultsim_save_and_close do_faultsim_test 1.1 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { INSERT INTO tt(tt) VALUES('rebuild') } } -test { faultsim_test_result {0 {}} } do_faultsim_test 1.2 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { REPLACE INTO tt(rowid, a, b) VALUES(6, 'x y z', 'l l l'); } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM within a special delete. # reset_db do_execsql_test 2.0 { CREATE VIRTUAL TABLE tt USING fts5(a, content=""); INSERT INTO tt VALUES('c d c g g f'); INSERT INTO tt VALUES('c d g b f d'); INSERT INTO tt VALUES('c c f d e d'); INSERT INTO tt VALUES('e a f c e f'); INSERT INTO tt VALUES('c g f b b d'); INSERT INTO tt VALUES('d a g a b b'); INSERT INTO tt VALUES('e f a b c e'); INSERT INTO tt VALUES('e c a g c d'); INSERT INTO tt VALUES('g b d d e b'); INSERT INTO tt VALUES('e a d a e d'); } faultsim_save_and_close do_faultsim_test 2.1 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { INSERT INTO tt(tt, rowid, a) VALUES('delete', 3, 'c d g b f d'); } } -test { faultsim_test_result {0 {}} } do_faultsim_test 2.2 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { INSERT INTO tt(tt) VALUES('delete-all') } } -test { faultsim_test_result {0 {}} } do_faultsim_test 2.3 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { INSERT INTO tt VALUES('x y z') } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM in the ASCII tokenizer with very large tokens. # # Also the unicode tokenizer. # set t1 [string repeat wxyz 20] set t2 [string repeat wxyz 200] set t3 [string repeat wxyz 2000] set doc "$t1 $t2 $t3" do_execsql_test 3.0 { CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content=""); CREATE VIRTUAL TABLE xyz2 USING fts5(c, content=""); } faultsim_save_and_close do_faultsim_test 3.1 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM xyz } } -body { db eval { INSERT INTO xyz VALUES($::doc) } } -test { faultsim_test_result {0 {}} } do_faultsim_test 3.2 -faults oom-t* -prep { faultsim_restore_and_reopen db eval { SELECT * FROM xyz2 } } -body { db eval { INSERT INTO xyz2 VALUES($::doc) } } -test { faultsim_test_result {0 {}} } #------------------------------------------------------------------------- # OOM while initializing a unicode61 tokenizer. # reset_db faultsim_save_and_close do_faultsim_test 4.1 -faults oom-t* -prep { faultsim_restore_and_reopen } -body { db eval { CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc"); } } -test { faultsim_test_result {0 {}} } finish_test |
Added ext/fts5/test/fts5full.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Test that SQLITE_FULL is returned if the FTS5 table cannot find a free # segid to use. In practice this can only really happen when automerge and # crisismerge are both disabled. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5full # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('automerge', 0); INSERT INTO x8(x8, rank) VALUES('crisismerge', 100000); } db func rnddoc fts5_rnddoc do_test 1.1 { list [catch { for {set i 0} {$i < 2500} {incr i} { execsql { INSERT INTO x8 VALUES( rnddoc(5) ); } } } msg] $msg } {1 {database or disk is full}} finish_test |
Added ext/fts5/test/fts5hash.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | # 2015 April 21 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # The tests in this file are focused on the code in fts5_hash.c. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5hash # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # Return a list of tokens (a vocabulary) that all share the same hash # key value. This can be used to test hash collisions. # proc build_vocab1 {args} { set O(-nslot) 1024 set O(-nword) 20 set O(-hash) 88 set O(-prefix) "" if {[llength $args] % 2} { error "bad args" } array set O2 $args foreach {k v} $args { if {[info exists O($k)]==0} { error "bad option: $k" } set O($k) $v } set L [list] while {[llength $L] < $O(-nword)} { set t "$O(-prefix)[random_token]" set h [sqlite3_fts5_token_hash $O(-nslot) $t] if {$O(-hash)==$h} { lappend L $t } } return $L } proc random_token {} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set iVal [expr int(rand() * 2000000)] return [string map $map $iVal] } proc random_doc {vocab nWord} { set doc "" set nVocab [llength $vocab] for {set i 0} {$i<$nWord} {incr i} { set j [expr {int(rand() * $nVocab)}] lappend doc [lindex $vocab $j] } return $doc } set vocab [build_vocab1] db func r random_doc do_execsql_test 1.0 { CREATE VIRTUAL TABLE eee USING fts5(e, ee); BEGIN; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii; INSERT INTO eee(eee) VALUES('integrity-check'); COMMIT; INSERT INTO eee(eee) VALUES('integrity-check'); } set hash [sqlite3_fts5_token_hash 1024 xyz] set vocab [build_vocab1 -prefix xyz -hash $hash] lappend vocab xyz do_execsql_test 1.1 { CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row'); BEGIN; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii; INSERT INTO eee(eee) VALUES('integrity-check'); } do_test 1.2 { db eval { SELECT term, doc FROM vocab } { set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}] if {$nRow != $doc} { error "term=$term fts5vocab=$doc cnt=$nRow" } } set {} {} } {} do_execsql_test 1.3 { COMMIT; INSERT INTO eee(eee) VALUES('integrity-check'); } finish_test |
Added ext/fts5/test/fts5integrity.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | # 2015 Jan 13 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file containst tests focused on the integrity-check procedure. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5integrity # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x); INSERT INTO xx VALUES('term'); } do_execsql_test 1.1 { INSERT INTO xx(xx) VALUES('integrity-check'); } do_execsql_test 2.0 { CREATE VIRTUAL TABLE yy USING fts5(x, prefix=1); INSERT INTO yy VALUES('term'); } do_execsql_test 2.1 { INSERT INTO yy(yy) VALUES('integrity-check'); } #-------------------------------------------------------------------- # do_execsql_test 3.0 { CREATE VIRTUAL TABLE zz USING fts5(z); INSERT INTO zz(zz, rank) VALUES('pgsz', 32); INSERT INTO zz VALUES('b b b b b b b b b b b b b b'); INSERT INTO zz SELECT z FROM zz; INSERT INTO zz SELECT z FROM zz; INSERT INTO zz SELECT z FROM zz; INSERT INTO zz SELECT z FROM zz; INSERT INTO zz SELECT z FROM zz; INSERT INTO zz SELECT z FROM zz; INSERT INTO zz(zz) VALUES('optimize'); } do_execsql_test 3.1 { INSERT INTO zz(zz) VALUES('integrity-check'); } #-------------------------------------------------------------------- # Mess around with a docsize record. And the averages record. Then # check that integrity-check picks it up. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE aa USING fts5(zz); INSERT INTO aa(zz) VALUES('a b c d e'); INSERT INTO aa(zz) VALUES('a b c d'); INSERT INTO aa(zz) VALUES('a b c'); INSERT INTO aa(zz) VALUES('a b'); INSERT INTO aa(zz) VALUES('a'); SELECT length(sz) FROM aa_docsize; } {1 1 1 1 1} do_execsql_test 4.1 { INSERT INTO aa(aa) VALUES('integrity-check'); } do_catchsql_test 4.2 { BEGIN; UPDATE aa_docsize SET sz = X'44' WHERE rowid = 3; INSERT INTO aa(aa) VALUES('integrity-check'); } {1 {database disk image is malformed}} do_catchsql_test 4.3 { ROLLBACK; BEGIN; UPDATE aa_data SET block = X'44' WHERE rowid = 1; INSERT INTO aa(aa) VALUES('integrity-check'); } {1 {database disk image is malformed}} do_catchsql_test 4.4 { ROLLBACK; BEGIN; INSERT INTO aa_docsize VALUES(23, X'04'); INSERT INTO aa(aa) VALUES('integrity-check'); } {1 {database disk image is malformed}} do_catchsql_test 4.5 { ROLLBACK; BEGIN; INSERT INTO aa_docsize VALUES(23, X'00'); INSERT INTO aa_content VALUES(23, ''); INSERT INTO aa(aa) VALUES('integrity-check'); } {1 {database disk image is malformed}} #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM zz_data} {puts $r} #exit finish_test |
Added ext/fts5/test/fts5merge.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Test that focus on incremental merges of segments. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5merge # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } db func repeat [list string repeat] #------------------------------------------------------------------------- # Create an fts index so that: # # * the index consists of two top-level segments # * each segment contains records related to $nRowPerSeg rows # * all rows consist of tokens "x" and "y" only. # # Then run ('merge', 1) until everything is completely merged. # proc do_merge1_test {testname nRowPerSeg} { set ::nRowPerSeg [expr $nRowPerSeg] do_execsql_test $testname.0 { DROP TABLE IF EXISTS x8; CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg) INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<$::nRowPerSeg) INSERT INTO x8 SELECT repeat('x y ', i % 16) FROM ii; INSERT INTO x8(x8, rank) VALUES('automerge', 2); } for {set tn 1} {[lindex [fts5_level_segs x8] 0]>0} {incr tn} { do_execsql_test $testname.$tn { INSERT INTO x8(x8, rank) VALUES('merge', 1); INSERT INTO x8(x8) VALUES('integrity-check'); } if {$tn>5} break } do_test $testname.x [list expr "$tn < 5"] 1 } do_merge1_test 1.1 1 do_merge1_test 1.2 2 do_merge1_test 1.3 3 do_merge1_test 1.4 4 do_merge1_test 1.5 10 do_merge1_test 1.6 20 do_merge1_test 1.7 100 #------------------------------------------------------------------------- # proc do_merge2_test {testname nRow} { db func rnddoc fts5_rnddoc do_execsql_test $testname.0 { DROP TABLE IF EXISTS x8; CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('pgsz', 32); } set ::nRow $nRow do_test $testname.1 { for {set i 0} {$i < $::nRow} {incr i} { execsql { INSERT INTO x8 VALUES( rnddoc(($i%16) + 5) ) } while {[not_merged x8]} { execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 2); INSERT INTO x8(x8, rank) VALUES('merge', 1); INSERT INTO x8(x8, rank) VALUES('automerge', 16); INSERT INTO x8(x8) VALUES('integrity-check'); } } } } {} } proc not_merged {tbl} { set segs [fts5_level_segs $tbl] foreach s $segs { if {$s>1} { return 1 } } return 0 } do_merge2_test 2.1 5 do_merge2_test 2.2 10 do_merge2_test 2.3 20 #------------------------------------------------------------------------- # Test that an auto-merge will complete any merge that has already been # started, even if the number of input segments is less than the current # value of the 'automerge' configuration parameter. # db func rnddoc fts5_rnddoc do_execsql_test 3.1 { DROP TABLE IF EXISTS x8; CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('pgsz', 32); INSERT INTO x8 VALUES(rnddoc(100)); INSERT INTO x8 VALUES(rnddoc(100)); } do_test 3.2 { execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4); INSERT INTO x8(x8, rank) VALUES('merge', 1); } fts5_level_segs x8 } {2} do_test 3.3 { execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 2); INSERT INTO x8(x8, rank) VALUES('merge', 1); } fts5_level_segs x8 } {2 1} do_test 3.4 { execsql { INSERT INTO x8(x8, rank) VALUES('automerge', 4) } while {[not_merged x8]} { execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1) } } fts5_level_segs x8 } {0 1} #------------------------------------------------------------------------- # proc mydoc {} { set x [lindex {a b c d e f g h i j} [expr int(rand()*10)]] return [string repeat "$x " 30] } db func mydoc mydoc proc mycount {} { set res [list] foreach x {a b c d e f g h i j} { lappend res [db one {SELECT count(*) FROM x8 WHERE x8 MATCH $x}] } set res } #1 32 foreach {tn pgsz} { 2 1000 } { do_execsql_test 4.$tn.1 { DROP TABLE IF EXISTS x8; CREATE VIRTUAL TABLE x8 USING fts5(i); INSERT INTO x8(x8, rank) VALUES('pgsz', $pgsz); } do_execsql_test 4.$tn.2 { INSERT INTO x8(x8, rank) VALUES('merge', 1); } do_execsql_test 4.$tn.3 { WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO x8 SELECT mydoc() FROM ii; WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100) INSERT INTO x8 SELECT mydoc() FROM ii; INSERT INTO x8(x8, rank) VALUES('automerge', 2); } set expect [mycount] for {set i 0} {$i < 20} {incr i} { do_test 4.$tn.4.$i { execsql { INSERT INTO x8(x8, rank) VALUES('merge', 1); } mycount } $expect break } # db eval {SELECT fts5_decode(rowid, block) AS r FROM x8_data} { puts $r } } finish_test |
Added ext/fts5/test/fts5near.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | # 2014 Jan 08 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focused on the NEAR operator. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5near # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc do_near_test {tn doc near res} { uplevel [list do_execsql_test $tn " DELETE FROM t1; INSERT INTO t1 VALUES('$doc'); SELECT count(*) FROM t1 WHERE t1 MATCH '$near'; " $res] } execsql { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = "ascii tokenchars '.'") } do_near_test 1.1 ". . a . . . b . ." { NEAR(a b, 5) } 1 do_near_test 1.2 ". . a . . . b . ." { NEAR(a b, 4) } 1 do_near_test 1.3 ". . a . . . b . ." { NEAR(a b, 3) } 1 do_near_test 1.4 ". . a . . . b . ." { NEAR(a b, 2) } 0 do_near_test 1.5 ". . a . . . b . ." { NEAR(b a, 5) } 1 do_near_test 1.6 ". . a . . . b . ." { NEAR(b a, 4) } 1 do_near_test 1.7 ". . a . . . b . ." { NEAR(b a, 3) } 1 do_near_test 1.8 ". . a . . . b . ." { NEAR(b a, 2) } 0 do_near_test 1.9 ". a b . . . c . ." { NEAR("a b" c, 3) } 1 do_near_test 1.10 ". a b . . . c . ." { NEAR("a b" c, 2) } 0 do_near_test 1.11 ". a b . . . c . ." { NEAR(c "a b", 3) } 1 do_near_test 1.12 ". a b . . . c . ." { NEAR(c "a b", 2) } 0 do_near_test 1.13 ". a b . . . c d ." { NEAR(a+b c+d, 3) } 1 do_near_test 1.14 ". a b . . . c d ." { NEAR(a+b c+d, 2) } 0 do_near_test 1.15 ". a b . . . c d ." { NEAR(c+d a+b, 3) } 1 do_near_test 1.16 ". a b . . . c d ." { NEAR(c+d a+b, 2) } 0 do_near_test 1.17 ". a b . . . c d ." { NEAR(a b c d, 5) } 1 do_near_test 1.18 ". a b . . . c d ." { NEAR(a b c d, 4) } 0 do_near_test 1.19 ". a b . . . c d ." { NEAR(a+b c d, 4) } 1 do_near_test 1.20 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 5) } 1 do_near_test 1.21 "a b c d e f g h i" { NEAR(b+c a+b+c+d i, 4) } 0 do_near_test 1.22 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 5) } 1 do_near_test 1.23 "a b c d e f g h i" { NEAR(a+b+c+d i b+c, 4) } 0 do_near_test 1.24 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 5) } 1 do_near_test 1.25 "a b c d e f g h i" { NEAR(i a+b+c+d b+c, 4) } 0 finish_test |
Added ext/fts5/test/fts5optimize.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5optimize # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc rnddoc {nWord} { set vocab {a b c d e f g h i j k l m n o p q r s t u v w x y z} set nVocab [llength $vocab] set ret [list] for {set i 0} {$i < $nWord} {incr i} { lappend ret [lindex $vocab [expr {int(rand() * $nVocab)}]] } return $ret } foreach {tn nStep} { 1 2 2 10 3 50 4 500 } { if {$tn!=4} continue reset_db db func rnddoc rnddoc do_execsql_test 1.$tn.1 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); } do_test 1.$tn.2 { for {set i 0} {$i < $nStep} {incr i} { execsql { INSERT INTO t1 VALUES( rnddoc(5), rnddoc(5) ) } } } {} do_execsql_test 1.$tn.3 { INSERT INTO t1(t1) VALUES('integrity-check'); } do_execsql_test 1.$tn.4 { INSERT INTO t1(t1) VALUES('optimize'); } do_execsql_test 1.$tn.5 { INSERT INTO t1(t1) VALUES('integrity-check'); } } finish_test |
Added ext/fts5/test/fts5plan.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file focuses on testing the planner (xBestIndex function). # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5plan # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE TABLE t1(x, y); CREATE VIRTUAL TABLE f1 USING fts5(ff); } do_eqp_test 1.1 { SELECT * FROM t1, f1 WHERE f1 MATCH t1.x } { 0 0 0 {SCAN TABLE t1} 0 1 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} } do_eqp_test 1.2 { SELECT * FROM t1, f1 WHERE f1 > t1.x } { 0 0 1 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:} 0 1 0 {SCAN TABLE t1} } do_eqp_test 1.3 { SELECT * FROM f1 WHERE f1 MATCH ? ORDER BY ff } { 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 1:} 0 0 0 {USE TEMP B-TREE FOR ORDER BY} } do_eqp_test 1.4 { SELECT * FROM f1 ORDER BY rank } { 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 0:} 0 0 0 {USE TEMP B-TREE FOR ORDER BY} } do_eqp_test 1.5 { SELECT * FROM f1 WHERE rank MATCH ? } { 0 0 0 {SCAN TABLE f1 VIRTUAL TABLE INDEX 2:} } finish_test |
Added ext/fts5/test/fts5porter.test.
more than 10,000 changes
Added ext/fts5/test/fts5porter2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 porter stemmer implementation. # # These are extra tests added to those in fts5porter.test in order to # improve test coverage of the porter stemmer implementation. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5porter2 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } set test_vocab { tion tion ation ation vation vation avation avat vion vion ion ion relational relat relation relat relate relat zzz zzz ii ii iiing ii xtional xtional xenci xenci xlogi xlogi realization realiz realize realiz xization xizat capitalism capit talism talism xiveness xive xfulness xful xousness xous xical xical xicate xicat xicity xiciti ies ie eed e eing e s s } set i 0 foreach {in out} $test_vocab { do_test "1.$i.($in -> $out)" { lindex [sqlite3_fts5_tokenize db porter $in] 0 } $out incr i } finish_test |
Added ext/fts5/test/fts5prefix.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # 2015 Jan 13 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file containst tests focused on prefix indexes. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5prefix # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1); INSERT INTO xx VALUES('one two three'); INSERT INTO xx VALUES('four five six'); INSERT INTO xx VALUES('seven eight nine ten'); } do_execsql_test 1.1 { SELECT rowid FROM xx WHERE xx MATCH 't*' } {1 3} #------------------------------------------------------------------------- # Check that prefix indexes really do index n-character prefixes, not # n-byte prefixes. Use the ascii tokenizer so as not to be confused by # diacritic removal. # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) } do_test 2.1 { foreach {rowid string} { 1 "\xCA\xCB\xCC\xCD" 2 "\u1234\u5678\u4321\u8765" } { execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) } } } {} do_execsql_test 2.2 { INSERT INTO t1(t1) VALUES('integrity-check'); } foreach {tn q res} { 1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1 2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2 } { do_execsql_test 2.3.$tn $q $res } finish_test |
Added ext/fts5/test/fts5rank.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file focuses on testing queries that use the "rank" column. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rank # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } #------------------------------------------------------------------------- # "ORDER BY rank" + highlight() + large poslists. # do_execsql_test 1.0 { CREATE VIRTUAL TABLE xyz USING fts5(z); } do_test 1.1 { set doc [string trim [string repeat "x y " 500]] execsql { INSERT INTO xyz VALUES($doc) } } {} do_execsql_test 1.2 { SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x' ORDER BY rank } [list [string map {x [x]} $doc]] do_execsql_test 1.3 { SELECT highlight(xyz, 0, '[', ']') FROM xyz WHERE xyz MATCH 'x AND y' ORDER BY rank } [list [string map {x [x] y [y]} $doc]] finish_test |
Added ext/fts5/test/fts5rebuild.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rebuild # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE f1 USING fts5(a, b); INSERT INTO f1(a, b) VALUES('one', 'o n e'); INSERT INTO f1(a, b) VALUES('two', 't w o'); INSERT INTO f1(a, b) VALUES('three', 't h r e e'); } do_execsql_test 1.2 { INSERT INTO f1(f1) VALUES('integrity-check'); } {} do_execsql_test 1.3 { INSERT INTO f1(f1) VALUES('rebuild'); } {} do_execsql_test 1.4 { INSERT INTO f1(f1) VALUES('integrity-check'); } {} do_execsql_test 1.5 { DELETE FROM f1_data; } {} do_catchsql_test 1.6 { INSERT INTO f1(f1) VALUES('integrity-check'); } {1 {database disk image is malformed}} do_execsql_test 1.7 { INSERT INTO f1(f1) VALUES('rebuild'); INSERT INTO f1(f1) VALUES('integrity-check'); } {} #------------------------------------------------------------------------- # Check that 'rebuild' may not be used with a contentless table. # do_execsql_test 2.1 { CREATE VIRTUAL TABLE nc USING fts5(doc, content=); } do_catchsql_test 2.2 { INSERT INTO nc(nc) VALUES('rebuild'); } {1 {'rebuild' may not be used with a contentless fts5 table}} finish_test |
Added ext/fts5/test/fts5restart.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | # 2015 April 28 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This file focuses on testing the planner (xBestIndex function). # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5restart # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE f1 USING fts5(ff); } #------------------------------------------------------------------------- # Run the 'optimize' command. Check that it does not disturb ongoing # full-text queries. # do_test 1.1 { for {set i 1} {$i < 1000} {incr i} { execsql { INSERT INTO f1 VALUES('a b c d e') } lappend lRowid $i } } {} do_execsql_test 1.2 { SELECT rowid FROM f1 WHERE f1 MATCH 'c'; } $lRowid do_test 1.3 { set res [list] db eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } { if {$rowid == 100} { execsql { INSERT INTO f1(f1) VALUES('optimize') } } lappend res $rowid } set res } $lRowid do_test 1.4.1 { sqlite3 db2 test.db set res [list] db2 eval { SELECT rowid FROM f1 WHERE f1 MATCH 'c' } { if {$rowid == 100} { set cres [catchsql { INSERT INTO f1(f1) VALUES('optimize') }] } lappend res $rowid } set res } $lRowid do_test 1.4.2 { db2 close set cres } {1 {database is locked}} #------------------------------------------------------------------------- # Open a couple of cursors. Then close them in the same order. # do_test 2.1 { set ::s1 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'b'" -1 X] set ::s2 [sqlite3_prepare db "SELECT rowid FROM f1 WHERE f1 MATCH 'c'" -1 X] sqlite3_step $::s1 } {SQLITE_ROW} do_test 2.2 { sqlite3_step $::s2 } {SQLITE_ROW} do_test 2.1 { sqlite3_finalize $::s1 sqlite3_finalize $::s2 } {SQLITE_OK} #------------------------------------------------------------------------- # Copy data between two FTS5 tables. # do_execsql_test 3.1 { CREATE VIRTUAL TABLE f2 USING fts5(gg); INSERT INTO f2 SELECT ff FROM f1 WHERE f1 MATCH 'b+c+d'; } do_execsql_test 3.2 { SELECT rowid FROM f2 WHERE f2 MATCH 'a+b+c+d+e' } $lRowid #------------------------------------------------------------------------- # Remove the row that an FTS5 cursor is currently pointing to. And # various other similar things. Check that this does not disturb # ongoing scans. # do_execsql_test 4.0 { CREATE VIRTUAL TABLE n4 USING fts5(n); INSERT INTO n4(rowid, n) VALUES(100, '1 2 3 4 5'); INSERT INTO n4(rowid, n) VALUES(200, '1 2 3 4'); INSERT INTO n4(rowid, n) VALUES(300, '2 3 4'); INSERT INTO n4(rowid, n) VALUES(400, '2 3'); INSERT INTO n4(rowid, n) VALUES(500, '3'); } do_test 4.1 { set res [list] db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' } { if {$rowid==300} { execsql { DELETE FROM n4 WHERE rowid=300 } } lappend res $rowid } set res } {100 200 300 400 500} do_test 4.2 { execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') } set res [list] db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} { if {$rowid==300} { execsql { DELETE FROM n4 WHERE rowid=300 } } lappend res $rowid } set res } {500 400 300 200 100} do_test 4.3 { execsql { INSERT INTO n4(rowid, n) VALUES(300, '2 3 4') } set res [list] db eval { SELECT rowid FROM n4 WHERE n4 MATCH '3' ORDER BY rowid DESC} { if {$rowid==300} { execsql { DELETE FROM n4 } } lappend res $rowid } set res } {500 400 300} finish_test |
Added ext/fts5/test/fts5rowid.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests of the scalar fts5_rowid() and fts5_decode() functions. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5rowid # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_catchsql_test 1.1 { SELECT fts5_rowid() } {1 {should be: fts5_rowid(subject, ....)}} do_catchsql_test 1.2 { SELECT fts5_rowid('segment') } {1 {should be: fts5_rowid('segment', segid, height, pgno))}} do_execsql_test 1.3 { SELECT fts5_rowid('segment', 1, 1, 1) } {139586437121} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); } {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} #------------------------------------------------------------------------- # Tests of the fts5_decode() function. # reset_db do_execsql_test 2.1 { CREATE VIRTUAL TABLE x1 USING fts5(a, b); INSERT INTO x1(x1, rank) VALUES('pgsz', 32); } {} proc rnddoc {n} { set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j] set doc [list] for {set i 0} {$i < $n} {incr i} { lappend doc [string map $map [format %.3d [expr int(rand()*100)]]] } set doc } db func rnddoc rnddoc do_execsql_test 2.2 { WITH r(a, b) AS ( SELECT rnddoc(6), rnddoc(6) UNION ALL SELECT rnddoc(6), rnddoc(6) FROM r ) INSERT INTO x1 SELECT * FROM r LIMIT 10000; } set res [db one {SELECT count(*) FROM x1_data}] do_execsql_test 2.3 { SELECT count(fts5_decode(rowid, block)) FROM x1_data; } $res do_execsql_test 2.4 { UPDATE x1_data SET block = X''; SELECT count(fts5_decode(rowid, block)) FROM x1_data; } $res do_execsql_test 2.5 { INSERT INTO x1(x1, rank) VALUES('pgsz', 1024); INSERT INTO x1(x1) VALUES('rebuild'); } set res [db one {SELECT count(*) FROM x1_data}] do_execsql_test 2.6 { SELECT count(fts5_decode(rowid, block)) FROM x1_data; } $res do_execsql_test 2.7 { UPDATE x1_data SET block = X''; SELECT count(fts5_decode(rowid, block)) FROM x1_data; } $res #------------------------------------------------------------------------- # Tests with very large tokens. # set strlist [list \ "[string repeat x 400]" \ "[string repeat x 300][string repeat w 100]" \ "[string repeat x 300][string repeat y 100]" \ "[string repeat x 300][string repeat z 600]" \ ] do_test 3.0 { execsql { BEGIN; CREATE VIRTUAL TABLE x2 USING fts5(a); } foreach str $strlist { execsql { INSERT INTO x2 VALUES($str) } } execsql COMMIT } {} for {set tn 0} {$tn<[llength $strlist]} {incr tn} { set str [lindex $strlist $tn] do_execsql_test 3.1.$tn { SELECT rowid FROM x2 WHERE x2 MATCH $str } [expr $tn+1] } set res [db one {SELECT count(*) FROM x2_data}] do_execsql_test 3.2 { SELECT count(fts5_decode(rowid, block)) FROM x2_data; } $res #------------------------------------------------------------------------- # Leaf pages with no terms or rowids at all. # set strlist [list \ "[string repeat {w } 400]" \ "[string repeat {x } 400]" \ "[string repeat {y } 400]" \ "[string repeat {z } 400]" \ ] do_test 4.0 { execsql { BEGIN; CREATE VIRTUAL TABLE x3 USING fts5(a); INSERT INTO x3(x3, rank) VALUES('pgsz', 32); } foreach str $strlist { execsql { INSERT INTO x3 VALUES($str) } } execsql COMMIT } {} for {set tn 0} {$tn<[llength $strlist]} {incr tn} { set str [lindex $strlist $tn] do_execsql_test 4.1.$tn { SELECT rowid FROM x3 WHERE x3 MATCH $str } [expr $tn+1] } set res [db one {SELECT count(*) FROM x3_data}] do_execsql_test 4.2 { SELECT count(fts5_decode(rowid, block)) FROM x3_data; } $res #------------------------------------------------------------------------- # Position lists with large values. # set strlist [list \ "[string repeat {w } 400]a" \ "[string repeat {x } 400]a" \ "[string repeat {y } 400]a" \ "[string repeat {z } 400]a" \ ] do_test 5.0 { execsql { BEGIN; CREATE VIRTUAL TABLE x4 USING fts5(a); INSERT INTO x4(x4, rank) VALUES('pgsz', 32); } foreach str $strlist { execsql { INSERT INTO x4 VALUES($str) } } execsql COMMIT } {} do_execsql_test 5.1 { SELECT rowid FROM x4 WHERE x4 MATCH 'a' } {1 2 3 4} set res [db one {SELECT count(*) FROM x4_data}] do_execsql_test 5.2 { SELECT count(fts5_decode(rowid, block)) FROM x4_data; } $res finish_test |
Added ext/fts5/test/fts5tokenizer.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the built-in fts5 tokenizers. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5tokenizer # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); DROP TABLE ft1; } do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize='porter'); DROP TABLE ft1; } do_execsql_test 1.2 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = porter); DROP TABLE ft1; } do_execsql_test 1.3 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter'); DROP TABLE ft1; } do_execsql_test 1.4 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter ascii'); DROP TABLE ft1; } do_catchsql_test 1.5 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'nosuch'); } {1 {no such tokenizer: nosuch}} do_catchsql_test 1.6 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize = 'porter nosuch'); } {1 {error in tokenizer constructor}} do_execsql_test 2.0 { CREATE VIRTUAL TABLE ft1 USING fts5(x, tokenize=porter); INSERT INTO ft1 VALUES('embedded databases'); } do_execsql_test 2.1 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'embedding' } 1 do_execsql_test 2.2 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database' } 1 do_execsql_test 2.3 { SELECT rowid FROM ft1 WHERE ft1 MATCH 'database embedding' } 1 proc tcl_create {args} { set ::targs $args error "failed" } sqlite3_fts5_create_tokenizer db tcl tcl_create foreach {tn directive expected} { 1 {tokenize='tcl a b c'} {a b c} 2 {tokenize='tcl ''d'' ''e'' ''f'''} {d e f} 3 {tokenize="tcl 'g' 'h' 'i'"} {g h i} 4 {tokenize = tcl} {} } { do_catchsql_test 3.$tn.1 " CREATE VIRTUAL TABLE ft2 USING fts5(x, $directive) " {1 {error in tokenizer constructor}} do_test 3.$tn.2 { set ::targs } $expected } do_catchsql_test 4.1 { CREATE VIRTUAL TABLE ft2 USING fts5(x, tokenize = tcl abc); } {1 {parse error in "tokenize = tcl abc"}} do_catchsql_test 4.2 { CREATE VIRTUAL TABLE ft2 USING fts5(x y) } {1 {unrecognized column option: y}} #------------------------------------------------------------------------- # Test the "separators" and "tokenchars" options a bit. # foreach {tn tokenizer} {1 ascii 2 unicode61} { reset_db set T "$tokenizer tokenchars ',.:' separators 'xyz'" execsql "CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = \"$T\")" do_execsql_test 5.$tn.1 { INSERT INTO t1 VALUES('abcxdefyghizjkl.mno,pqr:stu/vwx+yz'); } foreach {tn2 token res} { 1 abc 1 2 def 1 3 ghi 1 4 jkl {} 5 mno {} 6 pqr {} 7 stu {} 8 jkl.mno,pqr:stu 1 9 vw 1 } { do_execsql_test 5.$tn.2.$tn2 " SELECT rowid FROM t1 WHERE t1 MATCH '\"$token\"' " $res } } #------------------------------------------------------------------------- # Miscellaneous tests for the ascii tokenizer. # # 5.1.*: Test that the ascii tokenizer ignores non-ASCII characters in the # 'separators' option. But unicode61 does not. # # 5.2.*: An option without an argument is an error. # do_test 5.1.1 { execsql " CREATE VIRTUAL TABLE a1 USING fts5(x, tokenize=`ascii separators '\u1234'`); INSERT INTO a1 VALUES('abc\u1234def'); " execsql { SELECT rowid FROM a1 WHERE a1 MATCH 'def' } } {} do_test 5.1.2 { execsql " CREATE VIRTUAL TABLE a2 USING fts5( x, tokenize=`unicode61 separators '\u1234'`); INSERT INTO a2 VALUES('abc\u1234def'); " execsql { SELECT rowid FROM a2 WHERE a2 MATCH 'def' } } {1} do_catchsql_test 5.2 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars'); } {1 {error in tokenizer constructor}} do_catchsql_test 5.3 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg'); } {1 {error in tokenizer constructor}} #------------------------------------------------------------------------- # Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE # correctly. # proc test_token_cb {varname token iStart iEnd} { upvar $varname var lappend var $token if {[llength $var]==3} { return "SQLITE_DONE" } return "SQLITE_OK" } proc tokenize {cmd} { set res [list] $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res] set res } sqlite3_fts5_create_function db tokenize tokenize do_execsql_test 6.0 { CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii); INSERT INTO x1 VALUES('q w e r t y'); INSERT INTO x1 VALUES('y t r e w q'); SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r'; } { {q w e} {y t r} } do_execsql_test 6.1 { CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61); INSERT INTO x2 VALUES('q w e r t y'); INSERT INTO x2 VALUES('y t r e w q'); SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r'; } { {q w e} {y t r} } #------------------------------------------------------------------------- # Miscellaneous tests for the unicode tokenizer. # do_catchsql_test 6.1 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars'); } {1 {error in tokenizer constructor}} do_catchsql_test 6.2 { CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b'); } {1 {error in tokenizer constructor}} do_catchsql_test 6.3 { CREATE VIRTUAL TABLE a3 USING fts5( x, y, tokenize = 'unicode61 remove_diacritics 2' ); } {1 {error in tokenizer constructor}} do_catchsql_test 6.4 { CREATE VIRTUAL TABLE a3 USING fts5( x, y, tokenize = 'unicode61 remove_diacritics 10' ); } {1 {error in tokenizer constructor}} #------------------------------------------------------------------------- # Porter tokenizer with very large tokens. # set a [string repeat a 100] set b [string repeat b 500] set c [string repeat c 1000] do_execsql_test 7.0 { CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter); INSERT INTO e5 VALUES($a || ' ' || $b); INSERT INTO e5 VALUES($b || ' ' || $c); INSERT INTO e5 VALUES($c || ' ' || $a); } do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 } do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 } do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 } #------------------------------------------------------------------------- # Test the 'separators' option with the unicode61 tokenizer. # do_execsql_test 8.1 { BEGIN; CREATE VIRTUAL TABLE e6 USING fts5(x, tokenize="unicode61 separators ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); INSERT INTO e6 VALUES('theAquickBbrownCfoxDjumpedWoverXtheYlazyZdog'); CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); SELECT term FROM e7; ROLLBACK; } { brown dog fox jumped lazy over quick the } do_execsql_test 8.2 [subst { BEGIN; CREATE VIRTUAL TABLE e6 USING fts5(x, tokenize="unicode61 separators '\u0E01\u0E02\u0E03\u0E04\u0E05\u0E06\u0E07'" ); INSERT INTO e6 VALUES('the\u0E01quick\u0E01brown\u0E01fox\u0E01' || 'jumped\u0E01over\u0E01the\u0E01lazy\u0E01dog' ); INSERT INTO e6 VALUES('\u0E08\u0E07\u0E09'); CREATE VIRTUAL TABLE e7 USING fts5vocab(e6, 'row'); SELECT term FROM e7; ROLLBACK; }] [subst { brown dog fox jumped lazy over quick the \u0E08 \u0E09 }] finish_test |
Added ext/fts5/test/fts5unicode.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc tokenize_test {tn tokenizer input output} { uplevel [list do_test $tn [subst -nocommands { set ret {} foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] { lappend ret [set z] } set ret }] [list {*}$output]] } foreach {tn t} {1 ascii 2 unicode61} { tokenize_test 1.$tn.0 $t {A B C D} {a b c d} tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely} tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely} tokenize_test 1.$tn.3 $t {} {} } #------------------------------------------------------------------------- # Check that "unicode61" really is the default tokenizer. # do_execsql_test 2.0 " CREATE VIRTUAL TABLE t1 USING fts5(x); CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61); CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii); INSERT INTO t1 VALUES('\xC0\xC8\xCC'); INSERT INTO t2 VALUES('\xC0\xC8\xCC'); INSERT INTO t3 VALUES('\xC0\xC8\xCC'); " breakpoint do_execsql_test 2.1 " SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC'; SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC'; SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC'; " {t1 t2} finish_test |
Added ext/fts5/test/fts5unicode2.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 | # 2012 May 25 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #************************************************************************* # # The tests in this file focus on testing the "unicode" FTS tokenizer. # # This is a modified copy of FTS4 test file "fts4_unicode.test". # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unicode2 # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc do_unicode_token_test {tn input res} { uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input ] [list {*}$res]] } proc do_unicode_token_test2 {tn input res} { uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db "unicode61" $input ] [list {*}$res]] } proc do_unicode_token_test3 {tn args} { set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]] set input [lindex $args end-1] set res [lindex $args end] uplevel [list do_test $tn [list \ sqlite3_fts5_tokenize -subst db $tokenizer $input ] [list {*}$res]] } do_unicode_token_test 1.0 {a B c D} {a a b B c c d D} do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ "\uE4 \uC4 \uF6 \uD6 \uFC \uDC" do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx" # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "\uDF \uDF" do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E" do_unicode_token_test 1.5 "The quick brown fox" { the The quick quick brown brown fox fox } do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { the The quick quick brown brown fox fox } do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D} do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC" do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ "xax x\uC4x xox x\uD6x xux x\uDCx" # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" # Title-case mappings work do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \ "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3" do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \ "abc abc def def" #------------------------------------------------------------------------- # set docs [list { Enhance the INSERT syntax to allow multiple rows to be inserted via the VALUES clause. } { Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. } { Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). } { Added the sqlite3_db_readonly() interface. } { Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the ability to add new PRAGMA statements or to override built-in PRAGMAs. } { Queries of the form: "SELECT max(x), y FROM table" returns the value of y on the same row that contains the maximum x value. } { Added support for the FTS4 languageid option. } { Documented support for the FTS4 content option. This feature has actually been in the code since version 3.7.9 but is only now considered to be officially supported. } { Pending statements no longer block ROLLBACK. Instead, the pending statement will return SQLITE_ABORT upon next access after the ROLLBACK. } { Improvements to the handling of CSV inputs in the command-line shell } { Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be incorrectly converted into an INNER JOIN if the WHERE clause indexable terms connected by OR. }] set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS foreach k [array names map] { lappend mappings [string toupper $k] [lindex $map($k) 0] lappend mappings $k [lindex $map($k) 1] } proc mapdoc {doc} { set doc [regsub -all {[[:space:]]+} $doc " "] string map $::mappings [string trim $doc] } do_test 2.0 { execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); } foreach doc $docs { set d [mapdoc $doc] execsql { INSERT INTO t2 VALUES($d) } } } {} do_test 2.1 { set q [mapdoc "row"] execsql { SELECT * FROM t2 WHERE t2 MATCH $q } } [list [mapdoc { Queries of the form: "SELECT max(x), y FROM table" returns the value of y on the same row that contains the maximum x value. }]] foreach {tn query snippet} { 2 "row" { ...returns the value of y on the same [row] that contains the maximum x value. } 3 "ROW" { ...returns the value of y on the same [row] that contains the maximum x value. } 4 "rollback" { ...[ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon next access after the [ROLLBACK]. } 5 "rOllback" { ...[ROLLBACK]. Instead, the pending statement will return SQLITE_ABORT upon next access after the [ROLLBACK]. } 6 "lang*" { Added support for the FTS4 [languageid] option. } } { do_test 2.$tn { set q [mapdoc $query] execsql { SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q } } [list [mapdoc $snippet]] } #------------------------------------------------------------------------- # Make sure the unicode61 tokenizer does not crash if it is passed a # NULL pointer. reset_db do_execsql_test 3.1 { CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y); INSERT INTO t1 VALUES(NULL, 'a b c'); } do_execsql_test 3.2 { SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b' } {{a [b] c}} do_execsql_test 3.3 { BEGIN; DELETE FROM t1; INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b'); INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 VALUES('a b c', NULL); INSERT INTO t1 VALUES('a x c', NULL); COMMIT; } do_execsql_test 3.4 { SELECT * FROM t1 WHERE t1 MATCH 'a b'; } {{a b c} {}} #------------------------------------------------------------------------- # reset_db do_test 4.1 { set a "abc\uFFFEdef" set b "abc\uD800def" set c "\uFFFEdef" set d "\uD800def" execsql { CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } execsql "CREATE VIRTUAL TABLE t8 USING fts5( a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\" )" } {} do_test 4.2 { set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.3 { set a [binary format c* {0xF7 0xBF 0xBF 0xBF}] set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}] set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}] set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}] execsql { INSERT INTO t1 VALUES($a); INSERT INTO t1 VALUES($b); INSERT INTO t1 VALUES($c); INSERT INTO t1 VALUES($d); } } {} do_test 4.4 { sqlite3_exec_hex db { CREATE VIRTUAL TABLE t9 USING fts5(a, b, tokenize="unicode61 separators '%C09004'" ); INSERT INTO t9(a) VALUES('abc%88def %89ghi%90'); } } {0 {}} #------------------------------------------------------------------------- breakpoint do_unicode_token_test3 5.1 {tokenchars {}} { sqlite3_reset sqlite3_column_int } { sqlite3 sqlite3 reset reset sqlite3 sqlite3 column column int int } do_unicode_token_test3 5.2 {tokenchars _} { sqlite3_reset sqlite3_column_int } { sqlite3_reset sqlite3_reset sqlite3_column_int sqlite3_column_int } do_unicode_token_test3 5.3 {separators xyz} { Laotianxhorseyrunszfast } { laotian Laotian horse horse runs runs fast fast } do_unicode_token_test3 5.4 {tokenchars xyz} { Laotianxhorseyrunszfast } { laotianxhorseyrunszfast Laotianxhorseyrunszfast } do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { sqlite3_resetxsqlite3_column_intyhonda_phantom } { sqlite3_reset sqlite3_reset sqlite3_column_int sqlite3_column_int honda_phantom honda_phantom } do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { abc abc def def } do_unicode_token_test3 5.7 \ "tokenchars \u2444\u2445" \ "separators \u05D0\u05D1\u05D2" \ "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ [list \ \u2444fre\u2445sh \u2444fre\u2445sh \ water water \ fish fish \ \u2445timer \u2445timer \ ] # Check that it is not possible to add a standalone diacritic codepoint # to either separators or tokenchars. do_unicode_token_test3 5.8 "separators \u0301" \ "hello\u0301world \u0301helloworld" \ "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.9 "tokenchars \u0301" \ "hello\u0301world \u0301helloworld" \ "helloworld hello\u0301world helloworld helloworld" do_unicode_token_test3 5.10 "separators \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ "hello\u0301world hello\u0301world helloworld helloworld" do_unicode_token_test3 5.11 "tokenchars \u0301" \ "remove_diacritics 0" \ "hello\u0301world \u0301helloworld" \ "hello\u0301world hello\u0301world helloworld helloworld" #------------------------------------------------------------------------- proc do_tokenize {tokenizer txt} { set res [list] foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { lappend res $b } set res } # Argument $lCodepoint must be a list of codepoints (integers) that # correspond to whitespace characters. This command creates a string # $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" # using tokenizer $tokenizer. The test passes if the tokenizer successfully # extracts the two 5 character tokens. # proc do_isspace_test {tn tokenizer lCp} { set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] set txt "${whitespace}hello${whitespace}world${whitespace}" uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] } set tokenizers [list unicode61] ifcapable icu { lappend tokenizers icu } # Some tests to check that the tokenizers can both identify white-space # codepoints. All codepoints tested below are of type "Zs" in the # UnicodeData.txt file. foreach T $tokenizers { do_isspace_test 6.$T.1 $T 32 do_isspace_test 6.$T.2 $T 160 do_isspace_test 6.$T.3 $T 5760 do_isspace_test 6.$T.4 $T 6158 do_isspace_test 6.$T.5 $T 8192 do_isspace_test 6.$T.6 $T 8193 do_isspace_test 6.$T.7 $T 8194 do_isspace_test 6.$T.8 $T 8195 do_isspace_test 6.$T.9 $T 8196 do_isspace_test 6.$T.10 $T 8197 do_isspace_test 6.$T.11 $T 8198 do_isspace_test 6.$T.12 $T 8199 do_isspace_test 6.$T.13 $T 8200 do_isspace_test 6.$T.14 $T 8201 do_isspace_test 6.$T.15 $T 8202 do_isspace_test 6.$T.16 $T 8239 do_isspace_test 6.$T.17 $T 8287 do_isspace_test 6.$T.18 $T 12288 do_isspace_test 6.$T.19 $T {32 160 5760 6158} do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} do_isspace_test 6.$T.23 $T {8287 12288} } #------------------------------------------------------------------------- # Test that the private use ranges are treated as alphanumeric. # foreach {tn1 c} { 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff } { foreach {tn2 config res} { 1 "" "hello*world hello*world" 2 "separators *" "hello hello world world" } { set config [string map [list * $c] $config] set input [string map [list * $c] "hello*world"] set output [string map [list * $c] $res] do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output } } #------------------------------------------------------------------------- # Cursory test of remove_diacritics=0. # # 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS # 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS # 00E4;LATIN SMALL LETTER A WITH DIAERESIS # 00F6;LATIN SMALL LETTER O WITH DIAERESIS # do_execsql_test 8.1.1 " CREATE VIRTUAL TABLE t3 USING fts5( content, tokenize='unicode61 remove_diacritics 1' ); INSERT INTO t3 VALUES('o'); INSERT INTO t3 VALUES('a'); INSERT INTO t3 VALUES('O'); INSERT INTO t3 VALUES('A'); INSERT INTO t3 VALUES('\xD6'); INSERT INTO t3 VALUES('\xC4'); INSERT INTO t3 VALUES('\xF6'); INSERT INTO t3 VALUES('\xE4'); " do_execsql_test 8.1.2 { SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC; } {1 3 5 7} do_execsql_test 8.1.3 { SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC; } {2 4 6 8} do_execsql_test 8.2.1 { CREATE VIRTUAL TABLE t4 USING fts5( content, tokenize='unicode61 remove_diacritics 0' ); INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC; } do_execsql_test 8.2.2 { SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC; } {1 3} do_execsql_test 8.2.3 { SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC; } {2 4} #------------------------------------------------------------------------- # if 0 { foreach {tn sql} { 1 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]); CREATE VIRTUAL TABLE t6 USING fts4( tokenize=unicode61 [tokenchars=="] "tokenchars=[]"); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]); } 2 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= ."); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]"); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4"); } 3 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .'); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]'); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4'); } 4 { CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`); CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`); CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`); } } { do_execsql_test 9.$tn.0 { DROP TABLE IF EXISTS t5; DROP TABLE IF EXISTS t5aux; DROP TABLE IF EXISTS t6; DROP TABLE IF EXISTS t6aux; DROP TABLE IF EXISTS t7; DROP TABLE IF EXISTS t7aux; } do_execsql_test 9.$tn.1 $sql do_execsql_test 9.$tn.2 { CREATE VIRTUAL TABLE t5aux USING fts4aux(t5); INSERT INTO t5 VALUES('one two three/four.five.six'); SELECT * FROM t5aux; } { four.five.six * 1 1 four.five.six 0 1 1 {one two three} * 1 1 {one two three} 0 1 1 } do_execsql_test 9.$tn.3 { CREATE VIRTUAL TABLE t6aux USING fts4aux(t6); INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta'); SELECT * FROM t6aux; } { {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1 } do_execsql_test 9.$tn.4 { CREATE VIRTUAL TABLE t7aux USING fts4aux(t7); INSERT INTO t7 VALUES('alephxbeth\xC4gimel'); SELECT * FROM t7aux; } { aleph * 1 1 aleph 0 1 1 beth * 1 1 beth 0 1 1 gimel * 1 1 gimel 0 1 1 } } # Check that multiple options are handled correctly. # do_execsql_test 10.1 { DROP TABLE IF EXISTS t1; CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy" "separators=a" "separators=a" "tokenchars=a" "tokenchars=a" ); INSERT INTO t1 VALUES('oneatwoxthreeyfour'); INSERT INTO t1 VALUES('a.single=word'); CREATE VIRTUAL TABLE t1aux USING fts4aux(t1); SELECT * FROM t1aux; } { .single=word * 1 1 .single=word 0 1 1 four * 1 1 four 0 1 1 one * 1 1 one 0 1 1 three * 1 1 three 0 1 1 two * 1 1 two 0 1 1 } # Test that case folding happens after tokenization, not before. # do_execsql_test 10.2 { DROP TABLE IF EXISTS t2; CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB"); INSERT INTO t2 VALUES('oneatwoBthree'); INSERT INTO t2 VALUES('onebtwoAthree'); CREATE VIRTUAL TABLE t2aux USING fts4aux(t2); SELECT * FROM t2aux; } { one * 1 1 one 0 1 1 onebtwoathree * 1 1 onebtwoathree 0 1 1 three * 1 1 three 0 1 1 two * 1 1 two 0 1 1 } # Test that the tokenchars and separators options work with the # fts3tokenize table. # do_execsql_test 11.1 { CREATE VIRTUAL TABLE ft1 USING fts3tokenize( "unicode61", "tokenchars=@.", "separators=1234567890" ); SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road'; } { berlin@street sydney.road } } finish_test |
Added ext/fts5/test/fts5unicode3.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | # 2014 Dec 20 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # Tests focusing on the fts5 tokenizers # source [file join [file dirname [info script]] fts5_common.tcl] # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } proc fts3_unicode_path {file} { file join [file dirname [info script]] .. .. fts3 unicode $file } source [fts3_unicode_path parseunicode.tcl] set testprefix fts5unicode3 set CF [fts3_unicode_path CaseFolding.txt] set UD [fts3_unicode_path UnicodeData.txt] tl_load_casefolding_txt $CF foreach x [an_load_unicodedata_text $UD] { set aNotAlnum($x) 1 } foreach {y} [rd_load_unicodedata_text $UD] { foreach {code ascii} $y {} if {$ascii==""} { set int 0 } else { binary scan $ascii c int } set aDiacritic($code) $int } proc tcl_fold {i {bRemoveDiacritic 0}} { global tl_lookup_table global aDiacritic if {[info exists tl_lookup_table($i)]} { set i $tl_lookup_table($i) } if {$bRemoveDiacritic && [info exists aDiacritic($i)]} { set i $aDiacritic($i) } expr $i } db func tcl_fold tcl_fold proc tcl_isalnum {i} { global aNotAlnum expr {![info exists aNotAlnum($i)]} } db func tcl_isalnum tcl_isalnum do_catchsql_test 1.0.1 { SELECT fts5_isalnum(1, 2, 3); } {1 {wrong number of arguments to function fts5_isalnum}} do_catchsql_test 1.0.2 { SELECT fts5_fold(); } {1 {wrong number of arguments to function fts5_fold}} do_catchsql_test 1.0.3 { SELECT fts5_fold(1,2,3); } {1 {wrong number of arguments to function fts5_fold}} do_execsql_test 1.1 { WITH ii(i) AS ( SELECT -1 UNION ALL SELECT i+1 FROM ii WHERE i<100000 ) SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); } {0 {}} do_execsql_test 1.2 { WITH ii(i) AS ( SELECT -1 UNION ALL SELECT i+1 FROM ii WHERE i<100000 ) SELECT count(*), min(i) FROM ii WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); } {0 {}} do_execsql_test 1.3 { WITH ii(i) AS ( SELECT -1 UNION ALL SELECT i+1 FROM ii WHERE i<100000 ) SELECT count(*), min(i) FROM ii WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); } {0 {}} do_test 1.4 { set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} append str {"unicode61 separators '} for {set i 700} {$i<900} {incr i} { append str [format %c $i] } append str {'");} execsql $str } {} do_test 1.5 { set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} append str {"unicode61 tokenchars '} for {set i 700} {$i<900} {incr i} { append str [format %c $i] } append str {'");} execsql $str } {} finish_test |
Added ext/fts5/test/fts5unindexed.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | # 2015 Apr 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # The tests in this file focus on "unindexed" columns. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5unindexed # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(a, b UNINDEXED); INSERT INTO t1 VALUES('a b c', 'd e f'); INSERT INTO t1 VALUES('g h i', 'j k l'); } {} do_execsql_test 1.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} do_execsql_test 1.4 { INSERT INTO t1(t1) VALUES('integrity-check') } {} do_execsql_test 1.5 { INSERT INTO t1(t1) VALUES('rebuild') } {} do_execsql_test 1.6 { INSERT INTO t1(t1) VALUES('integrity-check') } {} do_execsql_test 1.7 { SELECT rowid FROM t1 WHERE t1 MATCH 'b' } {1} do_execsql_test 1.8 { SELECT rowid FROM t1 WHERE t1 MATCH 'e' } {} do_execsql_test 1.9 { DELETE FROM t1 WHERE t1 MATCH 'b' } {} do_execsql_test 1.10 { INSERT INTO t1(t1) VALUES('integrity-check') } {} do_execsql_test 1.11 { INSERT INTO t1(t1) VALUES('rebuild') } {} do_execsql_test 1.12 { INSERT INTO t1(t1) VALUES('integrity-check') } {} do_execsql_test 1.13 { SELECT rowid FROM t1 WHERE t1 MATCH 'i' } {2} do_execsql_test 1.14 { SELECT rowid FROM t1 WHERE t1 MATCH 'l' } {} do_execsql_test 2.1 { CREATE VIRTUAL TABLE t2 USING fts5(a UNINDEXED, b UNINDEXED); INSERT INTO t1 VALUES('a b c', 'd e f'); INSERT INTO t1 VALUES('g h i', 'j k l'); SELECT rowid FROM t2_data; } {1 10} do_execsql_test 2.2 { INSERT INTO t2(t2) VALUES('rebuild'); INSERT INTO t2(t2) VALUES('integrity-check'); SELECT rowid FROM t2_data; } {1 10} do_execsql_test 3.1 { CREATE TABLE x4(i INTEGER PRIMARY KEY, a, b, c); CREATE VIRTUAL TABLE t4 USING fts5(a, b UNINDEXED, c, content=x4); INSERT INTO x4 VALUES(10, 'a b c', 'd e f', 'g h i'); INSERT INTO x4 VALUES(20, 'j k l', 'm n o', 'p q r'); INSERT INTO t4(t4) VALUES('rebuild'); INSERT INTO t4(t4) VALUES('integrity-check'); } {} do_execsql_test 3.2 { INSERT INTO t4(t4, rowid, a, b, c) VALUES('delete', 20, 'j k l', '', 'p q r'); DELETE FROM x4 WHERE rowid=20; INSERT INTO t4(t4) VALUES('integrity-check'); } {} finish_test |
Added ext/fts5/test/fts5version.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | # 2015 Apr 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # The tests in this file focus on testing that unrecognized file-format # versions are detected and reported. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5version # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one); INSERT INTO t1 VALUES('a b c d'); } {} do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' } {version 2} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { UPDATE t1_config set v=3 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } } {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} breakpoint do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } } {1 {invalid fts5 file format (found 3, expected 2) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } } {1 {invalid fts5 file format (found 0, expected 2) - run 'rebuild'}} finish_test |
Added ext/fts5/test/fts5vocab.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | # 2015 Apr 24 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # The tests in this file focus on testing the fts5vocab module. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5vocab # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { finish_test return } do_execsql_test 1.1.1 { CREATE VIRTUAL TABLE t1 USING fts5(one, prefix=1); CREATE VIRTUAL TABLE v1 USING fts5vocab(t1, 'row'); PRAGMA table_info = v1; } { 0 term {} 0 {} 0 1 doc {} 0 {} 0 2 cnt {} 0 {} 0 } do_execsql_test 1.1.2 { CREATE VIRTUAL TABLE v2 USING fts5vocab(t1, 'col'); PRAGMA table_info = v2; } { 0 term {} 0 {} 0 1 col {} 0 {} 0 2 doc {} 0 {} 0 3 cnt {} 0 {} 0 } do_execsql_test 1.2.1 { SELECT * FROM v1 } { } do_execsql_test 1.2.2 { SELECT * FROM v2 } { } do_execsql_test 1.3 { INSERT INTO t1 VALUES('x y z'); INSERT INTO t1 VALUES('x x x'); } do_execsql_test 1.4.1 { SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} do_execsql_test 1.4.2 { SELECT * FROM v2; } {x 0 2 4 y 0 1 1 z 0 1 1} do_execsql_test 1.5.1 { BEGIN; INSERT INTO t1 VALUES('a b c'); SELECT * FROM v1 WHERE term<'d'; } {a 1 1 b 1 1 c 1 1} do_execsql_test 1.5.2 { SELECT * FROM v2 WHERE term<'d'; COMMIT; } {a 0 1 1 b 0 1 1 c 0 1 1} do_execsql_test 1.6 { DELETE FROM t1 WHERE one = 'a b c'; SELECT * FROM v1; } {x 2 4 y 1 1 z 1 1} #------------------------------------------------------------------------- # do_execsql_test 2.0 { CREATE VIRTUAL TABLE tt USING fts5(a, b); INSERT INTO tt VALUES('d g b f d f', 'f c e c d a'); INSERT INTO tt VALUES('f a e a a b', 'e d c f d d'); INSERT INTO tt VALUES('b c a a a b', 'f f c c b c'); INSERT INTO tt VALUES('f d c a c e', 'd g d e g d'); INSERT INTO tt VALUES('g d e f a g x', 'f f d a a b'); INSERT INTO tt VALUES('g c f b c g', 'a g f d c b'); INSERT INTO tt VALUES('c e c f g b', 'f e d b g a'); INSERT INTO tt VALUES('g d e f d e', 'a c d b a g'); INSERT INTO tt VALUES('e f a c c b', 'b f e a f d y'); INSERT INTO tt VALUES('c c a a c f', 'd g a e b g'); } set res_col { a 0 6 11 a 1 7 9 b 0 6 7 b 1 7 7 c 0 6 12 c 1 5 8 d 0 4 6 d 1 9 13 e 0 6 7 e 1 6 6 f 0 9 10 f 1 7 10 g 0 5 7 g 1 5 7 x 0 1 1 y 1 1 1 } set res_row { a 10 20 b 9 14 c 9 20 d 9 19 e 8 13 f 10 20 g 7 14 x 1 1 y 1 1 } foreach {tn tbl resname} { 1 "fts5vocab(tt, 'col')" res_col 2 "fts5vocab(tt, 'row')" res_row 3 "fts5vocab(tt, \"row\")" res_row 4 "fts5vocab(tt, [row])" res_row 5 "fts5vocab(tt, `row`)" res_row 6 "fts5vocab('tt', 'row')" res_row 7 "fts5vocab(\"tt\", \"row\")" res_row 8 "fts5vocab([tt], [row])" res_row 9 "fts5vocab(`tt`, `row`)" res_row } { do_execsql_test 2.$tn " DROP TABLE IF EXISTS tv; CREATE VIRTUAL TABLE tv USING $tbl; SELECT * FROM tv; " [set $resname] } #------------------------------------------------------------------------- # Test errors in the CREATE VIRTUAL TABLE statement. # foreach {tn sql} { 1 { CREATE VIRTUAL TABLE aa USING fts5vocab() } 2 { CREATE VIRTUAL TABLE aa USING fts5vocab(x) } 3 { CREATE VIRTUAL TABLE aa USING fts5vocab(x,y,z) } 4 { CREATE VIRTUAL TABLE temp.aa USING fts5vocab(x,y,z,y) } } { do_catchsql_test 3.$tn $sql {1 {wrong number of vtable arguments}} } do_catchsql_test 4.0 { CREATE VIRTUAL TABLE cc USING fts5vocab(tbl, unknown); } {1 {fts5vocab: unknown table type: 'unknown'}} do_catchsql_test 4.1 { ATTACH 'test.db' AS aux; CREATE VIRTUAL TABLE aux.cc USING fts5vocab(main, tbl, row); } {1 {wrong number of vtable arguments}} #------------------------------------------------------------------------- # Test fts5vocab tables created in the temp schema. # reset_db forcedelete test.db2 do_execsql_test 5.0 { ATTACH 'test.db2' AS aux; CREATE VIRTUAL TABLE t1 USING fts5(x); CREATE VIRTUAL TABLE temp.t1 USING fts5(x); CREATE VIRTUAL TABLE aux.t1 USING fts5(x); INSERT INTO main.t1 VALUES('a b c'); INSERT INTO main.t1 VALUES('d e f'); INSERT INTO main.t1 VALUES('a e c'); INSERT INTO temp.t1 VALUES('1 2 3'); INSERT INTO temp.t1 VALUES('4 5 6'); INSERT INTO temp.t1 VALUES('1 5 3'); INSERT INTO aux.t1 VALUES('x y z'); INSERT INTO aux.t1 VALUES('m n o'); INSERT INTO aux.t1 VALUES('x n z'); } breakpoint do_execsql_test 5.1 { CREATE VIRTUAL TABLE temp.vm USING fts5vocab(main, t1, row); CREATE VIRTUAL TABLE temp.vt1 USING fts5vocab(t1, row); CREATE VIRTUAL TABLE temp.vt2 USING fts5vocab(temp, t1, row); CREATE VIRTUAL TABLE temp.va USING fts5vocab(aux, t1, row); } do_execsql_test 5.2 { SELECT * FROM vm } { a 2 2 b 1 1 c 2 2 d 1 1 e 2 2 f 1 1 } do_execsql_test 5.3 { SELECT * FROM vt1 } { 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 } do_execsql_test 5.4 { SELECT * FROM vt2 } { 1 2 2 2 1 1 3 2 2 4 1 1 5 2 2 6 1 1 } do_execsql_test 5.5 { SELECT * FROM va } { m 1 1 n 2 2 o 1 1 x 2 2 y 1 1 z 2 2 } #------------------------------------------------------------------------- # do_execsql_test 6.0 { CREATE TABLE iii(iii); CREATE TABLE jjj(x); } do_catchsql_test 6.1 { CREATE VIRTUAL TABLE vocab1 USING fts5vocab(iii, row); SELECT * FROM vocab1; } {1 {no such fts5 table: main.iii}} do_catchsql_test 6.2 { CREATE VIRTUAL TABLE vocab2 USING fts5vocab(jjj, row); SELECT * FROM vocab2; } {1 {no such fts5 table: main.jjj}} do_catchsql_test 6.2 { CREATE VIRTUAL TABLE vocab3 USING fts5vocab(lll, row); SELECT * FROM vocab3; } {1 {no such fts5 table: main.lll}} finish_test |
Added ext/fts5/tool/loadfts5.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | proc loadfile {f} { set fd [open $f] set data [read $fd] close $fd return $data } set ::nRow 0 set ::nRowPerDot 1000 proc load_hierachy {dir} { foreach f [glob -nocomplain -dir $dir *] { if {$::O(limit) && $::nRow>=$::O(limit)} break if {[file isdir $f]} { load_hierachy $f } else { db eval { INSERT INTO t1 VALUES($f, loadfile($f)) } incr ::nRow if {($::nRow % $::nRowPerDot)==0} { puts -nonewline . if {($::nRow % (65*$::nRowPerDot))==0} { puts "" } flush stdout } } } } proc usage {} { puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH" puts stderr "" puts stderr "Switches are:" puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -delete (delete the database file before starting)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" puts stderr " -crisismerge N (set the crisismerge parameter to N)" puts stderr " -prefix PREFIX (comma separated prefix= argument)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(delete) 0 set O(automerge) -1 set O(crisismerge) -1 set O(prefix) "" if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] switch -- [lindex $argv $i] { -fts4 { set O(vtab) fts4 } -fts5 { set O(vtab) fts5 } -porter { set O(tok) ", tokenize=porter" } -delete { set O(delete) 1 } -limit { if { [incr i]>=$nOpt } usage set O(limit) [lindex $argv $i] } -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } -crisismerge { if { [incr i]>=$nOpt } usage set O(crisismerge) [lindex $argv $i] } -prefix { if { [incr i]>=$nOpt } usage set O(prefix) [lindex $argv $i] } default { usage } } } set dbfile [lindex $argv end-1] if {$O(delete)} { file delete -force $dbfile } sqlite3 db $dbfile db func loadfile loadfile db transaction { set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)" db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);" } if {$O(automerge)>=0} { if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } } else { db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } } } if {$O(crisismerge)>=0} { if {$O(vtab) == "fts5"} { db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} } else { } } load_hierachy [lindex $argv end] } |
Added ext/fts5/tool/mkfts5c.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | #!/bin/sh # restart with tclsh \ exec tclsh "$0" "$@" set srcdir [file dirname [file dirname [info script]]] set G(src) [string map [list %dir% $srcdir] { %dir%/fts5.h %dir%/fts5Int.h fts5parse.h %dir%/fts5_aux.c %dir%/fts5_buffer.c %dir%/fts5_config.c %dir%/fts5_expr.c %dir%/fts5_hash.c %dir%/fts5_index.c %dir%/fts5_main.c %dir%/fts5_storage.c %dir%/fts5_tokenize.c %dir%/fts5_unicode2.c %dir%/fts5_varint.c %dir%/fts5_vocab.c fts5parse.c }] set G(hdr) { #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) # define NDEBUG 1 #endif #if defined(NDEBUG) && defined(SQLITE_DEBUG) # undef NDEBUG #endif } proc readfile {zFile} { set fd [open $zFile] set data [read $fd] close $fd return $data } proc fts5c_init {zOut} { global G set G(fd) stdout set G(fd) [open $zOut w] puts -nonewline $G(fd) $G(hdr) } proc fts5c_printfile {zIn} { global G set data [readfile $zIn] puts $G(fd) "#line 1 \"[file tail $zIn]\"" foreach line [split $data "\n"] { if {[regexp {^#include.*fts5} $line]} continue if {[regexp {^(const )?[a-zA-Z][a-zA-Z0-9]* [*]?sqlite3Fts5} $line]} { set line "static $line" } puts $G(fd) $line } } proc fts5c_close {} { global G if {$G(fd)!="stdout"} { close $G(fd) } } fts5c_init fts5.c foreach f $G(src) { fts5c_printfile $f } fts5c_close |
Added ext/fts5/tool/showfts5.tcl.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | proc usage {} { puts stderr "usage: $::argv0 database table" puts stderr "" exit 1 } set o(vtab) fts5 set o(tok) "" set o(limit) 0 set o(automerge) -1 set o(crisismerge) -1 if {[llength $argv]!=2} usage set database [lindex $argv 0] set tbl [lindex $argv 1] sqlite3 db $database db eval "SELECT fts5_decode(rowid, block) AS d FROM ${tbl}_data WHERE id=10" { foreach lvl [lrange $d 1 end] { puts $lvl } } |
Changes to main.mk.
︙ | ︙ | |||
68 69 70 71 72 73 74 75 76 77 78 79 80 81 | random.o resolve.o rowset.o rtree.o select.o sqlite3ota.o status.o \ table.o threads.o tokenize.o treeview.o trigger.o \ update.o userauth.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \ utf.o vtab.o # All of the source code files. # SRC = \ $(TOP)/src/alter.c \ $(TOP)/src/analyze.c \ | > > | 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | random.o resolve.o rowset.o rtree.o select.o sqlite3ota.o status.o \ table.o threads.o tokenize.o treeview.o trigger.o \ update.o userauth.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ vdbetrace.o wal.o walker.o where.o wherecode.o whereexpr.o \ utf.o vtab.o LIBOBJ += fts5.o # All of the source code files. # SRC = \ $(TOP)/src/alter.c \ $(TOP)/src/analyze.c \ |
︙ | ︙ | |||
220 221 222 223 224 225 226 | $(TOP)/ext/icu/icu.c SRC += \ $(TOP)/ext/rtree/sqlite3rtree.h \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c SRC += \ $(TOP)/ext/userauth/userauth.c \ | | > > | 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | $(TOP)/ext/icu/icu.c SRC += \ $(TOP)/ext/rtree/sqlite3rtree.h \ $(TOP)/ext/rtree/rtree.h \ $(TOP)/ext/rtree/rtree.c SRC += \ $(TOP)/ext/userauth/userauth.c \ $(TOP)/ext/userauth/sqlite3userauth.h SRC += \ $(TOP)/ext/ota/sqlite3ota.c \ $(TOP)/ext/ota/sqlite3ota.h # Generated source code files # SRC += \ keywordhash.h \ opcodes.c \ opcodes.h \ |
︙ | ︙ | |||
298 299 300 301 302 303 304 | $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c \ | | > | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | $(TOP)/ext/misc/ieee754.c \ $(TOP)/ext/misc/nextchar.c \ $(TOP)/ext/misc/percentile.c \ $(TOP)/ext/misc/regexp.c \ $(TOP)/ext/misc/spellfix.c \ $(TOP)/ext/misc/totype.c \ $(TOP)/ext/misc/wholenumber.c \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c #TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c TESTSRC2 = \ $(TOP)/src/attach.c \ |
︙ | ︙ | |||
394 395 396 397 398 399 400 401 402 403 404 405 406 407 | $(TOP)/ext/fts3/fts3Int.h \ $(TOP)/ext/fts3/fts3_hash.h \ $(TOP)/ext/fts3/fts3_tokenizer.h EXTHDR += \ $(TOP)/ext/rtree/rtree.h EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h EXTHDR += \ $(TOP)/ext/userauth/sqlite3userauth.h # executables needed for testing # TESTPROGS = \ testfixture$(EXE) \ | > > > > | 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 | $(TOP)/ext/fts3/fts3Int.h \ $(TOP)/ext/fts3/fts3_hash.h \ $(TOP)/ext/fts3/fts3_tokenizer.h EXTHDR += \ $(TOP)/ext/rtree/rtree.h EXTHDR += \ $(TOP)/ext/icu/sqliteicu.h EXTHDR += \ $(TOP)/ext/fts5/fts5Int.h \ fts5parse.h \ $(TOP)/ext/fts5/fts5.h EXTHDR += \ $(TOP)/ext/userauth/sqlite3userauth.h # executables needed for testing # TESTPROGS = \ testfixture$(EXE) \ |
︙ | ︙ | |||
620 621 622 623 624 625 626 627 628 629 630 631 632 633 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c sqlite3ota.o: $(TOP)/ext/ota/sqlite3ota.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/ota/sqlite3ota.c | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 | $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c fts3_write.o: $(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c rtree.o: $(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c # FTS5 things # FTS5_SRC = \ $(TOP)/ext/fts5/fts5.h \ $(TOP)/ext/fts5/fts5Int.h \ $(TOP)/ext/fts5/fts5_aux.c \ $(TOP)/ext/fts5/fts5_buffer.c \ $(TOP)/ext/fts5/fts5_main.c \ $(TOP)/ext/fts5/fts5_config.c \ $(TOP)/ext/fts5/fts5_expr.c \ $(TOP)/ext/fts5/fts5_hash.c \ $(TOP)/ext/fts5/fts5_index.c \ fts5parse.c fts5parse.h \ $(TOP)/ext/fts5/fts5_storage.c \ $(TOP)/ext/fts5/fts5_tokenize.c \ $(TOP)/ext/fts5/fts5_unicode2.c \ $(TOP)/ext/fts5/fts5_varint.c \ $(TOP)/ext/fts5/fts5_vocab.c \ fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon cp $(TOP)/ext/fts5/fts5parse.y . rm -f fts5parse.h ./lemon $(OPTS) fts5parse.y mv fts5parse.c fts5parse.c.orig echo "#ifdef SQLITE_ENABLE_FTS5" > fts5parse.c cat fts5parse.c.orig | sed 's/yy/fts5yy/g' | sed 's/YY/fts5YY/g' \ | sed 's/TOKEN/FTS5TOKEN/g' >> fts5parse.c echo "#endif /* SQLITE_ENABLE_FTS5 */" >> fts5parse.c fts5parse.h: fts5parse.c fts5.c: $(FTS5_SRC) tclsh $(TOP)/ext/fts5/tool/mkfts5c.tcl fts5.o: fts5.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c fts5.c userauth.o: $(TOP)/ext/userauth/userauth.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/userauth/userauth.c sqlite3ota.o: $(TOP)/ext/ota/sqlite3ota.c $(HDR) $(EXTHDR) $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/ota/sqlite3ota.c |
︙ | ︙ | |||
656 657 658 659 660 661 662 | TESTFIXTURE_FLAGS += -DSQLITE_SERVER=1 -DSQLITE_PRIVATE="" -DSQLITE_CORE testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c $(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS) \ $(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c \ -o testfixture$(EXE) $(LIBTCL) libsqlite3.a $(THREADLIB) | | | | 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 | TESTFIXTURE_FLAGS += -DSQLITE_SERVER=1 -DSQLITE_PRIVATE="" -DSQLITE_CORE testfixture$(EXE): $(TESTSRC2) libsqlite3.a $(TESTSRC) $(TOP)/src/tclsqlite.c $(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS) \ $(TESTSRC) $(TESTSRC2) $(TOP)/src/tclsqlite.c \ -o testfixture$(EXE) $(LIBTCL) libsqlite3.a $(THREADLIB) amalgamation-testfixture$(EXE): sqlite3.c fts5.c $(TESTSRC) $(TOP)/src/tclsqlite.c $(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS) \ $(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts5.c \ -o testfixture$(EXE) $(LIBTCL) $(THREADLIB) fts3-testfixture$(EXE): sqlite3.c fts3amal.c $(TESTSRC) $(TOP)/src/tclsqlite.c $(TCCX) $(TCL_FLAGS) -DTCLSH=1 $(TESTFIXTURE_FLAGS) \ -DSQLITE_ENABLE_FTS3=1 \ $(TESTSRC) $(TOP)/src/tclsqlite.c sqlite3.c fts3amal.c \ -o testfixture$(EXE) $(LIBTCL) $(THREADLIB) |
︙ | ︙ | |||
773 774 775 776 777 778 779 780 781 782 783 784 785 786 | speedtest1$(EXE): $(TOP)/test/speedtest1.c sqlite3.o $(TCC) -I. $(OTAFLAGS) -o speedtest1$(EXE) $(TOP)/test/speedtest1.c sqlite3.o $(THREADLIB) ota$(EXE): $(TOP)/ext/ota/ota.c $(TOP)/ext/ota/sqlite3ota.c sqlite3.o $(TCC) -I. -o ota$(EXE) $(TOP)/ext/ota/ota.c sqlite3.o \ $(THREADLIB) # This target will fail if the SQLite amalgamation contains any exported # symbols that do not begin with "sqlite3_". It is run as part of the # releasetest.tcl script. # checksymbols: sqlite3.o nm -g --defined-only sqlite3.o | grep -v " sqlite3_" ; test $$? -ne 0 | > > > | 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 | speedtest1$(EXE): $(TOP)/test/speedtest1.c sqlite3.o $(TCC) -I. $(OTAFLAGS) -o speedtest1$(EXE) $(TOP)/test/speedtest1.c sqlite3.o $(THREADLIB) ota$(EXE): $(TOP)/ext/ota/ota.c $(TOP)/ext/ota/sqlite3ota.c sqlite3.o $(TCC) -I. -o ota$(EXE) $(TOP)/ext/ota/ota.c sqlite3.o \ $(THREADLIB) loadfts: $(TOP)/tool/loadfts.c libsqlite3.a $(TCC) $(TOP)/tool/loadfts.c libsqlite3.a -o loadfts $(THREADLIB) # This target will fail if the SQLite amalgamation contains any exported # symbols that do not begin with "sqlite3_". It is run as part of the # releasetest.tcl script. # checksymbols: sqlite3.o nm -g --defined-only sqlite3.o | grep -v " sqlite3_" ; test $$? -ne 0 |
︙ | ︙ |
Changes to src/tclsqlite.c.
︙ | ︙ | |||
3756 3757 3758 3759 3760 3761 3762 3763 | extern int Sqlitetestintarray_Init(Tcl_Interp*); extern int Sqlitetestvfs_Init(Tcl_Interp *); extern int Sqlitetestrtree_Init(Tcl_Interp*); extern int Sqlitequota_Init(Tcl_Interp*); extern int Sqlitemultiplex_Init(Tcl_Interp*); extern int SqliteSuperlock_Init(Tcl_Interp*); extern int SqlitetestSyscall_Init(Tcl_Interp*); extern int SqliteOta_Init(Tcl_Interp*); | > < | 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 | extern int Sqlitetestintarray_Init(Tcl_Interp*); extern int Sqlitetestvfs_Init(Tcl_Interp *); extern int Sqlitetestrtree_Init(Tcl_Interp*); extern int Sqlitequota_Init(Tcl_Interp*); extern int Sqlitemultiplex_Init(Tcl_Interp*); extern int SqliteSuperlock_Init(Tcl_Interp*); extern int SqlitetestSyscall_Init(Tcl_Interp*); extern int Fts5tcl_Init(Tcl_Interp *); extern int SqliteOta_Init(Tcl_Interp*); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) extern int Sqlitetestfts3_Init(Tcl_Interp *interp); #endif #ifdef SQLITE_ENABLE_ZIPVFS extern int Zipvfs_Init(Tcl_Interp*); Zipvfs_Init(interp); |
︙ | ︙ | |||
3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 | Sqlitetestintarray_Init(interp); Sqlitetestvfs_Init(interp); Sqlitetestrtree_Init(interp); Sqlitequota_Init(interp); Sqlitemultiplex_Init(interp); SqliteSuperlock_Init(interp); SqlitetestSyscall_Init(interp); SqliteOta_Init(interp); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) Sqlitetestfts3_Init(interp); #endif Tcl_CreateObjCommand( | > | 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 | Sqlitetestintarray_Init(interp); Sqlitetestvfs_Init(interp); Sqlitetestrtree_Init(interp); Sqlitequota_Init(interp); Sqlitemultiplex_Init(interp); SqliteSuperlock_Init(interp); SqlitetestSyscall_Init(interp); Fts5tcl_Init(interp); SqliteOta_Init(interp); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) Sqlitetestfts3_Init(interp); #endif Tcl_CreateObjCommand( |
︙ | ︙ |
Changes to src/test1.c.
︙ | ︙ | |||
6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 | extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_totype_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*); static const struct { const char *zExtName; int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*); } aExtension[] = { { "amatch", sqlite3_amatch_init }, { "closure", sqlite3_closure_init }, { "eval", sqlite3_eval_init }, { "fileio", sqlite3_fileio_init }, { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, { "nextchar", sqlite3_nextchar_init }, { "percentile", sqlite3_percentile_init }, { "regexp", sqlite3_regexp_init }, { "spellfix", sqlite3_spellfix_init }, | > > > > | 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 | extern int sqlite3_ieee_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_nextchar_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_percentile_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_regexp_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_spellfix_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_totype_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_wholenumber_init(sqlite3*,char**,const sqlite3_api_routines*); extern int sqlite3_fts5_init(sqlite3*,char**,const sqlite3_api_routines*); static const struct { const char *zExtName; int (*pInit)(sqlite3*,char**,const sqlite3_api_routines*); } aExtension[] = { { "amatch", sqlite3_amatch_init }, { "closure", sqlite3_closure_init }, { "eval", sqlite3_eval_init }, #ifdef SQLITE_ENABLE_FTS5 { "fts5", sqlite3_fts5_init }, #endif { "fileio", sqlite3_fileio_init }, { "fuzzer", sqlite3_fuzzer_init }, { "ieee754", sqlite3_ieee_init }, { "nextchar", sqlite3_nextchar_init }, { "percentile", sqlite3_percentile_init }, { "regexp", sqlite3_regexp_init }, { "spellfix", sqlite3_spellfix_init }, |
︙ | ︙ |
Changes to src/test_config.c.
︙ | ︙ | |||
335 336 337 338 339 340 341 342 343 344 345 346 347 348 | #endif #ifdef SQLITE_ENABLE_FTS3 Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_DISABLE_FTS3_UNICODE) Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY); #endif | > > > > > > | 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 | #endif #ifdef SQLITE_ENABLE_FTS3 Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY); #endif #ifdef SQLITE_ENABLE_FTS5 Tcl_SetVar2(interp, "sqlite_options", "fts5", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts5", "0", TCL_GLOBAL_ONLY); #endif #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_DISABLE_FTS3_UNICODE) Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY); #else Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY); #endif |
︙ | ︙ |
Changes to src/vtab.c.
︙ | ︙ | |||
833 834 835 836 837 838 839 840 | ** the offset of the method to call in the sqlite3_module structure. ** ** The array is cleared after invoking the callbacks. */ static void callFinaliser(sqlite3 *db, int offset){ int i; if( db->aVTrans ){ for(i=0; i<db->nVTrans; i++){ | > > | | < | 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 | ** the offset of the method to call in the sqlite3_module structure. ** ** The array is cleared after invoking the callbacks. */ static void callFinaliser(sqlite3 *db, int offset){ int i; if( db->aVTrans ){ VTable **aVTrans = db->aVTrans; db->aVTrans = 0; for(i=0; i<db->nVTrans; i++){ VTable *pVTab = aVTrans[i]; sqlite3_vtab *p = pVTab->pVtab; if( p ){ int (*x)(sqlite3_vtab *); x = *(int (**)(sqlite3_vtab *))((char *)p->pModule + offset); if( x ) x(p); } pVTab->iSavepoint = 0; sqlite3VtabUnlock(pVTab); } sqlite3DbFree(db, aVTrans); db->nVTrans = 0; } } /* ** Invoke the xSync method of all virtual tables in the sqlite3.aVTrans ** array. Return the error code for the first error that occurs, or ** SQLITE_OK if all xSync operations are successful. |
︙ | ︙ |
Changes to test/malloc_common.tcl.
︙ | ︙ | |||
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | if {$n != "interrupt"} {lappend DEFAULT(-faults) $n} } set DEFAULT(-prep) "" set DEFAULT(-body) "" set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" fix_testname name array set O [array get DEFAULT] array set O $args foreach o [array names O] { if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" } } set faultlist [list] foreach f $O(-faults) { set flist [array names FAULTSIM $f] if {[llength $flist]==0} { error "unknown fault: $f" } set faultlist [concat $faultlist $flist] } set testspec [list -prep $O(-prep) -body $O(-body) \ | > > | > | 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | if {$n != "interrupt"} {lappend DEFAULT(-faults) $n} } set DEFAULT(-prep) "" set DEFAULT(-body) "" set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" set DEFAULT(-start) 1 set DEFAULT(-end) 0 fix_testname name array set O [array get DEFAULT] array set O $args foreach o [array names O] { if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" } } set faultlist [list] foreach f $O(-faults) { set flist [array names FAULTSIM $f] if {[llength $flist]==0} { error "unknown fault: $f" } set faultlist [concat $faultlist $flist] } set testspec [list -prep $O(-prep) -body $O(-body) \ -test $O(-test) -install $O(-install) -uninstall $O(-uninstall) \ -start $O(-start) -end $O(-end) ] foreach f [lsort -unique $faultlist] { eval do_one_faultsim_test "$name-$f" $FAULTSIM($f) $testspec } } |
︙ | ︙ | |||
285 286 287 288 289 290 291 | # [faultsim_test_result] command created by [do_faultsim_test] and used # by -test scripts. # proc faultsim_test_result_int {args} { upvar testrc testrc testresult testresult testnfail testnfail set t [list $testrc $testresult] set r $args | | | 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | # [faultsim_test_result] command created by [do_faultsim_test] and used # by -test scripts. # proc faultsim_test_result_int {args} { upvar testrc testrc testresult testresult testnfail testnfail set t [list $testrc $testresult] set r $args if { ($testnfail==0 && $t != [lindex $r 0]) || [lsearch -exact $r $t]<0 } { error "nfail=$testnfail rc=$testrc result=$testresult list=$r" } } #-------------------------------------------------------------------------- # Usage do_one_faultsim_test NAME ?OPTIONS...? # |
︙ | ︙ | |||
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | # # -prep Script to execute before -body. # # -body Script to execute (with fault injection). # # -test Script to execute after -body. # proc do_one_faultsim_test {testname args} { set DEFAULT(-injectstart) "expr" set DEFAULT(-injectstop) "expr 0" set DEFAULT(-injecterrlist) [list] set DEFAULT(-injectinstall) "" set DEFAULT(-injectuninstall) "" set DEFAULT(-prep) "" set DEFAULT(-body) "" set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" array set O [array get DEFAULT] array set O $args foreach o [array names O] { if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" } } proc faultsim_test_proc {testrc testresult testnfail} $O(-test) proc faultsim_test_result {args} " uplevel faultsim_test_result_int \$args [list $O(-injecterrlist)] " eval $O(-injectinstall) eval $O(-install) set stop 0 | > > > > | > > > | 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | # # -prep Script to execute before -body. # # -body Script to execute (with fault injection). # # -test Script to execute after -body. # # -start Index of first fault to inject (default 1) # proc do_one_faultsim_test {testname args} { set DEFAULT(-injectstart) "expr" set DEFAULT(-injectstop) "expr 0" set DEFAULT(-injecterrlist) [list] set DEFAULT(-injectinstall) "" set DEFAULT(-injectuninstall) "" set DEFAULT(-prep) "" set DEFAULT(-body) "" set DEFAULT(-test) "" set DEFAULT(-install) "" set DEFAULT(-uninstall) "" set DEFAULT(-start) 1 set DEFAULT(-end) 0 array set O [array get DEFAULT] array set O $args foreach o [array names O] { if {[info exists DEFAULT($o)]==0} { error "unknown option: $o" } } proc faultsim_test_proc {testrc testresult testnfail} $O(-test) proc faultsim_test_result {args} " uplevel faultsim_test_result_int \$args [list $O(-injecterrlist)] " eval $O(-injectinstall) eval $O(-install) set stop 0 for {set iFail $O(-start)} \ {!$stop && ($O(-end)==0 || $iFail<=$O(-end))} \ {incr iFail} \ { # Evaluate the -prep script. # eval $O(-prep) # Start the fault-injection. Run the -body script. Stop the fault # injection. Local var $nfail is set to the total number of faults |
︙ | ︙ |
Changes to test/permutations.test.
︙ | ︙ | |||
244 245 246 247 248 249 250 251 252 253 254 255 256 257 | fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test fts4check.test fts4unicode.test fts4noti.test fts3varint.test fts4growth.test fts4growth2.test } test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. } -files [ test_set $allquicktests -exclude *malloc* *ioerr* *fault* | > > > > > > > > > > > > | 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | fts4aa.test fts4content.test fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test fts4check.test fts4unicode.test fts4noti.test fts3varint.test fts4growth.test fts4growth2.test } test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] test_suite "fts5-light" -prefix "" -description { All FTS5 tests. } -files [ test_set \ [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \ -exclude *corrupt* *fault* *big* *fts5aj* ] test_suite "nofaultsim" -prefix "" -description { "Very" quick test suite. Runs in less than 5 minutes on a workstation. This test suite is the same as the "quick" tests, except that some files that test malloc and IO errors are omitted. } -files [ test_set $allquicktests -exclude *malloc* *ioerr* *fault* |
︙ | ︙ |
Added tool/loadfts.c.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | /* ** 2013-06-10 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* */ #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <assert.h> #include <string.h> #include <errno.h> #include <dirent.h> #include "sqlite3.h" /* ** Implementation of the "readtext(X)" SQL function. The entire content ** of the file named X is read and returned as a TEXT value. It is assumed ** the file contains UTF-8 text. NULL is returned if the file does not ** exist or is unreadable. */ static void readfileFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *zName; FILE *in; long nIn; void *pBuf; zName = (const char*)sqlite3_value_text(argv[0]); if( zName==0 ) return; in = fopen(zName, "rb"); if( in==0 ) return; fseek(in, 0, SEEK_END); nIn = ftell(in); rewind(in); pBuf = sqlite3_malloc( nIn ); if( pBuf && 1==fread(pBuf, nIn, 1, in) ){ sqlite3_result_text(context, pBuf, nIn, sqlite3_free); }else{ sqlite3_free(pBuf); } fclose(in); } /* ** Print usage text for this program and exit. */ static void showHelp(const char *zArgv0){ printf("\n" "Usage: %s SWITCHES... DB\n" "\n" " This program opens the database named on the command line and attempts to\n" " create an FTS table named \"fts\" with a single column. If successful, it\n" " recursively traverses the directory named by the -dir option and inserts\n" " the contents of each file into the fts table. All files are assumed to\n" " contain UTF-8 text.\n" "\n" "Switches are:\n" " -fts [345] FTS version to use (default=5)\n" " -idx [01] Create a mapping from filename to rowid (default=0)\n" " -dir <path> Root of directory tree to load data from (default=.)\n" " -trans <integer> Number of inserts per transaction (default=1)\n" , zArgv0 ); exit(1); } /* ** Exit with a message based on the argument and the current value of errno. */ static void error_out(const char *zText){ fprintf(stderr, "%s: %s\n", zText, strerror(errno)); exit(-1); } /* ** Exit with a message based on the first argument and the error message ** currently stored in database handle db. */ static void sqlite_error_out(const char *zText, sqlite3 *db){ fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db)); exit(-1); } /* ** Context object for visit_file(). */ typedef struct VisitContext VisitContext; struct VisitContext { int nRowPerTrans; sqlite3 *db; /* Database handle */ sqlite3_stmt *pInsert; /* INSERT INTO fts VALUES(readtext(:1)) */ }; /* ** Callback used with traverse(). The first argument points to an object ** of type VisitContext. This function inserts the contents of the text ** file zPath into the FTS table. */ void visit_file(void *pCtx, const char *zPath){ int rc; VisitContext *p = (VisitContext*)pCtx; /* printf("%s\n", zPath); */ sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC); sqlite3_step(p->pInsert); rc = sqlite3_reset(p->pInsert); if( rc!=SQLITE_OK ){ sqlite_error_out("insert", p->db); }else if( p->nRowPerTrans>0 && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0 ){ sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0); } } /* ** Recursively traverse directory zDir. For each file that is not a ** directory, invoke the supplied callback with its path. */ static void traverse( const char *zDir, /* Directory to traverse */ void *pCtx, /* First argument passed to callback */ void (*xCallback)(void*, const char *zPath) ){ DIR *d; struct dirent *e; d = opendir(zDir); if( d==0 ) error_out("opendir()"); for(e=readdir(d); e; e=readdir(d)){ if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue; char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name); if (e->d_type & DT_DIR) { traverse(zPath, pCtx, xCallback); }else{ xCallback(pCtx, zPath); } sqlite3_free(zPath); } closedir(d); } int main(int argc, char **argv){ int iFts = 5; /* Value of -fts option */ int bMap = 0; /* True to create mapping table */ const char *zDir = "."; /* Directory to scan */ int i; int rc; int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; int nCmd = 0; char **aCmd = 0; if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } else if( strcmp(zOpt, "-trans")==0 ){ nRowPerTrans = atoi(zArg); } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); } else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; } else if( strcmp(zOpt, "-special")==0 ){ nCmd++; aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); aCmd[nCmd-1] = zArg; } else{ showHelp(argv[0]); } } /* Open the database file */ rc = sqlite3_open(argv[argc-1], &db); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db); rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0, readfileFunc, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db); /* Create the FTS table */ zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); for(i=0; i<nCmd; i++){ zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]); rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); } /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; sCtx.nRowPerTrans = nRowPerTrans; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); traverse(zDir, (void*)&sCtx, visit_file); if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); sqlite3_close(db); sqlite3_free(aCmd); return 0; } |