/ Check-in [4f90ba20]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add tests for fts5 tokenizers.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 4f90ba20e2be6ec5755fe894938ac97342d6fbf6
User & Date: dan 2015-05-19 19:37:09
Context
2015-05-20
09:27
Improve test coverage of fts5_tokenize.c. check-in: 0e91a6a5 user: dan tags: fts5
2015-05-19
19:37
Add tests for fts5 tokenizers. check-in: 4f90ba20 user: dan tags: fts5
11:38
Update Makefile.in so that the amalgamation files built by "make sqlite3.c" include fts5. check-in: 2870a805 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/test/fts5fault6.test.

92
93
94
95
96
97
98
99
100
101
102
















































103
104
  faultsim_test_result {0 {}}
}

do_faultsim_test 2.3 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt VALUES('x y z') }
} -test {
  faultsim_test_result {0 {}}
}

















































finish_test












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
  faultsim_test_result {0 {}}
}

do_faultsim_test 2.3 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { INSERT INTO tt VALUES('x y z') }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM in the ASCII tokenizer with very large tokens. 
#
# Also the unicode tokenizer.
#
set t1 [string repeat wxyz 20]
set t2 [string repeat wxyz 200]
set t3 [string repeat wxyz 2000]
set doc "$t1 $t2 $t3"
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE xyz USING fts5(c, tokenize=ascii, content="");
  CREATE VIRTUAL TABLE xyz2 USING fts5(c, content="");
}
faultsim_save_and_close

do_faultsim_test 3.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM xyz }
} -body {
  db eval { INSERT INTO xyz VALUES($::doc) }
} -test {
  faultsim_test_result {0 {}}
}

do_faultsim_test 3.2 -faults oom-t* -prep {
  faultsim_restore_and_reopen
  db eval { SELECT * FROM xyz2 }
} -body {
  db eval { INSERT INTO xyz2 VALUES($::doc) }
} -test {
  faultsim_test_result {0 {}}
}

#-------------------------------------------------------------------------
# OOM while initializing a unicode61 tokenizer.
#
reset_db
faultsim_save_and_close
do_faultsim_test 4.1 -faults oom-t* -prep {
  faultsim_restore_and_reopen
} -body {
  db eval { 
    CREATE VIRTUAL TABLE yu USING fts5(x, tokenize="unicode61 separators abc");
  }
} -test {
  faultsim_test_result {0 {}}
}

finish_test

Changes to ext/fts5/test/fts5tokenizer.test.

129
130
131
132
133
134
135
136












































































137
138

do_catchsql_test 5.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 5.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg');
} {1 {error in tokenizer constructor}}













































































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

do_catchsql_test 5.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 5.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'ascii opt arg');
} {1 {error in tokenizer constructor}}

#-------------------------------------------------------------------------
# Test that the ASCII and unicode61 tokenizers both handle SQLITE_DONE 
# correctly.
#

proc test_token_cb {varname token iStart iEnd} {
  upvar $varname var
  lappend var $token
  if {[llength $var]==3} { return "SQLITE_DONE" }
  return "SQLITE_OK"
}

proc tokenize {cmd} {
  set res [list]
  $cmd xTokenize [$cmd xColumnText 0] [list test_token_cb res]
  set res
}
sqlite3_fts5_create_function db tokenize tokenize

do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE x1 USING fts5(a, tokenize=ascii);
  INSERT INTO x1 VALUES('q w e r t y');
  INSERT INTO x1 VALUES('y t r e w q');
  SELECT tokenize(x1) FROM x1 WHERE x1 MATCH 'e AND r';
} {
  {q w e} {y t r}
}

do_execsql_test 6.1 {
  CREATE VIRTUAL TABLE x2 USING fts5(a, tokenize=unicode61);
  INSERT INTO x2 VALUES('q w e r t y');
  INSERT INTO x2 VALUES('y t r e w q');
  SELECT tokenize(x2) FROM x2 WHERE x2 MATCH 'e AND r';
} {
  {q w e} {y t r}
}


#-------------------------------------------------------------------------
# Miscellaneous tests for the unicode tokenizer.
#
do_catchsql_test 6.1 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 tokenchars');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.2 {
  CREATE VIRTUAL TABLE a3 USING fts5(x, y, tokenize = 'unicode61 a b');
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.3 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 2'
  );
} {1 {error in tokenizer constructor}}
do_catchsql_test 6.4 {
  CREATE VIRTUAL TABLE a3 USING fts5(
    x, y, tokenize = 'unicode61 remove_diacritics 10'
  );
} {1 {error in tokenizer constructor}}

#-------------------------------------------------------------------------
# Porter tokenizer with very large tokens.
#
set a [string repeat a 100]
set b [string repeat b 500]
set c [string repeat c 1000]
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE e5 USING fts5(x, tokenize=porter);
  INSERT INTO e5 VALUES($a || ' ' || $b);
  INSERT INTO e5 VALUES($b || ' ' || $c);
  INSERT INTO e5 VALUES($c || ' ' || $a);
}

do_execsql_test 7.1 {SELECT rowid FROM e5 WHERE e5 MATCH $a} { 1 3 }
do_execsql_test 7.2 {SELECT rowid FROM e5 WHERE e5 MATCH $b} { 1 2 }
do_execsql_test 7.3 {SELECT rowid FROM e5 WHERE e5 MATCH $c} { 2 3 }


finish_test