SQLite

Check-in [baec1b96cb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow the subsitute character (codepoint 26 - 0x1A) to appear in fts5 barewords.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: baec1b96cb64a6a6317143a8df841e502afe3914
User & Date: dan 2015-10-07 17:01:22.560
Context
2015-10-07
17:06
Fix harmless compiler warning in FTS5. (check-in: 13adcd038f user: mistachkin tags: trunk)
17:01
Allow the subsitute character (codepoint 26 - 0x1A) to appear in fts5 barewords. (check-in: baec1b96cb user: dan tags: trunk)
16:14
Adjustments to spellfix2.test so that it works reliably on mac. (check-in: d591e860d3 user: drh tags: trunk)
Changes
Unified Diff Show Whitespace Changes Patch
Changes to ext/fts5/fts5_buffer.c.
288
289
290
291
292
293
294

295
296
297
298
299
300
301
302
303
304
305
306
** Return true if character 't' may be part of an FTS5 bareword, or false
** otherwise. Characters that may be part of barewords:
**
**   * All non-ASCII characters,
**   * The 52 upper and lower case ASCII characters, and
**   * The 10 integer ASCII characters.
**   * The underscore character "_" (0x5F).

*/
int sqlite3Fts5IsBareword(char t){
  u8 aBareword[128] = {
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
  };







>




|







288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
** Return true if character 't' may be part of an FTS5 bareword, or false
** otherwise. Characters that may be part of barewords:
**
**   * All non-ASCII characters,
**   * The 52 upper and lower case ASCII characters, and
**   * The 10 integer ASCII characters.
**   * The underscore character "_" (0x5F).
**   * The unicode "subsitute" character (0x1A).
*/
int sqlite3Fts5IsBareword(char t){
  u8 aBareword[128] = {
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 1, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
    0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
    1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
  };
Changes to ext/fts5/fts5_index.c.
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;

  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5MultiIterPoslist(p, pIter, 0, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){







|


|







4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( p->rc==SQLITE_OK );
  fts5BufferZero(pBuf);
  fts5SegiterPoslist(p, pSeg, 0, pBuf);
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
Changes to ext/fts5/test/fts5simple.test.
246
247
248
249
250
251
252






























253
254
255
256
  INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
  INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
}

do_execsql_test 10.1 {
  SELECT rowid FROM t3('c: c*');
} {2}
































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
  INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
  INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
}

do_execsql_test 10.1 {
  SELECT rowid FROM t3('c: c*');
} {2}

#-------------------------------------------------------------------------
# Test that character 0x1A is allowed in fts5 barewords.
#
do_test 11.0 {
  execsql "CREATE VIRTUAL TABLE t4 USING fts5(x, tokenize=\"ascii tokenchars '\x1A'\")"
  execsql "
    INSERT INTO t4 VALUES('a b c \x1A');
    INSERT INTO t4 VALUES('a b c d\x1A');
    INSERT INTO t4 VALUES('a b c \x1Ad');
    INSERT INTO t4 VALUES('a b c d');
  "
} {}

do_test 11.1 {
  execsql "SELECT rowid FROM t4('\x1A')"
} {1}
do_test 11.2 {
  execsql "SELECT rowid FROM t4('\x1A*')"
} {1 3}
do_test 11.3 {
  execsql "SELECT rowid FROM t4('d\x1A')"
} {2}

do_test 11.4 {
  catchsql "SELECT rowid FROM t4('d\x1B')"
} {/fts5: syntax error/}
do_test 11.5 {
  catchsql "SELECT rowid FROM t4('d\x19')"
} {/fts5: syntax error/}


finish_test