/ Check-in [baec1b96]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow the subsitute character (codepoint 26 - 0x1A) to appear in fts5 barewords.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: baec1b96cb64a6a6317143a8df841e502afe3914
User & Date: dan 2015-10-07 17:01:22
Context
2015-10-07
17:06
Fix harmless compiler warning in FTS5. check-in: 13adcd03 user: mistachkin tags: trunk
17:01
Allow the subsitute character (codepoint 26 - 0x1A) to appear in fts5 barewords. check-in: baec1b96 user: dan tags: trunk
16:14
Adjustments to spellfix2.test so that it works reliably on mac. check-in: d591e860 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_buffer.c.

   288    288   ** Return true if character 't' may be part of an FTS5 bareword, or false
   289    289   ** otherwise. Characters that may be part of barewords:
   290    290   **
   291    291   **   * All non-ASCII characters,
   292    292   **   * The 52 upper and lower case ASCII characters, and
   293    293   **   * The 10 integer ASCII characters.
   294    294   **   * The underscore character "_" (0x5F).
          295  +**   * The unicode "subsitute" character (0x1A).
   295    296   */
   296    297   int sqlite3Fts5IsBareword(char t){
   297    298     u8 aBareword[128] = {
   298    299       0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
   299         -    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
          300  +    0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 1, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
   300    301       0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
   301    302       1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
   302    303       0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
   303    304       1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
   304    305       0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
   305    306       1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
   306    307     };
   307    308   
   308    309     return (t & 0x80) || aBareword[(int)t];
   309    310   }
   310    311   
   311    312   

Changes to ext/fts5/fts5_index.c.

  4683   4683   /*
  4684   4684   ** This function is similar to sqlite3Fts5IterPoslist(), except that it
  4685   4685   ** copies the position list into the buffer supplied as the second 
  4686   4686   ** argument.
  4687   4687   */
  4688   4688   int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  4689   4689     Fts5Index *p = pIter->pIndex;
  4690         -
         4690  +  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  4691   4691     assert( p->rc==SQLITE_OK );
  4692   4692     fts5BufferZero(pBuf);
  4693         -  fts5MultiIterPoslist(p, pIter, 0, 0, pBuf);
         4693  +  fts5SegiterPoslist(p, pSeg, 0, pBuf);
  4694   4694     return fts5IndexReturn(p);
  4695   4695   }
  4696   4696   
  4697   4697   /*
  4698   4698   ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
  4699   4699   */
  4700   4700   void sqlite3Fts5IterClose(Fts5IndexIter *pIter){

Changes to ext/fts5/test/fts5simple.test.

   246    246     INSERT INTO t3 VALUES('bac aab bab', 'c bac c', 'acb aba abb'); -- 1
   247    247     INSERT INTO t3 VALUES('bab abc c', 'acb c abb', 'c aaa c');     -- 2
   248    248   }
   249    249   
   250    250   do_execsql_test 10.1 {
   251    251     SELECT rowid FROM t3('c: c*');
   252    252   } {2}
          253  +
          254  +#-------------------------------------------------------------------------
          255  +# Test that character 0x1A is allowed in fts5 barewords.
          256  +#
          257  +do_test 11.0 {
          258  +  execsql "CREATE VIRTUAL TABLE t4 USING fts5(x, tokenize=\"ascii tokenchars '\x1A'\")"
          259  +  execsql "
          260  +    INSERT INTO t4 VALUES('a b c \x1A');
          261  +    INSERT INTO t4 VALUES('a b c d\x1A');
          262  +    INSERT INTO t4 VALUES('a b c \x1Ad');
          263  +    INSERT INTO t4 VALUES('a b c d');
          264  +  "
          265  +} {}
          266  +
          267  +do_test 11.1 {
          268  +  execsql "SELECT rowid FROM t4('\x1A')"
          269  +} {1}
          270  +do_test 11.2 {
          271  +  execsql "SELECT rowid FROM t4('\x1A*')"
          272  +} {1 3}
          273  +do_test 11.3 {
          274  +  execsql "SELECT rowid FROM t4('d\x1A')"
          275  +} {2}
          276  +
          277  +do_test 11.4 {
          278  +  catchsql "SELECT rowid FROM t4('d\x1B')"
          279  +} {/fts5: syntax error/}
          280  +do_test 11.5 {
          281  +  catchsql "SELECT rowid FROM t4('d\x19')"
          282  +} {/fts5: syntax error/}
   253    283   
   254    284   
   255    285   finish_test
   256    286