SQLite

Check-in [00714b39]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with the fts5 trigram tokenizer and LIKE or GLOB patterns for which contain runs of 2 or fewer non-wildcard characters that are 3 or more bytes when encoded as utf-8.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 00714b39b39c51519edbc0194f98c7275fecf96763a06fd95db6e1d81bb9f1f1
User & Date: dan 2023-02-10 17:17:04
Context
2023-02-10
21:53
Do a better job of detecting when a WHERE clause term might be useful to an expression index. Fix for performance regression reported by forum thread e65800d8cb. (check-in: 44200596 user: drh tags: trunk)
17:17
Fix a problem with the fts5 trigram tokenizer and LIKE or GLOB patterns for which contain runs of 2 or fewer non-wildcard characters that are 3 or more bytes when encoded as utf-8. (check-in: 00714b39 user: dan tags: trunk)
14:20
Ensure that the valueFromFunction() routine does not clear a prior parser error. dbsqlfuzz 6fa816f20cf5b62260d635d110b88f38e29d8fe1. (check-in: 73476645 user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_expr.c.

284
285
286
287
288
289
290













291
292
293
294
295
296
297
    sqlite3Fts5ParseNodeFree(sParse.pExpr);
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}














/*
** This function is only called when using the special 'trigram' tokenizer.
** Argument zText contains the text of a LIKE or GLOB pattern matched
** against column iCol. This function creates and compiles an FTS5 MATCH
** expression that will match a superset of the rows matched by the LIKE or
** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error







>
>
>
>
>
>
>
>
>
>
>
>
>







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
    sqlite3Fts5ParseNodeFree(sParse.pExpr);
  }

  sqlite3_free(sParse.apPhrase);
  *pzErr = sParse.zErr;
  return sParse.rc;
}

/*
** Assuming that buffer z is at least nByte bytes in size and contains a
** valid utf-8 string, return the number of characters in the string.
*/
static int fts5ExprCountChar(const char *z, int nByte){
  int nRet = 0;
  int ii;
  for(ii=0; ii<nByte; ii++){
    if( (z[ii] & 0xC0)!=0x80 ) nRet++;
  }
  return nRet;
}

/*
** This function is only called when using the special 'trigram' tokenizer.
** Argument zText contains the text of a LIKE or GLOB pattern matched
** against column iCol. This function creates and compiles an FTS5 MATCH
** expression that will match a superset of the rows matched by the LIKE or
** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
322
323
324
325
326
327
328
329

330
331
332
333
334
335
336
      aSpec[2] = '[';
    }

    while( i<=nText ){
      if( i==nText 
       || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] 
      ){
        if( i-iFirst>=3 ){

          int jj;
          zExpr[iOut++] = '"';
          for(jj=iFirst; jj<i; jj++){
            zExpr[iOut++] = zText[jj];
            if( zText[jj]=='"' ) zExpr[iOut++] = '"';
          }
          zExpr[iOut++] = '"';







|
>







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
      aSpec[2] = '[';
    }

    while( i<=nText ){
      if( i==nText 
       || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] 
      ){

        if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){
          int jj;
          zExpr[iOut++] = '"';
          for(jj=iFirst; jj<i; jj++){
            zExpr[iOut++] = zText[jj];
            if( zText[jj]=='"' ) zExpr[iOut++] = '"';
          }
          zExpr[iOut++] = '"';

Changes to ext/fts5/test/fts5trigram.test.

51
52
53
54
55
56
57

58
59
60
61
62
63
64
  2 {cDef%}    {}
  3 {%f%}      1
  4 {%f_h%}    1
  5 {%f_g%}    {}
  6 {abc%klm}  1
  7 {ABCDEFG%} 1
  8 {%รุงเ%}    2

} {
  do_execsql_test 1.3.$tn {
    SELECT rowid FROM t1 WHERE y LIKE $like
  } $res
}

#-------------------------------------------------------------------------







>







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
  2 {cDef%}    {}
  3 {%f%}      1
  4 {%f_h%}    1
  5 {%f_g%}    {}
  6 {abc%klm}  1
  7 {ABCDEFG%} 1
  8 {%รุงเ%}    2
  9 {%งเ%}     2
} {
  do_execsql_test 1.3.$tn {
    SELECT rowid FROM t1 WHERE y LIKE $like
  } $res
}

#-------------------------------------------------------------------------
192
193
194
195
196
197
198
199

















200
} {VIRTUAL TABLE INDEX 0:G0}
do_eqp_test 6.3 {
  SELECT * FROM ci1 WHERE x LIKE ?
} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
do_eqp_test 6.4 {
  SELECT * FROM ci1 WHERE x GLOB ?
} {VIRTUAL TABLE INDEX 0:G0}


















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
} {VIRTUAL TABLE INDEX 0:G0}
do_eqp_test 6.3 {
  SELECT * FROM ci1 WHERE x LIKE ?
} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
do_eqp_test 6.4 {
  SELECT * FROM ci1 WHERE x GLOB ?
} {VIRTUAL TABLE INDEX 0:G0}

reset_db
do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram");
  INSERT INTO f (rowid, filename) VALUES 
      (10, "giraffe.png"), 
      (20, "жираф.png"), 
      (30, "cat.png"), 
      (40, "кот.png"), 
      (50, "misic-🎵-.mp3");
}
do_execsql_test 7.1 {
  SELECT rowid FROM f WHERE +filename GLOB '*ир*';
} {20}
do_execsql_test 7.2 {
  SELECT rowid FROM f WHERE filename GLOB '*ир*';
} {20}

finish_test