Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix a problem with the fts5 trigram tokenizer and LIKE or GLOB patterns for which contain runs of 2 or fewer non-wildcard characters that are 3 or more bytes when encoded as utf-8. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
00714b39b39c51519edbc0194f98c727 |
User & Date: | dan 2023-02-10 17:17:04 |
Context
2023-02-10
| ||
21:53 | Do a better job of detecting when a WHERE clause term might be useful to an expression index. Fix for performance regression reported by forum thread e65800d8cb. (check-in: 44200596 user: drh tags: trunk) | |
17:17 | Fix a problem with the fts5 trigram tokenizer and LIKE or GLOB patterns for which contain runs of 2 or fewer non-wildcard characters that are 3 or more bytes when encoded as utf-8. (check-in: 00714b39 user: dan tags: trunk) | |
14:20 | Ensure that the valueFromFunction() routine does not clear a prior parser error. dbsqlfuzz 6fa816f20cf5b62260d635d110b88f38e29d8fe1. (check-in: 73476645 user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
284 285 286 287 288 289 290 291 292 293 294 295 296 297 | sqlite3Fts5ParseNodeFree(sParse.pExpr); } sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } /* ** This function is only called when using the special 'trigram' tokenizer. ** Argument zText contains the text of a LIKE or GLOB pattern matched ** against column iCol. This function creates and compiles an FTS5 MATCH ** expression that will match a superset of the rows matched by the LIKE or ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error | > > > > > > > > > > > > > | 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 | sqlite3Fts5ParseNodeFree(sParse.pExpr); } sqlite3_free(sParse.apPhrase); *pzErr = sParse.zErr; return sParse.rc; } /* ** Assuming that buffer z is at least nByte bytes in size and contains a ** valid utf-8 string, return the number of characters in the string. */ static int fts5ExprCountChar(const char *z, int nByte){ int nRet = 0; int ii; for(ii=0; ii<nByte; ii++){ if( (z[ii] & 0xC0)!=0x80 ) nRet++; } return nRet; } /* ** This function is only called when using the special 'trigram' tokenizer. ** Argument zText contains the text of a LIKE or GLOB pattern matched ** against column iCol. This function creates and compiles an FTS5 MATCH ** expression that will match a superset of the rows matched by the LIKE or ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error |
︙ | ︙ | |||
322 323 324 325 326 327 328 | aSpec[2] = '['; } while( i<=nText ){ if( i==nText || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] ){ | | > | 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | aSpec[2] = '['; } while( i<=nText ){ if( i==nText || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] ){ if( fts5ExprCountChar(&zText[iFirst], i-iFirst)>=3 ){ int jj; zExpr[iOut++] = '"'; for(jj=iFirst; jj<i; jj++){ zExpr[iOut++] = zText[jj]; if( zText[jj]=='"' ) zExpr[iOut++] = '"'; } zExpr[iOut++] = '"'; |
︙ | ︙ |
Changes to ext/fts5/test/fts5trigram.test.
︙ | ︙ | |||
51 52 53 54 55 56 57 58 59 60 61 62 63 64 | 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 } { do_execsql_test 1.3.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } #------------------------------------------------------------------------- | > | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | 2 {cDef%} {} 3 {%f%} 1 4 {%f_h%} 1 5 {%f_g%} {} 6 {abc%klm} 1 7 {ABCDEFG%} 1 8 {%รุงเ%} 2 9 {%งเ%} 2 } { do_execsql_test 1.3.$tn { SELECT rowid FROM t1 WHERE y LIKE $like } $res } #------------------------------------------------------------------------- |
︙ | ︙ | |||
192 193 194 195 196 197 198 199 200 | } {VIRTUAL TABLE INDEX 0:G0} do_eqp_test 6.3 { SELECT * FROM ci1 WHERE x LIKE ? } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} do_eqp_test 6.4 { SELECT * FROM ci1 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} finish_test | > > > > > > > > > > > > > > > > > | 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | } {VIRTUAL TABLE INDEX 0:G0} do_eqp_test 6.3 { SELECT * FROM ci1 WHERE x LIKE ? } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} do_eqp_test 6.4 { SELECT * FROM ci1 WHERE x GLOB ? } {VIRTUAL TABLE INDEX 0:G0} reset_db do_execsql_test 7.0 { CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram"); INSERT INTO f (rowid, filename) VALUES (10, "giraffe.png"), (20, "жираф.png"), (30, "cat.png"), (40, "кот.png"), (50, "misic-🎵-.mp3"); } do_execsql_test 7.1 { SELECT rowid FROM f WHERE +filename GLOB '*ир*'; } {20} do_execsql_test 7.2 { SELECT rowid FROM f WHERE filename GLOB '*ир*'; } {20} finish_test |