Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF. Forum post d7b90d92ffbfc61f |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
4b4f33d791fe4318c4597bee7d2f9e48 |
User & Date: | dan 2025-01-31 14:52:36 |
Original Comment: | Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF. |
References
2025-02-14
| ||
10:28 | Fix test cases added for [4b4f33d791fe4318] so that they also work when compiling with ICU support. Forum post 2ca8a09a7e (check-in: 5964616d user: drh tags: trunk) | |
2025-01-31
| ||
20:18 | Tweaks to [4b4f33d791fe4318] to make it easier to test. (check-in: 2567298f user: drh tags: trunk) | |
Context
2025-01-31
| ||
18:33 | Remove an assert() that is not true if a trace-callback is deregistered while there are active statements. Forum post 3547aa1078510686. (check-in: 0ecfc4d0 user: dan tags: trunk) | |
14:58 | Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF. (check-in: 9d59f8a5 user: drh tags: branch-3.48) | |
14:52 | Fix a problem with LIKE and GLOB processing in utf-16be databases in cases where the utf-8 encoding of a character ends with the byte 0xBF. Forum post d7b90d92ffbfc61f (check-in: 4b4f33d7 user: dan tags: trunk) | |
14:52 | If any errors occur while processing sqlite_dbpage changes, cancel pending truncate operations. (check-in: 6138e0dc user: drh tags: trunk) | |
Changes
Changes to src/whereexpr.c.
︙ | ︙ | |||
215 216 217 218 219 220 221 | sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER ); }else if( op==TK_STRING ){ assert( !ExprHasProperty(pRight, EP_IntValue) ); z = (u8*)pRight->u.zToken; } if( z ){ | | | | | | | | > > | 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER ); }else if( op==TK_STRING ){ assert( !ExprHasProperty(pRight, EP_IntValue) ); z = (u8*)pRight->u.zToken; } if( z ){ /* Count the number of prefix bytes prior to the first wildcard, ** U+fffd character, or malformed utf-8. If the underlying database ** has a UTF16LE encoding, then only consider ASCII characters. Note that ** the encoding of z[] is UTF8 - we are dealing with only UTF8 here in this ** code, but the database engine itself might be processing content using a ** different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; if( c==wc[3] && z[cnt]>0 && z[cnt]<0x80 ){ cnt++; }else if( c>=0x80 ){ const u8 *z2 = z+cnt-1; if( sqlite3Utf8Read(&z2)==0xfffd || c==0xFF /* bad utf-8 */ || ENC(db)==SQLITE_UTF16LE ){ cnt--; break; }else{ cnt = (int)(z2-z); } } } |
︙ | ︙ | |||
1380 1381 1382 1383 1384 1385 1386 | for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ pStr1->u.zToken[i] = sqlite3Toupper(c); pStr2->u.zToken[i] = sqlite3Tolower(c); } } if( !db->mallocFailed ){ | | < | | > > > | > > > > | 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 | for(i=0; (c = pStr1->u.zToken[i])!=0; i++){ pStr1->u.zToken[i] = sqlite3Toupper(c); pStr2->u.zToken[i] = sqlite3Tolower(c); } } if( !db->mallocFailed ){ u8 *pC; /* Last character before the first wildcard */ pC = (u8*)&pStr2->u.zToken[sqlite3Strlen30(pStr2->u.zToken)-1]; if( noCase ){ /* The point is to increment the last character before the first ** wildcard. But if we increment '@', that will push it into the ** alphabetic range where case conversions will mess up the ** inequality. To avoid this, make sure to also run the full ** LIKE on all candidate expressions by clearing the isComplete flag */ if( *pC=='A'-1 ) isComplete = 0; *pC = sqlite3UpperToLower[*pC]; } /* Increment the value of the last utf8 character in the prefix. */ while( *pC==0xBF && pC>(u8*)pStr2->u.zToken ){ *pC = 0x80; pC--; } assert( *pC!=0xFF ); /* isLikeOrGlob() guarantees this */ (*pC)++; } zCollSeqName = noCase ? "NOCASE" : sqlite3StrBINARY; pNewExpr1 = sqlite3ExprDup(db, pLeft, 0); pNewExpr1 = sqlite3PExpr(pParse, TK_GE, sqlite3ExprAddCollateString(pParse,pNewExpr1,zCollSeqName), pStr1); transferJoinMarkings(pNewExpr1, pExpr); |
︙ | ︙ |
Changes to test/like3.test.
︙ | ︙ | |||
270 271 272 273 274 275 276 277 278 | do_eqp_test like3-6.240 { SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '\'; } { QUERY PLAN `--SEARCH t2 USING INDEX t2path2 (path>? AND path<?) } } finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 | do_eqp_test like3-6.240 { SELECT * FROM t2 WHERE path LIKE 'a%' ESCAPE '\'; } { QUERY PLAN `--SEARCH t2 USING INDEX t2path2 (path>? AND path<?) } } #------------------------------------------------------------------------- ifcapable utf16 { reset_db do_execsql_test like3-7.0 { PRAGMA encoding = 'UTF-16be'; CREATE TABLE Example(word TEXT NOT NULL); CREATE INDEX Example_word on Example(word); INSERT INTO Example VALUES(char(0x307F)); } do_execsql_test like3-7.1 { SELECT char(0x307F)=='み'; } {1} do_execsql_test like3-7.1 { SELECT * FROM Example WHERE word GLOB 'み*' } {み} do_execsql_test like3-7.2 { SELECT * FROM Example WHERE word >= char(0x307F) AND word < char(0x3080); } {み} } #------------------------------------------------------------------------- reset_db foreach enc { UTF-8 UTF-16le UTF-16be } { foreach {tn expr} { 1 "CAST (X'FF' AS TEXT)" 2 "CAST (X'FFBF' AS TEXT)" 3 "CAST (X'FFBFBF' AS TEXT)" 4 "CAST (X'FFBFBFBF' AS TEXT)" 5 "'abc' || CAST (X'FF' AS TEXT)" 6 "'def' || CAST (X'FFBF' AS TEXT)" 7 "'ghi' || CAST (X'FFBFBF' AS TEXT)" 8 "'jkl' || CAST (X'FFBFBFBF' AS TEXT)" } { reset_db execsql "PRAGMA encoding = '$enc'" do_execsql_test like3-8.$tn.0 { CREATE TABLE t1(x); } do_execsql_test like3-8.$tn.1 { PRAGMA encoding } $enc do_execsql_test like3-8.$tn.1 " INSERT INTO t1 VALUES( $expr ) " do_execsql_test like3-8.$tn.2 { SELECT typeof(x) FROM t1 } {text} set x [db one {SELECT x || '%' FROM t1}] do_execsql_test like3-8.$tn.3 { SELECT rowid FROM t1 WHERE x LIKE $x } 1 do_execsql_test like3-8.$tn.4 { CREATE INDEX i1 ON t1(x); } do_execsql_test like3-8.$tn.5 { SELECT rowid FROM t1 WHERE x LIKE $x } 1 } } finish_test |