Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Make sure that UTF16 to UTF8 conversions to not read past the end of the UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
19064d7cea838e1a93fe63743ed247f4 |
User & Date: | drh 2009-10-23 18:15:46.000 |
References
2009-10-24
| ||
01:55 | Check-in [19064d7cea] broke the pTail return on sqlite3_prepare16() when the SQL contained surrogates. This check-in restores correct function. Part of ticket [3fe897352e]. (check-in: 65b1e3a4c3 user: drh tags: trunk) | |
01:48 | • Ticket [3fe897352e] Malformed UTF16 leads to a 2-byte buffer overread status still Fixed with 1 other change (artifact: 1b24ca61f0 user: shane) | |
Context
2009-10-24
| ||
01:55 | Check-in [19064d7cea] broke the pTail return on sqlite3_prepare16() when the SQL contained surrogates. This check-in restores correct function. Part of ticket [3fe897352e]. (check-in: 65b1e3a4c3 user: drh tags: trunk) | |
2009-10-23
| ||
18:15 | Make sure that UTF16 to UTF8 conversions to not read past the end of the UTF16 input buffer if the last two bytes of the UTF16 happen to be the first half of a surrogate pair. Ticket [3fe897352e] (check-in: 19064d7cea user: drh tags: trunk) | |
01:27 | In shell, ensure that do_meta_command() returns consistent error values. Adjusted the text of some error message to be more consistent. Ticket [beb2dd69ad]. (check-in: 1ebac9eddd user: shane tags: trunk) | |
Changes
Changes to src/test_func.c.
︙ | ︙ | |||
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | assert( pStmt==0 ); zErr = sqlite3_mprintf("sqlite3_prepare_v2() error: %s",sqlite3_errmsg(db)); sqlite3_result_text(pCtx, zErr, -1, sqlite3_free); sqlite3_result_error_code(pCtx, rc); } } static int registerTestFunctions(sqlite3 *db){ static const struct { char *zName; signed char nArg; unsigned char eTextRep; /* 1: UTF-16. 0: UTF-8 */ void (*xFunc)(sqlite3_context*,int,sqlite3_value **); } aFuncs[] = { { "randstr", 2, SQLITE_UTF8, randStr }, { "test_destructor", 1, SQLITE_UTF8, test_destructor}, #ifndef SQLITE_OMIT_UTF16 { "test_destructor16", 1, SQLITE_UTF8, test_destructor16}, #endif { "test_destructor_count", 0, SQLITE_UTF8, test_destructor_count}, { "test_auxdata", -1, SQLITE_UTF8, test_auxdata}, { "test_error", 1, SQLITE_UTF8, test_error}, { "test_error", 2, SQLITE_UTF8, test_error}, { "test_eval", 1, SQLITE_UTF8, test_eval}, { "test_isolation", 2, SQLITE_UTF8, test_isolation}, { "test_counter", 1, SQLITE_UTF8, counterFunc}, | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 | assert( pStmt==0 ); zErr = sqlite3_mprintf("sqlite3_prepare_v2() error: %s",sqlite3_errmsg(db)); sqlite3_result_text(pCtx, zErr, -1, sqlite3_free); sqlite3_result_error_code(pCtx, rc); } } /* ** convert one character from hex to binary */ static int testHexChar(char c){ if( c>='0' && c<='9' ){ return c - '0'; }else if( c>='a' && c<='f' ){ return c - 'a' + 10; }else if( c>='A' && c<='F' ){ return c - 'A' + 10; } return 0; } /* ** Convert hex to binary. */ static void testHexToBin(const char *zIn, char *zOut){ while( zIn[0] && zIn[1] ){ *(zOut++) = (testHexChar(zIn[0])<<4) + testHexChar(zIn[1]); zIn += 2; } } /* ** hex_to_utf16be(HEX) ** ** Convert the input string from HEX into binary. Then return the ** result using sqlite3_result_text16le(). */ static void testHexToUtf16be( sqlite3_context *pCtx, int nArg, sqlite3_value **argv ){ int n; const char *zIn; char *zOut; assert( nArg==1 ); n = sqlite3_value_bytes(argv[0]); zIn = (const char*)sqlite3_value_text(argv[0]); zOut = sqlite3_malloc( n/2 ); if( zOut==0 ){ sqlite3_result_error_nomem(pCtx); }else{ testHexToBin(zIn, zOut); sqlite3_result_text16be(pCtx, zOut, n/2, sqlite3_free); } } /* ** hex_to_utf8(HEX) ** ** Convert the input string from HEX into binary. Then return the ** result using sqlite3_result_text16le(). */ static void testHexToUtf8( sqlite3_context *pCtx, int nArg, sqlite3_value **argv ){ int n; const char *zIn; char *zOut; assert( nArg==1 ); n = sqlite3_value_bytes(argv[0]); zIn = (const char*)sqlite3_value_text(argv[0]); zOut = sqlite3_malloc( n/2 ); if( zOut==0 ){ sqlite3_result_error_nomem(pCtx); }else{ testHexToBin(zIn, zOut); sqlite3_result_text(pCtx, zOut, n/2, sqlite3_free); } } /* ** hex_to_utf16le(HEX) ** ** Convert the input string from HEX into binary. Then return the ** result using sqlite3_result_text16le(). */ static void testHexToUtf16le( sqlite3_context *pCtx, int nArg, sqlite3_value **argv ){ int n; const char *zIn; char *zOut; assert( nArg==1 ); n = sqlite3_value_bytes(argv[0]); zIn = (const char*)sqlite3_value_text(argv[0]); zOut = sqlite3_malloc( n/2 ); if( zOut==0 ){ sqlite3_result_error_nomem(pCtx); }else{ testHexToBin(zIn, zOut); sqlite3_result_text16le(pCtx, zOut, n/2, sqlite3_free); } } static int registerTestFunctions(sqlite3 *db){ static const struct { char *zName; signed char nArg; unsigned char eTextRep; /* 1: UTF-16. 0: UTF-8 */ void (*xFunc)(sqlite3_context*,int,sqlite3_value **); } aFuncs[] = { { "randstr", 2, SQLITE_UTF8, randStr }, { "test_destructor", 1, SQLITE_UTF8, test_destructor}, #ifndef SQLITE_OMIT_UTF16 { "test_destructor16", 1, SQLITE_UTF8, test_destructor16}, { "hex_to_utf16be", 1, SQLITE_UTF8, testHexToUtf16be}, { "hex_to_utf16le", 1, SQLITE_UTF8, testHexToUtf16le}, #endif { "hex_to_utf8", 1, SQLITE_UTF8, testHexToUtf8}, { "test_destructor_count", 0, SQLITE_UTF8, test_destructor_count}, { "test_auxdata", -1, SQLITE_UTF8, test_auxdata}, { "test_error", 1, SQLITE_UTF8, test_error}, { "test_error", 2, SQLITE_UTF8, test_error}, { "test_eval", 1, SQLITE_UTF8, test_eval}, { "test_isolation", 2, SQLITE_UTF8, test_isolation}, { "test_counter", 1, SQLITE_UTF8, counterFunc}, |
︙ | ︙ | |||
442 443 444 445 446 447 448 | return TCL_OK; abuse_err: Tcl_AppendResult(interp, "sqlite3_create_function abused test failed", (char*)0); return TCL_ERROR; } | < < | 547 548 549 550 551 552 553 554 555 556 557 558 559 560 | return TCL_OK; abuse_err: Tcl_AppendResult(interp, "sqlite3_create_function abused test failed", (char*)0); return TCL_ERROR; } /* ** Register commands with the TCL interpreter. */ int Sqlitetest_func_Init(Tcl_Interp *interp){ static struct { char *zName; |
︙ | ︙ |
Changes to src/utf.c.
︙ | ︙ | |||
103 104 105 106 107 108 109 | *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ *zOut++ = (u8)(c&0x00FF); \ } \ } | | | | | | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | *zOut++ = (u8)(0x00D8 + (((c-0x10000)>>18)&0x03)); \ *zOut++ = (u8)(((c>>10)&0x003F) + (((c-0x10000)>>10)&0x00C0)); \ *zOut++ = (u8)(0x00DC + ((c>>8)&0x03)); \ *zOut++ = (u8)(c&0x00FF); \ } \ } #define READ_UTF16LE(zIn, zTerm, c){ \ c = (*zIn++); \ c += ((*zIn++)<<8); \ if( c>=0xD800 && c<0xE000 && zIn<zTerm ){ \ int c2 = (*zIn++); \ c2 += ((*zIn++)<<8); \ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ } \ } #define READ_UTF16BE(zIn, zTerm, c){ \ c = ((*zIn++)<<8); \ c += (*zIn++); \ if( c>=0xD800 && c<0xE000 && zIn<zTerm ){ \ int c2 = ((*zIn++)<<8); \ c2 += (*zIn++); \ c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10); \ } \ } /* |
︙ | ︙ | |||
301 302 303 304 305 306 307 | pMem->n = (int)(z - zOut); *z++ = 0; }else{ assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ | | | | 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | pMem->n = (int)(z - zOut); *z++ = 0; }else{ assert( desiredEnc==SQLITE_UTF8 ); if( pMem->enc==SQLITE_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16LE(zIn, zTerm, c); WRITE_UTF8(z, c); } }else{ /* UTF-16 Big-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16BE(zIn, zTerm, c); WRITE_UTF8(z, c); } } pMem->n = (int)(z - zOut); } *z = 0; assert( (pMem->n+(desiredEnc==SQLITE_UTF8?1:2))<=len ); |
︙ | ︙ | |||
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 | ** pZ is a UTF-16 encoded unicode string at least nChar characters long. ** Return the number of bytes in the first nChar unicode characters ** in pZ. nChar must be non-negative. */ int sqlite3Utf16ByteLen(const void *zIn, int nChar){ int c; unsigned char const *z = zIn; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here ** and in other parts of this file means that at one branch will ** not be covered by coverage testing on any single host. But coverage ** will be complete if the tests are run on both a little-endian and ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE ** macros are constant at compile time the compiler can determine ** which branch will be followed. It is therefore assumed that no runtime ** penalty is paid for this "if" statement. */ while( n<nChar ){ | > | | | 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 | ** pZ is a UTF-16 encoded unicode string at least nChar characters long. ** Return the number of bytes in the first nChar unicode characters ** in pZ. nChar must be non-negative. */ int sqlite3Utf16ByteLen(const void *zIn, int nChar){ int c; unsigned char const *z = zIn; unsigned char const *zTerm = &z[nChar]; int n = 0; if( SQLITE_UTF16NATIVE==SQLITE_UTF16BE ){ /* Using an "if (SQLITE_UTF16NATIVE==SQLITE_UTF16BE)" construct here ** and in other parts of this file means that at one branch will ** not be covered by coverage testing on any single host. But coverage ** will be complete if the tests are run on both a little-endian and ** big-endian host. Because both the UTF16NATIVE and SQLITE_UTF16BE ** macros are constant at compile time the compiler can determine ** which branch will be followed. It is therefore assumed that no runtime ** penalty is paid for this "if" statement. */ while( n<nChar ){ READ_UTF16BE(z, zTerm, c); n++; } }else{ while( n<nChar ){ READ_UTF16LE(z, zTerm, c); n++; } } return (int)(z-(unsigned char const *)zIn); } #if defined(SQLITE_TEST) |
︙ | ︙ | |||
543 544 545 546 547 548 549 | if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16LE(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; | | | | 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 | if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16LE(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; READ_UTF16LE(z, &zBuf[n], c); assert( c==i ); assert( (z-zBuf)==n ); } for(i=0; i<0x00110000; i++){ if( i>=0xD800 && i<0xE000 ) continue; z = zBuf; WRITE_UTF16BE(z, i); n = (int)(z-zBuf); assert( n>0 && n<=4 ); z[0] = 0; z = zBuf; READ_UTF16BE(z, &zBuf[n], c); assert( c==i ); assert( (z-zBuf)==n ); } } #endif /* SQLITE_TEST */ #endif /* SQLITE_OMIT_UTF16 */ |
Added test/tkt-3fe897352e.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | # 2009 October 23 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # This file implements regression tests for SQLite library. # # This file implements tests to verify that ticket [3fe897352e8d8] has been # fixed. # set testdir [file dirname $argv0] source $testdir/tester.tcl do_test tkt-3fe89-1.1 { db close sqlite3 db :memory: db eval { PRAGMA encoding=UTF8; CREATE TABLE t1(x); INSERT INTO t1 VALUES(hex_to_utf16be('D800')); SELECT hex(x) FROM t1; } } {EDA080} do_test tkt-3fe89-1.2 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16le('00D8')); SELECT hex(x) FROM t1; } } {EDA080} do_test tkt-3fe89-1.3 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16be('DFFF')); SELECT hex(x) FROM t1; } } {EDBFBF} do_test tkt-3fe89-1.4 { db eval { DELETE FROM t1; INSERT INTO t1 VALUES(hex_to_utf16le('FFDF')); SELECT hex(x) FROM t1; } } {EDBFBF} finish_test |