/ Check-in [0595319c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add some code to MemTranslate() to prevent the READ_UTF8() macro from overreading a buffer. (CVS 4033)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0595319cd716611bb5a12eb952e395283ea749bb
User & Date: danielk1977 2007-05-23 16:23:09
Context
2007-05-24
07:22
Fix an assert() failure occuring in corrupt.test. (CVS 4034) check-in: 3c54cddf user: danielk1977 tags: trunk
2007-05-23
16:23
Add some code to MemTranslate() to prevent the READ_UTF8() macro from overreading a buffer. (CVS 4033) check-in: 0595319c user: danielk1977 tags: trunk
13:50
Make sure the database schema has been read before compiling an incrmental_vacuum pragma. (CVS 4032) check-in: efd7bcb3 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
215
216
217
218
219
220
221
























































222
223
224
225
226
227



228
229
230
231
232
233
234



235
236
237
238
239
240
241
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.50 2007/05/16 18:23:05 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    zOut = zShort;
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
























































    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 
        WRITE_UTF16LE(z, c);
      }



    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 
        WRITE_UTF16BE(z, c);
      }



    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






>
>
>







>
>
>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.51 2007/05/23 16:23:09 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    zOut = zShort;
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    unsigned int iExtra = 0xD800;

    if( 0==(pMem->flags&MEM_Term) && zTerm>zIn && (zTerm[-1]&0x80) ){
      /* This UTF8 string is not nul-terminated, and the last byte is
      ** not a character in the ascii range (codpoints 0..127). This
      ** means the SQLITE_READ_UTF8() macro might read past the end
      ** of the allocated buffer.
      **
      ** There are four possibilities:
      **
      **   1. The last byte is the first byte of a non-ASCII character,
      **
      **   2. The final N bytes of the input string are continuation bytes
      **      and immediately preceding them is the first byte of a 
      **      non-ASCII character.
      **
      **   3. The final N bytes of the input string are continuation bytes
      **      and immediately preceding them is a byte that encodes a 
      **      character in the ASCII range.
      **
      **   4. The entire string consists of continuation characters.
      **
      ** Cases (3) and (4) require no special handling. The SQLITE_READ_UTF8()
      ** macro will not overread the buffer in these cases.
      */
      unsigned char *zExtra = &zTerm[-1];
      while( zExtra>zIn && (zExtra[0]&0xC0)==0x80 ){
        zExtra--;
      }

      if( (zExtra[0]&0xC0)==0xC0 ){
        /* Make a copy of the last character encoding in the input string.
        ** Then make sure it is nul-terminated and use SQLITE_READ_UTF8()
        ** to decode the codepoint. Store the codepoint in variable iExtra,
        ** it will be appended to the output string later.
        */
        unsigned char *zFree = 0;
        unsigned char zBuf[16];
        int nExtra = (pMem->n+zIn-zExtra);
        zTerm = zExtra;
        if( nExtra>15 ){
          zExtra = sqliteMallocRaw(nExtra+1);
          if( !zExtra ){
            return SQLITE_NOMEM;
          }
          zFree = zExtra;
        }else{
          zExtra = zBuf;
        }
        memcpy(zExtra, zTerm, nExtra);
        zExtra[nExtra] = '\0';
        SQLITE_READ_UTF8(zExtra, iExtra);
        sqliteFree(zFree);
      }
    }

    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 
        WRITE_UTF16LE(z, c);
      }
      if( iExtra!=0xD800 ){
        WRITE_UTF16LE(z, iExtra);
      }
    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 
        WRITE_UTF16BE(z, c);
      }
      if( iExtra!=0xD800 ){
        WRITE_UTF16BE(z, iExtra);
      }
    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */

Changes to test/enc.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
144
145
146
147
148
149
150
151
152
153



















154
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.6 2007/05/16 18:11:41 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
................................................................................
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

test_conversion enc-10 [string repeat "\uE000" 100]




















finish_test







|







 







<


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
144
145
146
147
148
149
150

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
................................................................................
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]

test_conversion enc-10 [string repeat "\uE000" 100]

proc test_collate {enc zLeft zRight} {
  return [string compare $zLeft $zRight]
}
add_test_collate $::DB 0 0 1
do_test enc-11.1 {
  execsql {
    CREATE TABLE ab(a COLLATE test_collate, b);
    INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
    INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
    CREATE INDEX ab_i ON ab(a, b);
  }
} {}
do_test enc-11.2 {
  set cp200 "\u00C8"
  execsql {
    SELECT count(*) FROM ab WHERE a = $::cp200;
  }
} {2}

finish_test

Changes to test/ioerr2.test.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
99
100
101
102
103
104
105
106
107
108
109
110
# This file implements regression tests for SQLite library.  The
# focus of this file is testing for correct handling of I/O errors
# such as writes failing because the disk is full.
# 
# The tests in this file use special facilities that are only
# available in the SQLite test fixture.
#
# $Id: ioerr2.test,v 1.4 2007/04/13 02:14:30 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

do_test ioerr2-1.1 {
  execsql {
    PRAGMA cache_size = 10;
................................................................................
  set ::go 1
  for {set ::N 1} {$::go} {incr ::N} {
    set ::sqlite_io_error_hit 0
    set ::sqlite_io_error_persist $bPersist
    set ::sqlite_io_error_pending $::N

    foreach {::go res} [catchsql $sql] {}
    check_db ioerr2-3.[expr {$bPersist+2}].$::N
  }
}

finish_test







|







 







|




11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
99
100
101
102
103
104
105
106
107
108
109
110
# This file implements regression tests for SQLite library.  The
# focus of this file is testing for correct handling of I/O errors
# such as writes failing because the disk is full.
# 
# The tests in this file use special facilities that are only
# available in the SQLite test fixture.
#
# $Id: ioerr2.test,v 1.5 2007/05/23 16:23:09 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

do_test ioerr2-1.1 {
  execsql {
    PRAGMA cache_size = 10;
................................................................................
  set ::go 1
  for {set ::N 1} {$::go} {incr ::N} {
    set ::sqlite_io_error_hit 0
    set ::sqlite_io_error_persist $bPersist
    set ::sqlite_io_error_pending $::N

    foreach {::go res} [catchsql $sql] {}
    check_db ioerr2-4.[expr {$bPersist+2}].$::N
  }
}

finish_test