Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix the ICU extension LIKE function so that it does not read past the end of a buffer if it it passed malformed utf-8. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
424b7aee3310b9782bd312589dc3d9f2 |
User & Date: | dan 2016-02-26 16:03:29 |
Context
2016-02-26
| ||
21:03 | Further refinements to the MSVC batch build process. (check-in: cf4e4fbd user: mistachkin tags: trunk) | |
16:14 | Merge all recent enhancements from trunk, and in particular the SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER change. (check-in: 4fb4aee8 user: drh tags: apple-osx) | |
16:03 | Fix the ICU extension LIKE function so that it does not read past the end of a buffer if it it passed malformed utf-8. (check-in: 424b7aee user: dan tags: trunk) | |
15:38 | Provide the new SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER option to sqlite3_db_config() that can be used to activate the two-argument version of fts3_tokenizer() for a specific database connection at run-time. (check-in: 374b5108 user: drh tags: trunk) | |
Changes
Changes to ext/icu/icu.c.
︙ | ︙ | |||
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | /* ** Version of sqlite3_free() that is always a function, never a macro. */ static void xFree(void *p){ sqlite3_free(p); } /* ** Compare two UTF-8 strings for equality where the first string is ** a "LIKE" expression. Return true (1) if they are the same and ** false (0) if they are different. */ static int icuLikeCompare( const uint8_t *zPattern, /* LIKE pattern */ const uint8_t *zString, /* The UTF-8 string to compare against */ const UChar32 uEsc /* The escape character */ ){ static const int MATCH_ONE = (UChar32)'_'; static const int MATCH_ALL = (UChar32)'%'; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < | | > | | | | | | | | | | | | | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | /* ** Version of sqlite3_free() that is always a function, never a macro. */ static void xFree(void *p){ sqlite3_free(p); } /* ** This lookup table is used to help decode the first byte of ** a multi-byte UTF8 character. It is copied here from SQLite source ** code file utf8.c. */ static const unsigned char icuUtf8Trans1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, }; #define SQLITE_ICU_READ_UTF8(zIn, c) \ c = *(zIn++); \ if( c>=0xc0 ){ \ c = icuUtf8Trans1[c-0xc0]; \ while( (*zIn & 0xc0)==0x80 ){ \ c = (c<<6) + (0x3f & *(zIn++)); \ } \ } #define SQLITE_ICU_SKIP_UTF8(zIn) \ assert( *zIn ); \ if( *(zIn++)>=0xc0 ){ \ while( (*zIn & 0xc0)==0x80 ){zIn++;} \ } /* ** Compare two UTF-8 strings for equality where the first string is ** a "LIKE" expression. Return true (1) if they are the same and ** false (0) if they are different. */ static int icuLikeCompare( const uint8_t *zPattern, /* LIKE pattern */ const uint8_t *zString, /* The UTF-8 string to compare against */ const UChar32 uEsc /* The escape character */ ){ static const int MATCH_ONE = (UChar32)'_'; static const int MATCH_ALL = (UChar32)'%'; int prevEscape = 0; /* True if the previous character was uEsc */ while( 1 ){ /* Read (and consume) the next character from the input pattern. */ UChar32 uPattern; SQLITE_ICU_READ_UTF8(zPattern, uPattern); if( uPattern==0 ) break; /* There are now 4 possibilities: ** ** 1. uPattern is an unescaped match-all character "%", ** 2. uPattern is an unescaped match-one character "_", ** 3. uPattern is an unescaped escape character, or ** 4. uPattern is to be handled as an ordinary character */ if( !prevEscape && uPattern==MATCH_ALL ){ /* Case 1. */ uint8_t c; /* Skip any MATCH_ALL or MATCH_ONE characters that follow a ** MATCH_ALL. For each MATCH_ONE, skip one character in the ** test string. */ while( (c=*zPattern) == MATCH_ALL || c == MATCH_ONE ){ if( c==MATCH_ONE ){ if( *zString==0 ) return 0; SQLITE_ICU_SKIP_UTF8(zString); } zPattern++; } if( *zPattern==0 ) return 1; while( *zString ){ if( icuLikeCompare(zPattern, zString, uEsc) ){ return 1; } SQLITE_ICU_SKIP_UTF8(zString); } return 0; }else if( !prevEscape && uPattern==MATCH_ONE ){ /* Case 2. */ if( *zString==0 ) return 0; SQLITE_ICU_SKIP_UTF8(zString); }else if( !prevEscape && uPattern==uEsc){ /* Case 3. */ prevEscape = 1; }else{ /* Case 4. */ UChar32 uString; SQLITE_ICU_READ_UTF8(zString, uString); uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); if( uString!=uPattern ){ return 0; } prevEscape = 0; } } return *zString==0; } /* ** Implementation of the like() SQL function. This function implements ** the build-in LIKE operator. The first argument to the function is the ** pattern and the second argument is the string. So, the SQL statements: ** |
︙ | ︙ |