/ Check-in [77f01578]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use only unsigned values in the implementatin of LIKE and GLOB so that values won't overflow to negative when dealing with malformed UTF8.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 77f01578bb565d1bc884b374b68bae10ce34a084
User & Date: drh 2011-06-13 12:19:21
Context
2011-06-14
11:50
Merge fts3-prefix-search branch with trunk. check-in: b1f9c1e0 user: dan tags: trunk
07:22
Merge recent trunk changes into fts3-prefix-search branch. check-in: 135ce30f user: dan tags: fts3-prefix-search
2011-06-13
12:19
Use only unsigned values in the implementatin of LIKE and GLOB so that values won't overflow to negative when dealing with malformed UTF8. check-in: 77f01578 user: drh tags: trunk
2011-06-10
18:33
When updating a field that requires foreign key constraints be checked, ensure that the indexes and tables are consistent when the FK logic is run. Otherwise, it may detect the inconsistency and report database corruption. check-in: 2b3d9996 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/func.c.

   502    502   /*
   503    503   ** For LIKE and GLOB matching on EBCDIC machines, assume that every
   504    504   ** character is exactly one byte in size.  Also, all characters are
   505    505   ** able to participate in upper-case-to-lower-case mappings in EBCDIC
   506    506   ** whereas only characters less than 0x80 do in ASCII.
   507    507   */
   508    508   #if defined(SQLITE_EBCDIC)
   509         -# define sqlite3Utf8Read(A,C)    (*(A++))
   510         -# define GlogUpperToLower(A)     A = sqlite3UpperToLower[A]
          509  +# define sqlite3Utf8Read(A,C)  (*(A++))
          510  +# define GlogUpperToLower(A)   A = sqlite3UpperToLower[A]
   511    511   #else
   512         -# define GlogUpperToLower(A)     if( A<0x80 ){ A = sqlite3UpperToLower[A]; }
          512  +# define GlogUpperToLower(A)   if( !((A)&~0x7f) ){ A = sqlite3UpperToLower[A]; }
   513    513   #endif
   514    514   
   515    515   static const struct compareInfo globInfo = { '*', '?', '[', 0 };
   516    516   /* The correct SQL-92 behavior is for the LIKE operator to ignore
   517    517   ** case.  Thus  'a' LIKE 'A' would be true. */
   518    518   static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
   519    519   /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
................................................................................
   548    548   **
   549    549   **         abc[*]xyz        Matches "abc*xyz" only
   550    550   */
   551    551   static int patternCompare(
   552    552     const u8 *zPattern,              /* The glob pattern */
   553    553     const u8 *zString,               /* The string to compare against the glob */
   554    554     const struct compareInfo *pInfo, /* Information about how to do the compare */
   555         -  const int esc                    /* The escape character */
          555  +  u32 esc                          /* The escape character */
   556    556   ){
   557         -  int c, c2;
          557  +  u32 c, c2;
   558    558     int invert;
   559    559     int seen;
   560    560     u8 matchOne = pInfo->matchOne;
   561    561     u8 matchAll = pInfo->matchAll;
   562    562     u8 matchSet = pInfo->matchSet;
   563    563     u8 noCase = pInfo->noCase; 
   564    564     int prevEscape = 0;     /* True if the previous character was 'escape' */
................................................................................
   680    680   */
   681    681   static void likeFunc(
   682    682     sqlite3_context *context, 
   683    683     int argc, 
   684    684     sqlite3_value **argv
   685    685   ){
   686    686     const unsigned char *zA, *zB;
   687         -  int escape = 0;
          687  +  u32 escape = 0;
   688    688     int nPat;
   689    689     sqlite3 *db = sqlite3_context_db_handle(context);
   690    690   
   691    691     zB = sqlite3_value_text(argv[0]);
   692    692     zA = sqlite3_value_text(argv[1]);
   693    693   
   694    694     /* Limit the length of the LIKE or GLOB pattern to avoid problems

Changes to src/sqliteInt.h.

  2875   2875   int sqlite3FixExprList(DbFixer*, ExprList*);
  2876   2876   int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
  2877   2877   int sqlite3AtoF(const char *z, double*, int, u8);
  2878   2878   int sqlite3GetInt32(const char *, int*);
  2879   2879   int sqlite3Atoi(const char*);
  2880   2880   int sqlite3Utf16ByteLen(const void *pData, int nChar);
  2881   2881   int sqlite3Utf8CharLen(const char *pData, int nByte);
  2882         -int sqlite3Utf8Read(const u8*, const u8**);
         2882  +u32 sqlite3Utf8Read(const u8*, const u8**);
  2883   2883   
  2884   2884   /*
  2885   2885   ** Routines to read and write variable-length integers.  These used to
  2886   2886   ** be defined locally, but now we use the varint routines in the util.c
  2887   2887   ** file.  Code should use the MACRO forms below, as the Varint32 versions
  2888   2888   ** are coded to assume the single byte case is already handled (which 
  2889   2889   ** the MACRO form does).

Changes to src/utf.c.

   159    159       while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
   160    160         c = (c<<6) + (0x3f & *(zIn++));                      \
   161    161       }                                                      \
   162    162       if( c<0x80                                             \
   163    163           || (c&0xFFFFF800)==0xD800                          \
   164    164           || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
   165    165     }
   166         -int sqlite3Utf8Read(
          166  +u32 sqlite3Utf8Read(
   167    167     const unsigned char *zIn,       /* First byte of UTF-8 character */
   168    168     const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
   169    169   ){
   170    170     unsigned int c;
   171    171   
   172    172     /* Same as READ_UTF8() above but without the zTerm parameter.
   173    173     ** For this routine, we assume the UTF8 string is always zero-terminated.