/ Check-in [5b22053f]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix text-to-numeric type casting so that it works correctly on UTF16 strings that contain characters where the LSB is numeric but the MSB is non-zero. Ticket [689137afb6da41]
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 5b22053f918d16f593227a432a5d5b4c195bb0b5
User & Date: drh 2013-03-20 12:04:29
Context
2013-03-21
21:20
Many spelling fixes in comments. No changes to code. check-in: 6f6e2d50 user: mistachkin tags: trunk
2013-03-20
12:04
Fix text-to-numeric type casting so that it works correctly on UTF16 strings that contain characters where the LSB is numeric but the MSB is non-zero. Ticket [689137afb6da41] check-in: 5b22053f user: drh tags: trunk
2013-03-19
16:12
Bring makefiles and build scripts into alignment with the sessions branch. No changes to code. check-in: d1f41089 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/util.c.

   257    257   **
   258    258   ** If some prefix of the input string is a valid number, this routine
   259    259   ** returns FALSE but it still converts the prefix and writes the result
   260    260   ** into *pResult.
   261    261   */
   262    262   int sqlite3AtoF(const char *z, double *pResult, int length, u8 enc){
   263    263   #ifndef SQLITE_OMIT_FLOATING_POINT
   264         -  int incr = (enc==SQLITE_UTF8?1:2);
          264  +  int incr;
   265    265     const char *zEnd = z + length;
   266    266     /* sign * significand * (10 ^ (esign * exponent)) */
   267    267     int sign = 1;    /* sign of significand */
   268    268     i64 s = 0;       /* significand */
   269    269     int d = 0;       /* adjust exponent for shifting decimal point */
   270    270     int esign = 1;   /* sign of exponent */
   271    271     int e = 0;       /* exponent */
   272    272     int eValid = 1;  /* True exponent is either not used or is well-formed */
   273    273     double result;
   274    274     int nDigits = 0;
          275  +  int nonNum = 0;
   275    276   
          277  +  assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE );
   276    278     *pResult = 0.0;   /* Default return value, in case of an error */
   277    279   
   278         -  if( enc==SQLITE_UTF16BE ) z++;
          280  +  if( enc==SQLITE_UTF8 ){
          281  +    incr = 1;
          282  +  }else{
          283  +    int i;
          284  +    incr = 2;
          285  +    assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
          286  +    for(i=3-enc; i<length && z[i]==0; i+=2){}
          287  +    nonNum = i<length;
          288  +    zEnd = z+i+enc-3;
          289  +    z += (enc&1);
          290  +  }
   279    291   
   280    292     /* skip leading spaces */
   281    293     while( z<zEnd && sqlite3Isspace(*z) ) z+=incr;
   282    294     if( z>=zEnd ) return 0;
   283    295   
   284    296     /* get sign of significand */
   285    297     if( *z=='-' ){
................................................................................
   404    416       }
   405    417     }
   406    418   
   407    419     /* store the result */
   408    420     *pResult = result;
   409    421   
   410    422     /* return true if number and no extra non-whitespace chracters after */
   411         -  return z>=zEnd && nDigits>0 && eValid;
          423  +  return z>=zEnd && nDigits>0 && eValid && nonNum==0;
   412    424   #else
   413    425     return !sqlite3Atoi64(z, pResult, length, enc);
   414    426   #endif /* SQLITE_OMIT_FLOATING_POINT */
   415    427   }
   416    428   
   417    429   /*
   418    430   ** Compare the 19-character string zNum against the text representation
................................................................................
   453    465   ** integer, then write that value into *pNum and return 0.
   454    466   **
   455    467   ** If zNum is exactly 9223372036854665808, return 2.  This special
   456    468   ** case is broken out because while 9223372036854665808 cannot be a 
   457    469   ** signed 64-bit integer, its negative -9223372036854665808 can be.
   458    470   **
   459    471   ** If zNum is too big for a 64-bit integer and is not
   460         -** 9223372036854665808 then return 1.
          472  +** 9223372036854665808  or if zNum contains any non-numeric text,
          473  +** then return 1.
   461    474   **
   462    475   ** length is the number of bytes in the string (bytes, not characters).
   463    476   ** The string is not necessarily zero-terminated.  The encoding is
   464    477   ** given by enc.
   465    478   */
   466    479   int sqlite3Atoi64(const char *zNum, i64 *pNum, int length, u8 enc){
   467         -  int incr = (enc==SQLITE_UTF8?1:2);
          480  +  int incr;
   468    481     u64 u = 0;
   469    482     int neg = 0; /* assume positive */
   470    483     int i;
   471    484     int c = 0;
          485  +  int nonNum = 0;
   472    486     const char *zStart;
   473    487     const char *zEnd = zNum + length;
   474         -  if( enc==SQLITE_UTF16BE ) zNum++;
          488  +  assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE );
          489  +  if( enc==SQLITE_UTF8 ){
          490  +    incr = 1;
          491  +  }else{
          492  +    incr = 2;
          493  +    assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
          494  +    for(i=3-enc; i<length && zNum[i]==0; i+=2){}
          495  +    nonNum = i<length;
          496  +    zEnd = zNum+i+enc-3;
          497  +    zNum += (enc&1);
          498  +  }
   475    499     while( zNum<zEnd && sqlite3Isspace(*zNum) ) zNum+=incr;
   476    500     if( zNum<zEnd ){
   477    501       if( *zNum=='-' ){
   478    502         neg = 1;
   479    503         zNum+=incr;
   480    504       }else if( *zNum=='+' ){
   481    505         zNum+=incr;
................................................................................
   492    516       *pNum = -(i64)u;
   493    517     }else{
   494    518       *pNum = (i64)u;
   495    519     }
   496    520     testcase( i==18 );
   497    521     testcase( i==19 );
   498    522     testcase( i==20 );
   499         -  if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19*incr ){
          523  +  if( (c+nonNum!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19*incr ){
   500    524       /* zNum is empty or contains non-numeric text or is longer
   501    525       ** than 19 digits (thus guaranteeing that it is too large) */
   502    526       return 1;
   503    527     }else if( i<19*incr ){
   504    528       /* Less than 19 digits, so we know that it fits in 64 bits */
   505    529       assert( u<=LARGEST_INT64 );
   506    530       return 0;

Added test/numcast.test.

            1  +# 2013 March 20
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +# This particular file does testing of casting strings into numeric
           13  +# values.
           14  +#
           15  +
           16  +set testdir [file dirname $argv0]
           17  +source $testdir/tester.tcl
           18  +
           19  +foreach enc {utf8 utf16le utf16be} {
           20  +  do_test numcast-$enc.0 {
           21  +    db close
           22  +    sqlite3 db :memory:
           23  +    db eval "PRAGMA encoding='$enc'"
           24  +    set x [db eval {PRAGMA encoding}]
           25  +    string map {- {}} [string tolower $x]
           26  +  } $enc
           27  +  foreach {idx str rval ival} {
           28  +     1 12345.0       12345.0    12345
           29  +     2 12345.0e0     12345.0    12345
           30  +     3 -12345.0e0   -12345.0   -12345
           31  +     4 -12345.25    -12345.25  -12345
           32  +     5 { -12345.0}  -12345.0   -12345
           33  +     6 { 876xyz}       876.0      876
           34  +     7 { 456ķ89}       456.0      456
           35  +     8 { Ġ 321.5}        0.0        0
           36  +  } {
           37  +    do_test numcast-$enc.$idx.1 {
           38  +      db eval {SELECT CAST($str AS real)}
           39  +    } $rval
           40  +    do_test numcast-$enc.$idx.2 {
           41  +      db eval {SELECT CAST($str AS integer)}
           42  +    } $ival
           43  +  }
           44  +}
           45  +
           46  +finish_test