/ Check-in [9a059cb6]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Rework the UTF8 reader logic in order to avoid the use of malloc(). Ticket #2523. (CVS 4175)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 9a059cb6bced5cdc950f7816602ac92d89a899be
User & Date: drh 2007-07-23 19:12:42
Context
2007-07-23
19:26
Check the return value of sqlite3PagerWrite() when autovacuuming. Ticket #2524. (CVS 4176) check-in: b4a5c62b user: drh tags: trunk
19:12
Rework the UTF8 reader logic in order to avoid the use of malloc(). Ticket #2523. (CVS 4175) check-in: 9a059cb6 user: drh tags: trunk
2007-07-22
19:10
Fix a bad sizeof in vdbe.c. Ticket #2522. (CVS 4174) check-in: 77ebc3fe user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/func.c.

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27

28
29
30
31
32
33
34
...
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
...
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454

455
456
457
458
459
460
461

462
463
464
465

466
467
468
469

470
471
472
473
474
475


476
477
478
479



480
481

482

483
484
485
486
487
488
489

490
491
492

493
494
495
496
497
498
499
500
501



502
503
504

505
506

507
508
509

510
511

512
513

514
515
516
517
518

519
520
521
522
523


524
525
526

527
528
529



530
531
532
533
534
535
536
537
538
539
540
...
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"
#include "os.h"


/*
** Return the collating function associated with a function.
*/
static CollSeq *sqlite3GetFuncCollSeq(sqlite3_context *context){
  return context->pColl;
}
................................................................................
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };

/*
** Read a single UTF-8 character and return its value.
*/
u32 sqlite3ReadUtf8(const unsigned char *z){
  u32 c;
  SQLITE_READ_UTF8(z, c);
  return c;
}

/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression.  Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
**
................................................................................
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  const int esc                    /* The escape character */
){
  register int c;
  int invert;
  int seen;
  int c2;
  u8 matchOne = pInfo->matchOne;
  u8 matchAll = pInfo->matchAll;
  u8 matchSet = pInfo->matchSet;
  u8 noCase = pInfo->noCase; 
  int prevEscape = 0;     /* True if the previous character was 'escape' */

  while( (c = *zPattern)!=0 ){
    if( !prevEscape && c==matchAll ){

      while( (c=zPattern[1]) == matchAll || c == matchOne ){
        if( c==matchOne ){
          if( *zString==0 ) return 0;
          SQLITE_SKIP_UTF8(zString);
        }
        zPattern++;
      }

      if( c && esc && sqlite3ReadUtf8(&zPattern[1])==esc ){
        u8 const *zTemp = &zPattern[1];
        SQLITE_SKIP_UTF8(zTemp);
        c = *zTemp;

      }
      if( c==0 ) return 1;
      if( c==matchSet ){
        assert( esc==0 );   /* This is GLOB, not LIKE */

        while( *zString && patternCompare(&zPattern[1],zString,pInfo,esc)==0 ){
          SQLITE_SKIP_UTF8(zString);
        }
        return *zString!=0;
      }else{
        while( (c2 = *zString)!=0 ){


          if( noCase ){
            c2 = sqlite3UpperToLower[c2];
            c = sqlite3UpperToLower[c];
            while( c2 != 0 && c2 != c ){ c2 = sqlite3UpperToLower[*++zString]; }



          }else{
            while( c2 != 0 && c2 != c ){ c2 = *++zString; }

          }

          if( c2==0 ) return 0;
          if( patternCompare(&zPattern[1],zString,pInfo,esc) ) return 1;
          SQLITE_SKIP_UTF8(zString);
        }
        return 0;
      }
    }else if( !prevEscape && c==matchOne ){

      if( *zString==0 ) return 0;
      SQLITE_SKIP_UTF8(zString);
      zPattern++;

    }else if( c==matchSet ){
      int prior_c = 0;
      assert( esc==0 );    /* This only occurs for GLOB, not LIKE */
      seen = 0;
      invert = 0;
      c = sqlite3ReadUtf8(zString);
      if( c==0 ) return 0;
      c2 = *++zPattern;
      if( c2=='^' ){ invert = 1; c2 = *++zPattern; }



      if( c2==']' ){
        if( c==']' ) seen = 1;
        c2 = *++zPattern;

      }
      while( (c2 = sqlite3ReadUtf8(zPattern))!=0 && c2!=']' ){

        if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
          zPattern++;
          c2 = sqlite3ReadUtf8(zPattern);

          if( c>=prior_c && c<=c2 ) seen = 1;
          prior_c = 0;

        }else if( c==c2 ){
          seen = 1;

          prior_c = c2;
        }else{
          prior_c = c2;
        }
        SQLITE_SKIP_UTF8(zPattern);

      }
      if( c2==0 || (seen ^ invert)==0 ) return 0;
      SQLITE_SKIP_UTF8(zString);
      zPattern++;
    }else if( esc && !prevEscape && sqlite3ReadUtf8(zPattern)==esc){


      prevEscape = 1;
      SQLITE_SKIP_UTF8(zPattern);
    }else{

      if( noCase ){
        if( sqlite3UpperToLower[c] != sqlite3UpperToLower[*zString] ) return 0;
      }else{



        if( c != *zString ) return 0;
      }
      zPattern++;
      zString++;
      prevEscape = 0;
    }
  }
  return *zString==0;
}

/*
................................................................................
    const unsigned char *zEsc = sqlite3_value_text(argv[2]);
    if( zEsc==0 ) return;
    if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){
      sqlite3_result_error(context, 
          "ESCAPE expression must be a single character", -1);
      return;
    }
    escape = sqlite3ReadUtf8(zEsc);
  }
  if( zA && zB ){
    struct compareInfo *pInfo = sqlite3_user_data(context);
#ifdef SQLITE_TEST
    sqlite3_like_count++;
#endif
    







|








>







 







<
<
<
<
<
<
<
<
<







 







|


<






|

>
|
|
|
<

<

>
|
|
|
|
>
|
<
|
|
>
|



<
<
>
>
|
|
|
|
>
>
>
|
|
>

>
|
|
<
|
|
<

>
|
<
<
>





|

|
|
>
>
>


<
>

<
>
|
<
<
>


>
|
|
>

<
<

<
>

|
<
<
|
>
>

<

>

|
<
>
>
>
|

<
<







 







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
...
394
395
396
397
398
399
400









401
402
403
404
405
406
407
...
428
429
430
431
432
433
434
435
436
437

438
439
440
441
442
443
444
445
446
447
448
449

450

451
452
453
454
455
456
457
458

459
460
461
462
463
464
465


466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481

482
483

484
485
486


487
488
489
490
491
492
493
494
495
496
497
498
499
500
501

502
503

504
505


506
507
508
509
510
511
512
513


514

515
516
517


518
519
520
521

522
523
524
525

526
527
528
529
530


531
532
533
534
535
536
537
...
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.162 2007/07/23 19:12:42 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"
#include "os.h"


/*
** Return the collating function associated with a function.
*/
static CollSeq *sqlite3GetFuncCollSeq(sqlite3_context *context){
  return context->pColl;
}
................................................................................
/* The correct SQL-92 behavior is for the LIKE operator to ignore
** case.  Thus  'a' LIKE 'A' would be true. */
static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
/* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
** is case sensitive causing 'a' LIKE 'A' to be false */
static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };










/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression.  Return true (1) if they
** are the same and false (0) if they are different.
**
** Globbing rules:
**
................................................................................
*/
static int patternCompare(
  const u8 *zPattern,              /* The glob pattern */
  const u8 *zString,               /* The string to compare against the glob */
  const struct compareInfo *pInfo, /* Information about how to do the compare */
  const int esc                    /* The escape character */
){
  int c, c2;
  int invert;
  int seen;

  u8 matchOne = pInfo->matchOne;
  u8 matchAll = pInfo->matchAll;
  u8 matchSet = pInfo->matchSet;
  u8 noCase = pInfo->noCase; 
  int prevEscape = 0;     /* True if the previous character was 'escape' */

  while( (c = sqlite3Utf8Read(zPattern,0,&zPattern))!=0 ){
    if( !prevEscape && c==matchAll ){
      while( (c=sqlite3Utf8Read(zPattern,0,&zPattern)) == matchAll
               || c == matchOne ){
        if( c==matchOne && sqlite3Utf8Read(zString, 0, &zString)==0 ){
          return 0;

        }

      }
      if( c==0 ){
        return 1;
      }else if( c==esc ){
        c = sqlite3Utf8Read(zPattern, 0, &zPattern);
        if( c==0 ){
          return 0;
        }

      }else if( c==matchSet ){
        assert( esc==0 );         /* This is GLOB, not LIKE */
        assert( matchSet<0x80 );  /* '[' is a single-byte character */
        while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
          SQLITE_SKIP_UTF8(zString);
        }
        return *zString!=0;


      }
      while( (c2 = sqlite3Utf8Read(zString,0,&zString))!=0 ){
        if( noCase ){
          c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
          c = c<0x80 ? sqlite3UpperToLower[c] : c;
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(zString, 0, &zString);
            if( c2<0x80 ) c2 = sqlite3UpperToLower[c2];
          }
        }else{
          while( c2 != 0 && c2 != c ){
            c2 = sqlite3Utf8Read(zString, 0, &zString);
          }
        }
        if( c2==0 ) return 0;
        if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;

      }
      return 0;

    }else if( !prevEscape && c==matchOne ){
      if( sqlite3Utf8Read(zString, 0, &zString)==0 ){
        return 0;


      }
    }else if( c==matchSet ){
      int prior_c = 0;
      assert( esc==0 );    /* This only occurs for GLOB, not LIKE */
      seen = 0;
      invert = 0;
      c = sqlite3Utf8Read(zString, 0, &zString);
      if( c==0 ) return 0;
      c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
      if( c2=='^' ){
        invert = 1;
        c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
      }
      if( c2==']' ){
        if( c==']' ) seen = 1;

        c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
      }

      while( c2 && c2!=']' ){
        if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){


          c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
          if( c>=prior_c && c<=c2 ) seen = 1;
          prior_c = 0;
        }else{
          if( c==c2 ){
            seen = 1;
          }
          prior_c = c2;


        }

        c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
      }
      if( c2==0 || (seen ^ invert)==0 ){


        return 0;
      }
    }else if( esc==c && !prevEscape ){
      prevEscape = 1;

    }else{
      c2 = sqlite3Utf8Read(zString, 0, &zString);
      if( noCase ){
        c = c<0x80 ? sqlite3UpperToLower[c] : c;

        c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
      }
      if( c!=c2 ){
        return 0;
      }


      prevEscape = 0;
    }
  }
  return *zString==0;
}

/*
................................................................................
    const unsigned char *zEsc = sqlite3_value_text(argv[2]);
    if( zEsc==0 ) return;
    if( sqlite3Utf8CharLen((char*)zEsc, -1)!=1 ){
      sqlite3_result_error(context, 
          "ESCAPE expression must be a single character", -1);
      return;
    }
    escape = sqlite3Utf8Read(zEsc, 0, &zEsc);
  }
  if( zA && zB ){
    struct compareInfo *pInfo = sqlite3_user_data(context);
#ifdef SQLITE_TEST
    sqlite3_like_count++;
#endif
    

Changes to src/sqliteInt.h.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
....
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
....
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.578 2007/06/26 10:38:55 danielk1977 Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
#include "sqliteLimit.h"


#if defined(SQLITE_TCL) || defined(TCLSH)
................................................................................
 * This global flag is set for performance testing of triggers. When it is set
 * SQLite will perform the overhead of building new and old trigger references 
 * even when no triggers exist
 */
extern int sqlite3_always_code_trigger_setup;

/*
** A lookup table used by the SQLITE_READ_UTF8 macro.  The definition
** is in utf.c.
*/
extern const unsigned char sqlite3UtfTrans1[];

/*
** Macros for reading UTF8 characters.
**
** SQLITE_READ_UTF8(x,c) reads a single UTF8 value out of x and writes
** that value into c.  The type of x must be unsigned char*.  The type
** of c must be unsigned int.
**
** SQLITE_SKIP_UTF8(x) advances x forward by one character.  The type of
** x must be unsigned char*.
**
** Notes On Invalid UTF-8:
**
**  *  These macros never allow a 7-bit character (0x00 through 0x7f) to
**     be encoded as a multi-byte character.  Any multi-byte character that
**     attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
**
**  *  These macros never allow a UTF16 surrogate value to be encoded.
**     If a multi-byte character attempts to encode a value between
**     0xd800 and 0xe000 then it is rendered as 0xfffd.
**
**  *  Bytes in the range of 0x80 through 0xbf which occur as the first
**     byte of a character are interpreted as single-byte characters
**     and rendered as themselves even though they are technically
**     invalid characters.
**
**  *  These routines accept an infinite number of different UTF8 encodings
**     for unicode values 0x80 and greater.  They do not change over-length
**     encodings to 0xfffd as some systems recommend.
** 
*/
#define SQLITE_READ_UTF8(zIn, c) {                     \
  c = *(zIn++);                                        \
  if( c>=0xc0 ){                                       \
    c = sqlite3UtfTrans1[c-0xc0];                      \
    while( (*zIn & 0xc0)==0x80 ){                      \
      c = (c<<6) + (0x3f & *(zIn++));                  \
    }                                                  \
    if( c<0x80                                         \
        || (c&0xFFFFF800)==0xD800                      \
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }    \
  }                                                    \
}
#define SQLITE_SKIP_UTF8(zIn) {                        \
  if( (*(zIn++))>=0xc0 ){                              \
    while( (*zIn & 0xc0)==0x80 ){ zIn++; }             \
  }                                                    \
}




/*
** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production
** builds) or a function call (for debugging).  If it is a function call,
** it allows the operator to set a breakpoint at the spot where database
** corruption is first detected.
*/
................................................................................
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
int sqlite3AtoF(const char *z, double*);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3FitsIn64Bits(const char *);
int sqlite3Utf16ByteLen(const void *pData, int nChar);
int sqlite3Utf8CharLen(const char *pData, int nByte);
u32 sqlite3ReadUtf8(const unsigned char *);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
void sqlite3IndexAffinityStr(Vdbe *, Index *);
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);







|







 







|
|

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<





<
<
<







 







|







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
....
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561












































1562
1563
1564
1565
1566



1567
1568
1569
1570
1571
1572
1573
....
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.579 2007/07/23 19:12:42 drh Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
#include "sqliteLimit.h"


#if defined(SQLITE_TCL) || defined(TCLSH)
................................................................................
 * This global flag is set for performance testing of triggers. When it is set
 * SQLite will perform the overhead of building new and old trigger references 
 * even when no triggers exist
 */
extern int sqlite3_always_code_trigger_setup;

/*
** Assuming zIn points to the first byte of a UTF-8 character,
** advance zIn to point to the first byte of the next UTF-8 character.
*/












































#define SQLITE_SKIP_UTF8(zIn) {                        \
  if( (*(zIn++))>=0xc0 ){                              \
    while( (*zIn & 0xc0)==0x80 ){ zIn++; }             \
  }                                                    \
}




/*
** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production
** builds) or a function call (for debugging).  If it is a function call,
** it allows the operator to set a breakpoint at the spot where database
** corruption is first detected.
*/
................................................................................
int sqlite3FixTriggerStep(DbFixer*, TriggerStep*);
int sqlite3AtoF(const char *z, double*);
char *sqlite3_snprintf(int,char*,const char*,...);
int sqlite3GetInt32(const char *, int*);
int sqlite3FitsIn64Bits(const char *);
int sqlite3Utf16ByteLen(const void *pData, int nChar);
int sqlite3Utf8CharLen(const char *pData, int nByte);
int sqlite3Utf8Read(const u8*, const u8*, const u8**);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
int sqlite3VarintLen(u64 v);
void sqlite3IndexAffinityStr(Vdbe *, Index *);
void sqlite3TableAffinityStr(Vdbe *, Table *);
char sqlite3CompareAffinity(Expr *pExpr, char aff2);

Changes to src/utf.c.

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
55
56
57
58
59
60
61

62
63
64
65
66
67
68
...
121
122
123
124
125
126
127
















































128
129
130
131
132
133
134
...
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
...
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487
488
489
490
491
...
497
498
499
500
501
502
503

504
505
506
507
508
509
510
511

512
513

514
515
516
517
518
519
520
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.51 2007/05/23 16:23:09 danielk1977 Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};


#define WRITE_UTF8(zOut, c) {                          \
  if( c<0x00080 ){                                     \
    *zOut++ = (c&0xFF);                                \
  }                                                    \
  else if( c<0x00800 ){                                \
    *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
................................................................................
  if( c>=0xD800 && c<0xE000 ){                                       \
    int c2 = ((*zIn++)<<8);                                           \
    c2 += (*zIn++);                                                   \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
  }                                                                   \
}

















































/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
*/ 
/* #define TRANSLATE_TRACE 1 */

................................................................................
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    zOut = zShort;
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
    unsigned int iExtra = 0xD800;

    if( 0==(pMem->flags&MEM_Term) && zTerm>zIn && (zTerm[-1]&0x80) ){
      /* This UTF8 string is not nul-terminated, and the last byte is
      ** not a character in the ascii range (codpoints 0..127). This
      ** means the SQLITE_READ_UTF8() macro might read past the end
      ** of the allocated buffer.
      **
      ** There are four possibilities:
      **
      **   1. The last byte is the first byte of a non-ASCII character,
      **
      **   2. The final N bytes of the input string are continuation bytes
      **      and immediately preceding them is the first byte of a 
      **      non-ASCII character.
      **
      **   3. The final N bytes of the input string are continuation bytes
      **      and immediately preceding them is a byte that encodes a 
      **      character in the ASCII range.
      **
      **   4. The entire string consists of continuation characters.
      **
      ** Cases (3) and (4) require no special handling. The SQLITE_READ_UTF8()
      ** macro will not overread the buffer in these cases.
      */
      unsigned char *zExtra = &zTerm[-1];
      while( zExtra>zIn && (zExtra[0]&0xC0)==0x80 ){
        zExtra--;
      }

      if( (zExtra[0]&0xC0)==0xC0 ){
        /* Make a copy of the last character encoding in the input string.
        ** Then make sure it is nul-terminated and use SQLITE_READ_UTF8()
        ** to decode the codepoint. Store the codepoint in variable iExtra,
        ** it will be appended to the output string later.
        */
        unsigned char *zFree = 0;
        unsigned char zBuf[16];
        int nExtra = (pMem->n+zIn-zExtra);
        zTerm = zExtra;
        if( nExtra>15 ){
          zExtra = sqliteMallocRaw(nExtra+1);
          if( !zExtra ){
            return SQLITE_NOMEM;
          }
          zFree = zExtra;
        }else{
          zExtra = zBuf;
        }
        memcpy(zExtra, zTerm, nExtra);
        zExtra[nExtra] = '\0';
        SQLITE_READ_UTF8(zExtra, iExtra);
        sqliteFree(zFree);
      }
    }

    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 

        WRITE_UTF16LE(z, c);
      }
      if( iExtra!=0xD800 ){
        WRITE_UTF16LE(z, iExtra);
      }
    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        SQLITE_READ_UTF8(zIn, c); 
        WRITE_UTF16BE(z, c);
      }
      if( iExtra!=0xD800 ){
        WRITE_UTF16BE(z, iExtra);
      }
    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
................................................................................
**
** The translation is done in-place (since it is impossible for the
** correct UTF-8 encoding to be longer than a malformed encoding).
*/
int sqlite3Utf8To8(unsigned char *zIn){
  unsigned char *zOut = zIn;
  unsigned char *zStart = zIn;

  int c;

  while(1){
    SQLITE_READ_UTF8(zIn, c);
    if( c==0 ) break;
    if( c!=0xfffd ){
      WRITE_UTF8(zOut, c);
    }
  }
  *zOut = 0;
  return zOut - zStart;
}
................................................................................
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite3UtfSelfTest(){
  unsigned int i, t;
  unsigned char zBuf[20];
  unsigned char *z;

  int n;
  unsigned int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z[0] = 0;

    z = zBuf;
    SQLITE_READ_UTF8(z, c);

    t = i;
    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
    assert( c==t );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){







|







 







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<



<
>


<
<
<




|


<
<
<







 







>
|

|
<
|







 







>








>

<
>







8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
..
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
...
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
...
264
265
266
267
268
269
270
























































271
272
273

274
275
276



277
278
279
280
281
282
283



284
285
286
287
288
289
290
...
460
461
462
463
464
465
466
467
468
469
470

471
472
473
474
475
476
477
478
...
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501

502
503
504
505
506
507
508
509
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains routines used to translate between UTF-8, 
** UTF-16, UTF-16BE, and UTF-16LE.
**
** $Id: utf.c,v 1.52 2007/07/23 19:12:42 drh Exp $
**
** Notes on UTF-8:
**
**   Byte-0    Byte-1    Byte-2    Byte-3    Value
**  0xxxxxxx                                 00000000 00000000 0xxxxxxx
**  110yyyyy  10xxxxxx                       00000000 00000yyy yyxxxxxx
**  1110zzzz  10yyyyyy  10xxxxxx             00000000 zzzzyyyy yyxxxxxx
................................................................................
  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};


#define WRITE_UTF8(zOut, c) {                          \
  if( c<0x00080 ){                                     \
    *zOut++ = (c&0xFF);                                \
  }                                                    \
  else if( c<0x00800 ){                                \
    *zOut++ = 0xC0 + ((c>>6)&0x1F);                    \
................................................................................
  if( c>=0xD800 && c<0xE000 ){                                       \
    int c2 = ((*zIn++)<<8);                                           \
    c2 += (*zIn++);                                                   \
    c = (c2&0x03FF) + ((c&0x003F)<<10) + (((c&0x03C0)+0x0040)<<10);   \
    if( (c & 0xFFFF0000)==0 ) c = 0xFFFD;                             \
  }                                                                   \
}

/*
** Translate a single UTF-8 character.  Return the unicode value.
**
** During translation, assume that the byte that zTerm points
** is a 0x00.
**
** Write a pointer to the next unread byte back into *pzNext.
**
** Notes On Invalid UTF-8:
**
**  *  This routine never allows a 7-bit character (0x00 through 0x7f) to
**     be encoded as a multi-byte character.  Any multi-byte character that
**     attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
**
**  *  This routine never allows a UTF16 surrogate value to be encoded.
**     If a multi-byte character attempts to encode a value between
**     0xd800 and 0xe000 then it is rendered as 0xfffd.
**
**  *  Bytes in the range of 0x80 through 0xbf which occur as the first
**     byte of a character are interpreted as single-byte characters
**     and rendered as themselves even though they are technically
**     invalid characters.
**
**  *  This routine accepts an infinite number of different UTF8 encodings
**     for unicode values 0x80 and greater.  It do not change over-length
**     encodings to 0xfffd as some systems recommend.
*/
int sqlite3Utf8Read(
  const unsigned char *z,         /* First byte of UTF-8 character */
  const unsigned char *zTerm,     /* Pretend this byte is 0x00 */
  const unsigned char **pzNext    /* Write first byte past UTF-8 char here */
){
  int c = *(z++);
  if( c>=0xc0 ){
    c = sqlite3UtfTrans1[c-0xc0];
    while( z!=zTerm && (*z & 0xc0)==0x80 ){
      c = (c<<6) + (0x3f & *(z++));
    }
    if( c<0x80
        || (c&0xFFFFF800)==0xD800
        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }
  }
  *pzNext = z;
  return c;
}



/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
*/ 
/* #define TRANSLATE_TRACE 1 */

................................................................................
    if( !zOut ) return SQLITE_NOMEM;
  }else{
    zOut = zShort;
  }
  z = zOut;

  if( pMem->enc==SQLITE_UTF8 ){
























































    if( desiredEnc==SQLITE_UTF16LE ){
      /* UTF-8 -> UTF-16 Little-endian */
      while( zIn<zTerm ){

        c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
        WRITE_UTF16LE(z, c);
      }



    }else{
      assert( desiredEnc==SQLITE_UTF16BE );
      /* UTF-8 -> UTF-16 Big-endian */
      while( zIn<zTerm ){
        c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
        WRITE_UTF16BE(z, c);
      }



    }
    pMem->n = z - zOut;
    *z++ = 0;
  }else{
    assert( desiredEnc==SQLITE_UTF8 );
    if( pMem->enc==SQLITE_UTF16LE ){
      /* UTF-16 Little-endian -> UTF-8 */
................................................................................
**
** The translation is done in-place (since it is impossible for the
** correct UTF-8 encoding to be longer than a malformed encoding).
*/
int sqlite3Utf8To8(unsigned char *zIn){
  unsigned char *zOut = zIn;
  unsigned char *zStart = zIn;
  unsigned char *zTerm;
  u32 c;

  while( zIn[0] ){

    c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
    if( c!=0xfffd ){
      WRITE_UTF8(zOut, c);
    }
  }
  *zOut = 0;
  return zOut - zStart;
}
................................................................................
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite3UtfSelfTest(){
  unsigned int i, t;
  unsigned char zBuf[20];
  unsigned char *z;
  unsigned char *zTerm;
  int n;
  unsigned int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = z-zBuf;
    z[0] = 0;
    zTerm = z;
    z = zBuf;

    c = sqlite3Utf8Read(z, zTerm, (const u8**)&z);
    t = i;
    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
    assert( c==t );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){