SQLite4
Check-in [211c1baef7]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add tests and minor fixes for the sqlite4_translate() API.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 211c1baef7e6f079a66b73a54d1f4265e9c1bdd6
User & Date: dan 2013-06-11 15:18:06
Context
2013-06-11
16:48
Remove legacy API functions: complete16(), errmsg16(), column_name16(), column_database_name16(), column_table_name16(), column_origin_name16(), column_decltype16(), create_function16() and collation_needed16(). check-in: c7c533dddc user: dan tags: trunk
15:18
Add tests and minor fixes for the sqlite4_translate() API. check-in: 211c1baef7 user: dan tags: trunk
2013-06-10
19:52
Add the sqlite4_translate() API, for translating between utf-8 and utf-16 text encodings. Also the sqlite4_buffer object. check-in: f56bc22c9e user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/malloc.c.

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  return sqlite4_mm_msize(pEnv->pMM, p);
}
int sqlite4DbMallocSize(sqlite4 *db, void *p){
  assert( db==0 || sqlite4_mutex_held(db->mutex) );
  if( db && isLookaside(db, p) ){
    return db->lookaside.sz;
  }else{
    return sqlite4MallocSize(db->pEnv, p);
  }
}

/*
** Free memory previously obtained from sqlite4Malloc().
*/
void sqlite4_free(sqlite4_env *pEnv, void *p){







|







92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  return sqlite4_mm_msize(pEnv->pMM, p);
}
int sqlite4DbMallocSize(sqlite4 *db, void *p){
  assert( db==0 || sqlite4_mutex_held(db->mutex) );
  if( db && isLookaside(db, p) ){
    return db->lookaside.sz;
  }else{
    return sqlite4MallocSize(sqlite4_db_env(db), p);
  }
}

/*
** Free memory previously obtained from sqlite4Malloc().
*/
void sqlite4_free(sqlite4_env *pEnv, void *p){

Changes to src/utf.c.

271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
...
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
      zIn++;
      *zIn++ = temp;
    }
    pMem->enc = desiredEnc;
  }else{
    int eTrans;
    sqlite4_buffer buf;
    sqlite4_mm *pMM = pMem->db->pEnv->pMM;

    switch( pMem->enc ){
      case SQLITE4_UTF8:
        if( desiredEnc==SQLITE4_UTF16BE ){
          eTrans = SQLITE4_TRANSLATE_UTF8_UTF16BE;
        }else{
          eTrans = SQLITE4_TRANSLATE_UTF8_UTF16LE;
................................................................................
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite4UtfSelfTest(void){
  unsigned int i, t;
  char zBuf[20];
  char *z;
  int n;
  unsigned int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = (int)(z-zBuf);
    assert( n>0 && n<=4 );
    z[0] = 0;
    z = zBuf;
    c = sqlite4Utf8Read(z, (const char **)&z);
    t = i;
    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
    assert( c==t );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){







|







 







|
|










|







271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
...
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
      zIn++;
      *zIn++ = temp;
    }
    pMem->enc = desiredEnc;
  }else{
    int eTrans;
    sqlite4_buffer buf;
    sqlite4_mm *pMM = sqlite4_db_env(pMem->db)->pMM;

    switch( pMem->enc ){
      case SQLITE4_UTF8:
        if( desiredEnc==SQLITE4_UTF16BE ){
          eTrans = SQLITE4_TRANSLATE_UTF8_UTF16BE;
        }else{
          eTrans = SQLITE4_TRANSLATE_UTF8_UTF16LE;
................................................................................
/*
** This routine is called from the TCL test function "translate_selftest".
** It checks that the primitives for serializing and deserializing
** characters in each encoding are inverses of each other.
*/
void sqlite4UtfSelfTest(void){
  unsigned int i, t;
  u8 zBuf[20];
  u8 *z;
  int n;
  unsigned int c;

  for(i=0; i<0x00110000; i++){
    z = zBuf;
    WRITE_UTF8(z, i);
    n = (int)(z-zBuf);
    assert( n>0 && n<=4 );
    z[0] = 0;
    z = zBuf;
    c = sqlite4Utf8Read((char *)z, (const char **)&z);
    t = i;
    if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
    if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;
    assert( c==t );
    assert( (z-zBuf)==n );
  }
  for(i=0; i<0x00110000; i++){

Changes to test/enc.test.

13
14
15
16
17
18
19

20
21
22
23
24
25
26
..
32
33
34
35
36
37
38

39
40
41
42
43
44
45
46
47
48
49





50
51
52
53
54
55
56
...
131
132
133
134
135
136
137
138

139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154


























155
156
157
158
159
160
161
162





163
164
165
166
167
168


169
170



171








































172
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl


# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
  finish_test
  return
}
................................................................................
}

# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
# to change the byte-order of the string.
proc swap_byte_order {utf16} {
  binary scan $utf16 \c* ints


  foreach {a b} $ints {
    lappend ints2 $b
    lappend ints2 $a
  }

  return [binary format \c* $ints2]
}

#
# Test that the SQLite routines for converting between UTF encodings
# produce the same results as their TCL counterparts.





#
# $testname is the prefix to be used for the test names.
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
#
# The test procedure is:
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
#    SQLite routines produce the same results.
................................................................................
  set utf16_sqlite4 [test_translate $utf16be_bom UTF16 UTF16LE]
  do_bincmp_test $testname.5.be.le $utf16_sqlite4 $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite4 [test_translate $utf16le_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.le.be $utf16_sqlite4 $utf16be
}


translate_selftest

test_conversion enc-1 "hello world"
test_conversion enc-2 "sqlite"
test_conversion enc-3 ""
test_conversion enc-X "\u0100"
test_conversion enc-4 "\u1234"
test_conversion enc-5 "\u4321abc"
test_conversion enc-6 "\u4321\u1234"
test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_conversion enc-10 [string repeat "\uE000" 100]

proc test_collate {enc zLeft zRight} {
  return [string compare $zLeft $zRight]


























}
add_test_collate $::DB 0 0 1
do_test enc-11.1 {
  execsql {
    CREATE TABLE ab(a COLLATE test_collate, b);
    INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
    INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
    CREATE INDEX ab_i ON ab(a, b);





  }
} {}
do_test enc-11.2 {
  set cp200 "\u00C8"
  execsql {
    SELECT count(*) FROM ab WHERE a = $::cp200;


  }
} {2}












































finish_test







>







 







>








|

|
>
>
>
>
>







 








>


|
|
|
|
|
|
|
|
|
|
|

<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
<
<
<
<
<
<
>
>
>
>
>

<
<
<
<
<
>
>
|
<
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
..
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
...
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160


161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187







188
189
190
191
192
193





194
195
196

197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix enc

# Skip this test if the build does not support multiple encodings.
#
ifcapable {!utf16} {
  finish_test
  return
}
................................................................................
}

# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
# to change the byte-order of the string.
proc swap_byte_order {utf16} {
  binary scan $utf16 \c* ints

  set ints2 [list]
  foreach {a b} $ints {
    lappend ints2 $b
    lappend ints2 $a
  }

  return [binary format \c* $ints2]
}

#---------------------------------------------------------------------
# Test that the SQLite routines for converting between UTF encodings
# produce the same results as their TCL counterparts. The difference
# between this command and the test_sqlite4_translate command below
# is that this one uses calls to private library functions to test
# conversions on values stored within vdbe Mem* objects. The
# test_sqlite4_translate command below tests the public sqlite4_translate
# interface.
#
# $testname is the prefix to be used for the test names.
# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
#
# The test procedure is:
# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
#    SQLite routines produce the same results.
................................................................................
  set utf16_sqlite4 [test_translate $utf16be_bom UTF16 UTF16LE]
  do_bincmp_test $testname.5.be.le $utf16_sqlite4 $utf16le

  # Step 5 (little endian to big endian).
  set utf16_sqlite4 [test_translate $utf16le_bom UTF16 UTF16BE]
  do_bincmp_test $testname.5.le.be $utf16_sqlite4 $utf16be
}


translate_selftest

test_conversion 1.1 "hello world"
test_conversion 1.2 "sqlite"
test_conversion 1.3 ""
test_conversion 1.X "\u0100"
test_conversion 1.4 "\u1234"
test_conversion 1.5 "\u4321abc"
test_conversion 1.6 "\u4321\u1234"
test_conversion 1.7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_conversion 1.8 [string repeat "\u007E\u007F\u0080\u0081" 100]
test_conversion 1.9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_conversion 1.10 [string repeat "\uE000" 100]



#----------------------------------------------------------------------
# This command is used to test translation between text encodings using
# the public sqlite4_translate() interface.
#
# The test procedure is:
#
# 1.1 Convert the string from UTF-8 to UTF-16le and check that the TCL and
#     SQLite routines produce the same results.
#
# 1.2 Convert the UTF-16le back to UTF-8 and test that the result is 
#     identical to the original string.
#
# 2.1 Convert the string from UTF-8 to UTF-16le and check that the TCL and
#     SQLite routines produce the same results.
#
# 2.2 Convert the UTF-16le back to UTF-8 and test that the result is 
#     identical to the original string.
#
# 3.1 Convert the string from UTF-8 to UTF-16 and check that the result
#     matches the result in step 1.1 (for a little-endian host) or 1.2
#     (for a big-endian host).
#
# 3.2 Convert the UTF-16 from 3.1 back to UTF-8 and test that the result 
#     is identical to the original string.
#
proc test_sqlite4_translate {testname str} {








  # Step 1.1.
  set utf16le_sqlite4 [sqlite4_translate $str utf8_utf16le]
  set utf16le_tcl [encoding convertto unicode $str]
  if { $::tcl_platform(byteOrder)!="littleEndian" } {
    set utf16le_tcl [swap_byte_order $utf16le_tcl]
  }





  do_bincmp_test $testname.1.1 $utf16le_sqlite4 $utf16le_tcl
  set utf16le $utf16le_tcl
  

  # Step 1.2.
  set utf8 [sqlite4_translate $utf16le utf16le_utf8]
  do_bincmp_test $testname.1.2 $utf8 [binarize -noterm $str]
  
  # Step 2.1.
  set utf16be_sqlite4 [sqlite4_translate $str utf8_utf16be]
  set utf16be_tcl [encoding convertto unicode $str]
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    set utf16be_tcl [swap_byte_order $utf16be_tcl]
  }
  do_bincmp_test $testname.2.1 $utf16be_sqlite4 $utf16be_tcl
  set utf16be $utf16be_tcl
  
  # Step 2.2.
  set utf8 [sqlite4_translate $utf16be utf16be_utf8]
  do_bincmp_test $testname.2.2 $utf8 [binarize -noterm $str]

  # Step 3.1.
  set utf16 [sqlite4_translate $str utf8_utf16]
  if { $::tcl_platform(byteOrder)=="littleEndian" } {
    do_bincmp_test $testname.3.1.le $utf16 $utf16le
  } else {
    do_bincmp_test $testname.3.1.be $utf16 $utf16be
  }

  # Step 3.2.
  set utf8 [sqlite4_translate $utf16 utf16_utf8]
  do_bincmp_test $testname.3.2 $utf8 [binarize -noterm $str]

}

test_sqlite4_translate 2.1 "hello world"
test_sqlite4_translate 2.2 "sqlite"
test_sqlite4_translate 2.3 ""
test_sqlite4_translate 2.X "\u0100"
test_sqlite4_translate 2.4 "\u1234"
test_sqlite4_translate 2.5 "\u4321abc"
test_sqlite4_translate 2.6 "\u4321\u1234"
test_sqlite4_translate 2.7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
test_sqlite4_translate 2.8 [string repeat "\u007E\u007F\u0080\u0081" 100]
set str [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
test_sqlite4_translate 2.9 $str
test_sqlite4_translate 2.10 [string repeat "\uE000" 100]

finish_test

Changes to test/permutations.test.

158
159
160
161
162
163
164

165
166
167
168
169
170
171
172
173
174
175
176
  conflict.test 
  count.test
  cse.test
  ctime.test
  date.test
  delete.test delete2.test
  distinct.test distinctagg.test

  exists.test
  e_droptrigger.test e_dropview.test
  e_resolve.test e_dropview.test
  e_select2.test
  enc4.test
  fkey1.test fkey2.test fkey3.test fkey4.test
  func.test func2.test func3.test 
  fuzz2.test 
  in.test in4.test
  index2.test index3.test index4.test 
  insert.test insert2.test insert3.test insert5.test
  join.test join2.test join3.test join4.test join5.test join6.test







>




<







158
159
160
161
162
163
164
165
166
167
168
169

170
171
172
173
174
175
176
  conflict.test 
  count.test
  cse.test
  ctime.test
  date.test
  delete.test delete2.test
  distinct.test distinctagg.test
  enc.test enc4.test
  exists.test
  e_droptrigger.test e_dropview.test
  e_resolve.test e_dropview.test
  e_select2.test

  fkey1.test fkey2.test fkey3.test fkey4.test
  func.test func2.test func3.test 
  fuzz2.test 
  in.test in4.test
  index2.test index3.test index4.test 
  insert.test insert2.test insert3.test insert5.test
  join.test join2.test join3.test join4.test join5.test join6.test

Changes to test/test_utf.c.

29
30
31
32
33
34
35

36
37





38

39

40
41
42
43
44
45
46
47
48
...
115
116
117
118
119
120
121


























































122
123
124
125
126
127
128
...
208
209
210
211
212
213
214

215
216
217
218
219
220
221
222
static int binarize(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int len;

  char *bytes;
  Tcl_Obj *pRet;





  assert(objc==2);



  bytes = Tcl_GetStringFromObj(objv[1], &len);
  pRet = Tcl_NewByteArrayObj((u8*)bytes, len+1);
  Tcl_SetObjResult(interp, pRet);
  return TCL_OK;
}

/*
** Usage: test_value_overhead <repeat-count> <do-calls>.
**
................................................................................
  }
  return pEnc->enc;
}

static void freeStr(void *pEnv, void *pStr){
  sqlite4_free((sqlite4_env*)pEnv, pStr);
}



























































/*
** Usage:   test_translate <string/blob> <from enc> <to enc> ?<transient>?
**
*/
static int test_translate(
  void * clientData,
................................................................................
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {
    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },
    { "test_translate",          (Tcl_ObjCmdProc*)test_translate     },

    { "translate_selftest",      (Tcl_ObjCmdProc*)test_translate_selftest},
  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE4_OK;
}







>


>
>
>
>
>
|
>
|
>
|
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>








29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
...
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
...
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
static int binarize(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int len;
  int bNoterm = 0;
  char *bytes;
  Tcl_Obj *pRet;

  if( objc!=2 && objc!=3 ){
    Tcl_WrongNumArgs(interp, 1, objv, "?-noterm? STR");
    return TCL_ERROR;
  }
  if( objc==3 ){
    bNoterm = 1;
  }

  bytes = Tcl_GetStringFromObj(objv[objc-1], &len);
  pRet = Tcl_NewByteArrayObj((u8*)bytes, len+1-bNoterm);
  Tcl_SetObjResult(interp, pRet);
  return TCL_OK;
}

/*
** Usage: test_value_overhead <repeat-count> <do-calls>.
**
................................................................................
  }
  return pEnc->enc;
}

static void freeStr(void *pEnv, void *pStr){
  sqlite4_free((sqlite4_env*)pEnv, pStr);
}

/*
** Usage:   sqlite4_translate <string/blob> <translation>
*/
static int test_sqlite4_translate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Translation {
    const char *zTrans;
    int eTrans;
    int isUtf8;
  } aTrans[] = {
    { "utf8_utf16",   SQLITE4_TRANSLATE_UTF8_UTF16,   1 },
    { "utf8_utf16le", SQLITE4_TRANSLATE_UTF8_UTF16LE, 1 },
    { "utf8_utf16be", SQLITE4_TRANSLATE_UTF8_UTF16BE, 1 },
    { "utf16_utf8",   SQLITE4_TRANSLATE_UTF16_UTF8,   0 },
    { "utf16le_utf8", SQLITE4_TRANSLATE_UTF16LE_UTF8, 0 },
    { "utf16be_utf8", SQLITE4_TRANSLATE_UTF16BE_UTF8, 0 },
    { 0, 0, 0 }
  };

  int rc;                         /* Return code */
  int iOpt;                       /* Index into aTrans[] array */

  if( objc!=3 ){
    Tcl_WrongNumArgs(interp, 1, objv, "VALUE TRANSLATION");
    rc = TCL_ERROR;
  }else{
    rc = Tcl_GetIndexFromObjStruct(
        interp, objv[2], aTrans, sizeof(aTrans[0]), "translation", 0, &iOpt
    );
  }

  if( rc==TCL_OK ){
    sqlite4_buffer buf;
    void *p;
    int n;
    int isUtf8 = aTrans[iOpt].isUtf8;
    int eTrans = aTrans[iOpt].eTrans;

    if( isUtf8 ){
      p = Tcl_GetString(objv[1]);
      n = strlen((char *)p);
    }else{
      p = Tcl_GetByteArrayFromObj(objv[1], &n);
    }

    sqlite4_buffer_init(&buf, 0);
    sqlite4_translate(&buf, p, n, eTrans);
    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(buf.p, buf.n));
    sqlite4_buffer_clear(&buf);
  }

  return rc;
}

/*
** Usage:   test_translate <string/blob> <from enc> <to enc> ?<transient>?
**
*/
static int test_translate(
  void * clientData,
................................................................................
  static struct {
    char *zName;
    Tcl_ObjCmdProc *xProc;
  } aCmd[] = {
    { "binarize",                (Tcl_ObjCmdProc*)binarize },
    { "test_value_overhead",     (Tcl_ObjCmdProc*)test_value_overhead },
    { "test_translate",          (Tcl_ObjCmdProc*)test_translate     },
    { "sqlite4_translate",       (Tcl_ObjCmdProc*)test_sqlite4_translate  },
    { "translate_selftest",      (Tcl_ObjCmdProc*)test_translate_selftest},
  };
  int i;
  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  }
  return SQLITE4_OK;
}