/ Check-in [49ab4794]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Correctly handle the situation where a collation sequence is available, but not in the preferred encoding. (CVS 1565)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 49ab4794e1b5be5cbb3b87a65477659762487cf8
User & Date: danielk1977 2004-06-10 14:01:08
Context
2004-06-10
22:51
Add new contributed logo TIFF. (CVS 1566) check-in: 86744c9a user: drh tags: trunk
14:01
Correctly handle the situation where a collation sequence is available, but not in the preferred encoding. (CVS 1565) check-in: 49ab4794 user: danielk1977 tags: trunk
10:51
Add the sqlite3_collation_needed() API and fix some error handling cases involving unknown collation sequences. (CVS 1564) check-in: 67500546 user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/build.c.

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.215 2004/06/10 10:50:08 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
  int create
){
  CollSeq *pColl = findCollSeqEntry(db, zName, nName, create);
  if( pColl ) switch( enc ){
    case TEXT_Utf8:
      break;
    case TEXT_Utf16le:
      pColl = &pColl[2];
      break;
    case TEXT_Utf16be:
      pColl = &pColl[1];
      break;
    default: 
      assert(!"Cannot happen");
  }
  return pColl;
}








|







 







|


|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
...
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
**     DROP INDEX
**     creating ID lists
**     BEGIN TRANSACTION
**     COMMIT
**     ROLLBACK
**     PRAGMA
**
** $Id: build.c,v 1.216 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>

/*
** This routine is called when a new SQL statement is beginning to
** be parsed.  Check to see if the schema for the database needs
................................................................................
  int create
){
  CollSeq *pColl = findCollSeqEntry(db, zName, nName, create);
  if( pColl ) switch( enc ){
    case TEXT_Utf8:
      break;
    case TEXT_Utf16le:
      pColl = &pColl[1];
      break;
    case TEXT_Utf16be:
      pColl = &pColl[2];
      break;
    default: 
      assert(!"Cannot happen");
  }
  return pColl;
}

Changes to src/sqliteInt.h.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
....
1397
1398
1399
1400
1401
1402
1403







**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.280 2004/06/10 10:50:32 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
................................................................................
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);














|







 







>
>
>
>
>
>
>
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
....
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.281 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "config.h"
#include "sqlite3.h"
#include "hash.h"
#include "parse.h"
#include <stdio.h>
#include <stdlib.h>
................................................................................
int sqlite3ReadUniChar(const char *zStr, int *pOffset, u8 *pEnc, int fold);
int sqlite3ReadSchema(sqlite *db, char **);
CollSeq *sqlite3FindCollSeq(sqlite *,u8 enc, const char *,int,int);
CollSeq *sqlite3LocateCollSeq(Parse *pParse, const char *zName, int nName);
CollSeq *sqlite3ExprCollSeq(Parse *pParse, Expr *pExpr);
int sqlite3CheckCollSeq(Parse *, CollSeq *);
int sqlite3CheckIndexCollSeq(Parse *, Index *);

const void *sqlite3ValueText(sqlite3_value*, u8);
int sqlite3ValueBytes(sqlite3_value*, u8);
void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8);
void sqlite3ValueFree(sqlite3_value*);
sqlite3_value *sqlite3ValueNew();

Changes to src/test1.c.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
856
857
858
859
860
861
862






































































































863
864
865
866
867
868
869
....
1864
1865
1866
1867
1868
1869
1870

1871
1872
1873
1874
1875
1876
1877
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.74 2004/06/09 17:37:28 drh Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>

................................................................................
    sprintf(zBuf, "(%d) ", rc);
    Tcl_AppendResult(interp, zBuf, sqlite3ErrStr(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}







































































































/*
** Usage:    breakpoint
**
** This routine exists for one purpose - to provide a place to put a
** breakpoint with GDB that can be triggered using TCL code.  The use
** for this is when a particular test fails on (say) the 1485th iteration.
** In the TCL test script, we can add code like this:
................................................................................
     { "sqlite3_column_name16", test_stmt_utf16,    sqlite3_column_name16},

     /* Functions from os.h */
     { "sqlite3OsOpenReadWrite",test_sqlite3OsOpenReadWrite, 0 },
     { "sqlite3OsClose",        test_sqlite3OsClose, 0 },
     { "sqlite3OsLock",         test_sqlite3OsLock, 0 },
     { "sqlite3OsUnlock",       test_sqlite3OsUnlock, 0 },


  };
  int i;
  extern int sqlite3_os_trace;

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
....
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Code for testing the printf() interface to SQLite.  This code
** is not included in the SQLite library.  It is used for automated
** testing of the SQLite library.
**
** $Id: test1.c,v 1.75 2004/06/10 14:01:08 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "tcl.h"
#include "os.h"
#include <stdlib.h>
#include <string.h>

................................................................................
    sprintf(zBuf, "(%d) ", rc);
    Tcl_AppendResult(interp, zBuf, sqlite3ErrStr(rc), 0);
    return TCL_ERROR;
  }
  return TCL_OK;
}


/*
** Usage: add_test_collate <db ptr> <utf8> <utf16le> <utf16be>
**
** This function is used to test that SQLite selects the correct collation
** sequence callback when multiple versions (for different text encodings)
** are available.
**
** Calling this routine registers the collation sequence "test_collate"
** with database handle <db>. The second argument must be a list of three
** boolean values. If the first is true, then a version of test_collate is
** registered for UTF-8, if the second is true, a version is registered for
** UTF-16le, if the third is true, a UTF-16be version is available.
** Previous versions of test_collate are deleted.
**
** The collation sequence test_collate is implemented by calling the
** following TCL script:
**
**   "test_collate <enc> <lhs> <rhs>"
**
** The <lhs> and <rhs> are the two values being compared, encoded in UTF-8.
** The <enc> parameter is the encoding of the collation function that
** SQLite selected to call. The TCL test script implements the
** "test_collate" proc.
**
** Note that this will only work with one intepreter at a time, as the
** interp pointer to use when evaluating the TCL script is stored in
** pTestCollateInterp.
*/
static Tcl_Interp* pTestCollateInterp;
static int test_collate_func(
  void *pCtx, 
  int nA, const void *zA,
  int nB, const void *zB
){
  Tcl_Interp *i = pTestCollateInterp;
  int encin = (int)pCtx;
  int res;

  sqlite3_value *pVal;
  Tcl_Obj *pX;

  pX = Tcl_NewStringObj("test_collate", -1);
  Tcl_IncrRefCount(pX);

  switch( encin ){
    case SQLITE_UTF8:
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-8",-1));
      break;
    case SQLITE_UTF16LE:
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16LE",-1));
      break;
    case SQLITE_UTF16BE:
      Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1));
      break;
    default:
      assert(0);
  }

  pVal = sqlite3ValueNew();
  sqlite3ValueSetStr(pVal, nA, zA, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueSetStr(pVal, nB, zB, encin);
  Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj(sqlite3_value_text(pVal),-1));
  sqlite3ValueFree(pVal);

  Tcl_EvalObjEx(i, pX, 0);
  Tcl_DecrRefCount(pX);
  Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res);
  return res;
}
static int test_collate(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  sqlite3 *db;
  int val;

  if( objc!=5 ) goto bad_args;
  pTestCollateInterp = interp;
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;

  if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[2], &val) ) return TCL_ERROR;
  sqlite3_create_collation(db, "test_collate", SQLITE_UTF8, 
        (void *)SQLITE_UTF8, val?test_collate_func:0);
  if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[3], &val) ) return TCL_ERROR;
  sqlite3_create_collation(db, "test_collate", SQLITE_UTF16LE, 
        (void *)SQLITE_UTF16LE, val?test_collate_func:0);
  if( TCL_OK!=Tcl_GetBooleanFromObj(interp, objv[4], &val) ) return TCL_ERROR;
  sqlite3_create_collation(db, "test_collate", SQLITE_UTF16BE, 
        (void *)SQLITE_UTF16BE, val?test_collate_func:0);
  
  return TCL_OK;

bad_args:
  Tcl_AppendResult(interp, "wrong # args: should be \"",
      Tcl_GetStringFromObj(objv[0], 0), " <DB> <utf8> <utf16le> <utf16be>", 0);
  return TCL_ERROR;
}

/*
** Usage:    breakpoint
**
** This routine exists for one purpose - to provide a place to put a
** breakpoint with GDB that can be triggered using TCL code.  The use
** for this is when a particular test fails on (say) the 1485th iteration.
** In the TCL test script, we can add code like this:
................................................................................
     { "sqlite3_column_name16", test_stmt_utf16,    sqlite3_column_name16},

     /* Functions from os.h */
     { "sqlite3OsOpenReadWrite",test_sqlite3OsOpenReadWrite, 0 },
     { "sqlite3OsClose",        test_sqlite3OsClose, 0 },
     { "sqlite3OsLock",         test_sqlite3OsLock, 0 },
     { "sqlite3OsUnlock",       test_sqlite3OsUnlock, 0 },
     { "add_test_collate",      test_collate, 0         },

  };
  int i;
  extern int sqlite3_os_trace;

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);

Changes to src/vdbeapi.c.

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
}
long long int sqlite3_value_int64(sqlite3_value *pVal){
  Mem *pMem = (Mem *)pVal;
  sqlite3VdbeMemIntegerify(pMem);
  return pVal->i;
}
const unsigned char *sqlite3_value_text(sqlite3_value *pVal){
  if( pVal->flags&MEM_Null ){
    /* For a NULL return a NULL Pointer */
    return 0;
  }

  if( pVal->flags&MEM_Str ){
    /* If there is already a string representation, make sure it is in
    ** encoded in UTF-8.
    */
    sqlite3VdbeChangeEncoding(pVal, TEXT_Utf8);
  }else if( !(pVal->flags&MEM_Blob) ){
    /* Otherwise, unless this is a blob, convert it to a UTF-8 string */
    sqlite3VdbeMemStringify(pVal, TEXT_Utf8);
  }

  return pVal->z;
}
const void *sqlite3_value_text16(sqlite3_value* pVal){
  if( pVal->flags&MEM_Null ){
    /* For a NULL return a NULL Pointer */
    return 0;
  }

  if( pVal->flags&MEM_Str ){
    /* If there is already a string representation, make sure it is in
    ** encoded in UTF-16 machine byte order.
    */
    sqlite3VdbeChangeEncoding(pVal, TEXT_Utf16);
  }else if( !(pVal->flags&MEM_Blob) ){
    /* Otherwise, unless this is a blob, convert it to a UTF-16 string */
    sqlite3VdbeMemStringify(pVal, TEXT_Utf16);
  }

  return (const void *)(pVal->z);
}
int sqlite3_value_type(sqlite3_value* pVal){
  return pVal->type;
}

/**************************** sqlite3_result_  *******************************
** The following routines are used by user-defined functions to specify







|
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<







54
55
56
57
58
59
60
61


62













63












64



65
66
67
68
69
70
71
}
long long int sqlite3_value_int64(sqlite3_value *pVal){
  Mem *pMem = (Mem *)pVal;
  sqlite3VdbeMemIntegerify(pMem);
  return pVal->i;
}
const unsigned char *sqlite3_value_text(sqlite3_value *pVal){
  return (const char *)sqlite3ValueText(pVal, TEXT_Utf8);


}













const void *sqlite3_value_text16(sqlite3_value* pVal){












  return sqlite3ValueText(pVal, TEXT_Utf16);



}
int sqlite3_value_type(sqlite3_value* pVal){
  return pVal->type;
}

/**************************** sqlite3_result_  *******************************
** The following routines are used by user-defined functions to specify

Changes to src/vdbemem.c.

43
44
45
46
47
48
49
50
51

52
53
54


55
56
57
58
59
60
61
62
...
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
...
584
585
586
587
588
589
590






































































  if( pMem->enc==TEXT_Utf8 || desiredEnc==TEXT_Utf8 ){
    /* If the current encoding does not match the desired encoding, then
    ** we will need to do some translation between encodings.
    */
    char *z;
    int n;
    int rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc,
                                 (void **)&z, &n, desiredEnc);

    if( rc!=SQLITE_OK ){
      return rc;
    }


  
    /* Result of sqlite3utfTranslate is currently always dynamically
    ** allocated and nul terminated. This might be altered as a performance
    ** enhancement later.
    */
    pMem->z = z;
    pMem->n = n;
    pMem->flags &= ~(MEM_Ephem | MEM_Short | MEM_Static);
................................................................................
    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{
        switch( pColl->enc ){
          case SQLITE_UTF8:
            return pColl->xCmp(
              pColl->pUser,
              sqlite3_value_bytes((sqlite3_value *)pMem1),
              sqlite3_value_text((sqlite3_value *)pMem1),
              sqlite3_value_bytes((sqlite3_value *)pMem2),
              sqlite3_value_text((sqlite3_value *)pMem2)
            );
          case SQLITE_UTF16LE:
          case SQLITE_UTF16BE:
            /* FIX ME: Handle non-native UTF-16 properly instead of
            ** assuming it is always native. */
            return pColl->xCmp(
              pColl->pUser,
              sqlite3_value_bytes16((sqlite3_value *)pMem1),
              sqlite3_value_text16((sqlite3_value *)pMem1),
              sqlite3_value_bytes16((sqlite3_value *)pMem2),
              sqlite3_value_text16((sqlite3_value *)pMem2)
            );
          default:
            assert(!"Cannot happen");
        }
      }
    }
    /* If a NULL pointer was passed as the collate function, fall through
    ** to the blob case and use memcmp().
    */
  }
 
  /* Both values must be blobs.  Compare using memcmp().
  */
  rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n);
  if( rc==0 ){
    rc = pMem1->n - pMem2->n;
  }
  return rc;
}

................................................................................
    assert( (pMem->flags & (MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short))==0 );
  }
  /* MEM_Null excludes all other types */
  assert( (pMem->flags&(MEM_Str|MEM_Int|MEM_Real|MEM_Blob))==0
          || (pMem->flags&MEM_Null)==0 );
}
#endif












































































|
|
>



>
>
|







 







<
<
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
|
<

|
<


|
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
...
443
444
445
446
447
448
449


450
451
452
453
454
455
456













457
458

459
460

461
462
463

464
465
466
467
468
469
470
...
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644

  if( pMem->enc==TEXT_Utf8 || desiredEnc==TEXT_Utf8 ){
    /* If the current encoding does not match the desired encoding, then
    ** we will need to do some translation between encodings.
    */
    char *z;
    int n;
    int rc;

    rc = sqlite3utfTranslate(pMem->z, pMem->n, pMem->enc, &z, &n, desiredEnc);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    if( pMem->flags&MEM_Dyn ){
      sqliteFree(pMem->z);
    }
    /* Result of sqlite3utfTranslate is currently always dynamically
    ** allocated and nul terminated. This might be altered as a performance
    ** enhancement later.
    */
    pMem->z = z;
    pMem->n = n;
    pMem->flags &= ~(MEM_Ephem | MEM_Short | MEM_Static);
................................................................................
    */
    assert( !pColl || pColl->xCmp );

    if( pColl ){
      if( pMem1->enc==pColl->enc ){
        return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
      }else{


        return pColl->xCmp(
          pColl->pUser,
          sqlite3ValueBytes((sqlite3_value*)pMem1, pColl->enc),
          sqlite3ValueText((sqlite3_value*)pMem1, pColl->enc),
          sqlite3ValueBytes((sqlite3_value*)pMem2, pColl->enc),
          sqlite3ValueText((sqlite3_value*)pMem2, pColl->enc)
        );













      }
    }

    /* If a NULL pointer was passed as the collate function, fall through
    ** to the blob case and use memcmp().  */

  }
 
  /* Both values must be blobs.  Compare using memcmp().  */

  rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n);
  if( rc==0 ){
    rc = pMem1->n - pMem2->n;
  }
  return rc;
}

................................................................................
    assert( (pMem->flags & (MEM_Static|MEM_Dyn|MEM_Ephem|MEM_Short))==0 );
  }
  /* MEM_Null excludes all other types */
  assert( (pMem->flags&(MEM_Str|MEM_Int|MEM_Real|MEM_Blob))==0
          || (pMem->flags&MEM_Null)==0 );
}
#endif

/* This function is only available internally, it is not part of the
** external API. It works in a similar way to sqlite3_value_text(),
** except the data returned is in the encoding specified by the second
** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
** SQLITE_UTF8.
*/
const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
  assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8);
  if( pVal->flags&MEM_Null ){
    /* For a NULL return a NULL Pointer */
    return 0;
  }

  if( pVal->flags&MEM_Str ){
    /* If there is already a string representation, make sure it is in
    ** encoded in the required UTF-16 byte order.
    */
    sqlite3VdbeChangeEncoding(pVal, enc);
  }else if( !(pVal->flags&MEM_Blob) ){
    /* Otherwise, unless this is a blob, convert it to a UTF-16 string */
    sqlite3VdbeMemStringify(pVal, enc);
  }

  return (const void *)(pVal->z);
}

sqlite3_value* sqlite3ValueNew(){
  Mem *p = sqliteMalloc(sizeof(*p));
  if( p ){
    p->flags = MEM_Null;
    p->type = SQLITE_NULL;
  }
  return p;
}

void sqlite3ValueSetStr(sqlite3_value *v, int n, const void *z, u8 enc){
  Mem *p = (Mem *)v;
  if( p->z && p->flags&MEM_Dyn ){
    sqliteFree(p->z);
  }
  p->z = (char *)z;
  p->n = n;
  p->enc = enc;
  p->type = SQLITE_TEXT;
  p->flags = (MEM_Str|MEM_Static);

  if( p->n<0 ){
    if( enc==SQLITE_UTF8 ){
      p->n = strlen(p->z);
    }else{
      p->n = sqlite3utf16ByteLen(p->z, -1);
    }
  }
  return;
}

void sqlite3ValueFree(sqlite3_value *v){
  sqlite3ValueSetStr(v, 0, 0, SQLITE_UTF8);
  sqliteFree(v);
}

int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){
  Mem *p = (Mem*)pVal;
  if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){
    return p->n;
  }
  return 0;
}

Changes to test/enc2.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
139
140
141
142
143
144
145

146







































































































147



#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc2.test,v 1.7 2004/06/10 05:59:25 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

db close

# Return the UTF-8 representation of the supplied UTF-16 string $str. 
................................................................................
do_test enc2-4.3 {
  catchsql {
    ATTACH 'test2.db' as aux;
  }
} {1 {attached databases must use the same text encoding as main database}}

db2 close









































































































finish_test










|







 







>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
...
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The focus of
# this file is testing the SQLite routines used for converting between the
# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
# UTF-16be).
#
# $Id: enc2.test,v 1.8 2004/06/10 14:01:08 danielk1977 Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

db close

# Return the UTF-8 representation of the supplied UTF-16 string $str. 
................................................................................
do_test enc2-4.3 {
  catchsql {
    ATTACH 'test2.db' as aux;
  }
} {1 {attached databases must use the same text encoding as main database}}

db2 close
db close

# The following tests - enc2-5.* - test that SQLite selects the correct
# collation sequence when more than one is available.

set ::values [list one two three four five]
set ::test_collate_enc INVALID
proc test_collate {enc lhs rhs} {
  set ::test_collate_enc $enc
  set l [lsearch -exact $::values $lhs]
  set r [lsearch -exact $::values $rhs]
  set res [expr $l - $r]
  # puts "test_collate $enc $lhs $rhs -> $res"
  return $res
}

file delete -force test.db
set DB [sqlite db test.db]
do_test enc2-5.0 {
  execsql {
    CREATE TABLE t5(a);
    INSERT INTO t5 VALUES('one');
    INSERT INTO t5 VALUES('two');
    INSERT INTO t5 VALUES('five');
    INSERT INTO t5 VALUES('three');
    INSERT INTO t5 VALUES('four');
  }
} {}
do_test enc2-5.1 {
  add_test_collate $DB 1 1 1
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-8}
do_test enc2-5.2 {
  add_test_collate $DB 0 1 0
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
breakpoint
do_test enc2-5.3 {
  add_test_collate $DB 0 0 1
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16BE}

file delete -force test.db
set DB [sqlite db test.db]
execsql {pragma encoding = 'UTF-16LE'}
do_test enc2-5.4 {
  execsql {
    CREATE TABLE t5(a);
    INSERT INTO t5 VALUES('one');
    INSERT INTO t5 VALUES('two');
    INSERT INTO t5 VALUES('five');
    INSERT INTO t5 VALUES('three');
    INSERT INTO t5 VALUES('four');
  }
} {}
do_test enc2-5.5 {
  add_test_collate $DB 1 1 1
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
do_test enc2-5.6 {
  add_test_collate $DB 1 0 1
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16BE}
breakpoint
do_test enc2-5.7 {
  add_test_collate $DB 1 0 0
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-8}

file delete -force test.db
set DB [sqlite db test.db]
execsql {pragma encoding = 'UTF-16BE'}
do_test enc2-5.8 {
  execsql {
    CREATE TABLE t5(a);
    INSERT INTO t5 VALUES('one');
    INSERT INTO t5 VALUES('two');
    INSERT INTO t5 VALUES('five');
    INSERT INTO t5 VALUES('three');
    INSERT INTO t5 VALUES('four');
  }
} {}
do_test enc2-5.9 {
  add_test_collate $DB 1 1 1
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16BE}
do_test enc2-5.10 {
  add_test_collate $DB 1 1 0
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-16LE}
breakpoint
do_test enc2-5.11 {
  add_test_collate $DB 1 0 0
  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
  lappend res $::test_collate_enc
} {one two three four five UTF-8}

finish_test