/ Check-in [3e96105c]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add a version of the LIKE operator to the icu extension. Requires optimisation. (CVS 3939)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 3e96105c1f084a4ab4dad4de6f4759e43fc497f7
User & Date: danielk1977 2007-05-07 16:58:02
Context
2007-05-07
19:31
Fix an NULL deref in the randomblob() function following a malloc failure. (CVS 3940) check-in: 011e7db2 user: drh tags: trunk
16:58
Add a version of the LIKE operator to the icu extension. Requires optimisation. (CVS 3939) check-in: 3e96105c user: danielk1977 tags: trunk
14:58
Change the name of create_collation_x() to create_collation_v2(). Also add some tests for it. (CVS 3938) check-in: ddc4e479 user: danielk1977 tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/icu/icu.c.

     1      1   
     2      2   /*
     3      3   ** This file implements an integration between the ICU library 
     4      4   ** ("International Components for Unicode", an open-source library 
     5      5   ** for handling unicode data) and SQLite. The integration uses 
     6      6   ** ICU to provide the following to SQLite:
            7  +**
            8  +**   * An implementation of the SQL regexp() function (and hence REGEXP
            9  +**     operator) using the ICU uregex_XX() APIs.
     7     10   **
     8     11   **   * Implementations of the SQL scalar upper() and lower() 
     9         -**     functions for case mapping, 
           12  +**     functions for case mapping.
    10     13   **
    11     14   **   * Collation sequences
    12     15   **
    13         -**   * Implementation of the SQL regexp() function (and hence REGEXP
    14         -**     operator) using the ICU uregex_XX() APIs.
    15         -**
    16     16   **   * LIKE
    17     17   */
    18     18   
    19     19   #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU)
    20     20   
    21     21   #include <unicode/utypes.h>
    22     22   #include <unicode/uregex.h>
................................................................................
    43     43   ** Version of sqlite3_free() that is always a function, never a macro.
    44     44   */
    45     45   static void xFree(void *p){
    46     46     sqlite3_free(p);
    47     47   }
    48     48   
    49     49   /*
    50         -** LIKE operator.
           50  +** Compare two UTF-8 strings for equality where the first string is
           51  +** a "LIKE" expression. Return true (1) if they are the same and 
           52  +** false (0) if they are different.
           53  +*/
           54  +static int icuLikeCompare(
           55  +  const uint8_t *zPattern,   /* The UTF-8 LIKE pattern */
           56  +  const uint8_t *zString,    /* The UTF-8 string to compare against */
           57  +  const UChar32 uEsc         /* The escape character */
           58  +){
           59  +  static const int MATCH_ONE = (UChar32)'_';
           60  +  static const int MATCH_ALL = (UChar32)'%';
           61  +
           62  +  int iPattern = 0;       /* Current byte index in zPattern */
           63  +  int iString = 0;        /* Current byte index in zString */
           64  +
           65  +  int prevEscape = 0;     /* True if the previous character was uEsc */
           66  +
           67  +  while( zPattern[iPattern]!=0 ){
           68  +
           69  +    /* Read (and consume) the next character from the input pattern. */
           70  +    UChar32 uPattern;
           71  +    U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
           72  +    assert(uPattern!=0);
           73  +
           74  +    /* There are now 4 possibilities:
           75  +    **
           76  +    **     1. uPattern is an unescaped match-all character "%",
           77  +    **     2. uPattern is an unescaped match-one character "_",
           78  +    **     3. uPattern is an unescaped escape character, or
           79  +    **     4. uPattern is to be handled as an ordinary character
           80  +    */
           81  +    if( !prevEscape && uPattern==MATCH_ALL ){
           82  +      /* Case 1. */
           83  +      uint8_t c;
           84  +
           85  +      /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
           86  +      ** MATCH_ALL. For each MATCH_ONE, skip one character in the 
           87  +      ** test string.
           88  +      */
           89  +      while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
           90  +        if( c==MATCH_ONE ){
           91  +          if( zString[iString]==0 ) return 0;
           92  +          U8_FWD_1_UNSAFE(zString, iString);
           93  +        }
           94  +        iPattern++;
           95  +      }
           96  +
           97  +      if( zPattern[iPattern]==0 ) return 1;
           98  +
           99  +      while( zString[iString] ){
          100  +        if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
          101  +          return 1;
          102  +        }
          103  +        U8_FWD_1_UNSAFE(zString, iString);
          104  +      }
          105  +      return 0;
          106  +
          107  +    }else if( !prevEscape && uPattern==MATCH_ONE ){
          108  +      /* Case 2. */
          109  +      if( zString[iString]==0 ) return 0;
          110  +      U8_FWD_1_UNSAFE(zString, iString);
          111  +
          112  +    }else if( !prevEscape && uPattern==uEsc){
          113  +      /* Case 3. */
          114  +      prevEscape = 1;
          115  +
          116  +    }else{
          117  +      /* Case 4. */
          118  +      UChar32 uString;
          119  +      U8_NEXT_UNSAFE(zString, iString, uString);
          120  +      uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
          121  +      uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
          122  +      if( uString!=uPattern ){
          123  +        return 0;
          124  +      }
          125  +      prevEscape = 0;
          126  +    }
          127  +  }
          128  +
          129  +  return zString[iString]==0;
          130  +}
          131  +
          132  +/*
          133  +** Implementation of the like() SQL function.  This function implements
          134  +** the build-in LIKE operator.  The first argument to the function is the
          135  +** pattern and the second argument is the string.  So, the SQL statements:
    51    136   **
    52         -** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching
          137  +**       A LIKE B
          138  +**
          139  +** is implemented as like(B, A). If there is an escape character E, 
          140  +**
          141  +**       A LIKE B ESCAPE E
          142  +**
          143  +** is mapped to like(B, A, E).
    53    144   */
          145  +static void icuLikeFunc(
          146  +  sqlite3_context *context, 
          147  +  int argc, 
          148  +  sqlite3_value **argv
          149  +){
          150  +  const unsigned char *zA = sqlite3_value_text(argv[0]);
          151  +  const unsigned char *zB = sqlite3_value_text(argv[1]);
          152  +  UChar32 uEsc = 0;
          153  +
          154  +  if( argc==3 ){
          155  +    /* The escape character string must consist of a single UTF-8 character.
          156  +    ** Otherwise, return an error.
          157  +    */
          158  +    int nE= sqlite3_value_bytes(argv[2]);
          159  +    const unsigned char *zE = sqlite3_value_text(argv[2]);
          160  +    int i = 0;
          161  +    if( zE==0 ) return;
          162  +    U8_NEXT(zE, i, nE, uEsc);
          163  +    if( i!=nE){
          164  +      sqlite3_result_error(context, 
          165  +          "ESCAPE expression must be a single character", -1);
          166  +      return;
          167  +    }
          168  +  }
          169  +
          170  +  if( zA && zB ){
          171  +    sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
          172  +  }
          173  +}
    54    174   
    55    175   /*
    56    176   ** This function is called when an ICU function called from within
    57    177   ** the implementation of an SQL scalar function returns an error.
    58    178   **
    59    179   ** The scalar function context passed as the first argument is 
    60    180   ** loaded with an error message based on the following two args.
................................................................................
   190    310   
   191    311     assert(nArg==1 || nArg==2);
   192    312     if( nArg==2 ){
   193    313       zLocale = (const char *)sqlite3_value_text(apArg[1]);
   194    314     }
   195    315   
   196    316     zInput = sqlite3_value_text16(apArg[0]);
          317  +  if( !zInput ){
          318  +    return;
          319  +  }
   197    320     nInput = sqlite3_value_bytes16(apArg[0]);
   198    321   
   199    322     nOutput = nInput * 2 + 2;
   200    323     zOutput = sqlite3_malloc(nInput*2+2);
   201    324     if( !zOutput ){
   202    325       return;
   203    326     }
................................................................................
   240    363     UCollator *p = (UCollator *)pCtx;
   241    364     res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
   242    365     switch( res ){
   243    366       case UCOL_LESS:    return -1;
   244    367       case UCOL_GREATER: return +1;
   245    368       case UCOL_EQUAL:   return 0;
   246    369     }
   247         -  assert(!"Bad return value from ucol_strcoll()");
          370  +  assert(!"Unexpected return value from ucol_strcoll()");
   248    371     return 0;
   249    372   }
   250    373   
   251    374   /*
   252    375   ** Implementation of the scalar function icu_load_collation().
   253    376   **
   254    377   ** This scalar function is used to add ICU collation based collation 
................................................................................
   284    407     pUCollator = ucol_open(zLocale, &status);
   285    408     if( !U_SUCCESS(status) ){
   286    409       icuFunctionError(p, "ucol_open", status);
   287    410       return;
   288    411     }
   289    412     assert(p);
   290    413   
   291         -  rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator, 
          414  +  rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, 
   292    415         icuCollationColl, icuCollationDel
   293    416     );
   294    417     if( rc!=SQLITE_OK ){
   295    418       ucol_close(pUCollator);
   296    419       sqlite3_result_error(p, "Error registering collation function", -1);
   297    420     }
   298    421   }
................................................................................
   304    427     struct IcuScalar {
   305    428       const char *zName;                        /* Function name */
   306    429       int nArg;                                 /* Number of arguments */
   307    430       int enc;                                  /* Optimal text encoding */
   308    431       void *pContext;                           /* sqlite3_user_data() context */
   309    432       void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
   310    433     } scalars[] = {
   311         -    {"regexp", 2, SQLITE_ANY,          0, icuRegexpFunc},
          434  +    {"regexp",-1, SQLITE_ANY,          0, icuRegexpFunc},
   312    435   
   313    436       {"lower",  1, SQLITE_UTF16,        0, icuCaseFunc16},
   314    437       {"lower",  2, SQLITE_UTF16,        0, icuCaseFunc16},
   315    438       {"upper",  1, SQLITE_UTF16, (void*)1, icuCaseFunc16},
   316    439       {"upper",  2, SQLITE_UTF16, (void*)1, icuCaseFunc16},
   317    440   
   318    441       {"lower",  1, SQLITE_UTF8,         0, icuCaseFunc16},
   319    442       {"lower",  2, SQLITE_UTF8,         0, icuCaseFunc16},
   320    443       {"upper",  1, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
   321    444       {"upper",  2, SQLITE_UTF8,  (void*)1, icuCaseFunc16},
          445  +
          446  +    {"like",   2, SQLITE_UTF8,         0, icuLikeFunc},
          447  +    {"like",   3, SQLITE_UTF8,         0, icuLikeFunc},
   322    448   
   323    449       {"icu_load_collation",  2, SQLITE_UTF8, (void*)db, icuLoadCollation},
   324    450     };
   325    451   
   326    452     int rc = SQLITE_OK;
   327    453     int i;
   328    454   

Changes to test/func.test.

     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
    11     11   # This file implements regression tests for SQLite library.  The
    12     12   # focus of this file is testing built-in functions.
    13     13   #
    14         -# $Id: func.test,v 1.63 2007/05/02 15:36:02 drh Exp $
           14  +# $Id: func.test,v 1.64 2007/05/07 16:58:02 danielk1977 Exp $
    15     15   
    16     16   set testdir [file dirname $argv0]
    17     17   source $testdir/tester.tcl
    18     18   
    19     19   # Create a table to work with.
    20     20   #
    21     21   do_test func-0.0 {
................................................................................
   216    216   } {THIS PROGRAM IS FREE SOFTWARE}
   217    217   do_test func-5.2 {
   218    218     execsql {SELECT lower(upper(t1)) FROM tbl1}
   219    219   } {this program is free software}
   220    220   do_test func-5.3 {
   221    221     execsql {SELECT upper(a), lower(a) FROM t2}
   222    222   } {1 1 {} {} 345 345 {} {} 67890 67890}
   223         -do_test func-5.4 {
   224         -  catchsql {SELECT upper(a,5) FROM t2}
   225         -} {1 {wrong number of arguments to function upper()}}
          223  +ifcapable !icu {
          224  +  do_test func-5.4 {
          225  +    catchsql {SELECT upper(a,5) FROM t2}
          226  +  } {1 {wrong number of arguments to function upper()}}
          227  +}
   226    228   do_test func-5.5 {
   227    229     catchsql {SELECT upper(*) FROM t2}
   228    230   } {1 {wrong number of arguments to function upper()}}
   229    231   
   230    232   # Test the coalesce() and nullif() functions
   231    233   #
   232    234   do_test func-6.1 {