/ Check-in [df817e70]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge the unicode61 tokenizer and the shared-cache-memory database changes into the sessions branch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | sessions
Files: files | file ages | folders
SHA1: df817e70afc3f41e680d8f84dfa5772d5b3ae4d9
User & Date: drh 2012-05-28 18:22:41
Context
2012-05-29
00:48
Merge the 64-to-32-bit RTree rounding fixes from trunk into the sessions branch. check-in: 7eff45c2 user: drh tags: sessions
2012-05-28
18:22
Merge the unicode61 tokenizer and the shared-cache-memory database changes into the sessions branch. check-in: df817e70 user: drh tags: sessions
17:51
Updates regarding URI query parameters and shared cache in the documentation derived from comments in sqlite.h.in. No changes to code. check-in: bcc72d41 user: drh tags: trunk
2012-05-22
13:01
Version 3.7.12.1 check-in: d07b7b67 user: drh tags: sessions
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to Makefile.in.

   161    161   
   162    162   # Object files for the SQLite library (non-amalgamation).
   163    163   #
   164    164   LIBOBJS0 = alter.lo analyze.lo attach.lo auth.lo \
   165    165            backup.lo bitvec.lo btmutex.lo btree.lo build.lo \
   166    166            callback.lo complete.lo ctime.lo date.lo delete.lo \
   167    167            expr.lo fault.lo fkey.lo \
   168         -         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo fts3_porter.lo \
   169         -         fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo fts3_write.lo \
          168  +         fts3.lo fts3_aux.lo fts3_expr.lo fts3_hash.lo fts3_icu.lo \
          169  +         fts3_porter.lo fts3_snippet.lo fts3_tokenizer.lo fts3_tokenizer1.lo \
          170  +         fts3_unicode.lo fts3_unicode2.lo fts3_write.lo \
   170    171            func.lo global.lo hash.lo \
   171    172            icu.lo insert.lo journal.lo legacy.lo loadext.lo \
   172    173            main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
   173    174            memjournal.lo \
   174    175            mutex.lo mutex_noop.lo mutex_os2.lo mutex_unix.lo mutex_w32.lo \
   175    176            notify.lo opcodes.lo os.lo os_os2.lo os_unix.lo os_win.lo \
   176    177            pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
................................................................................
   313    314     $(TOP)/ext/fts3/fts3_hash.h \
   314    315     $(TOP)/ext/fts3/fts3_icu.c \
   315    316     $(TOP)/ext/fts3/fts3_porter.c \
   316    317     $(TOP)/ext/fts3/fts3_snippet.c \
   317    318     $(TOP)/ext/fts3/fts3_tokenizer.h \
   318    319     $(TOP)/ext/fts3/fts3_tokenizer.c \
   319    320     $(TOP)/ext/fts3/fts3_tokenizer1.c \
          321  +  $(TOP)/ext/fts3/fts3_unicode.c \
          322  +  $(TOP)/ext/fts3/fts3_unicode2.c \
   320    323     $(TOP)/ext/fts3/fts3_write.c
   321    324   SRC += \
   322    325     $(TOP)/ext/icu/sqliteicu.h \
   323    326     $(TOP)/ext/icu/icu.c
   324    327   SRC += \
   325    328     $(TOP)/ext/rtree/rtree.h \
   326    329     $(TOP)/ext/rtree/rtree.c

Changes to VERSION.

     1         -3.7.12.1
            1  +3.7.13

Changes to configure.

     1      1   #! /bin/sh
     2      2   # Guess values for system-dependent variables and create Makefiles.
     3         -# Generated by GNU Autoconf 2.62 for sqlite 3.7.12.1.
            3  +# Generated by GNU Autoconf 2.62 for sqlite 3.7.13.
     4      4   #
     5      5   # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
     6      6   # 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
     7      7   # This configure script is free software; the Free Software Foundation
     8      8   # gives unlimited permission to copy, distribute and modify it.
     9      9   ## --------------------- ##
    10     10   ## M4sh Initialization.  ##
................................................................................
   739    739   MFLAGS=
   740    740   MAKEFLAGS=
   741    741   SHELL=${CONFIG_SHELL-/bin/sh}
   742    742   
   743    743   # Identity of this package.
   744    744   PACKAGE_NAME='sqlite'
   745    745   PACKAGE_TARNAME='sqlite'
   746         -PACKAGE_VERSION='3.7.12.1'
   747         -PACKAGE_STRING='sqlite 3.7.12.1'
          746  +PACKAGE_VERSION='3.7.13'
          747  +PACKAGE_STRING='sqlite 3.7.13'
   748    748   PACKAGE_BUGREPORT=''
   749    749   
   750    750   # Factoring default headers for most tests.
   751    751   ac_includes_default="\
   752    752   #include <stdio.h>
   753    753   #ifdef HAVE_SYS_TYPES_H
   754    754   # include <sys/types.h>
................................................................................
  1481   1481   #
  1482   1482   # Report the --help message.
  1483   1483   #
  1484   1484   if test "$ac_init_help" = "long"; then
  1485   1485     # Omit some internal or obsolete options to make the list less imposing.
  1486   1486     # This message is too long to be a string in the A/UX 3.1 sh.
  1487   1487     cat <<_ACEOF
  1488         -\`configure' configures sqlite 3.7.12.1 to adapt to many kinds of systems.
         1488  +\`configure' configures sqlite 3.7.13 to adapt to many kinds of systems.
  1489   1489   
  1490   1490   Usage: $0 [OPTION]... [VAR=VALUE]...
  1491   1491   
  1492   1492   To assign environment variables (e.g., CC, CFLAGS...), specify them as
  1493   1493   VAR=VALUE.  See below for descriptions of some of the useful variables.
  1494   1494   
  1495   1495   Defaults for the options are specified in brackets.
................................................................................
  1546   1546     --build=BUILD     configure for building on BUILD [guessed]
  1547   1547     --host=HOST       cross-compile to build programs to run on HOST [BUILD]
  1548   1548   _ACEOF
  1549   1549   fi
  1550   1550   
  1551   1551   if test -n "$ac_init_help"; then
  1552   1552     case $ac_init_help in
  1553         -     short | recursive ) echo "Configuration of sqlite 3.7.12.1:";;
         1553  +     short | recursive ) echo "Configuration of sqlite 3.7.13:";;
  1554   1554      esac
  1555   1555     cat <<\_ACEOF
  1556   1556   
  1557   1557   Optional Features:
  1558   1558     --disable-option-checking  ignore unrecognized --enable/--with options
  1559   1559     --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
  1560   1560     --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
................................................................................
  1662   1662       cd "$ac_pwd" || { ac_status=$?; break; }
  1663   1663     done
  1664   1664   fi
  1665   1665   
  1666   1666   test -n "$ac_init_help" && exit $ac_status
  1667   1667   if $ac_init_version; then
  1668   1668     cat <<\_ACEOF
  1669         -sqlite configure 3.7.12.1
         1669  +sqlite configure 3.7.13
  1670   1670   generated by GNU Autoconf 2.62
  1671   1671   
  1672   1672   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
  1673   1673   2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
  1674   1674   This configure script is free software; the Free Software Foundation
  1675   1675   gives unlimited permission to copy, distribute and modify it.
  1676   1676   _ACEOF
  1677   1677     exit
  1678   1678   fi
  1679   1679   cat >config.log <<_ACEOF
  1680   1680   This file contains any messages produced by compilers while
  1681   1681   running configure, to aid debugging if configure makes a mistake.
  1682   1682   
  1683         -It was created by sqlite $as_me 3.7.12.1, which was
         1683  +It was created by sqlite $as_me 3.7.13, which was
  1684   1684   generated by GNU Autoconf 2.62.  Invocation command line was
  1685   1685   
  1686   1686     $ $0 $@
  1687   1687   
  1688   1688   _ACEOF
  1689   1689   exec 5>>config.log
  1690   1690   {
................................................................................
 14028  14028   
 14029  14029   exec 6>&1
 14030  14030   
 14031  14031   # Save the log message, to keep $[0] and so on meaningful, and to
 14032  14032   # report actual input values of CONFIG_FILES etc. instead of their
 14033  14033   # values after options handling.
 14034  14034   ac_log="
 14035         -This file was extended by sqlite $as_me 3.7.12.1, which was
        14035  +This file was extended by sqlite $as_me 3.7.13, which was
 14036  14036   generated by GNU Autoconf 2.62.  Invocation command line was
 14037  14037   
 14038  14038     CONFIG_FILES    = $CONFIG_FILES
 14039  14039     CONFIG_HEADERS  = $CONFIG_HEADERS
 14040  14040     CONFIG_LINKS    = $CONFIG_LINKS
 14041  14041     CONFIG_COMMANDS = $CONFIG_COMMANDS
 14042  14042     $ $0 $@
................................................................................
 14081  14081   $config_commands
 14082  14082   
 14083  14083   Report bugs to <bug-autoconf@gnu.org>."
 14084  14084   
 14085  14085   _ACEOF
 14086  14086   cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 14087  14087   ac_cs_version="\\
 14088         -sqlite config.status 3.7.12.1
        14088  +sqlite config.status 3.7.13
 14089  14089   configured by $0, generated by GNU Autoconf 2.62,
 14090  14090     with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 14091  14091   
 14092  14092   Copyright (C) 2008 Free Software Foundation, Inc.
 14093  14093   This config.status script is free software; the Free Software Foundation
 14094  14094   gives unlimited permission to copy, distribute and modify it."
 14095  14095   

Changes to ext/fts3/README.tokenizers.

     7      7     statement:
     8      8   
     9      9       CREATE VIRTUAL TABLE <table-name> USING fts3(
    10     10         <columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
    11     11       );
    12     12   
    13     13     The built-in tokenizers (valid values to pass as <tokenizer name>) are
    14         -  "simple" and "porter".
           14  +  "simple", "porter" and "unicode".
    15     15   
    16     16     <tokenizer-args> should consist of zero or more white-space separated
    17     17     arguments to pass to the selected tokenizer implementation. The 
    18     18     interpretation of the arguments, if any, depends on the individual 
    19     19     tokenizer.
    20     20   
    21     21   2. Custom Tokenizers

Changes to ext/fts3/fts3.c.

  3550   3550   **
  3551   3551   ** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
  3552   3552   ** to by the argument to point to the "simple" tokenizer implementation.
  3553   3553   ** And so on.
  3554   3554   */
  3555   3555   void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
  3556   3556   void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
         3557  +#ifndef SQLITE_DISABLE_FTS3_UNICODE
         3558  +void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
         3559  +#endif
  3557   3560   #ifdef SQLITE_ENABLE_ICU
  3558   3561   void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
  3559   3562   #endif
  3560   3563   
  3561   3564   /*
  3562   3565   ** Initialise the fts3 extension. If this extension is built as part
  3563   3566   ** of the sqlite library, then this function is called directly by
................................................................................
  3565   3568   ** function is called by the sqlite3_extension_init() entry point.
  3566   3569   */
  3567   3570   int sqlite3Fts3Init(sqlite3 *db){
  3568   3571     int rc = SQLITE_OK;
  3569   3572     Fts3Hash *pHash = 0;
  3570   3573     const sqlite3_tokenizer_module *pSimple = 0;
  3571   3574     const sqlite3_tokenizer_module *pPorter = 0;
         3575  +#ifndef SQLITE_DISABLE_FTS3_UNICODE
         3576  +  const sqlite3_tokenizer_module *pUnicode = 0;
         3577  +#endif
  3572   3578   
  3573   3579   #ifdef SQLITE_ENABLE_ICU
  3574   3580     const sqlite3_tokenizer_module *pIcu = 0;
  3575   3581     sqlite3Fts3IcuTokenizerModule(&pIcu);
  3576   3582   #endif
         3583  +
         3584  +#ifndef SQLITE_DISABLE_FTS3_UNICODE
         3585  +  sqlite3Fts3UnicodeTokenizer(&pUnicode);
         3586  +#endif
  3577   3587   
  3578   3588   #ifdef SQLITE_TEST
  3579   3589     rc = sqlite3Fts3InitTerm(db);
  3580   3590     if( rc!=SQLITE_OK ) return rc;
  3581   3591   #endif
  3582   3592   
  3583   3593     rc = sqlite3Fts3InitAux(db);
................................................................................
  3594   3604       sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
  3595   3605     }
  3596   3606   
  3597   3607     /* Load the built-in tokenizers into the hash table */
  3598   3608     if( rc==SQLITE_OK ){
  3599   3609       if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
  3600   3610        || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 
         3611  +
         3612  +#ifndef SQLITE_DISABLE_FTS3_UNICODE
         3613  +     || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) 
         3614  +#endif
  3601   3615   #ifdef SQLITE_ENABLE_ICU
  3602   3616        || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
  3603   3617   #endif
  3604   3618       ){
  3605   3619         rc = SQLITE_NOMEM;
  3606   3620       }
  3607   3621     }

Changes to ext/fts3/fts3Int.h.

   536    536   int sqlite3Fts3MsrIncrNext(
   537    537       Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
   538    538   int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); 
   539    539   int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
   540    540   int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
   541    541   
   542    542   int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
          543  +
          544  +/* fts3_unicode2.c (functions generated by parsing unicode text files) */
          545  +int sqlite3FtsUnicodeTolower(int);
          546  +int sqlite3FtsUnicodeIsalnum(int);
   543    547   
   544    548   #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
   545    549   #endif /* _FTSINT_H */

Added ext/fts3/fts3_unicode.c.

            1  +/*
            2  +** 2012 May 24
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +** Implementation of the "unicode" full-text-search tokenizer.
           14  +*/
           15  +
           16  +#ifndef SQLITE_DISABLE_FTS3_UNICODE
           17  +
           18  +#include "fts3Int.h"
           19  +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
           20  +
           21  +#include <assert.h>
           22  +#include <stdlib.h>
           23  +#include <stdio.h>
           24  +#include <string.h>
           25  +
           26  +#include "fts3_tokenizer.h"
           27  +
           28  +/*
           29  +** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
           30  +** from the sqlite3 source file utf.c. If this file is compiled as part
           31  +** of the amalgamation, they are not required.
           32  +*/
           33  +#ifndef SQLITE_AMALGAMATION
           34  +
           35  +static const unsigned char sqlite3Utf8Trans1[] = {
           36  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           37  +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
           38  +  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
           39  +  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
           40  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           41  +  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
           42  +  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
           43  +  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
           44  +};
           45  +
           46  +#define READ_UTF8(zIn, zTerm, c)                           \
           47  +  c = *(zIn++);                                            \
           48  +  if( c>=0xc0 ){                                           \
           49  +    c = sqlite3Utf8Trans1[c-0xc0];                         \
           50  +    while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
           51  +      c = (c<<6) + (0x3f & *(zIn++));                      \
           52  +    }                                                      \
           53  +    if( c<0x80                                             \
           54  +        || (c&0xFFFFF800)==0xD800                          \
           55  +        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
           56  +  }
           57  +
           58  +#define WRITE_UTF8(zOut, c) {                          \
           59  +  if( c<0x00080 ){                                     \
           60  +    *zOut++ = (u8)(c&0xFF);                            \
           61  +  }                                                    \
           62  +  else if( c<0x00800 ){                                \
           63  +    *zOut++ = 0xC0 + (u8)((c>>6)&0x1F);                \
           64  +    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
           65  +  }                                                    \
           66  +  else if( c<0x10000 ){                                \
           67  +    *zOut++ = 0xE0 + (u8)((c>>12)&0x0F);               \
           68  +    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
           69  +    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
           70  +  }else{                                               \
           71  +    *zOut++ = 0xF0 + (u8)((c>>18) & 0x07);             \
           72  +    *zOut++ = 0x80 + (u8)((c>>12) & 0x3F);             \
           73  +    *zOut++ = 0x80 + (u8)((c>>6) & 0x3F);              \
           74  +    *zOut++ = 0x80 + (u8)(c & 0x3F);                   \
           75  +  }                                                    \
           76  +}
           77  +
           78  +#endif /* ifndef SQLITE_AMALGAMATION */
           79  +
           80  +typedef struct unicode_tokenizer unicode_tokenizer;
           81  +typedef struct unicode_cursor unicode_cursor;
           82  +
           83  +struct unicode_tokenizer {
           84  +  sqlite3_tokenizer base;
           85  +};
           86  +
           87  +struct unicode_cursor {
           88  +  sqlite3_tokenizer_cursor base;
           89  +  const unsigned char *aInput;    /* Input text being tokenized */
           90  +  int nInput;                     /* Size of aInput[] in bytes */
           91  +  int iOff;                       /* Current offset within aInput[] */
           92  +  int iToken;                     /* Index of next token to be returned */
           93  +  char *zToken;                   /* storage for current token */
           94  +  int nAlloc;                     /* space allocated at zToken */
           95  +};
           96  +
           97  +/*
           98  +** Create a new tokenizer instance.
           99  +*/
          100  +static int unicodeCreate(
          101  +  int nArg,                       /* Size of array argv[] */
          102  +  const char * const *azArg,      /* Tokenizer creation arguments */
          103  +  sqlite3_tokenizer **pp          /* OUT: New tokenizer handle */
          104  +){
          105  +  unicode_tokenizer *pNew;        /* New tokenizer object */
          106  +  pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
          107  +  if( pNew==NULL ){
          108  +    return SQLITE_NOMEM;
          109  +  }
          110  +  memset(pNew, 0, sizeof(unicode_tokenizer));
          111  +  *pp = &pNew->base;
          112  +  return SQLITE_OK;
          113  +}
          114  +
          115  +/*
          116  +** Destroy a tokenizer allocated by unicodeCreate().
          117  +*/
          118  +static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
          119  +  sqlite3_free(pTokenizer);
          120  +  return SQLITE_OK;
          121  +}
          122  +
          123  +/*
          124  +** Prepare to begin tokenizing a particular string.  The input
          125  +** string to be tokenized is pInput[0..nBytes-1].  A cursor
          126  +** used to incrementally tokenize this string is returned in 
          127  +** *ppCursor.
          128  +*/
          129  +static int unicodeOpen(
          130  +  sqlite3_tokenizer *p,           /* The tokenizer */
          131  +  const char *aInput,             /* Input string */
          132  +  int nInput,                     /* Size of string aInput in bytes */
          133  +  sqlite3_tokenizer_cursor **pp   /* OUT: New cursor object */
          134  +){
          135  +  unicode_cursor *pCsr;
          136  +
          137  +  pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
          138  +  if( pCsr==0 ){
          139  +    return SQLITE_NOMEM;
          140  +  }
          141  +  memset(pCsr, 0, sizeof(unicode_cursor));
          142  +
          143  +  pCsr->aInput = (const unsigned char *)aInput;
          144  +  if( aInput==0 ){
          145  +    pCsr->nInput = 0;
          146  +  }else if( nInput<0 ){
          147  +    pCsr->nInput = (int)strlen(aInput);
          148  +  }else{
          149  +    pCsr->nInput = nInput;
          150  +  }
          151  +
          152  +  *pp = &pCsr->base;
          153  +  UNUSED_PARAMETER(p);
          154  +  return SQLITE_OK;
          155  +}
          156  +
          157  +/*
          158  +** Close a tokenization cursor previously opened by a call to
          159  +** simpleOpen() above.
          160  +*/
          161  +static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
          162  +  unicode_cursor *pCsr = (unicode_cursor *) pCursor;
          163  +  sqlite3_free(pCsr->zToken);
          164  +  sqlite3_free(pCsr);
          165  +  return SQLITE_OK;
          166  +}
          167  +
          168  +/*
          169  +** Extract the next token from a tokenization cursor.  The cursor must
          170  +** have been opened by a prior call to simpleOpen().
          171  +*/
          172  +static int unicodeNext(
          173  +  sqlite3_tokenizer_cursor *p,    /* Cursor returned by simpleOpen */
          174  +  const char **paToken,           /* OUT: Token text */
          175  +  int *pnToken,                   /* OUT: Number of bytes at *paToken */
          176  +  int *piStart,                   /* OUT: Starting offset of token */
          177  +  int *piEnd,                     /* OUT: Ending offset of token */
          178  +  int *piPos                      /* OUT: Position integer of token */
          179  +){
          180  +  unicode_cursor *pCsr = (unicode_cursor *)p;
          181  +  int iCode;
          182  +  char *zOut;
          183  +  const unsigned char *z = &pCsr->aInput[pCsr->iOff];
          184  +  const unsigned char *zStart = z;
          185  +  const unsigned char *zEnd;
          186  +  const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
          187  +
          188  +  /* Scan past any delimiter characters before the start of the next token.
          189  +  ** Return SQLITE_DONE early if this takes us all the way to the end of 
          190  +  ** the input.  */
          191  +  while( z<zTerm ){
          192  +    READ_UTF8(z, zTerm, iCode);
          193  +    if( sqlite3FtsUnicodeIsalnum(iCode) ) break;
          194  +    zStart = z;
          195  +  }
          196  +  if( zStart>=zTerm ) return SQLITE_DONE;
          197  +
          198  +  zOut = pCsr->zToken;
          199  +  do {
          200  +    /* Grow the output buffer if required. */
          201  +    if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
          202  +      char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
          203  +      if( !zNew ) return SQLITE_NOMEM;
          204  +      zOut = &zNew[zOut - pCsr->zToken];
          205  +      pCsr->zToken = zNew;
          206  +      pCsr->nAlloc += 64;
          207  +    }
          208  +
          209  +    /* Write the folded case of the last character read to the output */
          210  +    zEnd = z;
          211  +    WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode));
          212  +
          213  +    /* If the cursor is not at EOF, read the next character */
          214  +    if( z>=zTerm ) break;
          215  +    READ_UTF8(z, zTerm, iCode);
          216  +  }while( sqlite3FtsUnicodeIsalnum(iCode) );
          217  +
          218  +  /* Set the output variables and return. */
          219  +  pCsr->iOff = (z - pCsr->aInput);
          220  +  *paToken = pCsr->zToken;
          221  +  *pnToken = zOut - pCsr->zToken;
          222  +  *piStart = (zStart - pCsr->aInput);
          223  +  *piEnd = (zEnd - pCsr->aInput);
          224  +  *piPos = pCsr->iToken++;
          225  +  return SQLITE_OK;
          226  +}
          227  +
          228  +/*
          229  +** Set *ppModule to a pointer to the sqlite3_tokenizer_module 
          230  +** structure for the unicode tokenizer.
          231  +*/
          232  +void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
          233  +  static const sqlite3_tokenizer_module module = {
          234  +    0,
          235  +    unicodeCreate,
          236  +    unicodeDestroy,
          237  +    unicodeOpen,
          238  +    unicodeClose,
          239  +    unicodeNext,
          240  +    0,
          241  +  };
          242  +  *ppModule = &module;
          243  +}
          244  +
          245  +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
          246  +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

Added ext/fts3/fts3_unicode2.c.

            1  +/*
            2  +** 2012 May 25
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +*/
           13  +
           14  +/*
           15  +** DO NOT EDIT THIS MACHINE GENERATED FILE.
           16  +*/
           17  +
           18  +#if !defined(SQLITE_DISABLE_FTS3_UNICODE)
           19  +#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
           20  +
           21  +#include <assert.h>
           22  +
           23  +/*
           24  +** Return true if the argument corresponds to a unicode codepoint
           25  +** classified as either a letter or a number. Otherwise false.
           26  +**
           27  +** The results are undefined if the value passed to this function
           28  +** is less than zero.
           29  +*/
           30  +int sqlite3FtsUnicodeIsalnum(int c){
           31  +  /* Each unsigned integer in the following array corresponds to a contiguous
           32  +  ** range of unicode codepoints that are not either letters or numbers (i.e.
           33  +  ** codepoints for which this function should return 0).
           34  +  **
           35  +  ** The most significant 22 bits in each 32-bit value contain the first 
           36  +  ** codepoint in the range. The least significant 10 bits are used to store
           37  +  ** the size of the range (always at least 1). In other words, the value 
           38  +  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
           39  +  ** C. It is not possible to represent a range larger than 1023 codepoints 
           40  +  ** using this format.
           41  +  */
           42  +  const static unsigned int aEntry[] = {
           43  +    0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
           44  +    0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
           45  +    0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
           46  +    0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
           47  +    0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
           48  +    0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
           49  +    0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
           50  +    0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
           51  +    0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
           52  +    0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
           53  +    0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
           54  +    0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
           55  +    0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
           56  +    0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
           57  +    0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
           58  +    0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
           59  +    0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
           60  +    0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
           61  +    0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
           62  +    0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
           63  +    0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
           64  +    0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
           65  +    0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
           66  +    0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
           67  +    0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
           68  +    0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
           69  +    0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
           70  +    0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
           71  +    0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
           72  +    0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
           73  +    0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
           74  +    0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
           75  +    0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
           76  +    0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
           77  +    0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
           78  +    0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
           79  +    0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
           80  +    0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
           81  +    0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
           82  +    0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
           83  +    0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
           84  +    0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
           85  +    0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
           86  +    0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
           87  +    0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
           88  +    0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
           89  +    0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
           90  +    0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
           91  +    0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
           92  +    0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
           93  +    0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
           94  +    0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
           95  +    0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
           96  +    0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
           97  +    0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
           98  +    0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
           99  +    0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
          100  +    0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
          101  +    0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
          102  +    0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
          103  +    0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
          104  +    0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
          105  +    0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
          106  +    0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
          107  +    0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
          108  +    0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
          109  +    0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
          110  +    0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
          111  +    0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
          112  +    0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
          113  +    0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
          114  +    0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
          115  +    0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
          116  +    0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
          117  +    0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
          118  +    0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
          119  +    0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
          120  +    0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
          121  +    0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
          122  +    0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
          123  +    0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
          124  +    0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
          125  +    0x43FFF401,
          126  +  };
          127  +  static const unsigned int aAscii[4] = {
          128  +    0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
          129  +  };
          130  +
          131  +  if( c<128 ){
          132  +    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
          133  +  }else if( c<(1<<22) ){
          134  +    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
          135  +    int iRes;
          136  +    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
          137  +    int iLo = 0;
          138  +    while( iHi>=iLo ){
          139  +      int iTest = (iHi + iLo) / 2;
          140  +      if( key >= aEntry[iTest] ){
          141  +        iRes = iTest;
          142  +        iLo = iTest+1;
          143  +      }else{
          144  +        iHi = iTest-1;
          145  +      }
          146  +    }
          147  +    assert( aEntry[0]<key );
          148  +    assert( key>=aEntry[iRes] );
          149  +    return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
          150  +  }
          151  +  return 1;
          152  +}
          153  +
          154  +
          155  +/*
          156  +** Interpret the argument as a unicode codepoint. If the codepoint
          157  +** is an upper case character that has a lower case equivalent,
          158  +** return the codepoint corresponding to the lower case version.
          159  +** Otherwise, return a copy of the argument.
          160  +**
          161  +** The results are undefined if the value passed to this function
          162  +** is less than zero.
          163  +*/
          164  +int sqlite3FtsUnicodeTolower(int c){
          165  +  /* Each entry in the following array defines a rule for folding a range
          166  +  ** of codepoints to lower case. The rule applies to a range of nRange
          167  +  ** codepoints starting at codepoint iCode.
          168  +  **
          169  +  ** If the least significant bit in flags is clear, then the rule applies
          170  +  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
          171  +  ** need to be folded). Or, if it is set, then the rule only applies to
          172  +  ** every second codepoint in the range, starting with codepoint C.
          173  +  **
          174  +  ** The 7 most significant bits in flags are an index into the aiOff[]
          175  +  ** array. If a specific codepoint C does require folding, then its lower
          176  +  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
          177  +  **
          178  +  ** The contents of this array are generated by parsing the CaseFolding.txt
          179  +  ** file distributed as part of the "Unicode Character Database". See
          180  +  ** http://www.unicode.org for details.
          181  +  */
          182  +  static const struct TableEntry {
          183  +    unsigned short iCode;
          184  +    unsigned char flags;
          185  +    unsigned char nRange;
          186  +  } aEntry[] = {
          187  +    {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
          188  +    {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
          189  +    {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
          190  +    {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
          191  +    {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
          192  +    {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
          193  +    {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
          194  +    {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
          195  +    {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
          196  +    {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
          197  +    {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
          198  +    {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
          199  +    {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
          200  +    {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
          201  +    {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
          202  +    {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
          203  +    {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
          204  +    {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
          205  +    {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
          206  +    {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
          207  +    {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
          208  +    {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
          209  +    {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
          210  +    {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
          211  +    {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
          212  +    {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
          213  +    {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
          214  +    {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
          215  +    {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
          216  +    {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
          217  +    {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
          218  +    {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
          219  +    {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
          220  +    {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
          221  +    {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
          222  +    {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
          223  +    {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
          224  +    {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
          225  +    {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
          226  +    {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
          227  +    {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
          228  +    {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
          229  +    {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
          230  +    {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
          231  +    {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
          232  +    {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
          233  +    {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
          234  +    {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
          235  +    {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
          236  +    {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
          237  +    {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
          238  +    {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
          239  +    {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
          240  +    {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
          241  +    {65313, 14, 26},       
          242  +  };
          243  +  static const unsigned short aiOff[] = {
          244  +   1,     2,     8,     15,    16,    26,    28,    32,    
          245  +   37,    38,    40,    48,    63,    64,    69,    71,    
          246  +   79,    80,    116,   202,   203,   205,   206,   207,   
          247  +   209,   210,   211,   213,   214,   217,   218,   219,   
          248  +   775,   7264,  10792, 10795, 23228, 23256, 30204, 54721, 
          249  +   54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 
          250  +   57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 
          251  +   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
          252  +   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
          253  +   65514, 65521, 65527, 65528, 65529, 
          254  +  };
          255  +
          256  +  int ret = c;
          257  +
          258  +  assert( c>=0 );
          259  +  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
          260  +
          261  +  if( c<128 ){
          262  +    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
          263  +  }else if( c<65536 ){
          264  +    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
          265  +    int iLo = 0;
          266  +    int iRes = -1;
          267  +
          268  +    while( iHi>=iLo ){
          269  +      int iTest = (iHi + iLo) / 2;
          270  +      int cmp = (c - aEntry[iTest].iCode);
          271  +      if( cmp>=0 ){
          272  +        iRes = iTest;
          273  +        iLo = iTest+1;
          274  +      }else{
          275  +        iHi = iTest-1;
          276  +      }
          277  +    }
          278  +    assert( iRes<0 || c>=aEntry[iRes].iCode );
          279  +
          280  +    if( iRes>=0 ){
          281  +      const struct TableEntry *p = &aEntry[iRes];
          282  +      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
          283  +        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
          284  +        assert( ret>0 );
          285  +      }
          286  +    }
          287  +  }
          288  +  
          289  +  else if( c>=66560 && c<66600 ){
          290  +    ret = c + 40;
          291  +  }
          292  +
          293  +  return ret;
          294  +}
          295  +#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
          296  +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */

Changes to ext/fts3/fts3_write.c.

  3170   3170     if( sqlite3_step(pStmt)==SQLITE_ROW ){
  3171   3171       fts3DecodeIntArray(nStat, a,
  3172   3172            sqlite3_column_blob(pStmt, 0),
  3173   3173            sqlite3_column_bytes(pStmt, 0));
  3174   3174     }else{
  3175   3175       memset(a, 0, sizeof(u32)*(nStat) );
  3176   3176     }
  3177         -  sqlite3_reset(pStmt);
         3177  +  rc = sqlite3_reset(pStmt);
         3178  +  if( rc!=SQLITE_OK ){
         3179  +    sqlite3_free(a);
         3180  +    *pRC = rc;
         3181  +    return;
         3182  +  }
  3178   3183     if( nChng<0 && a[0]<(u32)(-nChng) ){
  3179   3184       a[0] = 0;
  3180   3185     }else{
  3181   3186       a[0] += nChng;
  3182   3187     }
  3183   3188     for(i=0; i<p->nColumn+1; i++){
  3184   3189       u32 x = a[i+1];

Added ext/fts3/unicode/CaseFolding.txt.

            1  +# CaseFolding-6.1.0.txt
            2  +# Date: 2011-07-25, 21:21:56 GMT [MD]
            3  +#
            4  +# Unicode Character Database
            5  +# Copyright (c) 1991-2011 Unicode, Inc.
            6  +# For terms of use, see http://www.unicode.org/terms_of_use.html
            7  +# For documentation, see http://www.unicode.org/reports/tr44/
            8  +#
            9  +# Case Folding Properties
           10  +#
           11  +# This file is a supplement to the UnicodeData file.
           12  +# It provides a case folding mapping generated from the Unicode Character Database.
           13  +# If all characters are mapped according to the full mapping below, then
           14  +# case differences (according to UnicodeData.txt and SpecialCasing.txt)
           15  +# are eliminated.
           16  +#
           17  +# The data supports both implementations that require simple case foldings
           18  +# (where string lengths don't change), and implementations that allow full case folding
           19  +# (where string lengths may grow). Note that where they can be supported, the
           20  +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
           21  +#
           22  +# All code points not listed in this file map to themselves.
           23  +#
           24  +# NOTE: case folding does not preserve normalization formats!
           25  +#
           26  +# For information on case folding, including how to have case folding 
           27  +# preserve normalization formats, see Section 3.13 Default Case Algorithms in
           28  +# The Unicode Standard, Version 5.0.
           29  +#
           30  +# ================================================================================
           31  +# Format
           32  +# ================================================================================
           33  +# The entries in this file are in the following machine-readable format:
           34  +#
           35  +# <code>; <status>; <mapping>; # <name>
           36  +#
           37  +# The status field is:
           38  +# C: common case folding, common mappings shared by both simple and full mappings.
           39  +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
           40  +# S: simple case folding, mappings to single characters where different from F.
           41  +# T: special case for uppercase I and dotted uppercase I
           42  +#    - For non-Turkic languages, this mapping is normally not used.
           43  +#    - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
           44  +#      Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
           45  +#      See the discussions of case mapping in the Unicode Standard for more information.
           46  +#
           47  +# Usage:
           48  +#  A. To do a simple case folding, use the mappings with status C + S.
           49  +#  B. To do a full case folding, use the mappings with status C + F.
           50  +#
           51  +#    The mappings with status T can be used or omitted depending on the desired case-folding
           52  +#    behavior. (The default option is to exclude them.)
           53  +#
           54  +# =================================================================
           55  +
           56  +# Property: Case_Folding
           57  +
           58  +#  All code points not explicitly listed for Case_Folding
           59  +#  have the value C for the status field, and the code point itself for the mapping field.
           60  +
           61  +# @missing: 0000..10FFFF; C; <code point>
           62  +
           63  +# =================================================================
           64  +0041; C; 0061; # LATIN CAPITAL LETTER A
           65  +0042; C; 0062; # LATIN CAPITAL LETTER B
           66  +0043; C; 0063; # LATIN CAPITAL LETTER C
           67  +0044; C; 0064; # LATIN CAPITAL LETTER D
           68  +0045; C; 0065; # LATIN CAPITAL LETTER E
           69  +0046; C; 0066; # LATIN CAPITAL LETTER F
           70  +0047; C; 0067; # LATIN CAPITAL LETTER G
           71  +0048; C; 0068; # LATIN CAPITAL LETTER H
           72  +0049; C; 0069; # LATIN CAPITAL LETTER I
           73  +0049; T; 0131; # LATIN CAPITAL LETTER I
           74  +004A; C; 006A; # LATIN CAPITAL LETTER J
           75  +004B; C; 006B; # LATIN CAPITAL LETTER K
           76  +004C; C; 006C; # LATIN CAPITAL LETTER L
           77  +004D; C; 006D; # LATIN CAPITAL LETTER M
           78  +004E; C; 006E; # LATIN CAPITAL LETTER N
           79  +004F; C; 006F; # LATIN CAPITAL LETTER O
           80  +0050; C; 0070; # LATIN CAPITAL LETTER P
           81  +0051; C; 0071; # LATIN CAPITAL LETTER Q
           82  +0052; C; 0072; # LATIN CAPITAL LETTER R
           83  +0053; C; 0073; # LATIN CAPITAL LETTER S
           84  +0054; C; 0074; # LATIN CAPITAL LETTER T
           85  +0055; C; 0075; # LATIN CAPITAL LETTER U
           86  +0056; C; 0076; # LATIN CAPITAL LETTER V
           87  +0057; C; 0077; # LATIN CAPITAL LETTER W
           88  +0058; C; 0078; # LATIN CAPITAL LETTER X
           89  +0059; C; 0079; # LATIN CAPITAL LETTER Y
           90  +005A; C; 007A; # LATIN CAPITAL LETTER Z
           91  +00B5; C; 03BC; # MICRO SIGN
           92  +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
           93  +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
           94  +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
           95  +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
           96  +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
           97  +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
           98  +00C6; C; 00E6; # LATIN CAPITAL LETTER AE
           99  +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
          100  +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
          101  +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
          102  +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
          103  +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
          104  +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
          105  +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
          106  +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
          107  +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
          108  +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
          109  +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
          110  +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
          111  +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
          112  +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
          113  +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
          114  +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
          115  +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
          116  +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
          117  +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
          118  +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
          119  +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
          120  +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
          121  +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
          122  +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
          123  +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
          124  +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
          125  +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
          126  +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
          127  +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
          128  +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
          129  +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
          130  +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
          131  +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
          132  +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
          133  +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
          134  +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
          135  +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
          136  +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
          137  +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
          138  +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
          139  +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
          140  +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
          141  +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
          142  +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
          143  +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
          144  +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
          145  +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
          146  +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
          147  +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
          148  +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
          149  +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
          150  +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
          151  +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
          152  +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
          153  +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
          154  +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
          155  +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
          156  +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
          157  +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
          158  +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
          159  +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
          160  +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
          161  +014A; C; 014B; # LATIN CAPITAL LETTER ENG
          162  +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
          163  +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
          164  +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
          165  +0152; C; 0153; # LATIN CAPITAL LIGATURE OE
          166  +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
          167  +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
          168  +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
          169  +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
          170  +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
          171  +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
          172  +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
          173  +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
          174  +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
          175  +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
          176  +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
          177  +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
          178  +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
          179  +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
          180  +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
          181  +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
          182  +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
          183  +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
          184  +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
          185  +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
          186  +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
          187  +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
          188  +017F; C; 0073; # LATIN SMALL LETTER LONG S
          189  +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
          190  +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
          191  +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
          192  +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
          193  +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
          194  +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
          195  +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
          196  +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
          197  +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
          198  +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
          199  +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
          200  +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
          201  +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
          202  +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
          203  +0196; C; 0269; # LATIN CAPITAL LETTER IOTA
          204  +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
          205  +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
          206  +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
          207  +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
          208  +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
          209  +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
          210  +01A2; C; 01A3; # LATIN CAPITAL LETTER OI
          211  +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
          212  +01A6; C; 0280; # LATIN LETTER YR
          213  +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
          214  +01A9; C; 0283; # LATIN CAPITAL LETTER ESH
          215  +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
          216  +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
          217  +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
          218  +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
          219  +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
          220  +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
          221  +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
          222  +01B7; C; 0292; # LATIN CAPITAL LETTER EZH
          223  +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
          224  +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
          225  +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
          226  +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
          227  +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
          228  +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
          229  +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
          230  +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
          231  +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
          232  +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
          233  +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
          234  +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
          235  +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
          236  +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
          237  +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
          238  +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
          239  +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
          240  +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
          241  +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
          242  +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
          243  +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
          244  +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
          245  +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
          246  +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
          247  +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
          248  +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
          249  +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
          250  +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
          251  +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
          252  +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
          253  +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
          254  +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
          255  +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
          256  +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
          257  +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
          258  +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
          259  +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
          260  +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
          261  +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
          262  +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
          263  +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
          264  +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
          265  +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
          266  +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
          267  +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
          268  +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
          269  +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
          270  +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
          271  +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
          272  +021C; C; 021D; # LATIN CAPITAL LETTER YOGH
          273  +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
          274  +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
          275  +0222; C; 0223; # LATIN CAPITAL LETTER OU
          276  +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
          277  +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
          278  +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
          279  +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
          280  +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
          281  +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
          282  +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
          283  +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
          284  +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
          285  +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
          286  +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
          287  +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
          288  +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
          289  +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
          290  +0244; C; 0289; # LATIN CAPITAL LETTER U BAR
          291  +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
          292  +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
          293  +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
          294  +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
          295  +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
          296  +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
          297  +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
          298  +0370; C; 0371; # GREEK CAPITAL LETTER HETA
          299  +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
          300  +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
          301  +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
          302  +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
          303  +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
          304  +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
          305  +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
          306  +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
          307  +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
          308  +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
          309  +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
          310  +0392; C; 03B2; # GREEK CAPITAL LETTER BETA
          311  +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
          312  +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
          313  +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
          314  +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
          315  +0397; C; 03B7; # GREEK CAPITAL LETTER ETA
          316  +0398; C; 03B8; # GREEK CAPITAL LETTER THETA
          317  +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
          318  +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
          319  +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
          320  +039C; C; 03BC; # GREEK CAPITAL LETTER MU
          321  +039D; C; 03BD; # GREEK CAPITAL LETTER NU
          322  +039E; C; 03BE; # GREEK CAPITAL LETTER XI
          323  +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
          324  +03A0; C; 03C0; # GREEK CAPITAL LETTER PI
          325  +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
          326  +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
          327  +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
          328  +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
          329  +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
          330  +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
          331  +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
          332  +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
          333  +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
          334  +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
          335  +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
          336  +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
          337  +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
          338  +03D0; C; 03B2; # GREEK BETA SYMBOL
          339  +03D1; C; 03B8; # GREEK THETA SYMBOL
          340  +03D5; C; 03C6; # GREEK PHI SYMBOL
          341  +03D6; C; 03C0; # GREEK PI SYMBOL
          342  +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
          343  +03DA; C; 03DB; # GREEK LETTER STIGMA
          344  +03DC; C; 03DD; # GREEK LETTER DIGAMMA
          345  +03DE; C; 03DF; # GREEK LETTER KOPPA
          346  +03E0; C; 03E1; # GREEK LETTER SAMPI
          347  +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
          348  +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
          349  +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
          350  +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
          351  +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
          352  +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
          353  +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
          354  +03F0; C; 03BA; # GREEK KAPPA SYMBOL
          355  +03F1; C; 03C1; # GREEK RHO SYMBOL
          356  +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
          357  +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
          358  +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
          359  +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
          360  +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
          361  +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
          362  +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
          363  +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
          364  +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
          365  +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
          366  +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
          367  +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
          368  +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
          369  +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
          370  +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
          371  +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
          372  +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
          373  +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
          374  +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
          375  +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
          376  +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
          377  +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
          378  +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
          379  +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
          380  +0410; C; 0430; # CYRILLIC CAPITAL LETTER A
          381  +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
          382  +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
          383  +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
          384  +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
          385  +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
          386  +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
          387  +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
          388  +0418; C; 0438; # CYRILLIC CAPITAL LETTER I
          389  +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
          390  +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
          391  +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
          392  +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
          393  +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
          394  +041E; C; 043E; # CYRILLIC CAPITAL LETTER O
          395  +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
          396  +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
          397  +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
          398  +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
          399  +0423; C; 0443; # CYRILLIC CAPITAL LETTER U
          400  +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
          401  +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
          402  +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
          403  +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
          404  +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
          405  +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
          406  +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
          407  +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
          408  +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
          409  +042D; C; 044D; # CYRILLIC CAPITAL LETTER E
          410  +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
          411  +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
          412  +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
          413  +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
          414  +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
          415  +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
          416  +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
          417  +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
          418  +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
          419  +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
          420  +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
          421  +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
          422  +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
          423  +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
          424  +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
          425  +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
          426  +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
          427  +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
          428  +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
          429  +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
          430  +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
          431  +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
          432  +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
          433  +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
          434  +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
          435  +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
          436  +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
          437  +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
          438  +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
          439  +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
          440  +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
          441  +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
          442  +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
          443  +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
          444  +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
          445  +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
          446  +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
          447  +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
          448  +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
          449  +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
          450  +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
          451  +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
          452  +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
          453  +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
          454  +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
          455  +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
          456  +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
          457  +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
          458  +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
          459  +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
          460  +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
          461  +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
          462  +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
          463  +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
          464  +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
          465  +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
          466  +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
          467  +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
          468  +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
          469  +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
          470  +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
          471  +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
          472  +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
          473  +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
          474  +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
          475  +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
          476  +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
          477  +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
          478  +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
          479  +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
          480  +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
          481  +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
          482  +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
          483  +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
          484  +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
          485  +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
          486  +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
          487  +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
          488  +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
          489  +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
          490  +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
          491  +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
          492  +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
          493  +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
          494  +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
          495  +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
          496  +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
          497  +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
          498  +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
          499  +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
          500  +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
          501  +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
          502  +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
          503  +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
          504  +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
          505  +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
          506  +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
          507  +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
          508  +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
          509  +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
          510  +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
          511  +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
          512  +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
          513  +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
          514  +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
          515  +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
          516  +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
          517  +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
          518  +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
          519  +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
          520  +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
          521  +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
          522  +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
          523  +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
          524  +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
          525  +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
          526  +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
          527  +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
          528  +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
          529  +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
          530  +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
          531  +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
          532  +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
          533  +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
          534  +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
          535  +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
          536  +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
          537  +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
          538  +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
          539  +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
          540  +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
          541  +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
          542  +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
          543  +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
          544  +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
          545  +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
          546  +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
          547  +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
          548  +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
          549  +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
          550  +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
          551  +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
          552  +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
          553  +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
          554  +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
          555  +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
          556  +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
          557  +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
          558  +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
          559  +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
          560  +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
          561  +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
          562  +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
          563  +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
          564  +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
          565  +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
          566  +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
          567  +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
          568  +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
          569  +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
          570  +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
          571  +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
          572  +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
          573  +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
          574  +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
          575  +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
          576  +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
          577  +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
          578  +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
          579  +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
          580  +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
          581  +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
          582  +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
          583  +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
          584  +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
          585  +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
          586  +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
          587  +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
          588  +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
          589  +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
          590  +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
          591  +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
          592  +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
          593  +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
          594  +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
          595  +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
          596  +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
          597  +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
          598  +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
          599  +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
          600  +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
          601  +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
          602  +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
          603  +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
          604  +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
          605  +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
          606  +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
          607  +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
          608  +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
          609  +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
          610  +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
          611  +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
          612  +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
          613  +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
          614  +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
          615  +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
          616  +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
          617  +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
          618  +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
          619  +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
          620  +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
          621  +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
          622  +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
          623  +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
          624  +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
          625  +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
          626  +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
          627  +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
          628  +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
          629  +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
          630  +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
          631  +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
          632  +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
          633  +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
          634  +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
          635  +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
          636  +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
          637  +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
          638  +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
          639  +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
          640  +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
          641  +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
          642  +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
          643  +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
          644  +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
          645  +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
          646  +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
          647  +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
          648  +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
          649  +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
          650  +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
          651  +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
          652  +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
          653  +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
          654  +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
          655  +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
          656  +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
          657  +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
          658  +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
          659  +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
          660  +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
          661  +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
          662  +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
          663  +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
          664  +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
          665  +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
          666  +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
          667  +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
          668  +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
          669  +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
          670  +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
          671  +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
          672  +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
          673  +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
          674  +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
          675  +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
          676  +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
          677  +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
          678  +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
          679  +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
          680  +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
          681  +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
          682  +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
          683  +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
          684  +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
          685  +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
          686  +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
          687  +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
          688  +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
          689  +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
          690  +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
          691  +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
          692  +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
          693  +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
          694  +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
          695  +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
          696  +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
          697  +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
          698  +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
          699  +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
          700  +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
          701  +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
          702  +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
          703  +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
          704  +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
          705  +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
          706  +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
          707  +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
          708  +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
          709  +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
          710  +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
          711  +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
          712  +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
          713  +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
          714  +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
          715  +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
          716  +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
          717  +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
          718  +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
          719  +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
          720  +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
          721  +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
          722  +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
          723  +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
          724  +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
          725  +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
          726  +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
          727  +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
          728  +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
          729  +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
          730  +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
          731  +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
          732  +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
          733  +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
          734  +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
          735  +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
          736  +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
          737  +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
          738  +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
          739  +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
          740  +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
          741  +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
          742  +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
          743  +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
          744  +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
          745  +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
          746  +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
          747  +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
          748  +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
          749  +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
          750  +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
          751  +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
          752  +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
          753  +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
          754  +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
          755  +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
          756  +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
          757  +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
          758  +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
          759  +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
          760  +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
          761  +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
          762  +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
          763  +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
          764  +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
          765  +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
          766  +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
          767  +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
          768  +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
          769  +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
          770  +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
          771  +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
          772  +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
          773  +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
          774  +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
          775  +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
          776  +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
          777  +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
          778  +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
          779  +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
          780  +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
          781  +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
          782  +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          783  +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          784  +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          785  +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          786  +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          787  +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          788  +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          789  +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          790  +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          791  +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          792  +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          793  +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          794  +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
          795  +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
          796  +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
          797  +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
          798  +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
          799  +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
          800  +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
          801  +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
          802  +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
          803  +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
          804  +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
          805  +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
          806  +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          807  +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          808  +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          809  +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          810  +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          811  +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          812  +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          813  +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          814  +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          815  +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          816  +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          817  +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          818  +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
          819  +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
          820  +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
          821  +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
          822  +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
          823  +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
          824  +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
          825  +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
          826  +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
          827  +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
          828  +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
          829  +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
          830  +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          831  +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
          832  +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          833  +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
          834  +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          835  +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
          836  +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          837  +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
          838  +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          839  +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
          840  +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          841  +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
          842  +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
          843  +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
          844  +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
          845  +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
          846  +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
          847  +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
          848  +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
          849  +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
          850  +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
          851  +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
          852  +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
          853  +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
          854  +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
          855  +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
          856  +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
          857  +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
          858  +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
          859  +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
          860  +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
          861  +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
          862  +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
          863  +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
          864  +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
          865  +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
          866  +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
          867  +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
          868  +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
          869  +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
          870  +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
          871  +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
          872  +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
          873  +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
          874  +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
          875  +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
          876  +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
          877  +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
          878  +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
          879  +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
          880  +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
          881  +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
          882  +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
          883  +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
          884  +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
          885  +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
          886  +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
          887  +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
          888  +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
          889  +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
          890  +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
          891  +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
          892  +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
          893  +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
          894  +2126; C; 03C9; # OHM SIGN
          895  +212A; C; 006B; # KELVIN SIGN
          896  +212B; C; 00E5; # ANGSTROM SIGN
          897  +2132; C; 214E; # TURNED CAPITAL F
          898  +2160; C; 2170; # ROMAN NUMERAL ONE
          899  +2161; C; 2171; # ROMAN NUMERAL TWO
          900  +2162; C; 2172; # ROMAN NUMERAL THREE
          901  +2163; C; 2173; # ROMAN NUMERAL FOUR
          902  +2164; C; 2174; # ROMAN NUMERAL FIVE
          903  +2165; C; 2175; # ROMAN NUMERAL SIX
          904  +2166; C; 2176; # ROMAN NUMERAL SEVEN
          905  +2167; C; 2177; # ROMAN NUMERAL EIGHT
          906  +2168; C; 2178; # ROMAN NUMERAL NINE
          907  +2169; C; 2179; # ROMAN NUMERAL TEN
          908  +216A; C; 217A; # ROMAN NUMERAL ELEVEN
          909  +216B; C; 217B; # ROMAN NUMERAL TWELVE
          910  +216C; C; 217C; # ROMAN NUMERAL FIFTY
          911  +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
          912  +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
          913  +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
          914  +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
          915  +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
          916  +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
          917  +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
          918  +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
          919  +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
          920  +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
          921  +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
          922  +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
          923  +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
          924  +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
          925  +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
          926  +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
          927  +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
          928  +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
          929  +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
          930  +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
          931  +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
          932  +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
          933  +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
          934  +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
          935  +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
          936  +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
          937  +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
          938  +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
          939  +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
          940  +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
          941  +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
          942  +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
          943  +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
          944  +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
          945  +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
          946  +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
          947  +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
          948  +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
          949  +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
          950  +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
          951  +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
          952  +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
          953  +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
          954  +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
          955  +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
          956  +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
          957  +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
          958  +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
          959  +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
          960  +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
          961  +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
          962  +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
          963  +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
          964  +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
          965  +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
          966  +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
          967  +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
          968  +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
          969  +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
          970  +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
          971  +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
          972  +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
          973  +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
          974  +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
          975  +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
          976  +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
          977  +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
          978  +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
          979  +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
          980  +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
          981  +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
          982  +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
          983  +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
          984  +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
          985  +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
          986  +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
          987  +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
          988  +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
          989  +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
          990  +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
          991  +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
          992  +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
          993  +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
          994  +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
          995  +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
          996  +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
          997  +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
          998  +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
          999  +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
         1000  +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
         1001  +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
         1002  +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
         1003  +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
         1004  +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
         1005  +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
         1006  +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
         1007  +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
         1008  +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
         1009  +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
         1010  +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
         1011  +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
         1012  +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
         1013  +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
         1014  +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
         1015  +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
         1016  +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
         1017  +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
         1018  +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
         1019  +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
         1020  +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
         1021  +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
         1022  +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
         1023  +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
         1024  +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
         1025  +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
         1026  +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
         1027  +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
         1028  +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
         1029  +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
         1030  +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
         1031  +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
         1032  +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
         1033  +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
         1034  +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
         1035  +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
         1036  +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
         1037  +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
         1038  +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
         1039  +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
         1040  +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
         1041  +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
         1042  +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
         1043  +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
         1044  +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
         1045  +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
         1046  +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
         1047  +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
         1048  +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
         1049  +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
         1050  +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
         1051  +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
         1052  +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
         1053  +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
         1054  +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
         1055  +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
         1056  +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
         1057  +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
         1058  +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
         1059  +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
         1060  +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
         1061  +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
         1062  +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
         1063  +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
         1064  +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
         1065  +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
         1066  +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
         1067  +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
         1068  +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
         1069  +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
         1070  +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
         1071  +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
         1072  +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
         1073  +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
         1074  +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
         1075  +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
         1076  +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
         1077  +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
         1078  +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
         1079  +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
         1080  +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
         1081  +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
         1082  +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
         1083  +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
         1084  +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
         1085  +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
         1086  +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
         1087  +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
         1088  +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
         1089  +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
         1090  +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
         1091  +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
         1092  +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
         1093  +A726; C; A727; # LATIN CAPITAL LETTER HENG
         1094  +A728; C; A729; # LATIN CAPITAL LETTER TZ
         1095  +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
         1096  +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
         1097  +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
         1098  +A732; C; A733; # LATIN CAPITAL LETTER AA
         1099  +A734; C; A735; # LATIN CAPITAL LETTER AO
         1100  +A736; C; A737; # LATIN CAPITAL LETTER AU
         1101  +A738; C; A739; # LATIN CAPITAL LETTER AV
         1102  +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
         1103  +A73C; C; A73D; # LATIN CAPITAL LETTER AY
         1104  +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
         1105  +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
         1106  +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
         1107  +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
         1108  +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
         1109  +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
         1110  +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
         1111  +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
         1112  +A74E; C; A74F; # LATIN CAPITAL LETTER OO
         1113  +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
         1114  +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
         1115  +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
         1116  +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
         1117  +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
         1118  +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
         1119  +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
         1120  +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
         1121  +A760; C; A761; # LATIN CAPITAL LETTER VY
         1122  +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
         1123  +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
         1124  +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
         1125  +A768; C; A769; # LATIN CAPITAL LETTER VEND
         1126  +A76A; C; A76B; # LATIN CAPITAL LETTER ET
         1127  +A76C; C; A76D; # LATIN CAPITAL LETTER IS
         1128  +A76E; C; A76F; # LATIN CAPITAL LETTER CON
         1129  +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
         1130  +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
         1131  +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
         1132  +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
         1133  +A780; C; A781; # LATIN CAPITAL LETTER TURNED L
         1134  +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
         1135  +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
         1136  +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
         1137  +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
         1138  +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
         1139  +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
         1140  +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
         1141  +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
         1142  +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
         1143  +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
         1144  +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
         1145  +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
         1146  +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
         1147  +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
         1148  +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
         1149  +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
         1150  +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
         1151  +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
         1152  +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
         1153  +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
         1154  +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
         1155  +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
         1156  +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
         1157  +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
         1158  +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
         1159  +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
         1160  +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
         1161  +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
         1162  +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
         1163  +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
         1164  +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
         1165  +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
         1166  +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
         1167  +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
         1168  +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
         1169  +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
         1170  +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
         1171  +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
         1172  +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
         1173  +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
         1174  +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
         1175  +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
         1176  +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
         1177  +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
         1178  +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
         1179  +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
         1180  +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
         1181  +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
         1182  +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
         1183  +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
         1184  +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
         1185  +10400; C; 10428; # DESERET CAPITAL LETTER LONG I
         1186  +10401; C; 10429; # DESERET CAPITAL LETTER LONG E
         1187  +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
         1188  +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
         1189  +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
         1190  +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
         1191  +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
         1192  +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
         1193  +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
         1194  +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
         1195  +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
         1196  +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
         1197  +1040C; C; 10434; # DESERET CAPITAL LETTER AY
         1198  +1040D; C; 10435; # DESERET CAPITAL LETTER OW
         1199  +1040E; C; 10436; # DESERET CAPITAL LETTER WU
         1200  +1040F; C; 10437; # DESERET CAPITAL LETTER YEE
         1201  +10410; C; 10438; # DESERET CAPITAL LETTER H
         1202  +10411; C; 10439; # DESERET CAPITAL LETTER PEE
         1203  +10412; C; 1043A; # DESERET CAPITAL LETTER BEE
         1204  +10413; C; 1043B; # DESERET CAPITAL LETTER TEE
         1205  +10414; C; 1043C; # DESERET CAPITAL LETTER DEE
         1206  +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
         1207  +10416; C; 1043E; # DESERET CAPITAL LETTER JEE
         1208  +10417; C; 1043F; # DESERET CAPITAL LETTER KAY
         1209  +10418; C; 10440; # DESERET CAPITAL LETTER GAY
         1210  +10419; C; 10441; # DESERET CAPITAL LETTER EF
         1211  +1041A; C; 10442; # DESERET CAPITAL LETTER VEE
         1212  +1041B; C; 10443; # DESERET CAPITAL LETTER ETH
         1213  +1041C; C; 10444; # DESERET CAPITAL LETTER THEE
         1214  +1041D; C; 10445; # DESERET CAPITAL LETTER ES
         1215  +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
         1216  +1041F; C; 10447; # DESERET CAPITAL LETTER ESH
         1217  +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
         1218  +10421; C; 10449; # DESERET CAPITAL LETTER ER
         1219  +10422; C; 1044A; # DESERET CAPITAL LETTER EL
         1220  +10423; C; 1044B; # DESERET CAPITAL LETTER EM
         1221  +10424; C; 1044C; # DESERET CAPITAL LETTER EN
         1222  +10425; C; 1044D; # DESERET CAPITAL LETTER ENG
         1223  +10426; C; 1044E; # DESERET CAPITAL LETTER OI
         1224  +10427; C; 1044F; # DESERET CAPITAL LETTER EW

Added ext/fts3/unicode/UnicodeData.txt.

more than 10,000 changes

Added ext/fts3/unicode/mkunicode.tcl.

            1  +
            2  +
            3  +# Parameter $zName must be a path to the file UnicodeData.txt. This command
            4  +# reads the file and returns a list of codepoints (integers). The list
            5  +# contains all codepoints in the UnicodeData.txt assigned to any "General
            6  +# Category" that is not a "Letter" or "Number".
            7  +#
            8  +proc an_load_unicodedata_text {zName} {
            9  +  set fd [open $zName]
           10  +  set lField {
           11  +    code
           12  +    character_name
           13  +    general_category
           14  +    canonical_combining_classes
           15  +    bidirectional_category
           16  +    character_decomposition_mapping
           17  +    decimal_digit_value
           18  +    digit_value
           19  +    numeric_value
           20  +    mirrored
           21  +    unicode_1_name
           22  +    iso10646_comment_field
           23  +    uppercase_mapping
           24  +    lowercase_mapping
           25  +    titlecase_mapping
           26  +  }
           27  +  set lRet [list]
           28  +
           29  +  while { ![eof $fd] } {
           30  +    set line [gets $fd]
           31  +    if {$line == ""} continue
           32  +
           33  +    set fields [split $line ";"]
           34  +    if {[llength $fields] != [llength $lField]} { error "parse error: $line" }
           35  +    foreach $lField $fields {}
           36  +
           37  +    set iCode [expr "0x$code"]
           38  +    set bAlnum [expr {[lsearch {L N} [string range $general_category 0 0]]>=0}]
           39  +
           40  +    if { !$bAlnum } { lappend lRet $iCode }
           41  +  }
           42  +
           43  +  close $fd
           44  +  set lRet
           45  +}
           46  +
           47  +proc an_load_separator_ranges {} {
           48  +  global unicodedata.txt
           49  +  set lSep [an_load_unicodedata_text ${unicodedata.txt}]
           50  +  unset -nocomplain iFirst 
           51  +  unset -nocomplain nRange 
           52  +  set lRange [list]
           53  +  foreach sep $lSep {
           54  +    if {0==[info exists iFirst]} {
           55  +      set iFirst $sep
           56  +      set nRange 1
           57  +    } elseif { $sep == ($iFirst+$nRange) } {
           58  +      incr nRange
           59  +    } else {
           60  +      lappend lRange [list $iFirst $nRange]
           61  +      set iFirst $sep
           62  +      set nRange 1
           63  +    }
           64  +  } 
           65  +  lappend lRange [list $iFirst $nRange]
           66  +  set lRange
           67  +}
           68  +
           69  +proc an_print_range_array {lRange} {
           70  +  set iFirstMax 0
           71  +  set nRangeMax 0
           72  +  foreach range $lRange {
           73  +    foreach {iFirst nRange} $range {}
           74  +    if {$iFirst > $iFirstMax} {set iFirstMax $iFirst}
           75  +    if {$nRange > $nRangeMax} {set nRangeMax $nRange}
           76  +  }
           77  +  if {$iFirstMax >= (1<<22)} {error "first-max is too large for format"}
           78  +  if {$nRangeMax >= (1<<10)} {error "range-max is too large for format"}
           79  +
           80  +  puts -nonewline "  "
           81  +  puts [string trim {
           82  +  /* Each unsigned integer in the following array corresponds to a contiguous
           83  +  ** range of unicode codepoints that are not either letters or numbers (i.e.
           84  +  ** codepoints for which this function should return 0).
           85  +  **
           86  +  ** The most significant 22 bits in each 32-bit value contain the first 
           87  +  ** codepoint in the range. The least significant 10 bits are used to store
           88  +  ** the size of the range (always at least 1). In other words, the value 
           89  +  ** ((C<<22) + N) represents a range of N codepoints starting with codepoint 
           90  +  ** C. It is not possible to represent a range larger than 1023 codepoints 
           91  +  ** using this format.
           92  +  */
           93  +  }]
           94  +  puts -nonewline "  const static unsigned int aEntry\[\] = \{"
           95  +  set i 0
           96  +  foreach range $lRange {
           97  +    foreach {iFirst nRange} $range {}
           98  +    set u32 [format "0x%08X" [expr ($iFirst<<10) + $nRange]]
           99  +
          100  +    if {($i % 5)==0} {puts "" ; puts -nonewline "   "}
          101  +    puts -nonewline " $u32,"
          102  +    incr i
          103  +  }
          104  +  puts ""
          105  +  puts "  \};"
          106  +}
          107  +
          108  +proc an_print_ascii_bitmap {lRange} {
          109  +  foreach range $lRange {
          110  +    foreach {iFirst nRange} $range {}
          111  +    for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} {
          112  +      if {$i<=127} { set a($i) 1 }
          113  +    }
          114  +  }
          115  +
          116  +  set aAscii [list 0 0 0 0]
          117  +  foreach key [array names a] {
          118  +    set idx [expr $key >> 5]
          119  +    lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))]
          120  +  }
          121  +
          122  +  puts "  static const unsigned int aAscii\[4\] = \{"
          123  +  puts -nonewline "   "
          124  +  foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] }
          125  +  puts ""
          126  +  puts "  \};"
          127  +}
          128  +
          129  +proc print_isalnum {zFunc lRange} {
          130  +  puts "/*"
          131  +  puts "** Return true if the argument corresponds to a unicode codepoint"
          132  +  puts "** classified as either a letter or a number. Otherwise false."
          133  +  puts "**"
          134  +  puts "** The results are undefined if the value passed to this function"
          135  +  puts "** is less than zero."
          136  +  puts "*/"
          137  +  puts "int ${zFunc}\(int c)\{"
          138  +  an_print_range_array $lRange
          139  +  an_print_ascii_bitmap $lRange
          140  +  puts {
          141  +  if( c<128 ){
          142  +    return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
          143  +  }else if( c<(1<<22) ){
          144  +    unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
          145  +    int iRes;
          146  +    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
          147  +    int iLo = 0;
          148  +    while( iHi>=iLo ){
          149  +      int iTest = (iHi + iLo) / 2;
          150  +      if( key >= aEntry[iTest] ){
          151  +        iRes = iTest;
          152  +        iLo = iTest+1;
          153  +      }else{
          154  +        iHi = iTest-1;
          155  +      }
          156  +    }
          157  +    assert( aEntry[0]<key );
          158  +    assert( key>=aEntry[iRes] );
          159  +    return (c >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
          160  +  }
          161  +  return 1;}
          162  +  puts "\}"
          163  +}
          164  +
          165  +proc print_test_isalnum {zFunc lRange} {
          166  +  foreach range $lRange {
          167  +    foreach {iFirst nRange} $range {}
          168  +    for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { set a($i) 1 }
          169  +  }
          170  +
          171  +  puts "static int isalnum_test(int *piCode)\{"
          172  +  puts -nonewline "  unsigned char aAlnum\[\] = \{"
          173  +  for {set i 0} {$i < 70000} {incr i} {
          174  +    if {($i % 32)==0} { puts "" ; puts -nonewline "    " }
          175  +    set bFlag [expr ![info exists a($i)]]
          176  +    puts -nonewline "${bFlag},"
          177  +  }
          178  +  puts ""
          179  +  puts "  \};"
          180  +
          181  +  puts -nonewline "  int aLargeSep\[\] = \{"
          182  +  set i 0
          183  +  foreach iSep [lsort -integer [array names a]] {
          184  +    if {$iSep<70000} continue
          185  +    if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
          186  +    puts -nonewline " $iSep,"
          187  +    incr i
          188  +  }
          189  +  puts ""
          190  +  puts "  \};"
          191  +  puts -nonewline "  int aLargeOther\[\] = \{"
          192  +  set i 0
          193  +  foreach iSep [lsort -integer [array names a]] {
          194  +    if {$iSep<70000} continue
          195  +    if {[info exists a([expr $iSep-1])]==0} {
          196  +      if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
          197  +      puts -nonewline " [expr $iSep-1],"
          198  +      incr i
          199  +    }
          200  +    if {[info exists a([expr $iSep+1])]==0} {
          201  +      if {($i % 8)==0} { puts "" ; puts -nonewline "   " }
          202  +      puts -nonewline " [expr $iSep+1],"
          203  +      incr i
          204  +    }
          205  +  }
          206  +  puts ""
          207  +  puts "  \};"
          208  +
          209  +  puts [subst -nocommands {
          210  +  int i;
          211  +  for(i=0; i<sizeof(aAlnum)/sizeof(aAlnum[0]); i++){
          212  +    if( ${zFunc}(i)!=aAlnum[i] ){
          213  +      *piCode = i;
          214  +      return 1;
          215  +    }
          216  +  }
          217  +  for(i=0; i<sizeof(aLargeSep)/sizeof(aLargeSep[0]); i++){
          218  +    if( ${zFunc}(aLargeSep[i])!=0 ){
          219  +      *piCode = aLargeSep[i];
          220  +      return 1;
          221  +    }
          222  +  }
          223  +  for(i=0; i<sizeof(aLargeOther)/sizeof(aLargeOther[0]); i++){
          224  +    if( ${zFunc}(aLargeOther[i])!=1 ){
          225  +      *piCode = aLargeOther[i];
          226  +      return 1;
          227  +    }
          228  +  }
          229  +  }]
          230  +  puts "  return 0;"
          231  +  puts "\}"
          232  +}
          233  +
          234  +#-------------------------------------------------------------------------
          235  +
          236  +proc tl_load_casefolding_txt {zName} {
          237  +  global tl_lookup_table
          238  +
          239  +  set fd [open $zName]
          240  +  while { ![eof $fd] } {
          241  +    set line [gets $fd]
          242  +    if {[string range $line 0 0] == "#"} continue
          243  +    if {$line == ""} continue
          244  +
          245  +    foreach x {a b c d} {unset -nocomplain $x}
          246  +    foreach {a b c d} [split $line ";"] {}
          247  +
          248  +    set a2 [list]
          249  +    set c2 [list]
          250  +    foreach elem $a { lappend a2 [expr "0x[string trim $elem]"] }
          251  +    foreach elem $c { lappend c2 [expr "0x[string trim $elem]"] }
          252  +    set b [string trim $b]
          253  +    set d [string trim $d]
          254  +
          255  +    if {$b=="C" || $b=="S"} { set tl_lookup_table($a2) $c2 }
          256  +  }
          257  +}
          258  +
          259  +proc tl_create_records {} {
          260  +  global tl_lookup_table
          261  +
          262  +  set iFirst ""
          263  +  set nOff 0
          264  +  set nRange 0
          265  +  set nIncr 0
          266  +
          267  +  set lRecord [list]
          268  +  foreach code [lsort -integer [array names tl_lookup_table]] {
          269  +    set mapping $tl_lookup_table($code)
          270  +    if {$iFirst == ""} {
          271  +      set iFirst $code
          272  +      set nOff   [expr $mapping - $code]
          273  +      set nRange 1
          274  +      set nIncr 1
          275  +    } else {
          276  +      set diff [expr $code - ($iFirst + ($nIncr * ($nRange - 1)))]
          277  +      if { $nRange==1 && ($diff==1 || $diff==2) } {
          278  +        set nIncr $diff
          279  +      }
          280  +
          281  +      if {$diff != $nIncr || ($mapping - $code)!=$nOff} {
          282  +        if { $nRange==1 } {set nIncr 1}
          283  +        lappend lRecord [list $iFirst $nIncr $nRange $nOff]
          284  +        set iFirst $code
          285  +        set nOff   [expr $mapping - $code]
          286  +        set nRange 1
          287  +        set nIncr 1
          288  +      } else {
          289  +        incr nRange
          290  +      }
          291  +    }
          292  +  }
          293  +
          294  +  lappend lRecord [list $iFirst $nIncr $nRange $nOff]
          295  +
          296  +  set lRecord
          297  +}
          298  +
          299  +proc tl_print_table_header {} {
          300  +  puts -nonewline "  "
          301  +  puts [string trim {
          302  +  /* Each entry in the following array defines a rule for folding a range
          303  +  ** of codepoints to lower case. The rule applies to a range of nRange
          304  +  ** codepoints starting at codepoint iCode.
          305  +  **
          306  +  ** If the least significant bit in flags is clear, then the rule applies
          307  +  ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
          308  +  ** need to be folded). Or, if it is set, then the rule only applies to
          309  +  ** every second codepoint in the range, starting with codepoint C.
          310  +  **
          311  +  ** The 7 most significant bits in flags are an index into the aiOff[]
          312  +  ** array. If a specific codepoint C does require folding, then its lower
          313  +  ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
          314  +  **
          315  +  ** The contents of this array are generated by parsing the CaseFolding.txt
          316  +  ** file distributed as part of the "Unicode Character Database". See
          317  +  ** http://www.unicode.org for details.
          318  +  */
          319  +  }]
          320  +  puts "  static const struct TableEntry \{"
          321  +  puts "    unsigned short iCode;"
          322  +  puts "    unsigned char flags;"
          323  +  puts "    unsigned char nRange;"
          324  +  puts "  \} aEntry\[\] = \{"
          325  +}
          326  +
          327  +proc tl_print_table_entry {togglevar entry liOff} {
          328  +  upvar $togglevar t
          329  +  foreach {iFirst nIncr nRange nOff} $entry {}
          330  +
          331  +  if {$iFirst > (1<<16)} { return 1 }
          332  +
          333  +  if {[info exists t]==0} {set t 0}
          334  +  if {$t==0} { puts -nonewline "    " }
          335  +
          336  +  set flags 0
          337  +  if {$nIncr==2} { set flags 1 ; set nRange [expr $nRange * 2]}
          338  +  if {$nOff<0}   { incr nOff [expr (1<<16)] }
          339  +
          340  +  set idx [lsearch $liOff $nOff]
          341  +  if {$idx<0} {error "malfunction generating aiOff"}
          342  +  set flags [expr $flags + $idx*2]
          343  +
          344  +  set txt "{$iFirst, $flags, $nRange},"
          345  +  if {$t==2} {
          346  +    puts $txt
          347  +  } else {
          348  +    puts -nonewline [format "% -23s" $txt]
          349  +  }
          350  +  set t [expr ($t+1)%3]
          351  +
          352  +  return 0
          353  +}
          354  +
          355  +proc tl_print_table_footer {togglevar} {
          356  +  upvar $togglevar t
          357  +  if {$t!=0} {puts ""}
          358  +  puts "  \};"
          359  +}
          360  +
          361  +proc tl_print_if_entry {entry} {
          362  +  foreach {iFirst nIncr nRange nOff} $entry {}
          363  +  if {$nIncr==2} {error "tl_print_if_entry needs improvement!"}
          364  +
          365  +  puts "  else if( c>=$iFirst && c<[expr $iFirst+$nRange] )\{"
          366  +  puts "    ret = c + $nOff;"
          367  +  puts "  \}"
          368  +}
          369  +
          370  +proc tl_generate_ioff_table {lRecord} {
          371  +  foreach entry $lRecord {
          372  +    foreach {iFirst nIncr nRange iOff} $entry {}
          373  +    if {$iOff<0}   { incr iOff [expr (1<<16)] }
          374  +    if {[info exists a($iOff)]} continue
          375  +    set a($iOff) 1
          376  +  }
          377  +
          378  +  set liOff [lsort -integer [array names a]]
          379  +  if {[llength $liOff]>128} { error "Too many distinct ioffs" }
          380  +  return $liOff
          381  +}
          382  +
          383  +proc tl_print_ioff_table {liOff} {
          384  +  puts -nonewline "  static const unsigned short aiOff\[\] = \{"
          385  +  set i 0
          386  +  foreach off $liOff {
          387  +    if {($i % 8)==0} {puts "" ; puts -nonewline "   "}
          388  +    puts -nonewline [format "% -7s" "$off,"]
          389  +    incr i
          390  +  }
          391  +  puts ""
          392  +  puts "  \};"
          393  +
          394  +}
          395  +
          396  +proc print_tolower {zFunc} {
          397  +
          398  +  set lRecord [tl_create_records]
          399  +
          400  +  set lHigh [list]
          401  +  puts "/*"
          402  +  puts "** Interpret the argument as a unicode codepoint. If the codepoint"
          403  +  puts "** is an upper case character that has a lower case equivalent,"
          404  +  puts "** return the codepoint corresponding to the lower case version."
          405  +  puts "** Otherwise, return a copy of the argument."
          406  +  puts "**"
          407  +  puts "** The results are undefined if the value passed to this function"
          408  +  puts "** is less than zero."
          409  +  puts "*/"
          410  +  puts "int ${zFunc}\(int c)\{"
          411  +
          412  +  set liOff [tl_generate_ioff_table $lRecord]
          413  +  tl_print_table_header
          414  +  foreach entry $lRecord { 
          415  +    if {[tl_print_table_entry toggle $entry $liOff]} { 
          416  +      lappend lHigh $entry 
          417  +    } 
          418  +  }
          419  +  tl_print_table_footer toggle
          420  +  tl_print_ioff_table $liOff
          421  +
          422  +  puts {
          423  +  int ret = c;
          424  +
          425  +  assert( c>=0 );
          426  +  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
          427  +
          428  +  if( c<128 ){
          429  +    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
          430  +  }else if( c<65536 ){
          431  +    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
          432  +    int iLo = 0;
          433  +    int iRes = -1;
          434  +
          435  +    while( iHi>=iLo ){
          436  +      int iTest = (iHi + iLo) / 2;
          437  +      int cmp = (c - aEntry[iTest].iCode);
          438  +      if( cmp>=0 ){
          439  +        iRes = iTest;
          440  +        iLo = iTest+1;
          441  +      }else{
          442  +        iHi = iTest-1;
          443  +      }
          444  +    }
          445  +    assert( iRes<0 || c>=aEntry[iRes].iCode );
          446  +
          447  +    if( iRes>=0 ){
          448  +      const struct TableEntry *p = &aEntry[iRes];
          449  +      if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
          450  +        ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
          451  +        assert( ret>0 );
          452  +      }
          453  +    }
          454  +  }
          455  +  }
          456  +
          457  +  foreach entry $lHigh {
          458  +    tl_print_if_entry $entry
          459  +  }
          460  +
          461  +  puts ""
          462  +  puts "  return ret;"
          463  +  puts "\}"
          464  +}
          465  +
          466  +proc print_tolower_test {zFunc} {
          467  +  global tl_lookup_table
          468  +
          469  +  puts "static int tolower_test(int *piCode)\{"
          470  +  puts -nonewline "  static int aLookup\[\] = \{"
          471  +  for {set i 0} {$i < 70000} {incr i} {
          472  +    set expected $i
          473  +    catch { set expected $tl_lookup_table($i) }
          474  +    if {($i % 8)==0}  { puts "" ; puts -nonewline "    " }
          475  +    puts -nonewline "$expected, "
          476  +  }
          477  +  puts "  \};"
          478  +  puts "  int i;"
          479  +  puts "  for(i=0; i<sizeof(aLookup)/sizeof(aLookup\[0\]); i++)\{"
          480  +  puts "    if( ${zFunc}\(i)!=aLookup\[i\] )\{"
          481  +  puts "      *piCode = i;"
          482  +  puts "      return 1;"
          483  +  puts "    \}"
          484  +  puts "  \}"
          485  +  puts "  return 0;"
          486  +  puts "\}"
          487  +}
          488  +
          489  +
          490  +proc print_fileheader {} {
          491  +  puts [string trim {
          492  +/*
          493  +** 2012 May 25
          494  +**
          495  +** The author disclaims copyright to this source code.  In place of
          496  +** a legal notice, here is a blessing:
          497  +**
          498  +**    May you do good and not evil.
          499  +**    May you find forgiveness for yourself and forgive others.
          500  +**    May you share freely, never taking more than you give.
          501  +**
          502  +******************************************************************************
          503  +*/
          504  +
          505  +/*
          506  +** DO NOT EDIT THIS MACHINE GENERATED FILE.
          507  +*/
          508  +  }]
          509  +  puts ""
          510  +  puts "#if !defined(SQLITE_DISABLE_FTS3_UNICODE)"
          511  +  puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
          512  +  puts ""
          513  +  puts "#include <assert.h>"
          514  +  puts ""
          515  +}
          516  +
          517  +proc print_test_main {} {
          518  +  puts ""
          519  +  puts "#include <stdio.h>"
          520  +  puts ""
          521  +  puts "int main(int argc, char **argv)\{"
          522  +  puts "  int r1, r2;"
          523  +  puts "  int code;"
          524  +  puts "  r1 = isalnum_test(&code);"
          525  +  puts "  if( r1 ) printf(\"isalnum(): Problem with code %d\\n\",code);"
          526  +  puts "  else printf(\"isalnum(): test passed\\n\");"
          527  +  puts "  r2 = tolower_test(&code);"
          528  +  puts "  if( r2 ) printf(\"tolower(): Problem with code %d\\n\",code);"
          529  +  puts "  else printf(\"tolower(): test passed\\n\");"
          530  +  puts "  return (r1 || r2);"
          531  +  puts "\}"
          532  +}
          533  +
          534  +# Proces the command line arguments. Exit early if they are not to
          535  +# our liking.
          536  +#
          537  +proc usage {} {
          538  +  puts -nonewline stderr "Usage: $::argv0 ?-test? "
          539  +  puts            stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
          540  +  exit 1
          541  +}
          542  +if {[llength $argv]!=2 && [llength $argv]!=3} usage
          543  +if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
          544  +set unicodedata.txt [lindex $argv end]
          545  +set casefolding.txt [lindex $argv end-1]
          546  +set generate_test_code [expr {[llength $argv]==3}]
          547  +
          548  +# Print the isalnum() function to stdout.
          549  +#
          550  +print_fileheader
          551  +set lRange [an_load_separator_ranges]
          552  +print_isalnum sqlite3FtsUnicodeIsalnum $lRange
          553  +
          554  +# Leave a gap between the two generated C functions.
          555  +#
          556  +puts ""
          557  +puts ""
          558  +
          559  +# Print the tolower() function to stdout.
          560  +#
          561  +tl_load_casefolding_txt ${casefolding.txt}
          562  +print_tolower sqlite3FtsUnicodeTolower
          563  +
          564  +# Print the test routines and main() function to stdout, if -test 
          565  +# was specified.
          566  +#
          567  +if {$::generate_test_code} {
          568  +  print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
          569  +  print_tolower_test sqlite3FtsUnicodeTolower 
          570  +  print_test_main 
          571  +}
          572  +
          573  +puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
          574  +puts "#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */"

Changes to main.mk.

    52     52   # Object files for the SQLite library.
    53     53   #
    54     54   LIBOBJ+= alter.o analyze.o attach.o auth.o \
    55     55            backup.o bitvec.o btmutex.o btree.o build.o \
    56     56            callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
    57     57            fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
    58     58            fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o \
           59  +	 fts3_unicode.o fts3_unicode2.o \
    59     60            fts3_write.o func.o global.o hash.o \
    60     61            icu.o insert.o journal.o legacy.o loadext.o \
    61     62            main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
    62     63            memjournal.o \
    63     64            mutex.o mutex_noop.o mutex_os2.o mutex_unix.o mutex_w32.o \
    64     65            notify.o opcodes.o os.o os_os2.o os_unix.o os_win.o \
    65     66            pager.o parse.o pcache.o pcache1.o pragma.o prepare.o printf.o \
................................................................................
   195    196     $(TOP)/ext/fts3/fts3_hash.h \
   196    197     $(TOP)/ext/fts3/fts3_icu.c \
   197    198     $(TOP)/ext/fts3/fts3_porter.c \
   198    199     $(TOP)/ext/fts3/fts3_snippet.c \
   199    200     $(TOP)/ext/fts3/fts3_tokenizer.h \
   200    201     $(TOP)/ext/fts3/fts3_tokenizer.c \
   201    202     $(TOP)/ext/fts3/fts3_tokenizer1.c \
          203  +  $(TOP)/ext/fts3/fts3_unicode.c \
          204  +  $(TOP)/ext/fts3/fts3_unicode2.c \
   202    205     $(TOP)/ext/fts3/fts3_write.c
   203    206   SRC += \
   204    207     $(TOP)/ext/icu/sqliteicu.h \
   205    208     $(TOP)/ext/icu/icu.c
   206    209   SRC += \
   207    210     $(TOP)/ext/rtree/rtree.h \
   208    211     $(TOP)/ext/rtree/rtree.c
................................................................................
   506    509   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_porter.c
   507    510   
   508    511   fts3_tokenizer.o:	$(TOP)/ext/fts3/fts3_tokenizer.c $(HDR) $(EXTHDR)
   509    512   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer.c
   510    513   
   511    514   fts3_tokenizer1.o:	$(TOP)/ext/fts3/fts3_tokenizer1.c $(HDR) $(EXTHDR)
   512    515   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_tokenizer1.c
          516  +
          517  +fts3_unicode.o:	$(TOP)/ext/fts3/fts3_unicode.c $(HDR) $(EXTHDR)
          518  +	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode.c
          519  +
          520  +fts3_unicode2.o:	$(TOP)/ext/fts3/fts3_unicode2.c $(HDR) $(EXTHDR)
          521  +	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_unicode2.c
   513    522   
   514    523   fts3_write.o:	$(TOP)/ext/fts3/fts3_write.c $(HDR) $(EXTHDR)
   515    524   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts3/fts3_write.c
   516    525   
   517    526   rtree.o:	$(TOP)/ext/rtree/rtree.c $(HDR) $(EXTHDR)
   518    527   	$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/rtree/rtree.c
   519    528   

Changes to src/btree.c.

  1717   1717     /* Set the variable isMemdb to true for an in-memory database, or 
  1718   1718     ** false for a file-based database.
  1719   1719     */
  1720   1720   #ifdef SQLITE_OMIT_MEMORYDB
  1721   1721     const int isMemdb = 0;
  1722   1722   #else
  1723   1723     const int isMemdb = (zFilename && strcmp(zFilename, ":memory:")==0)
  1724         -                       || (isTempDb && sqlite3TempInMemory(db));
         1724  +                       || (isTempDb && sqlite3TempInMemory(db))
         1725  +                       || (vfsFlags & SQLITE_OPEN_MEMORY)!=0;
  1725   1726   #endif
  1726   1727   
  1727   1728     assert( db!=0 );
  1728   1729     assert( pVfs!=0 );
  1729   1730     assert( sqlite3_mutex_held(db->mutex) );
  1730   1731     assert( (flags&0xff)==flags );   /* flags fit in 8 bits */
  1731   1732   
................................................................................
  1753   1754   #endif
  1754   1755   
  1755   1756   #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
  1756   1757     /*
  1757   1758     ** If this Btree is a candidate for shared cache, try to find an
  1758   1759     ** existing BtShared object that we can share with
  1759   1760     */
  1760         -  if( isMemdb==0 && isTempDb==0 ){
         1761  +  if( isTempDb==0 && (isMemdb==0 || (vfsFlags&SQLITE_OPEN_URI)!=0) ){
  1761   1762       if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
  1762   1763         int nFullPathname = pVfs->mxPathname+1;
  1763   1764         char *zFullPathname = sqlite3Malloc(nFullPathname);
  1764   1765         MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
  1765   1766         p->sharable = 1;
  1766   1767         if( !zFullPathname ){
  1767   1768           sqlite3_free(p);
  1768   1769           return SQLITE_NOMEM;
  1769   1770         }
  1770         -      rc = sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
  1771         -      if( rc ){
  1772         -        sqlite3_free(zFullPathname);
  1773         -        sqlite3_free(p);
  1774         -        return rc;
         1771  +      if( isMemdb ){
         1772  +        memcpy(zFullPathname, zFilename, sqlite3Strlen30(zFilename)+1);
         1773  +      }else{
         1774  +        rc = sqlite3OsFullPathname(pVfs, zFilename,
         1775  +                                   nFullPathname, zFullPathname);
         1776  +        if( rc ){
         1777  +          sqlite3_free(zFullPathname);
         1778  +          sqlite3_free(p);
         1779  +          return rc;
         1780  +        }
  1775   1781         }
  1776   1782   #if SQLITE_THREADSAFE
  1777   1783         mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
  1778   1784         sqlite3_mutex_enter(mutexOpen);
  1779   1785         mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
  1780   1786         sqlite3_mutex_enter(mutexShared);
  1781   1787   #endif
  1782   1788         for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
  1783   1789           assert( pBt->nRef>0 );
  1784         -        if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
         1790  +        if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager, 0))
  1785   1791                    && sqlite3PagerVfs(pBt->pPager)==pVfs ){
  1786   1792             int iDb;
  1787   1793             for(iDb=db->nDb-1; iDb>=0; iDb--){
  1788   1794               Btree *pExisting = db->aDb[iDb].pBt;
  1789   1795               if( pExisting && pExisting->pBt==pBt ){
  1790   1796                 sqlite3_mutex_leave(mutexShared);
  1791   1797                 sqlite3_mutex_leave(mutexOpen);
................................................................................
  8042   8048     *pnErr = sCheck.nErr;
  8043   8049     if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg);
  8044   8050     return sqlite3StrAccumFinish(&sCheck.errMsg);
  8045   8051   }
  8046   8052   #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
  8047   8053   
  8048   8054   /*
  8049         -** Return the full pathname of the underlying database file.
         8055  +** Return the full pathname of the underlying database file.  Return
         8056  +** an empty string if the database is in-memory or a TEMP database.
  8050   8057   **
  8051   8058   ** The pager filename is invariant as long as the pager is
  8052   8059   ** open so it is safe to access without the BtShared mutex.
  8053   8060   */
  8054   8061   const char *sqlite3BtreeGetFilename(Btree *p){
  8055   8062     assert( p->pBt->pPager!=0 );
  8056         -  return sqlite3PagerFilename(p->pBt->pPager);
         8063  +  return sqlite3PagerFilename(p->pBt->pPager, 1);
  8057   8064   }
  8058   8065   
  8059   8066   /*
  8060   8067   ** Return the pathname of the journal file for this database. The return
  8061   8068   ** value of this routine is the same regardless of whether the journal file
  8062   8069   ** has been created or not.
  8063   8070   **

Changes to src/main.c.

  2029   2029             zModeType = "cache";
  2030   2030           }
  2031   2031           if( nOpt==4 && memcmp("mode", zOpt, 4)==0 ){
  2032   2032             static struct OpenMode aOpenMode[] = {
  2033   2033               { "ro",  SQLITE_OPEN_READONLY },
  2034   2034               { "rw",  SQLITE_OPEN_READWRITE }, 
  2035   2035               { "rwc", SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE },
         2036  +            { "memory",
         2037  +                    SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE 
         2038  +                       | SQLITE_OPEN_MEMORY },
  2036   2039               { 0, 0 }
  2037   2040             };
  2038   2041   
  2039         -          mask = SQLITE_OPEN_READONLY|SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE;
         2042  +          mask = SQLITE_OPEN_READONLY | SQLITE_OPEN_READWRITE
         2043  +                   | SQLITE_OPEN_CREATE | SQLITE_OPEN_MEMORY;
  2040   2044             aMode = aOpenMode;
  2041   2045             limit = mask & flags;
  2042   2046             zModeType = "access";
  2043   2047           }
  2044   2048   
  2045   2049           if( aMode ){
  2046   2050             int i;
................................................................................
  2053   2057               }
  2054   2058             }
  2055   2059             if( mode==0 ){
  2056   2060               *pzErrMsg = sqlite3_mprintf("no such %s mode: %s", zModeType, zVal);
  2057   2061               rc = SQLITE_ERROR;
  2058   2062               goto parse_uri_out;
  2059   2063             }
  2060         -          if( mode>limit ){
         2064  +          if( (mode & ~SQLITE_OPEN_MEMORY)>limit ){
  2061   2065               *pzErrMsg = sqlite3_mprintf("%s mode not allowed: %s",
  2062   2066                                           zModeType, zVal);
  2063   2067               rc = SQLITE_PERM;
  2064   2068               goto parse_uri_out;
  2065   2069             }
  2066   2070             flags = (flags & ~mask) | mode;
  2067   2071           }
................................................................................
  2072   2076   
  2073   2077     }else{
  2074   2078       zFile = sqlite3_malloc(nUri+2);
  2075   2079       if( !zFile ) return SQLITE_NOMEM;
  2076   2080       memcpy(zFile, zUri, nUri);
  2077   2081       zFile[nUri] = '\0';
  2078   2082       zFile[nUri+1] = '\0';
         2083  +    flags &= ~SQLITE_OPEN_URI;
  2079   2084     }
  2080   2085   
  2081   2086     *ppVfs = sqlite3_vfs_find(zVfs);
  2082   2087     if( *ppVfs==0 ){
  2083   2088       *pzErrMsg = sqlite3_mprintf("no such vfs: %s", zVfs);
  2084   2089       rc = SQLITE_ERROR;
  2085   2090     }

Changes to src/pager.c.

  4356   4356   
  4357   4357     /* Set the output variable to NULL in case an error occurs. */
  4358   4358     *ppPager = 0;
  4359   4359   
  4360   4360   #ifndef SQLITE_OMIT_MEMORYDB
  4361   4361     if( flags & PAGER_MEMORY ){
  4362   4362       memDb = 1;
  4363         -    zFilename = 0;
         4363  +    if( zFilename && zFilename[0] ){
         4364  +      zPathname = sqlite3DbStrDup(0, zFilename);
         4365  +      if( zPathname==0  ) return SQLITE_NOMEM;
         4366  +      nPathname = sqlite3Strlen30(zPathname);
         4367  +      zFilename = 0;
         4368  +    }
  4364   4369     }
  4365   4370   #endif
  4366   4371   
  4367   4372     /* Compute and store the full pathname in an allocated buffer pointed
  4368   4373     ** to by zPathname, length nPathname. Or, if this is a temporary file,
  4369   4374     ** leave both nPathname and zPathname set to 0.
  4370   4375     */
................................................................................
  6292   6297     }
  6293   6298   
  6294   6299     return rc;
  6295   6300   }
  6296   6301   
  6297   6302   /*
  6298   6303   ** Return the full pathname of the database file.
         6304  +**
         6305  +** Except, if the pager is in-memory only, then return an empty string if
         6306  +** nullIfMemDb is true.  This routine is called with nullIfMemDb==1 when
         6307  +** used to report the filename to the user, for compatibility with legacy
         6308  +** behavior.  But when the Btree needs to know the filename for matching to
         6309  +** shared cache, it uses nullIfMemDb==0 so that in-memory databases can
         6310  +** participate in shared-cache.
  6299   6311   */
  6300         -const char *sqlite3PagerFilename(Pager *pPager){
  6301         -  return pPager->zFilename;
         6312  +const char *sqlite3PagerFilename(Pager *pPager, int nullIfMemDb){
         6313  +  return (nullIfMemDb && pPager->memDb) ? "" : pPager->zFilename;
  6302   6314   }
  6303   6315   
  6304   6316   /*
  6305   6317   ** Return the VFS structure for the pager.
  6306   6318   */
  6307   6319   const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
  6308   6320     return pPager->pVfs;

Changes to src/pager.h.

   147    147     int sqlite3PagerWalFramesize(Pager *pPager);
   148    148   #endif
   149    149   
   150    150   /* Functions used to query pager state and configuration. */
   151    151   u8 sqlite3PagerIsreadonly(Pager*);
   152    152   int sqlite3PagerRefcount(Pager*);
   153    153   int sqlite3PagerMemUsed(Pager*);
   154         -const char *sqlite3PagerFilename(Pager*);
          154  +const char *sqlite3PagerFilename(Pager*, int);
   155    155   const sqlite3_vfs *sqlite3PagerVfs(Pager*);
   156    156   sqlite3_file *sqlite3PagerFile(Pager*);
   157    157   const char *sqlite3PagerJournalname(Pager*);
   158    158   int sqlite3PagerNosync(Pager*);
   159    159   void *sqlite3PagerTempSpace(Pager*);
   160    160   int sqlite3PagerIsMemdb(Pager*);
   161    161   void sqlite3PagerCacheStat(Pager *, int, int, int *);

Changes to src/sqlite.h.in.

   469    469   #define SQLITE_OPEN_READONLY         0x00000001  /* Ok for sqlite3_open_v2() */
   470    470   #define SQLITE_OPEN_READWRITE        0x00000002  /* Ok for sqlite3_open_v2() */
   471    471   #define SQLITE_OPEN_CREATE           0x00000004  /* Ok for sqlite3_open_v2() */
   472    472   #define SQLITE_OPEN_DELETEONCLOSE    0x00000008  /* VFS only */
   473    473   #define SQLITE_OPEN_EXCLUSIVE        0x00000010  /* VFS only */
   474    474   #define SQLITE_OPEN_AUTOPROXY        0x00000020  /* VFS only */
   475    475   #define SQLITE_OPEN_URI              0x00000040  /* Ok for sqlite3_open_v2() */
          476  +#define SQLITE_OPEN_MEMORY           0x00000080  /* Ok for sqlite3_open_v2() */
   476    477   #define SQLITE_OPEN_MAIN_DB          0x00000100  /* VFS only */
   477    478   #define SQLITE_OPEN_TEMP_DB          0x00000200  /* VFS only */
   478    479   #define SQLITE_OPEN_TRANSIENT_DB     0x00000400  /* VFS only */
   479    480   #define SQLITE_OPEN_MAIN_JOURNAL     0x00000800  /* VFS only */
   480    481   #define SQLITE_OPEN_TEMP_JOURNAL     0x00001000  /* VFS only */
   481    482   #define SQLITE_OPEN_SUBJOURNAL       0x00002000  /* VFS only */
   482    483   #define SQLITE_OPEN_MASTER_JOURNAL   0x00004000  /* VFS only */
................................................................................
  2566   2567   **     a VFS object that provides the operating system interface that should
  2567   2568   **     be used to access the database file on disk. ^If this option is set to
  2568   2569   **     an empty string the default VFS object is used. ^Specifying an unknown
  2569   2570   **     VFS is an error. ^If sqlite3_open_v2() is used and the vfs option is
  2570   2571   **     present, then the VFS specified by the option takes precedence over
  2571   2572   **     the value passed as the fourth parameter to sqlite3_open_v2().
  2572   2573   **
  2573         -**   <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw" or
  2574         -**     "rwc". Attempting to set it to any other value is an error)^. 
         2574  +**   <li> <b>mode</b>: ^(The mode parameter may be set to either "ro", "rw",
         2575  +**     "rwc", or "memory". Attempting to set it to any other value is
         2576  +**     an error)^. 
  2575   2577   **     ^If "ro" is specified, then the database is opened for read-only 
  2576   2578   **     access, just as if the [SQLITE_OPEN_READONLY] flag had been set in the 
  2577   2579   **     third argument to sqlite3_prepare_v2(). ^If the mode option is set to 
  2578   2580   **     "rw", then the database is opened for read-write (but not create) 
  2579   2581   **     access, as if SQLITE_OPEN_READWRITE (but not SQLITE_OPEN_CREATE) had 
  2580   2582   **     been set. ^Value "rwc" is equivalent to setting both 
  2581         -**     SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE. ^If sqlite3_open_v2() is 
  2582         -**     used, it is an error to specify a value for the mode parameter that is 
  2583         -**     less restrictive than that specified by the flags passed as the third 
  2584         -**     parameter.
         2583  +**     SQLITE_OPEN_READWRITE and SQLITE_OPEN_CREATE.  ^If the mode option is
         2584  +**     set to "memory" then a pure [in-memory database] that never reads or
         2585  +**     or writes from disk is used. ^It is an error to specify a value for
         2586  +**     the mode parameter that is less restrictive than that specified by
         2587  +**     the flags passed in the third parameter to sqlite3_open_v2().
  2585   2588   **
  2586   2589   **   <li> <b>cache</b>: ^The cache parameter may be set to either "shared" or
  2587   2590   **     "private". ^Setting it to "shared" is equivalent to setting the
  2588   2591   **     SQLITE_OPEN_SHAREDCACHE bit in the flags argument passed to
  2589   2592   **     sqlite3_open_v2(). ^Setting the cache parameter to "private" is 
  2590   2593   **     equivalent to setting the SQLITE_OPEN_PRIVATECACHE bit.
  2591   2594   **     ^If sqlite3_open_v2() is used and the "cache" parameter is present in
................................................................................
  4618   4621     sqlite3*, 
  4619   4622     void(*)(void *,int ,char const *,char const *,sqlite3_int64),
  4620   4623     void*
  4621   4624   );
  4622   4625   
  4623   4626   /*
  4624   4627   ** CAPI3REF: Enable Or Disable Shared Pager Cache
  4625         -** KEYWORDS: {shared cache}
  4626   4628   **
  4627   4629   ** ^(This routine enables or disables the sharing of the database cache
  4628   4630   ** and schema data structures between [database connection | connections]
  4629   4631   ** to the same database. Sharing is enabled if the argument is true
  4630   4632   ** and disabled if the argument is false.)^
  4631   4633   **
  4632   4634   ** ^Cache sharing is enabled and disabled for an entire process.

Changes to src/test_btree.c.

    29     29     Tcl_Obj *CONST objv[]
    30     30   ){
    31     31   #ifndef SQLITE_OMIT_SHARED_CACHE
    32     32     extern BtShared *sqlite3SharedCacheList;
    33     33     BtShared *pBt;
    34     34     Tcl_Obj *pRet = Tcl_NewObj();
    35     35     for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
    36         -    const char *zFile = sqlite3PagerFilename(pBt->pPager);
           36  +    const char *zFile = sqlite3PagerFilename(pBt->pPager, 1);
    37     37       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewStringObj(zFile, -1));
    38     38       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(pBt->nRef));
    39     39     }
    40     40     Tcl_SetObjResult(interp, pRet);
    41     41   #endif
    42     42     return TCL_OK;
    43     43   }

Changes to src/test_config.c.

   308    308   #endif
   309    309   
   310    310   #ifdef SQLITE_ENABLE_FTS3
   311    311     Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY);
   312    312   #else
   313    313     Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
   314    314   #endif
          315  +
          316  +#if !defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_DISABLE_FTS3_UNICODE)
          317  +  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY);
          318  +#else
          319  +  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY);
          320  +#endif
   315    321   
   316    322   #ifdef SQLITE_OMIT_GET_TABLE
   317    323     Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY);
   318    324   #else
   319    325     Tcl_SetVar2(interp, "sqlite_options", "gettable", "1", TCL_GLOBAL_ONLY);
   320    326   #endif
   321    327   

Changes to src/vdbe.c.

  5559   5559     pBt = db->aDb[pOp->p1].pBt;
  5560   5560     pPager = sqlite3BtreePager(pBt);
  5561   5561     eOld = sqlite3PagerGetJournalMode(pPager);
  5562   5562     if( eNew==PAGER_JOURNALMODE_QUERY ) eNew = eOld;
  5563   5563     if( !sqlite3PagerOkToChangeJournalMode(pPager) ) eNew = eOld;
  5564   5564   
  5565   5565   #ifndef SQLITE_OMIT_WAL
  5566         -  zFilename = sqlite3PagerFilename(pPager);
         5566  +  zFilename = sqlite3PagerFilename(pPager, 1);
  5567   5567   
  5568   5568     /* Do not allow a transition to journal_mode=WAL for a database
  5569   5569     ** in temporary storage or if the VFS does not support shared memory 
  5570   5570     */
  5571   5571     if( eNew==PAGER_JOURNALMODE_WAL
  5572   5572      && (sqlite3Strlen30(zFilename)==0           /* Temp file */
  5573   5573          || !sqlite3PagerWalSupported(pPager))   /* No shared-memory support */

Changes to test/e_uri.test.

   250    250   foreach {tn uri error} "
   251    251     1    {file:test.db?mode=ro}    {not an error}
   252    252     2    {file:test.db?mode=rw}    {not an error}
   253    253     3    {file:test.db?mode=rwc}   {not an error}
   254    254     4    {file:test.db?mode=Ro}    {no such access mode: Ro}
   255    255     5    {file:test.db?mode=Rw}    {no such access mode: Rw}
   256    256     6    {file:test.db?mode=Rwc}   {no such access mode: Rwc}
          257  +  7    {file:test.db?mode=memory} {not an error}
          258  +  8    {file:test.db?mode=MEMORY} {no such access mode: MEMORY}
   257    259   " {
   258    260     do_test 7.$tn { open_uri_error $uri } $error
   259    261   }
   260    262   
   261    263   
   262    264   # EVIDENCE-OF: R-09651-31805 If "ro" is specified, then the database is
   263    265   # opened for read-only access, just as if the SQLITE_OPEN_READONLY flag

Changes to test/fts3fault2.test.

   126    126     faultsim_restore_and_reopen
   127    127     db eval {SELECT * FROM sqlite_master}
   128    128   } -body {
   129    129     execsql { INSERT INTO ft(ft) VALUES('rebuild') }
   130    130   } -test {
   131    131     faultsim_test_result {0 {}}
   132    132   }
          133  +
          134  +ifcapable fts3_unicode {
          135  +  do_test 5.0 {
          136  +    faultsim_delete_and_reopen
          137  +    execsql {
          138  +      CREATE VIRTUAL TABLE ft USING fts4(a, tokenize=unicode61);
          139  +    }
          140  +    faultsim_save_and_close
          141  +  } {}
          142  +  
          143  +  do_faultsim_test 5.1 -faults oom* -prep {
          144  +    faultsim_restore_and_reopen
          145  +    db eval {SELECT * FROM sqlite_master}
          146  +  } -body {
          147  +    execsql { INSERT INTO ft VALUES('the quick brown fox'); }
          148  +    execsql { INSERT INTO ft VALUES(
          149  +       'theunusuallylongtokenthatjustdragsonandonandonandthendragsonsomemoreeof'
          150  +      );
          151  +    }
          152  +    execsql { SELECT docid FROM ft WHERE ft MATCH 'th*' }
          153  +  } -test {
          154  +    faultsim_test_result {0 {1 2}}
          155  +  }
          156  +}
   133    157   
   134    158   finish_test

Added test/fts4unicode.test.

            1  +# 2012 May 25
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +#
           12  +# The tests in this file focus on testing the "unicode" FTS tokenizer.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +ifcapable !fts3_unicode { finish_test ; return }
           18  +set ::testprefix fts4unicode
           19  +
           20  +proc do_unicode_token_test {tn input res} {
           21  +  set input [string map {' ''} $input]
           22  +  uplevel [list do_execsql_test $tn "
           23  +    SELECT fts3_tokenizer_test('unicode61', '$input');
           24  +  " [list [list {*}$res]]]
           25  +}
           26  +
           27  +do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
           28  +do_unicode_token_test 1.1 {  } {0   1   2  }
           29  +do_unicode_token_test 1.2 {xx xx xx} {0 xx xx 1 xx xx 2 xx xx}
           30  +
           31  +# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
           32  +do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
           33  +do_unicode_token_test 1.4 "\u1E9E" "0  \u1E9E"
           34  +do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"
           35  +
           36  +do_unicode_token_test 1.6 "The quick brown fox" {
           37  +  0 the The 1 quick quick 2 brown brown 3 fox fox
           38  +}
           39  +do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
           40  +  0 the The 1 quick quick 2 brown brown 3 fox fox
           41  +}
           42  +
           43  +#-------------------------------------------------------------------------
           44  +#
           45  +set docs [list {
           46  +  Enhance the INSERT syntax to allow multiple rows to be inserted via the
           47  +  VALUES clause.
           48  +} {
           49  +  Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
           50  +} {
           51  +  Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
           52  +} {
           53  +  Added the sqlite3_db_readonly() interface.
           54  +} {
           55  +  Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
           56  +  ability to add new PRAGMA statements or to override built-in PRAGMAs.  
           57  +} {
           58  +  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
           59  +  the same row that contains the maximum x value.
           60  +} {
           61  +  Added support for the FTS4 languageid option.
           62  +} {
           63  +  Documented support for the FTS4 content option. This feature has actually
           64  +  been in the code since version 3.7.9 but is only now considered to be
           65  +  officially supported.  
           66  +} {
           67  +  Pending statements no longer block ROLLBACK. Instead, the pending statement
           68  +  will return SQLITE_ABORT upon next access after the ROLLBACK.  
           69  +} {
           70  +  Improvements to the handling of CSV inputs in the command-line shell
           71  +} {
           72  +  Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
           73  +  incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
           74  +  connected by OR.  
           75  +}]
           76  +
           77  +set map(a) [list "\u00C4" "\u00E4"]  ; # LATIN LETTER A WITH DIAERESIS
           78  +set map(e) [list "\u00CB" "\u00EB"]  ; # LATIN LETTER E WITH DIAERESIS
           79  +set map(i) [list "\u00CF" "\u00EF"]  ; # LATIN LETTER I WITH DIAERESIS
           80  +set map(o) [list "\u00D6" "\u00F6"]  ; # LATIN LETTER O WITH DIAERESIS
           81  +set map(u) [list "\u00DC" "\u00FC"]  ; # LATIN LETTER U WITH DIAERESIS
           82  +set map(y) [list "\u0178" "\u00FF"]  ; # LATIN LETTER Y WITH DIAERESIS
           83  +set map(h) [list "\u1E26" "\u1E27"]  ; # LATIN LETTER H WITH DIAERESIS
           84  +set map(w) [list "\u1E84" "\u1E85"]  ; # LATIN LETTER W WITH DIAERESIS
           85  +set map(x) [list "\u1E8C" "\u1E8D"]  ; # LATIN LETTER X WITH DIAERESIS
           86  +foreach k [array names map] {
           87  +  lappend mappings [string toupper $k] [lindex $map($k) 0] 
           88  +  lappend mappings $k [lindex $map($k) 1]
           89  +}
           90  +proc mapdoc {doc} { 
           91  +  set doc [regsub -all {[[:space:]]+} $doc " "]
           92  +  string map $::mappings [string trim $doc] 
           93  +}
           94  +
           95  +do_test 2.0 {
           96  +  execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }
           97  +  foreach doc $docs {
           98  +    set d [mapdoc $doc]
           99  +    execsql { INSERT INTO t2 VALUES($d) }
          100  +  }
          101  +} {}
          102  +
          103  +do_test 2.1 {
          104  +  set q [mapdoc "row"]
          105  +  execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
          106  +} [list [mapdoc {
          107  +  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
          108  +  the same row that contains the maximum x value.
          109  +}]]
          110  +
          111  +foreach {tn query snippet} {
          112  +  2 "row" {
          113  +     ...returns the value of y on the same [row] that contains 
          114  +     the maximum x value.
          115  +  }
          116  +  3 "ROW" {
          117  +     ...returns the value of y on the same [row] that contains 
          118  +     the maximum x value.
          119  +  }
          120  +  4 "rollback" {
          121  +     ...[ROLLBACK]. Instead, the pending statement
          122  +     will return SQLITE_ABORT upon next access after the [ROLLBACK].
          123  +  }
          124  +  5 "rOllback" {
          125  +     ...[ROLLBACK]. Instead, the pending statement
          126  +     will return SQLITE_ABORT upon next access after the [ROLLBACK].
          127  +  }
          128  +  6 "lang*" {
          129  +     Added support for the FTS4 [languageid] option.
          130  +  }
          131  +} {
          132  +  do_test 2.$tn {
          133  +    set q [mapdoc $query]
          134  +    execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }
          135  +  } [list [mapdoc $snippet]]
          136  +}
          137  +
          138  +#-------------------------------------------------------------------------
          139  +# Make sure the unicode61 tokenizer does not crash if it is passed a 
          140  +# NULL pointer.
          141  +reset_db
          142  +do_execsql_test 3.1 {
          143  +  CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x, y);
          144  +  INSERT INTO t1 VALUES(NULL, 'a b c');
          145  +}
          146  +
          147  +do_execsql_test 3.2 {
          148  +  SELECT snippet(t1, '[', ']') FROM t1 WHERE t1 MATCH 'b'
          149  +} {{a [b] c}}
          150  +
          151  +do_execsql_test 3.3 {
          152  +  BEGIN;
          153  +  DELETE FROM t1;
          154  +  INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
          155  +  INSERT INTO t1 SELECT * FROM t1;
          156  +  INSERT INTO t1 SELECT * FROM t1;
          157  +  INSERT INTO t1 SELECT * FROM t1;
          158  +  INSERT INTO t1 SELECT * FROM t1;
          159  +  INSERT INTO t1 SELECT * FROM t1;
          160  +  INSERT INTO t1 SELECT * FROM t1;
          161  +  INSERT INTO t1 SELECT * FROM t1;
          162  +  INSERT INTO t1 SELECT * FROM t1;
          163  +  INSERT INTO t1 SELECT * FROM t1;
          164  +  INSERT INTO t1 SELECT * FROM t1;
          165  +  INSERT INTO t1 SELECT * FROM t1;
          166  +  INSERT INTO t1 SELECT * FROM t1;
          167  +  INSERT INTO t1 SELECT * FROM t1;
          168  +  INSERT INTO t1 SELECT * FROM t1;
          169  +  INSERT INTO t1 SELECT * FROM t1;
          170  +  INSERT INTO t1 SELECT * FROM t1;
          171  +  INSERT INTO t1 VALUES('a b c', NULL);
          172  +  INSERT INTO t1 VALUES('a x c', NULL);
          173  +  COMMIT;
          174  +}
          175  +
          176  +do_execsql_test 3.4 {
          177  +  SELECT * FROM t1 WHERE t1 MATCH 'a b';
          178  +} {{a b c} {}}
          179  +
          180  +#-------------------------------------------------------------------------
          181  +#
          182  +reset_db
          183  +
          184  +do_test 4.1 {
          185  +  set a "abc\uFFFEdef"
          186  +  set b "abc\uD800def"
          187  +  set c "\uFFFEdef"
          188  +  set d "\uD800def"
          189  +  execsql {
          190  +    CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61, x);
          191  +    INSERT INTO t1 VALUES($a);
          192  +    INSERT INTO t1 VALUES($b);
          193  +    INSERT INTO t1 VALUES($c);
          194  +    INSERT INTO t1 VALUES($d);
          195  +  }
          196  +} {}
          197  +
          198  +do_test 4.2 {
          199  +  set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
          200  +  set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
          201  +  set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
          202  +  set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
          203  +  execsql {
          204  +    INSERT INTO t1 VALUES($a);
          205  +    INSERT INTO t1 VALUES($b);
          206  +    INSERT INTO t1 VALUES($c);
          207  +    INSERT INTO t1 VALUES($d);
          208  +  }
          209  +} {}
          210  +
          211  +do_test 4.3 {
          212  +  set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
          213  +  set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
          214  +  set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
          215  +  set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
          216  +  execsql {
          217  +    INSERT INTO t1 VALUES($a);
          218  +    INSERT INTO t1 VALUES($b);
          219  +    INSERT INTO t1 VALUES($c);
          220  +    INSERT INTO t1 VALUES($d);
          221  +  }
          222  +} {}
          223  +
          224  +
          225  +
          226  +finish_test
          227  +

Changes to test/permutations.test.

   184    184     fts3near.test fts3query.test fts3shared.test fts3snippet.test 
   185    185     fts3sort.test
   186    186     fts3fault.test fts3malloc.test fts3matchinfo.test
   187    187     fts3aux1.test fts3comp1.test fts3auto.test
   188    188     fts4aa.test fts4content.test
   189    189     fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
   190    190     fts3corrupt2.test fts3first.test fts4langid.test fts4merge.test
   191         -  fts4check.test
          191  +  fts4check.test fts4unicode.test
   192    192   }
   193    193   
   194    194   
   195    195   lappend ::testsuitelist xxx
   196    196   #-------------------------------------------------------------------------
   197    197   # Define the coverage related test suites:
   198    198   #

Changes to test/shared.test.

  1052   1052   } {1 2 6 8 9 12 1 2 5 11 12 14 1 2 4}
  1053   1053   do_test shared-$av-15.2 {
  1054   1054     execsql { DROP TABLE t1 } db2
  1055   1055   } {}
  1056   1056   db close
  1057   1057   db2 close
  1058   1058   
  1059         -}
         1059  +# Shared cache on a :memory: database.  This only works for URI filenames.
         1060  +#
         1061  +do_test shared-$av-16.1 {
         1062  +  sqlite3 db1 file::memory: -uri 1
         1063  +  sqlite3 db2 file::memory: -uri 1
         1064  +  db1 eval {
         1065  +    CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
         1066  +  }
         1067  +  db2 eval {
         1068  +    SELECT x FROM t1 ORDER BY x;
         1069  +  }
         1070  +} {1 2 3}
         1071  +do_test shared-$av-16.2 {
         1072  +  db2 eval {
         1073  +    INSERT INTO t1 VALUES(99);
         1074  +    DELETE FROM t1 WHERE x=2;
         1075  +  }
         1076  +  db1 eval {
         1077  +    SELECT x FROM t1 ORDER BY x;
         1078  +  }
         1079  +} {1 3 99}
         1080  +
         1081  +# Verify that there is no cache sharing ordinary (non-URI) filenames are
         1082  +# used.
         1083  +#
         1084  +do_test shared-$av-16.3 {
         1085  +  db1 close
         1086  +  db2 close
         1087  +  sqlite3 db1 :memory:
         1088  +  sqlite3 db2 :memory:
         1089  +  db1 eval {
         1090  +    CREATE TABLE t1(x); INSERT INTO t1 VALUES(4),(5),(6);
         1091  +  }
         1092  +  catchsql {
         1093  +    SELECT * FROM t1;
         1094  +  } db2
         1095  +} {1 {no such table: t1}}
         1096  +
         1097  +# Shared cache on named memory databases.
         1098  +#
         1099  +do_test shared-$av-16.4 {
         1100  +  db1 close
         1101  +  db2 close
         1102  +  forcedelete test.db test.db-wal test.db-journal
         1103  +  sqlite3 db1 file:test.db?mode=memory -uri 1
         1104  +  sqlite3 db2 file:test.db?mode=memory -uri 1
         1105  +  db1 eval {
         1106  +    CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
         1107  +  }
         1108  +  db2 eval {
         1109  +    SELECT x FROM t1 ORDER BY x;
         1110  +  }
         1111  +} {1 2 3}
         1112  +do_test shared-$av-16.5 {
         1113  +  db2 eval {
         1114  +    INSERT INTO t1 VALUES(99);
         1115  +    DELETE FROM t1 WHERE x=2;
         1116  +  }
         1117  +  db1 eval {
         1118  +    SELECT x FROM t1 ORDER BY x;
         1119  +  }
         1120  +} {1 3 99}
         1121  +do_test shared-$av-16.6 {
         1122  +  file exists test.db
         1123  +} {0}  ;# Verify that the database is in-memory
         1124  +
         1125  +# Shared cache on named memory databases with different names.
         1126  +#
         1127  +do_test shared-$av-16.7 {
         1128  +  db1 close
         1129  +  db2 close
         1130  +  forcedelete test1.db test2.db
         1131  +  sqlite3 db1 file:test1.db?mode=memory -uri 1
         1132  +  sqlite3 db2 file:test2.db?mode=memory -uri 1
         1133  +  db1 eval {
         1134  +    CREATE TABLE t1(x); INSERT INTO t1 VALUES(1),(2),(3);
         1135  +  }
         1136  +  catchsql {
         1137  +    SELECT x FROM t1 ORDER BY x;
         1138  +  } db2
         1139  +} {1 {no such table: t1}}
         1140  +do_test shared-$av-16.8 {
         1141  +  file exists test1.db
         1142  +} {0}  ;# Verify that the database is in-memory
         1143  +
         1144  +
         1145  +db1 close
         1146  +db2 close
         1147  +
         1148  +}  ;# end of autovacuum on/off loop
  1060   1149   
  1061   1150   sqlite3_enable_shared_cache $::enable_shared_cache
  1062   1151   finish_test

Changes to tool/mksqlite3c.tcl.

   312    312      fts3_expr.c
   313    313      fts3_hash.c
   314    314      fts3_porter.c
   315    315      fts3_tokenizer.c
   316    316      fts3_tokenizer1.c
   317    317      fts3_write.c
   318    318      fts3_snippet.c
          319  +   fts3_unicode.c
          320  +   fts3_unicode2.c
   319    321   
   320    322      rtree.c
   321    323      icu.c
   322    324      fts3_icu.c
   323    325   } {
   324    326     copy_file tsrc/$file
   325    327   }
   326    328   
   327    329   close $out