/ Check-in [68677e42]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Extend fts2 so that user defined tokenizers may be added. Add a tokenizer that uses the ICU library if available. Documentation and tests to come. (CVS 4108)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 68677e420c744b39ea9d7399819e0f376748886d
User & Date: danielk1977 2007-06-22 15:21:16
Context
2007-06-22
20:04
fix compilation failure on OS/2 with the amalgamation (CVS 4109) check-in: 12327ca0 user: pweilbacher tags: trunk
15:21
Extend fts2 so that user defined tokenizers may be added. Add a tokenizer that uses the ICU library if available. Documentation and tests to come. (CVS 4108) check-in: 68677e42 user: danielk1977 tags: trunk
2007-06-21
15:25
Clarify documentation of the column metadata APIs. Make sure that the new documentation claims are tested. (CVS 4107) check-in: 2dafe08a user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts2/fts2.c.

1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
....
2757
2758
2759
2760
2761
2762
2763

2764
2765
2766
2767
2768
2769
2770
2771
2772



2773
2774
2775
2776
2777
2778
2779
....
2783
2784
2785
2786
2787
2788
2789
2790

2791
2792
2793
2794

2795



2796
2797
2798
2799

2800
2801
2802
2803
2804
2805
2806
....
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
....
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
....
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840
5841
5842
5843
....
5851
5852
5853
5854
5855
5856
5857



























5858




































5859
5860

5861


5862
5863









5864
5865


5866

5867
5868
5869
5870
5871
5872
  DLReader reader;                 /* Result reader if result not empty */
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fulltextModule;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;
................................................................................

/*
** Build a new sqlite3_vtab structure that will describe the
** fulltext index defined by spec.
*/
static int constructVtab(
  sqlite3 *db,              /* The SQLite database connection */

  TableSpec *spec,          /* Parsed spec information from parseSpec() */
  sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
  char **pzErr              /* Write any error message here */
){
  int rc;
  int n;
  fulltext_vtab *v = 0;
  const sqlite3_tokenizer_module *m = NULL;
  char *schema;




  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  if( v==0 ) return SQLITE_NOMEM;
  CLEAR(v);
  /* sqlite will initialize v->base */
  v->db = db;
  v->zDb = spec->zDb;       /* Freed when azColumn is freed */
................................................................................
  spec->azContentColumn = 0;
  v->azColumn = spec->azColumn;
  spec->azColumn = 0;

  if( spec->azTokenizer==0 ){
    return SQLITE_NOMEM;
  }
  /* TODO(shess) For now, add new tokenizers as else if clauses. */

  if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
    sqlite3Fts2SimpleTokenizerModule(&m);
  }else if( startsWith(spec->azTokenizer[0], "porter") ){
    sqlite3Fts2PorterTokenizerModule(&m);

  }else{



    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
    rc = SQLITE_ERROR;
    goto err;
  }

  for(n=0; spec->azTokenizer[n]; n++){}
  if( n ){
    rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
                    &v->pTokenizer);
  }else{
    rc = m->xCreate(0, 0, &v->pTokenizer);
  }
................................................................................
  sqlite3_vtab **ppVTab,
  char **pzErr
){
  TableSpec spec;
  int rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  rc = constructVtab(db, &spec, ppVTab, pzErr);
  clearTableSpec(&spec);
  return rc;
}

/* The %_content table holds the text of each document, with
** the rowid used as the docid.
*/
................................................................................
                "  leaves_end_block integer,"
                "  end_block integer,"
                "  root blob,"
                "  primary key(level, idx)"
                ");");
  if( rc!=SQLITE_OK ) goto out;

  rc = constructVtab(db, &spec, ppVTab, pzErr);

out:
  clearTableSpec(&spec);
  return rc;
}

/* Decide how to handle an SQL query. */
................................................................................
  }else if( strcmp(zName,"offsets")==0 ){
    *pxFunc = snippetOffsetsFunc;
    return 1;
  }
  return 0;
}

static const sqlite3_module fulltextModule = {
  /* iVersion      */ 0,
  /* xCreate       */ fulltextCreate,
  /* xConnect      */ fulltextConnect,
  /* xBestIndex    */ fulltextBestIndex,
  /* xDisconnect   */ fulltextDisconnect,
  /* xDestroy      */ fulltextDestroy,
  /* xOpen         */ fulltextOpen,
................................................................................
  /* xBegin        */ fulltextBegin,
  /* xSync         */ fulltextSync,
  /* xCommit       */ fulltextCommit,
  /* xRollback     */ fulltextRollback,
  /* xFindFunction */ fulltextFindFunction,
};




























int sqlite3Fts2Init(sqlite3 *db){




































  sqlite3_overload_function(db, "snippet", -1);
  sqlite3_overload_function(db, "offsets", -1);

  return sqlite3_create_module(db, "fts2", &fulltextModule, 0);


}










#if !SQLITE_CORE
int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,


                           const sqlite3_api_routines *pApi){

  SQLITE_EXTENSION_INIT2(pApi)
  return sqlite3Fts2Init(db);
}
#endif

#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */







|







 







>









>
>
>







 







<
>
|
|
|
<
>
|
>
>
>




>







 







|







 







|







 







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
|
>
>
|

>
>
>
>
>
>
>
>
>

|
>
>
|
>






1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
....
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
....
2787
2788
2789
2790
2791
2792
2793

2794
2795
2796
2797

2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
....
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
....
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
....
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
....
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956
5957
5958
  DLReader reader;                 /* Result reader if result not empty */
} fulltext_cursor;

static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  return (fulltext_vtab *) c->base.pVtab;
}

static const sqlite3_module fts2Module;   /* forward declaration */

/* Return a dynamically generated statement of the form
 *   insert into %_content (rowid, ...) values (?, ...)
 */
static const char *contentInsertStatement(fulltext_vtab *v){
  StringBuffer sb;
  int i;
................................................................................

/*
** Build a new sqlite3_vtab structure that will describe the
** fulltext index defined by spec.
*/
static int constructVtab(
  sqlite3 *db,              /* The SQLite database connection */
  fts2Hash *pHash,          /* Hash table containing tokenizers */
  TableSpec *spec,          /* Parsed spec information from parseSpec() */
  sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
  char **pzErr              /* Write any error message here */
){
  int rc;
  int n;
  fulltext_vtab *v = 0;
  const sqlite3_tokenizer_module *m = NULL;
  char *schema;

  char const *zTok;         /* Name of tokenizer to use for this fts table */
  int nTok;                 /* Length of zTok, including nul terminator */

  v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  if( v==0 ) return SQLITE_NOMEM;
  CLEAR(v);
  /* sqlite will initialize v->base */
  v->db = db;
  v->zDb = spec->zDb;       /* Freed when azColumn is freed */
................................................................................
  spec->azContentColumn = 0;
  v->azColumn = spec->azColumn;
  spec->azColumn = 0;

  if( spec->azTokenizer==0 ){
    return SQLITE_NOMEM;
  }


  zTok = spec->azTokenizer[0]; 
  if( !zTok ){
    zTok = "simple";

  }
  nTok = strlen(zTok)+1;

  m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok);
  if( !m ){
    *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
    rc = SQLITE_ERROR;
    goto err;
  }

  for(n=0; spec->azTokenizer[n]; n++){}
  if( n ){
    rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
                    &v->pTokenizer);
  }else{
    rc = m->xCreate(0, 0, &v->pTokenizer);
  }
................................................................................
  sqlite3_vtab **ppVTab,
  char **pzErr
){
  TableSpec spec;
  int rc = parseSpec(&spec, argc, argv, pzErr);
  if( rc!=SQLITE_OK ) return rc;

  rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
  clearTableSpec(&spec);
  return rc;
}

/* The %_content table holds the text of each document, with
** the rowid used as the docid.
*/
................................................................................
                "  leaves_end_block integer,"
                "  end_block integer,"
                "  root blob,"
                "  primary key(level, idx)"
                ");");
  if( rc!=SQLITE_OK ) goto out;

  rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);

out:
  clearTableSpec(&spec);
  return rc;
}

/* Decide how to handle an SQL query. */
................................................................................
  }else if( strcmp(zName,"offsets")==0 ){
    *pxFunc = snippetOffsetsFunc;
    return 1;
  }
  return 0;
}

static const sqlite3_module fts2Module = {
  /* iVersion      */ 0,
  /* xCreate       */ fulltextCreate,
  /* xConnect      */ fulltextConnect,
  /* xBestIndex    */ fulltextBestIndex,
  /* xDisconnect   */ fulltextDisconnect,
  /* xDestroy      */ fulltextDestroy,
  /* xOpen         */ fulltextOpen,
................................................................................
  /* xBegin        */ fulltextBegin,
  /* xSync         */ fulltextSync,
  /* xCommit       */ fulltextCommit,
  /* xRollback     */ fulltextRollback,
  /* xFindFunction */ fulltextFindFunction,
};

static void hashDestroy(void *p){
  fts2Hash *pHash = (fts2Hash *)p;
  sqlite3Fts2HashClear(pHash);
  sqlite3_free(pHash);
}

/*
** The fts2 built-in tokenizers - "simple" and "porter" - are implemented
** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following
** two forward declarations are for functions declared in these files
** used to retrieve the respective implementations.
**
** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed
** to by the argument to point a the "simple" tokenizer implementation.
** Function ...PorterTokenizerModule() sets *pModule to point to the
** porter tokenizer/stemmer implementation.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);

/*
** Initialise the fts2 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
** SQLite. If fts2 is built as a dynamically loadable extension, this
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts2Init(sqlite3 *db){
  int rc = SQLITE_OK;
  fts2Hash *pHash = 0;
  const sqlite3_tokenizer_module *pSimple = 0;
  const sqlite3_tokenizer_module *pPorter = 0;
  const sqlite3_tokenizer_module *pIcu = 0;

  sqlite3Fts2SimpleTokenizerModule(&pSimple);
  sqlite3Fts2PorterTokenizerModule(&pPorter);
#ifdef SQLITE_ENABLE_ICU
  sqlite3Fts2IcuTokenizerModule(&pIcu);
#endif

  /* Allocate and initialise the hash-table used to store tokenizers. */
  pHash = sqlite3_malloc(sizeof(fts2Hash));
  if( !pHash ){
    rc = SQLITE_NOMEM;
  }else{
    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
  }

  /* Load the built-in tokenizers into the hash table */
  if( rc==SQLITE_OK ){
    if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple)
     || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter) 
     || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu))
    ){
      rc = SQLITE_NOMEM;
    }
  }

  /* Create the virtual table wrapper around the hash-table and overload 
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
  ){
    return sqlite3_create_module_v2(
        db, "fts2", &fts2Module, (void *)pHash, hashDestroy
    );
  }

  /* An error has occured. Delete the hash table and return the error code. */
  assert( rc!=SQLITE_OK );
  if( pHash ){
    sqlite3Fts2HashClear(pHash);
    sqlite3_free(pHash);
  }
  return rc;
}

#if !SQLITE_CORE
int sqlite3_extension_init(
  sqlite3 *db, 
  char **pzErrMsg,
  const sqlite3_api_routines *pApi
){
  SQLITE_EXTENSION_INIT2(pApi)
  return sqlite3Fts2Init(db);
}
#endif

#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

Added ext/fts2/fts2_icu.c.



































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
** 2007 June 22
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file implements a tokenizer for fts2 based on the ICU library.
** 
** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
*/

#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
#ifdef SQLITE_ENABLE_ICU

#include <assert.h>
#include <string.h>
#include "fts2_tokenizer.h"

#include <unicode/ubrk.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/utf16.h>

typedef struct IcuTokenizer IcuTokenizer;
typedef struct IcuCursor IcuCursor;

struct IcuTokenizer {
  sqlite3_tokenizer base;
  char *zLocale;
};

struct IcuCursor {
  sqlite3_tokenizer_cursor base;

  UBreakIterator *pIter;      /* ICU break-iterator object */
  int nChar;                  /* Number of UChar elements in pInput */
  UChar *aChar;               /* Copy of input using utf-16 encoding */
  int *aOffset;               /* Offsets of each character in utf-8 input */

  int nBuffer;
  char *zBuffer;

  int iToken;
};

/*
** Create a new tokenizer instance.
*/
static int icuCreate(
  int argc,                            /* Number of entries in argv[] */
  const char * const *argv,            /* Tokenizer creation arguments */
  sqlite3_tokenizer **ppTokenizer      /* OUT: Created tokenizer */
){
  IcuTokenizer *p;
  int n = 0;

  if( argc>0 ){
    n = strlen(argv[0])+1;
  }
  p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
  if( !p ){
    return SQLITE_NOMEM;
  }
  memset(p, 0, sizeof(IcuTokenizer));

  if( n ){
    p->zLocale = (char *)&p[1];
    memcpy(p->zLocale, argv[0], n);
  }

  *ppTokenizer = (sqlite3_tokenizer *)p;

  return SQLITE_OK;
}

/*
** Destroy a tokenizer
*/
static int icuDestroy(sqlite3_tokenizer *pTokenizer){
  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
  sqlite3_free(p);
  return SQLITE_OK;
}

/*
** Prepare to begin tokenizing a particular string.  The input
** string to be tokenized is pInput[0..nBytes-1].  A cursor
** used to incrementally tokenize this string is returned in 
** *ppCursor.
*/
static int icuOpen(
  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
  const char *zInput,                    /* Input string */
  int nInput,                            /* Length of zInput in bytes */
  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
){
  IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
  IcuCursor *pCsr;

  const int32_t opt = U_FOLD_CASE_DEFAULT;
  UErrorCode status = U_ZERO_ERROR;
  int nChar;

  UChar32 c;
  int iInput = 0;
  int iOut = 0;

  *ppCursor = 0;

  nChar = nInput+1;
  pCsr = (IcuCursor *)sqlite3_malloc(
      sizeof(IcuCursor) +                /* IcuCursor */
      nChar * sizeof(UChar) +            /* IcuCursor.aChar[] */
      (nChar+1) * sizeof(int)            /* IcuCursor.aOffset[] */
  );
  if( !pCsr ){
    return SQLITE_NOMEM;
  }
  memset(pCsr, 0, sizeof(IcuCursor));
  pCsr->aChar = (UChar *)&pCsr[1];
  pCsr->aOffset = (int *)&pCsr->aChar[nChar];

  pCsr->aOffset[iOut] = iInput;
  U8_NEXT(zInput, iInput, nInput, c); 
  while( c>0 ){
    int isError = 0;
    c = u_foldCase(c, opt);
    U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
    if( isError ){
      sqlite3_free(pCsr);
      return SQLITE_ERROR;
    }
    pCsr->aOffset[iOut] = iInput;

    if( iInput<nInput ){
      U8_NEXT(zInput, iInput, nInput, c);
    }else{
      c = 0;
    }
  }

  pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
  if( !U_SUCCESS(status) ){
    sqlite3_free(pCsr);
    return SQLITE_ERROR;
  }
  pCsr->nChar = iOut;

  ubrk_first(pCsr->pIter);
  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
  return SQLITE_OK;
}

/*
** Close a tokenization cursor previously opened by a call to icuOpen().
*/
static int icuClose(sqlite3_tokenizer_cursor *pCursor){
  IcuCursor *pCsr = (IcuCursor *)pCursor;
  ubrk_close(pCsr->pIter);
  sqlite3_free(pCsr->zBuffer);
  sqlite3_free(pCsr);
  return SQLITE_OK;
}

/*
** Extract the next token from a tokenization cursor.
*/
static int icuNext(
  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by simpleOpen */
  const char **ppToken,               /* OUT: *ppToken is the token text */
  int *pnBytes,                       /* OUT: Number of bytes in token */
  int *piStartOffset,                 /* OUT: Starting offset of token */
  int *piEndOffset,                   /* OUT: Ending offset of token */
  int *piPosition                     /* OUT: Position integer of token */
){
  IcuCursor *pCsr = (IcuCursor *)pCursor;

  int iStart = 0;
  int iEnd = 0;
  int nByte = 0;

  while( iStart==iEnd ){
    UChar32 c;

    iStart = ubrk_current(pCsr->pIter);
    iEnd = ubrk_next(pCsr->pIter);
    if( iEnd==UBRK_DONE ){
      return SQLITE_DONE;
    }

    while( iStart<iEnd ){
      int iWhite = iStart;
      U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
      if( u_isspace(c) ){
        iStart = iWhite;
      }else{
        break;
      }
    }
    assert(iStart<=iEnd);
  }

  do {
    UErrorCode status = U_ZERO_ERROR;
    if( nByte ){
      char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
      if( !zNew ){
        return SQLITE_NOMEM;
      }
      pCsr->zBuffer = zNew;
      pCsr->nBuffer = nByte;
    }

    u_strToUTF8(
        pCsr->zBuffer, pCsr->nBuffer, &nByte,    /* Output vars */
        &pCsr->aChar[iStart], iEnd-iStart,       /* Input vars */
        &status                                  /* Output success/failure */
    );
  } while( nByte>pCsr->nBuffer );

  *ppToken = pCsr->zBuffer;
  *pnBytes = nByte;
  *piStartOffset = pCsr->aOffset[iStart];
  *piEndOffset = pCsr->aOffset[iEnd];
  *piPosition = pCsr->iToken++;

  return SQLITE_OK;
}

/*
** The set of routines that implement the simple tokenizer
*/
static const sqlite3_tokenizer_module icuTokenizerModule = {
  0,                           /* iVersion */
  icuCreate,                   /* xCreate  */
  icuDestroy,                  /* xCreate  */
  icuOpen,                     /* xOpen    */
  icuClose,                    /* xClose   */
  icuNext,                     /* xNext    */
};

/*
** Set *ppModule to point at the implementation of the ICU tokenizer.
*/
void sqlite3Fts2IcuTokenizerModule(
  sqlite3_tokenizer_module const**ppModule
){
  *ppModule = &icuTokenizerModule;
}

#endif /* defined(SQLITE_ENABLE_ICU) */
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */

Added ext/fts2/fts2_tokenizer.c.













































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

#include "sqlite3.h"
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1

#include "fts2_hash.h"
#include "fts2_tokenizer.h"
#include <assert.h>

/*
** Implementation of the SQL scalar function for accessing the underlying 
** hash table. This function may be called as follows:
**
**   SELECT <function-name>(<key-name>);
**   SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
**
** If the <pointer> argument is specified, it must be a blob value
** containing a pointer to be stored as the hash data corresponding
** to the string <key-name>. If <pointer> is not specified, then
** the string <key-name> must already exist in the has table. Otherwise,
** an error is returned.
**
** Whether or not the <pointer> argument is specified, the value returned
** is a blob containing the pointer stored as the hash data corresponding
** to string <key-name> (after the hash-table is updated, if applicable).
*/
static void scalarFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  fts2Hash *pHash;
  void *pPtr = 0;
  const unsigned char *zName;
  int nName;

  assert( argc==1 || argc==2 );

  pHash = (fts2Hash *)sqlite3_user_data(context);

  zName = sqlite3_value_text(argv[0]);
  nName = sqlite3_value_bytes(argv[0])+1;

  if( argc==2 ){
    void *pOld;
    int n = sqlite3_value_bytes(argv[1]);
    if( n!=sizeof(pPtr) ){
      sqlite3_result_error(context, "argument type mismatch", -1);
      return;
    }
    pPtr = *(void **)sqlite3_value_blob(argv[1]);
    pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
    if( pOld==pPtr ){
      sqlite3_result_error(context, "out of memory", -1);
      return;
    }
  }else{
    pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
    if( !pPtr ){
      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
      sqlite3_result_error(context, zErr, -1);
      sqlite3_free(zErr);
      return;
    }
  }

  sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
}

#ifdef SQLITE_TEST

#include <tcl.h>

/*
** Implementation of a special SQL scalar function for testing tokenizers 
** designed to be used in concert with the Tcl testing framework. This
** function must be called with two arguments:
**
**   SELECT <function-name>(<key-name>, <input-string>);
**   SELECT <function-name>(<key-name>, <pointer>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
**
** The return value is a string that may be interpreted as a Tcl
** list. For each token in the <input-string>, three elements are
** added to the returned list. The first is the token position, the 
** second is the token text (folded, stemmed, etc.) and the third is the
** substring of <input-string> associated with the token. For example, 
** using the built-in "simple" tokenizer:
**
**   SELECT fts_tokenizer_test('simple', 'I don't see how');
**
** will return the string:
**
**   "{0 i I 1 dont don't 2 see see 3 how how}"
**   
*/
static void testFunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  fts2Hash *pHash;
  sqlite3_tokenizer_module *p;
  sqlite3_tokenizer *pTokenizer = 0;
  sqlite3_tokenizer_cursor *pCsr = 0;

  const char *zErr = 0;

  const char *zName;
  int nName;
  const char *zInput;
  int nInput;

  const char *zToken;
  int nToken;
  int iStart;
  int iEnd;
  int iPos;

  Tcl_Obj *pRet;

  assert( argc==2 );

  nName = sqlite3_value_bytes(argv[0]);
  zName = (const char *)sqlite3_value_text(argv[0]);
  nInput = sqlite3_value_bytes(argv[1]);
  zInput = (const char *)sqlite3_value_text(argv[1]);

  pHash = (fts2Hash *)sqlite3_user_data(context);
  p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);

  if( !p ){
    char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
    sqlite3_result_error(context, zErr, -1);
    sqlite3_free(zErr);
    return;
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);

  if( SQLITE_OK!=p->xCreate(0, 0, &pTokenizer) ){
    zErr = "error in xCreate()";
    goto finish;
  }
  pTokenizer->pModule = p;
  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }
  pCsr->pTokenizer = pTokenizer;

  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
    zToken = &zInput[iStart];
    nToken = iEnd-iStart;
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
  }

  if( SQLITE_OK!=p->xClose(pCsr) ){
    zErr = "error in xClose()";
    goto finish;
  }
  if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
    zErr = "error in xDestroy()";
    goto finish;
  }

finish:
  if( zErr ){
    sqlite3_result_error(context, zErr, -1);
  }else{
    sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
  }
  Tcl_DecrRefCount(pRet);
}
#endif

/*
** Set up SQL objects in database db used to access the contents of
** the hash table pointed to by argument pHash. The hash table must
** been initialised to use string keys, and to take a private copy 
** of the key when a value is inserted. i.e. by a call similar to:
**
**    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
**
** This function adds a scalar function (see header comment above
** scalarFunc() in this file for details) and, if ENABLE_TABLE is
** defined at compilation time, a temporary virtual table (see header 
** comment above struct HashTableVtab) to the database schema. Both 
** provide read/write access to the contents of *pHash.
**
** The third argument to this function, zName, is used as the name
** of both the scalar and, if created, the virtual table.
*/
int sqlite3Fts2InitHashTable(
  sqlite3 *db, 
  fts2Hash *pHash, 
  const char *zName
){
  int rc;
  void *p = (void *)pHash;
  const int any = SQLITE_ANY;
  char *zTest = 0;

#ifdef SQLITE_TEST
  zTest = sqlite3_mprintf("%s_test", zName);
  if( !zTest ){
    return SQLITE_NOMEM;
  }
#endif

  if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
   || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
#ifdef SQLITE_TEST
   || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
#endif
  );

  sqlite3_free(zTest);
  return rc;
}

Changes to ext/fts2/fts2_tokenizer.h.

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
};

struct sqlite3_tokenizer_cursor {
  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
  /* Tokenizer implementations will typically add additional fields */
};

/*
** Get the module for a tokenizer which generates tokens based on a
** set of non-token characters.  The default is to break tokens at any
** non-alnum character, though the set of delimiters can also be
** specified by the first argv argument to xCreate().
*/
/* TODO(shess) This doesn't belong here.  Need some sort of
** registration process.
*/
void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);

#endif /* _FTS2_TOKENIZER_H_ */







<
<
<
<
<
<
<
<
<
<
<
<

71
72
73
74
75
76
77












78
};

struct sqlite3_tokenizer_cursor {
  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
  /* Tokenizer implementations will typically add additional fields */
};













#endif /* _FTS2_TOKENIZER_H_ */

Changes to ext/icu/README.txt.

109
110
111
112
113
114
115
116

117

118




119
120
121
122
123
124
125
    Even more specifically, the value passed to the "flags" parameter
    of ICU C function uregex_open() is 0.


2  COMPILATION AND USAGE

  The easiest way to compile and use the ICU extension is to build
  and use it as a dynamically loadable SQLite extension.










3 BUGS, PROBLEMS AND SECURITY ISSUES

  3.1 The "case_sensitive_like" Pragma

    This extension does not work well with the "case_sensitive_like"







|
>

>

>
>
>
>







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
    Even more specifically, the value passed to the "flags" parameter
    of ICU C function uregex_open() is 0.


2  COMPILATION AND USAGE

  The easiest way to compile and use the ICU extension is to build
  and use it as a dynamically loadable SQLite extension. To do this
  using gcc on *nix:

    gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so

  You may need to add "-I" flags so that gcc can find sqlite3ext.h
  and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be
  loaded into sqlite in the same way as any other dynamically loadable
  extension.


3 BUGS, PROBLEMS AND SECURITY ISSUES

  3.1 The "case_sensitive_like" Pragma

    This extension does not work well with the "case_sensitive_like"

Changes to ext/icu/icu.c.

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
..
33
34
35
36
37
38
39
40
41
42
43
44


45
46
47
48
49
50
51
...
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: icu.c,v 1.5 2007/06/11 08:00:00 danielk1977 Exp $
**
** This file implements an integration between the ICU library 
** ("International Components for Unicode", an open-source library 
** for handling unicode data) and SQLite. The integration uses 
** ICU to provide the following to SQLite:
**
**   * An implementation of the SQL regexp() function (and hence REGEXP
................................................................................
/* Include ICU headers */
#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
#include <unicode/ucol.h>

#include <assert.h>
#include "sqlite3.h"

#ifndef SQLITE_CORE
  #include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1


#endif

/*
** Maximum length (in bytes) of the pattern in a LIKE or GLOB
** operator.
*/
#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
................................................................................
  zInput = sqlite3_value_text16(apArg[0]);
  if( !zInput ){
    return;
  }
  nInput = sqlite3_value_bytes16(apArg[0]);

  nOutput = nInput * 2 + 2;
  zOutput = sqlite3_malloc(nInput*2+2);
  if( !zOutput ){
    return;
  }

  if( sqlite3_user_data(p) ){
    u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
  }else{







|







 







<




>
>







 







|







5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
..
33
34
35
36
37
38
39

40
41
42
43
44
45
46
47
48
49
50
51
52
...
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $
**
** This file implements an integration between the ICU library 
** ("International Components for Unicode", an open-source library 
** for handling unicode data) and SQLite. The integration uses 
** ICU to provide the following to SQLite:
**
**   * An implementation of the SQL regexp() function (and hence REGEXP
................................................................................
/* Include ICU headers */
#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
#include <unicode/ucol.h>

#include <assert.h>


#ifndef SQLITE_CORE
  #include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#else
  #include "sqlite3.h"
#endif

/*
** Maximum length (in bytes) of the pattern in a LIKE or GLOB
** operator.
*/
#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH
................................................................................
  zInput = sqlite3_value_text16(apArg[0]);
  if( !zInput ){
    return;
  }
  nInput = sqlite3_value_bytes16(apArg[0]);

  nOutput = nInput * 2 + 2;
  zOutput = sqlite3_malloc(nOutput);
  if( !zOutput ){
    return;
  }

  if( sqlite3_user_data(p) ){
    u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status);
  }else{

Changes to main.mk.

144
145
146
147
148
149
150

151

152
153
154
155
156
157
158

# Source code for extensions.
#
EXTSRC += -DSQLITE_CORE=1             \
  $(TOP)/ext/icu/icu.c                \
  $(TOP)/ext/fts2/fts2.c              \
  $(TOP)/ext/fts2/fts2_hash.c         \

  $(TOP)/ext/fts2/fts2_porter.c       \

  $(TOP)/ext/fts2/fts2_tokenizer1.c

# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \







>

>







144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

# Source code for extensions.
#
EXTSRC += -DSQLITE_CORE=1             \
  $(TOP)/ext/icu/icu.c                \
  $(TOP)/ext/fts2/fts2.c              \
  $(TOP)/ext/fts2/fts2_hash.c         \
  $(TOP)/ext/fts2/fts2_icu.c          \
  $(TOP)/ext/fts2/fts2_porter.c       \
  $(TOP)/ext/fts2/fts2_tokenizer.c    \
  $(TOP)/ext/fts2/fts2_tokenizer1.c

# Generated source code files
#
SRC += \
  keywordhash.h \
  opcodes.c \

Changes to src/func.c.

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
....
1406
1407
1408
1409
1410
1411
1412

1413





1414
1415
1416
1417
1418
1419
1420
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.160 2007/06/07 19:08:33 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"
................................................................................
          strlen(aAggs[i].zName), aAggs[i].nArg, SQLITE_UTF8, 0);
      if( pFunc && aAggs[i].needCollSeq ){
        pFunc->needCollSeq = 1;
      }
    }
  }
  sqlite3RegisterDateTimeFunctions(db);

  sqlite3_overload_function(db, "MATCH", 2);





#ifdef SQLITE_SSE
  (void)sqlite3SseFunctions(db);
#endif
#ifdef SQLITE_CASE_SENSITIVE_LIKE
  sqlite3RegisterLikeFunctions(db, 1);
#else
  sqlite3RegisterLikeFunctions(db, 0);







|







 







>
|
>
>
>
>
>







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
....
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
** This file contains the C functions that implement various SQL
** functions of SQLite.  
**
** There is only one exported symbol in this file - the function
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
/* #include <math.h> */
#include <stdlib.h>
#include <assert.h>
#include "vdbeInt.h"
................................................................................
          strlen(aAggs[i].zName), aAggs[i].nArg, SQLITE_UTF8, 0);
      if( pFunc && aAggs[i].needCollSeq ){
        pFunc->needCollSeq = 1;
      }
    }
  }
  sqlite3RegisterDateTimeFunctions(db);
  if( !sqlite3MallocFailed() ){
    int rc = sqlite3_overload_function(db, "MATCH", 2);
    assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
    if( rc==SQLITE_NOMEM ){
      sqlite3FailedMalloc();
    }
  }
#ifdef SQLITE_SSE
  (void)sqlite3SseFunctions(db);
#endif
#ifdef SQLITE_CASE_SENSITIVE_LIKE
  sqlite3RegisterLikeFunctions(db, 1);
#else
  sqlite3RegisterLikeFunctions(db, 0);

Changes to src/loadext.c.

153
154
155
156
157
158
159

160
161
162
163
164
165
166
  sqlite3_complete,
  sqlite3_complete16,
  sqlite3_create_collation,
  sqlite3_create_collation16,
  sqlite3_create_function,
  sqlite3_create_function16,
  sqlite3_create_module,

  sqlite3_data_count,
  sqlite3_db_handle,
  sqlite3_declare_vtab,
  sqlite3_enable_shared_cache,
  sqlite3_errcode,
  sqlite3_errmsg,
  sqlite3_errmsg16,







>







153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
  sqlite3_complete,
  sqlite3_complete16,
  sqlite3_create_collation,
  sqlite3_create_collation16,
  sqlite3_create_function,
  sqlite3_create_function16,
  sqlite3_create_module,
  sqlite3_create_module_v2,
  sqlite3_data_count,
  sqlite3_db_handle,
  sqlite3_declare_vtab,
  sqlite3_enable_shared_cache,
  sqlite3_errcode,
  sqlite3_errmsg,
  sqlite3_errmsg16,

Changes to src/main.c.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
190
191
192
193
194
195
196



197
198
199
200
201
202
203
...
981
982
983
984
985
986
987





988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002


1003
1004
1005


1006
1007
1008
1009
1010
1011
1012

1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023

1024
1025
1026
1027
1028
1029
1030
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.376 2007/05/08 20:37:39 drh Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** The version of the library
................................................................................
    }
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);
#ifndef SQLITE_OMIT_VIRTUALTABLE
  for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
    Module *pMod = (Module *)sqliteHashData(i);



    sqliteFree(pMod);
  }
  sqlite3HashClear(&db->aModule);
#endif

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */
................................................................................
  */
  db->aDb[0].zName = "main";
  db->aDb[0].safety_level = 3;
#ifndef SQLITE_OMIT_TEMPDB
  db->aDb[1].zName = "temp";
  db->aDb[1].safety_level = 1;
#endif






  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */
  if( !sqlite3MallocFailed() ){
    sqlite3Error(db, SQLITE_OK, 0);
    sqlite3RegisterBuiltinFunctions(db);
  }
  db->magic = SQLITE_MAGIC_OPEN;

  /* Load automatic extensions - extensions that have been registered
  ** using the sqlite3_automatic_extension() API.
  */
  (void)sqlite3AutoLoadExtensions(db);



#ifdef SQLITE_ENABLE_FTS1
  {


    extern int sqlite3Fts1Init(sqlite3*);
    sqlite3Fts1Init(db);
  }
#endif

#ifdef SQLITE_ENABLE_FTS2
  {

    extern int sqlite3Fts2Init(sqlite3*);
    sqlite3Fts2Init(db);
  }
#endif

#ifdef SQLITE_ENABLE_ICU
  if( !sqlite3MallocFailed() ){
    extern int sqlite3IcuInit(sqlite3*);
    sqlite3IcuInit(db);
  }
#endif


  /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
  ** mode.  -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking
  ** mode.  Doing nothing at all also makes NORMAL the default.
  */
#ifdef SQLITE_DEFAULT_LOCKING_MODE
  db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE;







|







 







>
>
>







 







>
>
>
>
>





<
|
|
<
<





>
>
|
<
|
>
>

|




<
>

|




|

|


>







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
...
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000

1001
1002


1003
1004
1005
1006
1007
1008
1009
1010

1011
1012
1013
1014
1015
1016
1017
1018
1019

1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
**
*************************************************************************
** Main file for the SQLite library.  The routines in this file
** implement the programmer interface to the library.  Routines in
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
** $Id: main.c,v 1.377 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#include <ctype.h>

/*
** The version of the library
................................................................................
    }
    sqliteFree(pColl);
  }
  sqlite3HashClear(&db->aCollSeq);
#ifndef SQLITE_OMIT_VIRTUALTABLE
  for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
    Module *pMod = (Module *)sqliteHashData(i);
    if( pMod->xDestroy ){
      pMod->xDestroy(pMod->pAux);
    }
    sqliteFree(pMod);
  }
  sqlite3HashClear(&db->aModule);
#endif

  sqlite3HashClear(&db->aFunc);
  sqlite3Error(db, SQLITE_OK, 0); /* Deallocates any cached error strings. */
................................................................................
  */
  db->aDb[0].zName = "main";
  db->aDb[0].safety_level = 3;
#ifndef SQLITE_OMIT_TEMPDB
  db->aDb[1].zName = "temp";
  db->aDb[1].safety_level = 1;
#endif

  db->magic = SQLITE_MAGIC_OPEN;
  if( sqlite3MallocFailed() ){
    goto opendb_out;
  }

  /* Register all built-in functions, but do not attempt to read the
  ** database schema yet. This is delayed until the first time the database
  ** is accessed.
  */

  sqlite3Error(db, SQLITE_OK, 0);
  sqlite3RegisterBuiltinFunctions(db);



  /* Load automatic extensions - extensions that have been registered
  ** using the sqlite3_automatic_extension() API.
  */
  (void)sqlite3AutoLoadExtensions(db);
  if( sqlite3_errcode(db)!=SQLITE_OK ){
    goto opendb_out;
  }


#ifdef SQLITE_ENABLE_FTS1
  if( !sqlite3MallocFailed() ){
    extern int sqlite3Fts1Init(sqlite3*);
    rc = sqlite3Fts1Init(db);
  }
#endif

#ifdef SQLITE_ENABLE_FTS2

  if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
    extern int sqlite3Fts2Init(sqlite3*);
    rc = sqlite3Fts2Init(db);
  }
#endif

#ifdef SQLITE_ENABLE_ICU
  if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
    extern int sqlite3IcuInit(sqlite3*);
    rc = sqlite3IcuInit(db);
  }
#endif
  sqlite3Error(db, rc, 0);

  /* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
  ** mode.  -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking
  ** mode.  Doing nothing at all also makes NORMAL the default.
  */
#ifdef SQLITE_DEFAULT_LOCKING_MODE
  db->dfltLockMode = SQLITE_DEFAULT_LOCKING_MODE;

Changes to src/sqlite.h.in.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
....
2492
2493
2494
2495
2496
2497
2498













2499
2500
2501
2502
2503
2504
2505
** on how SQLite interfaces are suppose to operate.
**
** The name of this file under configuration management is "sqlite.h.in".
** The makefile makes some minor changes to this file (such as inserting
** the version number) and changes its name to "sqlite3.h" as
** part of the build process.
**
** @(#) $Id: sqlite.h.in,v 1.214 2007/06/21 15:25:05 drh Exp $
*/
#ifndef _SQLITE3_H_
#define _SQLITE3_H_
#include <stdarg.h>     /* Needed for the definition of va_list */

/*
** Make sure we can call this stuff from C++.
................................................................................
*/
int sqlite3_create_module(
  sqlite3 *db,               /* SQLite connection to register module with */
  const char *zName,         /* Name of the module */
  const sqlite3_module *,    /* Methods for the module */
  void *                     /* Client data for xCreate/xConnect */
);














/*
** Every module implementation uses a subclass of the following structure
** to describe a particular instance of the module.  Each subclass will
** be taylored to the specific needs of the module implementation.   The
** purpose of this superclass is to define certain fields that are common
** to all module implementations.







|







 







>
>
>
>
>
>
>
>
>
>
>
>
>







26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
....
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
** on how SQLite interfaces are suppose to operate.
**
** The name of this file under configuration management is "sqlite.h.in".
** The makefile makes some minor changes to this file (such as inserting
** the version number) and changes its name to "sqlite3.h" as
** part of the build process.
**
** @(#) $Id: sqlite.h.in,v 1.215 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3_H_
#define _SQLITE3_H_
#include <stdarg.h>     /* Needed for the definition of va_list */

/*
** Make sure we can call this stuff from C++.
................................................................................
*/
int sqlite3_create_module(
  sqlite3 *db,               /* SQLite connection to register module with */
  const char *zName,         /* Name of the module */
  const sqlite3_module *,    /* Methods for the module */
  void *                     /* Client data for xCreate/xConnect */
);

/*
** This routine is identical to the sqlite3_create_module() method above,
** except that it allows a destructor function to be specified. It is
** even more experimental than the rest of the virtual tables API.
*/
int sqlite3_create_module_v2(
  sqlite3 *db,               /* SQLite connection to register module with */
  const char *zName,         /* Name of the module */
  const sqlite3_module *,    /* Methods for the module */
  void *,                    /* Client data for xCreate/xConnect */
  void(*xDestroy)(void*)     /* Module destructor function */
);

/*
** Every module implementation uses a subclass of the following structure
** to describe a particular instance of the module.  Each subclass will
** be taylored to the specific needs of the module implementation.   The
** purpose of this superclass is to define certain fields that are common
** to all module implementations.

Changes to src/sqlite3ext.h.

11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
72
73
74
75
76
77
78

79
80
81
82
83
84
85
...
205
206
207
208
209
210
211

212
213
214
215
216
217
218
*************************************************************************
** This header file defines the SQLite interface for use by
** shared libraries that want to be imported as extensions into
** an SQLite instance.  Shared libraries that intend to be loaded
** as extensions by SQLite should #include this file instead of 
** sqlite3.h.
**
** @(#) $Id: sqlite3ext.h,v 1.10 2007/03/29 18:46:01 drh Exp $
*/
#ifndef _SQLITE3EXT_H_
#define _SQLITE3EXT_H_
#include "sqlite3.h"

typedef struct sqlite3_api_routines sqlite3_api_routines;

................................................................................
  int  (*complete)(const char*sql);
  int  (*complete16)(const void*sql);
  int  (*create_collation)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*));
  int  (*create_collation16)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*));
  int  (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
  int  (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
  int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);

  int  (*data_count)(sqlite3_stmt*pStmt);
  sqlite3 * (*db_handle)(sqlite3_stmt*);
  int (*declare_vtab)(sqlite3*,const char*);
  int  (*enable_shared_cache)(int);
  int  (*errcode)(sqlite3*db);
  const char * (*errmsg)(sqlite3*);
  const void * (*errmsg16)(sqlite3*);
................................................................................
#define sqlite3_complete               sqlite3_api->complete
#define sqlite3_complete16             sqlite3_api->complete16
#define sqlite3_create_collation       sqlite3_api->create_collation
#define sqlite3_create_collation16     sqlite3_api->create_collation16
#define sqlite3_create_function        sqlite3_api->create_function
#define sqlite3_create_function16      sqlite3_api->create_function16
#define sqlite3_create_module          sqlite3_api->create_module

#define sqlite3_data_count             sqlite3_api->data_count
#define sqlite3_db_handle              sqlite3_api->db_handle
#define sqlite3_declare_vtab           sqlite3_api->declare_vtab
#define sqlite3_enable_shared_cache    sqlite3_api->enable_shared_cache
#define sqlite3_errcode                sqlite3_api->errcode
#define sqlite3_errmsg                 sqlite3_api->errmsg
#define sqlite3_errmsg16               sqlite3_api->errmsg16







|







 







>







 







>







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
..
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
...
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
*************************************************************************
** This header file defines the SQLite interface for use by
** shared libraries that want to be imported as extensions into
** an SQLite instance.  Shared libraries that intend to be loaded
** as extensions by SQLite should #include this file instead of 
** sqlite3.h.
**
** @(#) $Id: sqlite3ext.h,v 1.11 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3EXT_H_
#define _SQLITE3EXT_H_
#include "sqlite3.h"

typedef struct sqlite3_api_routines sqlite3_api_routines;

................................................................................
  int  (*complete)(const char*sql);
  int  (*complete16)(const void*sql);
  int  (*create_collation)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*));
  int  (*create_collation16)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*));
  int  (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
  int  (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
  int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
  int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *));
  int  (*data_count)(sqlite3_stmt*pStmt);
  sqlite3 * (*db_handle)(sqlite3_stmt*);
  int (*declare_vtab)(sqlite3*,const char*);
  int  (*enable_shared_cache)(int);
  int  (*errcode)(sqlite3*db);
  const char * (*errmsg)(sqlite3*);
  const void * (*errmsg16)(sqlite3*);
................................................................................
#define sqlite3_complete               sqlite3_api->complete
#define sqlite3_complete16             sqlite3_api->complete16
#define sqlite3_create_collation       sqlite3_api->create_collation
#define sqlite3_create_collation16     sqlite3_api->create_collation16
#define sqlite3_create_function        sqlite3_api->create_function
#define sqlite3_create_function16      sqlite3_api->create_function16
#define sqlite3_create_module          sqlite3_api->create_module
#define sqlite3_create_module_v2       sqlite3_api->create_module_v2
#define sqlite3_data_count             sqlite3_api->data_count
#define sqlite3_db_handle              sqlite3_api->db_handle
#define sqlite3_declare_vtab           sqlite3_api->declare_vtab
#define sqlite3_enable_shared_cache    sqlite3_api->enable_shared_cache
#define sqlite3_errcode                sqlite3_api->errcode
#define sqlite3_errmsg                 sqlite3_api->errmsg
#define sqlite3_errmsg16               sqlite3_api->errmsg16

Changes to src/sqliteInt.h.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
580
581
582
583
584
585
586

587
588
589
590
591
592
593
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.574 2007/06/20 15:29:25 drh Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
#include "sqliteLimit.h"


#if defined(SQLITE_TCL) || defined(TCLSH)
................................................................................
** instance of the following structure, stored in the sqlite3.aModule
** hash table.
*/
struct Module {
  const sqlite3_module *pModule;       /* Callback pointers */
  const char *zName;                   /* Name passed to create_module() */
  void *pAux;                          /* pAux passed to create_module() */

};

/*
** Possible values for FuncDef.flags
*/
#define SQLITE_FUNC_LIKE   0x01  /* Candidate for the LIKE optimization */
#define SQLITE_FUNC_CASE   0x02  /* Case-sensitive LIKE-type function */







|







 







>







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
...
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.575 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
#include "sqliteLimit.h"


#if defined(SQLITE_TCL) || defined(TCLSH)
................................................................................
** instance of the following structure, stored in the sqlite3.aModule
** hash table.
*/
struct Module {
  const sqlite3_module *pModule;       /* Callback pointers */
  const char *zName;                   /* Name passed to create_module() */
  void *pAux;                          /* pAux passed to create_module() */
  void (*xDestroy)(void *);            /* Module destructor function */
};

/*
** Possible values for FuncDef.flags
*/
#define SQLITE_FUNC_LIKE   0x01  /* Candidate for the LIKE optimization */
#define SQLITE_FUNC_CASE   0x02  /* Case-sensitive LIKE-type function */

Changes to src/vtab.c.

7
8
9
10
11
12
13
14
15
16
17



























18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

41
42
43
44
45
46
47
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code used to help implement virtual tables.
**
** $Id: vtab.c,v 1.46 2007/05/04 13:15:57 drh Exp $
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
#include "sqliteInt.h"




























/*
** External API function used to create a new virtual-table module.
*/
int sqlite3_create_module(
  sqlite3 *db,                    /* Database in which module is registered */
  const char *zName,              /* Name assigned to this module */
  const sqlite3_module *pModule,  /* The definition of the module */
  void *pAux                      /* Context pointer for xCreate/xConnect */
){
  int nName = strlen(zName);
  Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
  if( pMod ){
    char *zCopy = (char *)(&pMod[1]);
    memcpy(zCopy, zName, nName+1);
    pMod->zName = zCopy;
    pMod->pModule = pModule;
    pMod->pAux = pAux;
    pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
    sqliteFree(pMod);
    sqlite3ResetInternalSchema(db, 0);
  }
  return sqlite3ApiExit(db, SQLITE_OK);

}

/*
** Lock the virtual table so that it cannot be disconnected.
** Locks nest.  Every lock should have a corresponding unlock.
** If an unlock is omitted, resources leaks will occur.  
**







|



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|
|
|
|
|
|
|
|
|
|
|
|
|
>







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
** This file contains code used to help implement virtual tables.
**
** $Id: vtab.c,v 1.47 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
#include "sqliteInt.h"

static int createModule(
  sqlite3 *db,                    /* Database in which module is registered */
  const char *zName,              /* Name assigned to this module */
  const sqlite3_module *pModule,  /* The definition of the module */
  void *pAux,                     /* Context pointer for xCreate/xConnect */
  void (*xDestroy)(void *)        /* Module destructor function */
) {
  int nName = strlen(zName);
  Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
  if( pMod ){
    char *zCopy = (char *)(&pMod[1]);
    memcpy(zCopy, zName, nName+1);
    pMod->zName = zCopy;
    pMod->pModule = pModule;
    pMod->pAux = pAux;
    pMod->xDestroy = xDestroy;
    pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
    if( pMod && pMod->xDestroy ){
      pMod->xDestroy(pMod->pAux);
    }
    sqliteFree(pMod);
    sqlite3ResetInternalSchema(db, 0);
  }
  return sqlite3ApiExit(db, SQLITE_OK);
}


/*
** External API function used to create a new virtual-table module.
*/
int sqlite3_create_module(
  sqlite3 *db,                    /* Database in which module is registered */
  const char *zName,              /* Name assigned to this module */
  const sqlite3_module *pModule,  /* The definition of the module */
  void *pAux                      /* Context pointer for xCreate/xConnect */
){
  return createModule(db, zName, pModule, pAux, 0);
}

/*
** External API function used to create a new virtual-table module.
*/
int sqlite3_create_module_v2(
  sqlite3 *db,                    /* Database in which module is registered */
  const char *zName,              /* Name assigned to this module */
  const sqlite3_module *pModule,  /* The definition of the module */
  void *pAux,                     /* Context pointer for xCreate/xConnect */
  void (*xDestroy)(void *)        /* Module destructor function */
){
  return createModule(db, zName, pModule, pAux, xDestroy);
}

/*
** Lock the virtual table so that it cannot be disconnected.
** Locks nest.  Every lock should have a corresponding unlock.
** If an unlock is omitted, resources leaks will occur.  
**

Added test/fts2token.test.























































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# 2007 June 21
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library. The focus 
# of this script is testing the pluggable tokeniser feature of the 
# FTS2 module.
#
# $Id: fts2token.test,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS2 is defined, omit this file.
ifcapable !fts2 {
  finish_test
  return
}

#--------------------------------------------------------------------------
# Test cases fts2token-1.* are the warm-body test for the SQL scalar
# function fts2_tokenizer(). The procedure is as follows:
#
#   1: Verify that there is no such fts2 tokenizer as 'blah'.
#
#   2: Query for the built-in tokenizer 'simple'. Insert a copy of the
#      retrieved value as tokenizer 'blah'.
#
#   3: Test that the value returned for tokenizer 'blah' is now the
#      same as that retrieved for 'simple'.
#
#   4: Test that it is now possible to create an fts2 table using 
#      tokenizer 'blah' (it was not possible in step 1).
#
#   5: Test that the table created to use tokenizer 'blah' is usable.
#
do_test fts2token-1.1 {
  catchsql {
    CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
  }
} {1 {unknown tokenizer: blah}}
do_test fts2token-1.2 {
  execsql {
    SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL;
  }
} {0}
do_test fts2token-1.3 {
  execsql {
    SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple');
  }
} {1}
do_test fts2token-1.4 {
  catchsql {
    CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
  }
} {0 {}}
do_test fts2token-1.5 {
  execsql {
    INSERT INTO t1(content) VALUES('There was movement at the station');
    INSERT INTO t1(content) VALUES('For the word has passed around');
    INSERT INTO t1(content) VALUES('That the colt from ol regret had got away');
    SELECT content FROM t1 WHERE content MATCH 'movement'
  }
} {{There was movement at the station}}

#--------------------------------------------------------------------------
# Test cases fts2token-2.* test error cases in the scalar function based
# API for getting and setting tokenizers.
#
do_test fts2token-2.1 {
  catchsql {
    SELECT fts2_tokenizer('nosuchtokenizer');
  }
} {1 {unknown tokenizer: nosuchtokenizer}}

#--------------------------------------------------------------------------
# Test cases fts2token-3.* test the three built-in tokenizers with a
# simple input string via the built-in test function. This is as much
# to test the test function as the tokenizer implementations.
#
do_test fts2token-3.1 {
  execsql {
    SELECT fts2_tokenizer_test('simple', 'I don''t see how');
  }
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
do_test fts2token-3.2 {
  execsql {
    SELECT fts2_tokenizer_test('porter', 'I don''t see how');
  }
} {{0 i I 1 don don 2 t t 3 see see 4 how how}}

ifcapable icu {
  do_test fts2token-3.3 {
    execsql {
      SELECT fts2_tokenizer_test('icu', 'I don''t see how');
    }
  } {{0 i I 1 don't don't 2 see see 3 how how}}
}

finish_test