SQLite

Check-in [6c51c9c6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add experimental way to specify an alternative tokenizer when writing to or querying an fts5 table.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-tokenize-blob
Files: files | file ages | folders
SHA3-256: 6c51c9c6a8a6a730c1d9e0119bc39edeefbbcb3b30476347a51d2e08eb91fe36
User & Date: dan 2024-04-15 20:24:50
Context
2024-04-16
14:23
Add some tests for the fts5 tokenize-blob functionality on this branch. (check-in: c2f9d125 user: dan tags: fts5-tokenize-blob)
2024-04-15
20:24
Add experimental way to specify an alternative tokenizer when writing to or querying an fts5 table. (check-in: 6c51c9c6 user: dan tags: fts5-tokenize-blob)
2024-04-12
18:46
If a build fails in testrunner.tcl, do not attempt to run the jobs that depend on that build. Instead, report those jobs as having been skipped. (check-in: b40580be user: drh tags: trunk)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.h.

22
23
24
25
26
27
28


29
30
31
32
33
34
35
#define _FTS5_H

#include "sqlite3.h"

#ifdef __cplusplus
extern "C" {
#endif



/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/







>
>







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#define _FTS5_H

#include "sqlite3.h"

#ifdef __cplusplus
extern "C" {
#endif

#define SQLITE_FTS5_TOKENIZE_SUBTYPE ((unsigned int)'T')

/*************************************************************************
** CUSTOM AUXILIARY FUNCTIONS
**
** Virtual table implementations may overload SQL functions by implementing
** the sqlite3_module.xFindFunction() method.
*/

Changes to ext/fts5/fts5Int.h.

139
140
141
142
143
144
145








146
147
148
149
150
151
152
/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/

typedef struct Fts5Config Fts5Config;









/*
** An instance of the following structure encodes all information that can
** be gleaned from the CREATE VIRTUAL TABLE statement.
**
** And all information loaded from the %_config table.
**
** nAutomerge:







>
>
>
>
>
>
>
>







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/**************************************************************************
** Interface to code in fts5_config.c. fts5_config.c contains contains code
** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
*/

typedef struct Fts5Config Fts5Config;

typedef struct Fts5TokenizerInst Fts5TokenizerInst;
struct Fts5TokenizerInst {
  char *zSpec;
  Fts5Tokenizer *pTok;
  fts5_tokenizer *pTokApi;
  Fts5TokenizerInst *pNext;
};

/*
** An instance of the following structure encodes all information that can
** be gleaned from the CREATE VIRTUAL TABLE statement.
**
** And all information loaded from the %_config table.
**
** nAutomerge:
180
181
182
183
184
185
186

187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
**   are ignored. This value is configured using:
**
**       INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
**
*/
struct Fts5Config {
  sqlite3 *db;                    /* Database handle */

  char *zDb;                      /* Database holding FTS index (e.g. "main") */
  char *zName;                    /* Name of FTS index */
  int nCol;                       /* Number of columns */
  char **azCol;                   /* Column names */
  u8 *abUnindexed;                /* True for unindexed columns */
  int nPrefix;                    /* Number of prefix indexes */
  int *aPrefix;                   /* Sizes in bytes of nPrefix prefix indexes */
  int eContent;                   /* An FTS5_CONTENT value */
  int bContentlessDelete;         /* "contentless_delete=" option (dflt==0) */
  char *zContent;                 /* content table */ 
  char *zContentRowid;            /* "content_rowid=" option value */ 
  int bColumnsize;                /* "columnsize=" option value (dflt==1) */
  int bTokendata;                 /* "tokendata=" option value (dflt==0) */
  int eDetail;                    /* FTS5_DETAIL_XXX value */
  char *zContentExprlist;
  Fts5Tokenizer *pTok;
  fts5_tokenizer *pTokApi;
  int bLock;                      /* True when table is preparing statement */
  int ePattern;                   /* FTS_PATTERN_XXX constant */

  /* Values loaded from the %_config table */
  int iVersion;                   /* fts5 file format 'version' */
  int iCookie;                    /* Incremented when %_config is modified */
  int pgsz;                       /* Approximate page size used in %_data */







>















|
<







188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

212
213
214
215
216
217
218
**   are ignored. This value is configured using:
**
**       INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
**
*/
struct Fts5Config {
  sqlite3 *db;                    /* Database handle */
  Fts5Global *pGlobal;            /* Database wide data */
  char *zDb;                      /* Database holding FTS index (e.g. "main") */
  char *zName;                    /* Name of FTS index */
  int nCol;                       /* Number of columns */
  char **azCol;                   /* Column names */
  u8 *abUnindexed;                /* True for unindexed columns */
  int nPrefix;                    /* Number of prefix indexes */
  int *aPrefix;                   /* Sizes in bytes of nPrefix prefix indexes */
  int eContent;                   /* An FTS5_CONTENT value */
  int bContentlessDelete;         /* "contentless_delete=" option (dflt==0) */
  char *zContent;                 /* content table */ 
  char *zContentRowid;            /* "content_rowid=" option value */ 
  int bColumnsize;                /* "columnsize=" option value (dflt==1) */
  int bTokendata;                 /* "tokendata=" option value (dflt==0) */
  int eDetail;                    /* FTS5_DETAIL_XXX value */
  char *zContentExprlist;
  Fts5TokenizerInst *pTokList;

  int bLock;                      /* True when table is preparing statement */
  int ePattern;                   /* FTS_PATTERN_XXX constant */

  /* Values loaded from the %_config table */
  int iVersion;                   /* fts5 file format 'version' */
  int iCookie;                    /* Incremented when %_config is modified */
  int pgsz;                       /* Approximate page size used in %_data */
249
250
251
252
253
254
255















256
257
258
259
260
261
262
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */















  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
);

int sqlite3Fts5ConfigFindTokenizer(
  Fts5Config *pConfig, 
  const char *z, 
  Fts5TokenizerInst **ppOut
);

int sqlite3Fts5SpecTokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  const char *zSpec,              /* Tokenizer specification */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
struct Fts5Table {
  sqlite3_vtab base;              /* Base class used by SQLite core */
  Fts5Config *pConfig;            /* Virtual table configuration */
  Fts5Index *pIndex;              /* Full-text index */
};

int sqlite3Fts5GetTokenizer(
  Fts5Global*, 
  const char **azArg,
  int nArg,
  Fts5Config*,
  char **pzErr
);

Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);

int sqlite3Fts5FlushToDisk(Fts5Table*);

/*







<
<
<

|







617
618
619
620
621
622
623



624
625
626
627
628
629
630
631
632
struct Fts5Table {
  sqlite3_vtab base;              /* Base class used by SQLite core */
  Fts5Config *pConfig;            /* Virtual table configuration */
  Fts5Index *pIndex;              /* Full-text index */
};

int sqlite3Fts5GetTokenizer(



  Fts5Config*,
  const char *zSpec
);

Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);

int sqlite3Fts5FlushToDisk(Fts5Table*);

/*
709
710
711
712
713
714
715








716
717
718
719
720
721
722
);

int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
int sqlite3Fts5StorageReset(Fts5Storage *p);









/*
** End of interface to code in fts5_storage.c.
**************************************************************************/


/**************************************************************************







>
>
>
>
>
>
>
>







729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
);

int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
int sqlite3Fts5StorageRebuild(Fts5Storage *p);
int sqlite3Fts5StorageOptimize(Fts5Storage *p);
int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
int sqlite3Fts5StorageReset(Fts5Storage *p);

int sqlite3Fts5UnpackTokenizeBlob(
  Fts5Config *pConfig,
  sqlite3_value *pVal,
  Fts5TokenizerInst **ppTok,
  char **pzText,
  int *pbDel
);

/*
** End of interface to code in fts5_storage.c.
**************************************************************************/


/**************************************************************************

Changes to ext/fts5/fts5_config.c.

219
220
221
222
223
224
225



























226
227
228
229
230
231
232
      iVal = aEnum[i].eVal;
    }
  }

  *peVal = iVal;
  return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
}




























/*
** Parse a "special" CREATE VIRTUAL TABLE directive and update
** configuration object pConfig as appropriate.
**
** If successful, object pConfig is updated and SQLITE_OK returned. If
** an error occurs, an SQLite error code is returned and an error message







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
      iVal = aEnum[i].eVal;
    }
  }

  *peVal = iVal;
  return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
}

int sqlite3Fts5ConfigFindTokenizer(
  Fts5Config *pConfig, 
  const char *z, 
  Fts5TokenizerInst **ppOut
){
  Fts5TokenizerInst *pRet = 0;
  int rc = SQLITE_OK;

  assert( pConfig->pzErrmsg );

  /* Search for an existing tokenizer that matches this spec */
  for(pRet=pConfig->pTokList; pRet; pRet=pRet->pNext){
    if( strcmp(pRet->zSpec, z)==0 ) break;
  }

  if( pRet==0 ){
    /* No tokenizer found - create one. */
    rc = sqlite3Fts5GetTokenizer(pConfig, z);
    if( rc==SQLITE_OK ){
      pRet = pConfig->pTokList->pNext;
    }
  }

  if( ppOut ) *ppOut = pRet;
  return rc;
}

/*
** Parse a "special" CREATE VIRTUAL TABLE directive and update
** configuration object pConfig as appropriate.
**
** If successful, object pConfig is updated and SQLITE_OK returned. If
** an error occurs, an SQLite error code is returned and an error message
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
      bFirst = 0;
    }
    assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
    return rc;
  }

  if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
    const char *p = (const char*)zArg;
    sqlite3_int64 nArg = strlen(zArg) + 1;
    char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
    char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
    char *pSpace = pDel;

    if( azArg && pSpace ){
      if( pConfig->pTok ){
        *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
        rc = SQLITE_ERROR;
      }else{
        for(nArg=0; p && *p; nArg++){
          const char *p2 = fts5ConfigSkipWhitespace(p);
          if( *p2=='\'' ){
            p = fts5ConfigSkipLiteral(p2);
          }else{
            p = fts5ConfigSkipBareword(p2);
          }
          if( p ){
            memcpy(pSpace, p2, p-p2);
            azArg[nArg] = pSpace;
            sqlite3Fts5Dequote(pSpace);
            pSpace += (p - p2) + 1;
            p = fts5ConfigSkipWhitespace(p);
          }
        }
        if( p==0 ){
          *pzErr = sqlite3_mprintf("parse error in tokenize directive");
          rc = SQLITE_ERROR;
        }else{
          rc = sqlite3Fts5GetTokenizer(pGlobal, 
              (const char**)azArg, (int)nArg, pConfig,
              pzErr
          );
        }
      }
    }

    sqlite3_free(azArg);
    sqlite3_free(pDel);
    return rc;
  }

  if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
    if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
      *pzErr = sqlite3_mprintf("multiple content=... directives");
      rc = SQLITE_ERROR;







<
<
<
<
<
<
<
|
|
|
|
<
<
<
|
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
|
<
|
<
<
<
<
<







319
320
321
322
323
324
325







326
327
328
329



330

331













332

333

334





335
336
337
338
339
340
341
      bFirst = 0;
    }
    assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
    return rc;
  }

  if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){







    if( pConfig->pTokList ){
      *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
      rc = SQLITE_ERROR;
    }else{



      assert( pConfig->pzErrmsg==0 );

      pConfig->pzErrmsg = pzErr;













      rc = sqlite3Fts5GetTokenizer(pConfig, zArg);

      pConfig->pzErrmsg = 0;

    }





    return rc;
  }

  if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
    if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
      *pzErr = sqlite3_mprintf("multiple content=... directives");
      rc = SQLITE_ERROR;
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
    return rc;
  }

  *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
  return SQLITE_ERROR;
}

/*
** Allocate an instance of the default tokenizer ("simple") at 
** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
** code if an error occurs.
*/
static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
  assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
  return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0);
}

/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a







<
<
<
<
<
<
<
<
<
<







404
405
406
407
408
409
410










411
412
413
414
415
416
417
    return rc;
  }

  *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
  return SQLITE_ERROR;
}











/*
** Gobble up the first bareword or quoted word from the input buffer zIn.
** Return a pointer to the character immediately following the last in
** the gobbled word if successful, or a NULL pointer otherwise (failed
** to find close-quote character).
**
** Before returning, set pzOut to point to a new buffer containing a
551
552
553
554
555
556
557

558
559
560
561
562
563
564
  int i;
  sqlite3_int64 nByte;

  *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
  if( pRet==0 ) return SQLITE_NOMEM;
  memset(pRet, 0, sizeof(Fts5Config));
  pRet->db = db;

  pRet->iCookie = -1;

  nByte = nArg * (sizeof(char*) + sizeof(u8));
  pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
  pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);







>







537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  int i;
  sqlite3_int64 nByte;

  *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
  if( pRet==0 ) return SQLITE_NOMEM;
  memset(pRet, 0, sizeof(Fts5Config));
  pRet->db = db;
  pRet->pGlobal = pGlobal;
  pRet->iCookie = -1;

  nByte = nArg * (sizeof(char*) + sizeof(u8));
  pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
  pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
    );
    rc = SQLITE_ERROR;
  }

  /* If a tokenizer= option was successfully parsed, the tokenizer has
  ** already been allocated. Otherwise, allocate an instance of the default
  ** tokenizer (unicode61) now.  */
  if( rc==SQLITE_OK && pRet->pTok==0 ){
    rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
  }

  /* If no zContent option was specified, fill in the default values. */
  if( rc==SQLITE_OK && pRet->zContent==0 ){
    const char *zTail = 0;
    assert( pRet->eContent==FTS5_CONTENT_NORMAL 
         || pRet->eContent==FTS5_CONTENT_NONE 







|
|







626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
    );
    rc = SQLITE_ERROR;
  }

  /* If a tokenizer= option was successfully parsed, the tokenizer has
  ** already been allocated. Otherwise, allocate an instance of the default
  ** tokenizer (unicode61) now.  */
  if( rc==SQLITE_OK && pRet->pTokList==0 ){
    rc = sqlite3Fts5GetTokenizer(pRet, 0);
  }

  /* If no zContent option was specified, fill in the default values. */
  if( rc==SQLITE_OK && pRet->zContent==0 ){
    const char *zTail = 0;
    assert( pRet->eContent==FTS5_CONTENT_NORMAL 
         || pRet->eContent==FTS5_CONTENT_NONE 
677
678
679
680
681
682
683











684
685
686
687
688
689
690
691
692
693
694
695

696
697
698
699
700
701
702

  if( rc!=SQLITE_OK ){
    sqlite3Fts5ConfigFree(pRet);
    *ppOut = 0;
  }
  return rc;
}












/*
** Free the configuration object passed as the only argument.
*/
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
  if( pConfig ){
    int i;
    if( pConfig->pTok ){
      pConfig->pTokApi->xDelete(pConfig->pTok);
    }
    sqlite3_free(pConfig->zDb);
    sqlite3_free(pConfig->zName);

    for(i=0; i<pConfig->nCol; i++){
      sqlite3_free(pConfig->azCol[i]);
    }
    sqlite3_free(pConfig->azCol);
    sqlite3_free(pConfig->aPrefix);
    sqlite3_free(pConfig->zRank);
    sqlite3_free(pConfig->zRankArgs);







>
>
>
>
>
>
>
>
>
>
>







|
<
<


>







664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689


690
691
692
693
694
695
696
697
698
699

  if( rc!=SQLITE_OK ){
    sqlite3Fts5ConfigFree(pRet);
    *ppOut = 0;
  }
  return rc;
}

static void fts5ConfigFreeTokenizers(Fts5Config *pConfig){
  Fts5TokenizerInst *p = pConfig->pTokList;
  while( p ){
    Fts5TokenizerInst *pNext = p->pNext;
    p->pTokApi->xDelete(p->pTok);
    sqlite3_free(p);
    p = pNext;
  }
  pConfig->pTokList = 0;
}

/*
** Free the configuration object passed as the only argument.
*/
void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
  if( pConfig ){
    int i;
    fts5ConfigFreeTokenizers(pConfig);


    sqlite3_free(pConfig->zDb);
    sqlite3_free(pConfig->zName);
    fts5ConfigFreeTokenizers(pConfig);
    for(i=0; i<pConfig->nCol; i++){
      sqlite3_free(pConfig->azCol[i]);
    }
    sqlite3_free(pConfig->azCol);
    sqlite3_free(pConfig->aPrefix);
    sqlite3_free(pConfig->zRank);
    sqlite3_free(pConfig->zRankArgs);
761
762
763
764
765
766
767
768
769
770











771









772
773
774
775
776
777
778
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(
      pConfig->pTok, pCtx, flags, pText, nText, xToken











  );









}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.







|
|
|
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
){
  if( pText ){
    Fts5TokenizerInst *p = pConfig->pTokList;
    return p->pTokApi->xTokenize(p->pTok, pCtx, flags, pText, nText, xToken);
  }
  return SQLITE_OK;
}

int sqlite3Fts5SpecTokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  const char *zSpec,              /* Tokenizer specification */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
){
  if( pText ){
    Fts5TokenizerInst *p = pConfig->pTokList;
    if( zSpec ){
      int rc = sqlite3Fts5ConfigFindTokenizer(pConfig, zSpec, &p);
      if( rc!=SQLITE_OK ) return rc;
    }
    return p->pTokApi->xTokenize(p->pTok, pCtx, flags, pText, nText, xToken);
  }
  return SQLITE_OK;
}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.

Changes to ext/fts5/fts5_main.c.

111
112
113
114
115
116
117

118

119
120
121
122
123
124
125
  void (*xDestroy)(void*);        /* Destructor function */
  Fts5TokenizerModule *pNext;     /* Next registered tokenizer module */
};

struct Fts5FullTable {
  Fts5Table p;                    /* Public class members from fts5Int.h */
  Fts5Storage *pStorage;          /* Document store */

  Fts5Global *pGlobal;            /* Global (connection wide) data */

  Fts5Cursor *pSortCsr;           /* Sort data from this cursor */
  int iSavepoint;                 /* Successful xSavepoint()+1 */
  
#ifdef SQLITE_DEBUG
  struct Fts5TransactionState ts;
#endif
};







>

>







111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
  void (*xDestroy)(void*);        /* Destructor function */
  Fts5TokenizerModule *pNext;     /* Next registered tokenizer module */
};

struct Fts5FullTable {
  Fts5Table p;                    /* Public class members from fts5Int.h */
  Fts5Storage *pStorage;          /* Document store */
#if 0
  Fts5Global *pGlobal;            /* Global (connection wide) data */
#endif
  Fts5Cursor *pSortCsr;           /* Sort data from this cursor */
  int iSavepoint;                 /* Successful xSavepoint()+1 */
  
#ifdef SQLITE_DEBUG
  struct Fts5TransactionState ts;
#endif
};
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
  pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable));
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
    assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
  }
  if( rc==SQLITE_OK ){
    pTab->p.pConfig = pConfig;
    pTab->pGlobal = pGlobal;
  }

  /* Open the index sub-system */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr);
  }








<







376
377
378
379
380
381
382

383
384
385
386
387
388
389
  pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable));
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
    assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
  }
  if( rc==SQLITE_OK ){
    pTab->p.pConfig = pConfig;

  }

  /* Open the index sub-system */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr);
  }

689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724

  pInfo->idxNum = idxFlags;
  return SQLITE_OK;
}

static int fts5NewTransaction(Fts5FullTable *pTab){
  Fts5Cursor *pCsr;
  for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK;
  }
  return sqlite3Fts5StorageReset(pTab->pStorage);
}

/*
** Implementation of xOpen method.
*/
static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
  Fts5FullTable *pTab = (Fts5FullTable*)pVTab;
  Fts5Config *pConfig = pTab->p.pConfig;
  Fts5Cursor *pCsr = 0;           /* New cursor object */
  sqlite3_int64 nByte;            /* Bytes of space to allocate */
  int rc;                         /* Return code */

  rc = fts5NewTransaction(pTab);
  if( rc==SQLITE_OK ){
    nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
    pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte);
    if( pCsr ){
      Fts5Global *pGlobal = pTab->pGlobal;
      memset(pCsr, 0, (size_t)nByte);
      pCsr->aColumnSize = (int*)&pCsr[1];
      pCsr->pNext = pGlobal->pCsr;
      pGlobal->pCsr = pCsr;
      pCsr->iCsrId = ++pGlobal->iNextId;
    }else{
      rc = SQLITE_NOMEM;







|




















|







690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725

  pInfo->idxNum = idxFlags;
  return SQLITE_OK;
}

static int fts5NewTransaction(Fts5FullTable *pTab){
  Fts5Cursor *pCsr;
  for(pCsr=pTab->p.pConfig->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK;
  }
  return sqlite3Fts5StorageReset(pTab->pStorage);
}

/*
** Implementation of xOpen method.
*/
static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
  Fts5FullTable *pTab = (Fts5FullTable*)pVTab;
  Fts5Config *pConfig = pTab->p.pConfig;
  Fts5Cursor *pCsr = 0;           /* New cursor object */
  sqlite3_int64 nByte;            /* Bytes of space to allocate */
  int rc;                         /* Return code */

  rc = fts5NewTransaction(pTab);
  if( rc==SQLITE_OK ){
    nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
    pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte);
    if( pCsr ){
      Fts5Global *pGlobal = pConfig->pGlobal;
      memset(pCsr, 0, (size_t)nByte);
      pCsr->aColumnSize = (int*)&pCsr[1];
      pCsr->pNext = pGlobal->pCsr;
      pGlobal->pCsr = pCsr;
      pCsr->iCsrId = ++pGlobal->iNextId;
    }else{
      rc = SQLITE_NOMEM;
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
  if( pCursor ){
    Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
    Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
    Fts5Cursor **pp;

    fts5FreeCursorComponents(pCsr);
    /* Remove the cursor from the Fts5Global.pCsr list */
    for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
    *pp = pCsr->pNext;

    sqlite3_free(pCsr);
  }
  return SQLITE_OK;
}








|







798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
  if( pCursor ){
    Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
    Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
    Fts5Cursor **pp;

    fts5FreeCursorComponents(pCsr);
    /* Remove the cursor from the Fts5Global.pCsr list */
    for(pp=&pTab->p.pConfig->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
    *pp = pCsr->pNext;

    sqlite3_free(pCsr);
  }
  return SQLITE_OK;
}

850
851
852
853
854
855
856
857
858
859
860
861
862
863
864

/*
** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors 
** open on table pTab.
*/
static void fts5TripCursors(Fts5FullTable *pTab){
  Fts5Cursor *pCsr;
  for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->ePlan==FTS5_PLAN_MATCH
     && pCsr->base.pVtab==(sqlite3_vtab*)pTab 
    ){
      CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
    }
  }
}







|







851
852
853
854
855
856
857
858
859
860
861
862
863
864
865

/*
** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors 
** open on table pTab.
*/
static void fts5TripCursors(Fts5FullTable *pTab){
  Fts5Cursor *pCsr;
  for(pCsr=pTab->p.pConfig->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
    if( pCsr->ePlan==FTS5_PLAN_MATCH
     && pCsr->base.pVtab==(sqlite3_vtab*)pTab 
    ){
      CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
    }
  }
}
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
** Search for an auxiliary function named zName that can be used with table
** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
** structure. Otherwise, if no such function exists, return NULL.
*/
static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){
  Fts5Auxiliary *pAux;

  for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
    if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
  }

  /* No function of the specified name was found. Return 0. */
  return 0;
}








|







1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
** Search for an auxiliary function named zName that can be used with table
** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
** structure. Otherwise, if no such function exists, return NULL.
*/
static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){
  Fts5Auxiliary *pAux;

  for(pAux=pTab->p.pConfig->pGlobal->pAux; pAux; pAux=pAux->pNext){
    if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
  }

  /* No function of the specified name was found. Return 0. */
  return 0;
}

1273
1274
1275
1276
1277
1278
1279

1280






1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292

1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303


1304
1305
1306
1307
1308
1309
1310
  /* Decode the arguments passed through to this function. */
  for(i=0; i<nVal; i++){
    switch( idxStr[iIdxStr++] ){
      case 'r':
        pRank = apVal[i];
        break;
      case 'M': {

        const char *zText = (const char*)sqlite3_value_text(apVal[i]);






        if( zText==0 ) zText = "";
        iCol = 0;
        do{
          iCol = iCol*10 + (idxStr[iIdxStr]-'0');
          iIdxStr++;
        }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );

        if( zText[0]=='*' ){
          /* The user has issued a query of the form "MATCH '*...'". This
          ** indicates that the MATCH expression is not a full text query,
          ** but a request for an internal parameter.  */
          rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);

          goto filter_out;
        }else{
          char **pzErr = &pTab->p.base.zErrMsg;
          rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
            pExpr = 0;
          }
          if( rc!=SQLITE_OK ) goto filter_out;
        }



        break;
      }
      case 'L':
      case 'G': {
        int bGlob = (idxStr[iIdxStr-1]=='G');
        const char *zText = (const char*)sqlite3_value_text(apVal[i]);
        iCol = 0;







>
|
>
>
>
>
>
>












>








<


>
>







1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309

1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
  /* Decode the arguments passed through to this function. */
  for(i=0; i<nVal; i++){
    switch( idxStr[iIdxStr++] ){
      case 'r':
        pRank = apVal[i];
        break;
      case 'M': {
        Fts5TokenizerInst *pInst = 0;
        char *zText = 0;
        int bDel = 0;

        rc = sqlite3Fts5UnpackTokenizeBlob(
            pConfig, apVal[i], &pInst, &zText, &bDel
        );

        if( zText==0 ) zText = "";
        iCol = 0;
        do{
          iCol = iCol*10 + (idxStr[iIdxStr]-'0');
          iIdxStr++;
        }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );

        if( zText[0]=='*' ){
          /* The user has issued a query of the form "MATCH '*...'". This
          ** indicates that the MATCH expression is not a full text query,
          ** but a request for an internal parameter.  */
          rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
          if( bDel ) sqlite3_free(zText);
          goto filter_out;
        }else{
          char **pzErr = &pTab->p.base.zErrMsg;
          rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
            pExpr = 0;
          }

        }

        if( bDel ) sqlite3_free(zText);
        if( rc!=SQLITE_OK ) goto filter_out;
        break;
      }
      case 'L':
      case 'G': {
        int bGlob = (idxStr[iIdxStr-1]=='G');
        const char *zText = (const char*)sqlite3_value_text(apVal[i]);
        iCol = 0;
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871




2872



































2873
2874
2875
2876
2877
2878








2879
2880
2881
2882
2883

2884

2885
2886
2887
2888
2889
2890
2891
2892


2893

2894
2895


2896





2897
2898
2899
2900
2901
2902
2903
    rc = SQLITE_ERROR;
  }

  return rc;
}

int sqlite3Fts5GetTokenizer(
  Fts5Global *pGlobal, 
  const char **azArg,
  int nArg,
  Fts5Config *pConfig,
  char **pzErr
){
  Fts5TokenizerModule *pMod;
  int rc = SQLITE_OK;








































  pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
  if( pMod==0 ){
    assert( nArg>0 );
    rc = SQLITE_ERROR;
    *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
  }else{








    rc = pMod->x.xCreate(
        pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok
    );
    pConfig->pTokApi = &pMod->x;
    if( rc!=SQLITE_OK ){

      if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor");

    }else{
      pConfig->ePattern = sqlite3Fts5TokenizerPattern(
          pMod->x.xCreate, pConfig->pTok
      );
    }
  }

  if( rc!=SQLITE_OK ){


    pConfig->pTokApi = 0;

    pConfig->pTok = 0;
  }








  return rc;
}

static void fts5ModuleDestroy(void *pCtx){
  Fts5TokenizerModule *pTok, *pNextTok;
  Fts5Auxiliary *pAux, *pNextAux;
  Fts5Global *pGlobal = (Fts5Global*)pCtx;







<
<
<

|

<

>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
>
>
>
>
>
>
>
>
|
|
|
|
|
>
|
>
|
|
|
|
|
|
<
|
>
>
|
>
|
|
>
>
|
>
>
>
>
>







2867
2868
2869
2870
2871
2872
2873



2874
2875
2876

2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945

2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
    rc = SQLITE_ERROR;
  }

  return rc;
}

int sqlite3Fts5GetTokenizer(



  Fts5Config *pConfig,
  const char *zSpec
){

  int rc = SQLITE_OK;
  char **pzErr = pConfig->pzErrmsg;
  const char **azArg = 0;
  char *pDel = 0;
  sqlite3_int64 nArg = 0;

  assert( pzErr || (zSpec==0 && pConfig->pTokList==0) );
  if( zSpec ){
    const char *p = (const char*)zSpec;
    char *pSpace = 0;

    nArg = strlen(zSpec) + 1;
    pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
    pSpace = pDel;
    azArg = (const char**)sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);

    if( azArg && pSpace ){
      for(nArg=0; p && *p; nArg++){
        const char *p2 = fts5ConfigSkipWhitespace(p);
        if( *p2=='\'' ){
          p = fts5ConfigSkipLiteral(p2);
        }else{
          p = fts5ConfigSkipBareword(p2);
        }
        if( p ){
          memcpy(pSpace, p2, p-p2);
          azArg[nArg] = pSpace;
          sqlite3Fts5Dequote(pSpace);
          pSpace += (p - p2) + 1;
          p = fts5ConfigSkipWhitespace(p);
        }
      }
      if( p==0 ){
        *pzErr= sqlite3_mprintf("parse error in tokenize directive");
        rc = SQLITE_ERROR;
      }
    }
  }

  if( rc==SQLITE_OK ){
    Fts5TokenizerModule *pMod;
    pMod = fts5LocateTokenizer(pConfig->pGlobal, nArg==0 ? 0 : azArg[0]);
    if( pMod==0 ){
      assert( nArg>0 );
      rc = SQLITE_ERROR;
      *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
    }else{
      int nSpec = zSpec ? strlen(zSpec) + 1 : 0;
      int nByte = sizeof(Fts5TokenizerInst) + nSpec;
      Fts5TokenizerInst *pNew = sqlite3Fts5MallocZero(&rc, nByte);
      if( pNew ){
        if( zSpec ){
          pNew->zSpec = (char*)&pNew[1];
          memcpy(pNew->zSpec, zSpec, nSpec);
        }
        rc = pMod->x.xCreate(
            pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pNew->pTok
        );
        pNew->pTokApi = &pMod->x;
        if( rc!=SQLITE_OK ){
          if( pzErr ){
            *pzErr = sqlite3_mprintf("error in tokenizer constructor");
          }
        }else if( pConfig->pTokList==0 ){
          pConfig->ePattern = sqlite3Fts5TokenizerPattern(
              pMod->x.xCreate, pNew->pTok
          );
        }
      }

      if( rc==SQLITE_OK ){
        if( pConfig->pTokList ){
          pNew->pNext = pConfig->pTokList->pNext;
          pConfig->pTokList->pNext = pNew;
        }else{
          pConfig->pTokList = pNew;
        }
      }else{
        sqlite3_free(pNew);
      }
    }
  }

  sqlite3_free(azArg);
  sqlite3_free(pDel);
  return rc;
}

static void fts5ModuleDestroy(void *pCtx){
  Fts5TokenizerModule *pTok, *pNextTok;
  Fts5Auxiliary *pAux, *pNextAux;
  Fts5Global *pGlobal = (Fts5Global*)pCtx;

Changes to ext/fts5/fts5_storage.c.

394
395
396
397
398
399
400































401
402
403
404
405
406
407
  UNUSED_PARAM2(iUnused1, iUnused2);
  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
    pCtx->szCol++;
  }
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}
































/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
*/
static int fts5StorageDeleteFromIndex(







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
  UNUSED_PARAM2(iUnused1, iUnused2);
  if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
  if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
    pCtx->szCol++;
  }
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}

#define IS_TOKENIZE_BLOB(pVal) ( \
    sqlite3_value_subtype(pVal)==SQLITE_FTS5_TOKENIZE_SUBTYPE \
 && sqlite3_value_type(pVal)==SQLITE_BLOB \
)

static const char *fts5UnpackTokenizeBlob(
  sqlite3_value *pVal,
  const char **pzT,
  int *pnT
){
  const u8 *pBlob = sqlite3_value_blob(pVal);
  int nBlob = sqlite3_value_bytes(pVal);
  int ii;

  assert( sqlite3_value_subtype(pVal)==SQLITE_FTS5_TOKENIZE_SUBTYPE );
  assert( sqlite3_value_type(pVal)==SQLITE_BLOB );

  for(ii=0; pBlob[ii]; ii++){
    if( ii==nBlob ){
      *pzT = 0;
      *pnT = 0;
      return 0;
    }
  }

  *pzT = (const char*)&pBlob[ii+1];
  *pnT = nBlob - ii - 1;
  return (const char*)pBlob;
}


/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
*/
static int fts5StorageDeleteFromIndex(
425
426
427
428
429
430
431
432

433
434
435
436
437
438
439




440
441

442
443
444
445
446
447
448
449
450
451
452
453
    }
  }

  ctx.pStorage = p;
  ctx.iCol = -1;
  for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
    if( pConfig->abUnindexed[iCol-1]==0 ){
      const char *zText;

      int nText;
      assert( pSeek==0 || apVal==0 );
      assert( pSeek!=0 || apVal!=0 );
      if( pSeek ){
        zText = (const char*)sqlite3_column_text(pSeek, iCol);
        nText = sqlite3_column_bytes(pSeek, iCol);
      }else if( ALWAYS(apVal) ){




        zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
        nText = sqlite3_value_bytes(apVal[iCol-1]);

      }else{
        continue;
      }
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, 
          zText, nText, (void*)&ctx, fts5StorageInsertCallback
      );
      p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      if( p->aTotalSize[iCol-1]<0 ){
        rc = FTS5_CORRUPT;
      }
    }







|
>
|






>
>
>
>
|
|
>




|







456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
    }
  }

  ctx.pStorage = p;
  ctx.iCol = -1;
  for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
    if( pConfig->abUnindexed[iCol-1]==0 ){
      const char *zText = 0;
      const char *zTok = 0;
      int nText = 0;
      assert( pSeek==0 || apVal==0 );
      assert( pSeek!=0 || apVal!=0 );
      if( pSeek ){
        zText = (const char*)sqlite3_column_text(pSeek, iCol);
        nText = sqlite3_column_bytes(pSeek, iCol);
      }else if( ALWAYS(apVal) ){
        sqlite3_value *pVal = apVal[iCol-1];
        if( IS_TOKENIZE_BLOB(pVal) ){
          zTok = fts5UnpackTokenizeBlob(pVal, &zText, &nText);
        }else{
          zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
          nText = sqlite3_value_bytes(apVal[iCol-1]);
        }
      }else{
        continue;
      }
      ctx.szCol = 0;
      rc = sqlite3Fts5SpecTokenize(pConfig, zTok, FTS5_TOKENIZE_DOCUMENT, 
          zText, nText, (void*)&ctx, fts5StorageInsertCallback
      );
      p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      if( p->aTotalSize[iCol-1]<0 ){
        rc = FTS5_CORRUPT;
      }
    }
747
748
749
750
751
752
753
























754
755
756
757
758
759
760
    }
    if( rc==SQLITE_OK ){
      *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
    }
  }
  return rc;
}

























/*
** Insert a new row into the FTS content table.
*/
int sqlite3Fts5StorageContentInsert(
  Fts5Storage *p, 
  sqlite3_value **apVal, 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
    }
    if( rc==SQLITE_OK ){
      *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
    }
  }
  return rc;
}

int sqlite3Fts5UnpackTokenizeBlob(
  Fts5Config *pConfig,
  sqlite3_value *pVal,
  Fts5TokenizerInst **ppTok,
  char **pzText,
  int *pbDel
){
  int rc = SQLITE_OK;
  if( IS_TOKENIZE_BLOB(pVal) ){
    const char *zTok = 0;
    const char *zText = 0;
    int nText = 0;
    zTok = fts5UnpackTokenizeBlob(pVal, &zText, &nText);
    rc = sqlite3Fts5ConfigFindTokenizer(pConfig, zTok, ppTok);
    *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText);
    *pbDel = 1;
  }else{
    *pzText = (char*)sqlite3_value_text(pVal);
    *pbDel = 0;
    *ppTok = pConfig->pTokList;
  }
  return rc;
}

/*
** Insert a new row into the FTS content table.
*/
int sqlite3Fts5StorageContentInsert(
  Fts5Storage *p, 
  sqlite3_value **apVal, 
771
772
773
774
775
776
777







778

779
780
781
782
783
784
785
      rc = fts5StorageNewRowid(p, piRowid);
    }
  }else{
    sqlite3_stmt *pInsert = 0;    /* Statement to write %_content table */
    int i;                        /* Counter variable */
    rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
    for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){







      rc = sqlite3_bind_value(pInsert, i, apVal[i]);

    }
    if( rc==SQLITE_OK ){
      sqlite3_step(pInsert);
      rc = sqlite3_reset(pInsert);
    }
    *piRowid = sqlite3_last_insert_rowid(pConfig->db);
  }







>
>
>
>
>
>
>
|
>







832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
      rc = fts5StorageNewRowid(p, piRowid);
    }
  }else{
    sqlite3_stmt *pInsert = 0;    /* Statement to write %_content table */
    int i;                        /* Counter variable */
    rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
    for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
      sqlite3_value *pVal = apVal[i];
      if( IS_TOKENIZE_BLOB(pVal) ){
        const char *zT = 0;
        int nT = 0;
        fts5UnpackTokenizeBlob(pVal, &zT, &nT);
        rc = sqlite3_bind_text(pInsert, i, zT, nT, SQLITE_STATIC);
      }else{
        rc = sqlite3_bind_value(pInsert, i, apVal[i]);
      }
    }
    if( rc==SQLITE_OK ){
      sqlite3_step(pInsert);
      rc = sqlite3_reset(pInsert);
    }
    *piRowid = sqlite3_last_insert_rowid(pConfig->db);
  }
806
807
808
809
810
811
812








813
814

815
816
817
818
819
820
821
822
823
824
825
826

  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){








      const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
      int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);

      rc = sqlite3Fts5Tokenize(pConfig, 
          FTS5_TOKENIZE_DOCUMENT,
          zText, nText,
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
    p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
  }
  p->nTotalRow++;








>
>
>
>
>
>
>
>
|
|
>
|
|
<
<
|







875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894


895
896
897
898
899
900
901
902

  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      sqlite3_value *pVal = apVal[ctx.iCol+2];
      const char *zText = 0;
      const char *zTok = 0;
      int nText = 0;

      if( IS_TOKENIZE_BLOB(pVal) ){
        zTok = fts5UnpackTokenizeBlob(pVal, &zText, &nText);
      }else{
        zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
        nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
      }
      rc = sqlite3Fts5SpecTokenize(pConfig, 
          zTok, FTS5_TOKENIZE_DOCUMENT, zText, nText, 


          (void*)&ctx, fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
    p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
  }
  p->nTotalRow++;

Changes to ext/fts5/fts5_tcl.c.

1121
1122
1123
1124
1125
1126
1127





























1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147





1148
1149
1150
1151

1152
1153
1154
1155
1156
1157
1158
  rc = sqlite3Fts5TestRegisterMatchinfo(db);
  if( rc!=SQLITE_OK ){
    Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
    return TCL_ERROR;
  }
  return TCL_OK;
}






























static int SQLITE_TCLAPI f5tRegisterTok(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int rc;
  sqlite3 *db = 0;
  fts5_api *pApi = 0;

  if( objc!=2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB");
    return TCL_ERROR;
  }
  if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
    return TCL_ERROR;
  }

  rc = sqlite3Fts5TestRegisterTok(db, pApi);





  if( rc!=SQLITE_OK ){
    Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
    return TCL_ERROR;
  }

  return TCL_OK;
}

typedef struct OriginTextCtx OriginTextCtx;
struct OriginTextCtx {
  sqlite3 *db;
  fts5_api *pApi;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















>
>
>
>
>




>







1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
  rc = sqlite3Fts5TestRegisterMatchinfo(db);
  if( rc!=SQLITE_OK ){
    Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
    return TCL_ERROR;
  }
  return TCL_OK;
}

static void f5tFree(void *p) { sqlite3_free(p); }

static void f5tScalarFunc(
  sqlite3_context *ctx, 
  int nArg, 
  sqlite3_value **apArg
){
  const char *zText = (const char*)sqlite3_value_text(apArg[0]);
  int nText = sqlite3_value_bytes(apArg[0]);
  const char *zTok = (const char*)sqlite3_value_text(apArg[1]);
  int nTok = sqlite3_value_bytes(apArg[1]);
  unsigned char *aBuf = 0; 
  int nBuf = 0;

  assert( nArg==2 );

  if( zTok==0 ) zTok = "";
  nBuf = nTok + 1 + nText;
  aBuf = (unsigned char*)sqlite3_malloc(nBuf);
  if( aBuf==0 ){
    sqlite3_result_error_nomem(ctx);
  }else{
    memcpy(aBuf, zTok, nTok+1);
    memcpy(&aBuf[nTok+1], zText, nText);
    sqlite3_result_blob(ctx, aBuf, nBuf, f5tFree);
    sqlite3_result_subtype(ctx, SQLITE_FTS5_TOKENIZE_SUBTYPE);
  }
}

static int SQLITE_TCLAPI f5tRegisterTok(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  int rc;
  sqlite3 *db = 0;
  fts5_api *pApi = 0;

  if( objc!=2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB");
    return TCL_ERROR;
  }
  if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
    return TCL_ERROR;
  }

  rc = sqlite3Fts5TestRegisterTok(db, pApi);
  if( rc==SQLITE_OK ){
    rc = sqlite3_create_function(db, "fts5tokenize", 2, SQLITE_UTF8, 0, 
        f5tScalarFunc, 0, 0
    );
  }
  if( rc!=SQLITE_OK ){
    Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
    return TCL_ERROR;
  }
  
  return TCL_OK;
}

typedef struct OriginTextCtx OriginTextCtx;
struct OriginTextCtx {
  sqlite3 *db;
  fts5_api *pApi;