/ Check-in [90b85b42]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the fts5 tokenizer API to allow more than one token to occupy a single position within a document.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-incompatible
Files: files | file ages | folders
SHA1: 90b85b42f2b2dd3e939b129b7df2b822a05e243d
User & Date: dan 2015-08-28 19:56:47
Context
2015-08-29
15:44
Another change to the fts5 tokenizer API. check-in: fc718684 user: dan tags: fts5-incompatible
2015-08-28
19:56
Change the fts5 tokenizer API to allow more than one token to occupy a single position within a document. check-in: 90b85b42 user: dan tags: fts5-incompatible
16:41
Fix compiler warnings in rbu code. check-in: 0fdc36fe user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.h.

213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
...
305
306
307
308
309
310
311

312
313
314
315
316
317
318

319
320
321





322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
................................................................................
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
  int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,

      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
        int iEnd            /* Byte offset of end of token within input text */

      )
  );
};






/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
  int iVersion;                   /* Currently always set to 1 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_tokenizer *pTokenizer,







|







 







>






|
>



>
>
>
>
>










|







213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
...
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  int (*xColumnCount)(Fts5Context*);
  int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
  int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);

  int (*xTokenize)(Fts5Context*, 
    const char *pText, int nText, /* Text to tokenize */
    void *pCtx,                   /* Context passed to xToken() */
    int (*xToken)(void*, const char*, int, int, int, int)       /* Callback */
  );

  int (*xPhraseCount)(Fts5Context*);
  int (*xPhraseSize)(Fts5Context*, int iPhrase);

  int (*xInstCount)(Fts5Context*, int *pnInst);
  int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
................................................................................
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct fts5_tokenizer fts5_tokenizer;
struct fts5_tokenizer {
  int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
  void (*xDelete)(Fts5Tokenizer*);
  int (*xTokenize)(Fts5Tokenizer*, 
      void *pCtx,
      int flags,
      const char *pText, int nText, 
      int (*xToken)(
        void *pCtx,         /* Copy of 2nd argument to xTokenize() */
        const char *pToken, /* Pointer to buffer containing token */
        int nToken,         /* Size of token in bytes */
        int iStart,         /* Byte offset of token within input text */
        int iEnd,           /* Byte offset of end of token within input text */
        int iPos            /* Number of tokens before this one in input text */
      )
  );
};

#define FTS5_TOKENIZE_QUERY     0x0001
#define FTS5_TOKENIZE_PREFIX    0x0002
#define FTS5_TOKENIZE_DOCUMENT  0x0004
#define FTS5_TOKENIZE_AUX       0x0008

/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

/*************************************************************************
** FTS5 EXTENSION REGISTRATION API
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
  int iVersion;                   /* Currently always set to 2 */

  /* Create a new tokenizer */
  int (*xCreateTokenizer)(
    fts5_api *pApi,
    const char *zName,
    void *pContext,
    fts5_tokenizer *pTokenizer,

Changes to ext/fts5/fts5Int.h.

162
163
164
165
166
167
168

169
170
171
172
173
174
175
176
177
178
);
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */

  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);








>


|







162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
);
void sqlite3Fts5ConfigFree(Fts5Config*);

int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);

int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int, int)    /* Callback */
);

void sqlite3Fts5Dequote(char *z);

/* Load the contents of the %_config table */
int sqlite3Fts5ConfigLoad(Fts5Config*, int);

Changes to ext/fts5/fts5_aux.c.

147
148
149
150
151
152
153
154

155
156
157


158
159
160
161
162
163
164
165
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff                     /* End offset of token */

){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;


  int iPos = p->iPos++;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  if( iPos==p->iter.iStart ){







|
>



>
>
|







147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
** Tokenizer callback used by implementation of highlight() function.
*/
static int fts5HighlightCb(
  void *pContext,                 /* Pointer to HighlightContext object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStartOff,                  /* Start offset of token */
  int iEndOff,                    /* End offset of token */
  int iPos
){
  HighlightContext *p = (HighlightContext*)pContext;
  int rc = SQLITE_OK;

  if( iPos<p->iPos ) return SQLITE_OK;
  p->iPos = iPos+1;

  if( p->iRangeEnd>0 ){
    if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
    if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
  }

  if( iPos==p->iter.iStart ){

Changes to ext/fts5/fts5_config.c.

641
642
643
644
645
646
647

648
649
650
651
652
653


654
655
656
657
658
659
660
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */

  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(pConfig->pTok, pCtx, pText, nText, xToken);


}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.







>


|


|
>
>







641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
** the callback returned SQLITE_DONE, this is not an error and this function
** still returns SQLITE_OK. Or, if the tokenization was abandoned early
** because the callback returned another non-zero value, it is assumed
** to be an SQLite error code and returned to the caller.
*/
int sqlite3Fts5Tokenize(
  Fts5Config *pConfig,            /* FTS5 Configuration object */
  int flags,                      /* FTS5_TOKENIZE_* flags */
  const char *pText, int nText,   /* Text to tokenize */
  void *pCtx,                     /* Context passed to xToken() */
  int (*xToken)(void*, const char*, int, int, int, int)    /* Callback */
){
  if( pText==0 ) return SQLITE_OK;
  return pConfig->pTokApi->xTokenize(
      pConfig->pTok, pCtx, flags, pText, nText, xToken
  );
}

/*
** Argument pIn points to the first character in what is expected to be
** a comma-separated list of SQL literals followed by a ')' character.
** If it actually is this, return a pointer to the ')'. Otherwise, return
** NULL to indicate a parse error.

Changes to ext/fts5/fts5_expr.c.

1337
1338
1339
1340
1341
1342
1343
1344

1345
1346
1347
1348
1349
1350
1351
....
1413
1414
1415
1416
1417
1418
1419


1420

1421
1422
1423
1424
1425
1426
1427
1428
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */

){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  Fts5ExprTerm *pTerm;

................................................................................
  char *z = 0;

  memset(&sCtx, 0, sizeof(TokenCtx));
  sCtx.pPhrase = pAppend;

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){


    sqlite3Fts5Dequote(z);

    rc = sqlite3Fts5Tokenize(pConfig, z, strlen(z), &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){







|
>







 







>
>

>
|







1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
....
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
** Callback for tokenizing terms used by ParseTerm().
*/
static int fts5ParseTokenize(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd,                       /* End offset of token */
  int iPos
){
  int rc = SQLITE_OK;
  const int SZALLOC = 8;
  TokenCtx *pCtx = (TokenCtx*)pContext;
  Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
  Fts5ExprTerm *pTerm;

................................................................................
  char *z = 0;

  memset(&sCtx, 0, sizeof(TokenCtx));
  sCtx.pPhrase = pAppend;

  rc = fts5ParseStringFromToken(pToken, &z);
  if( rc==SQLITE_OK ){
    int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_QUERY : 0);
    int n;
    sqlite3Fts5Dequote(z);
    n = strlen(z);
    rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
  }
  sqlite3_free(z);
  if( rc ){
    pParse->rc = rc;
    fts5ExprPhraseFree(sCtx.pPhrase);
    sCtx.pPhrase = 0;
  }else if( sCtx.pPhrase ){

Changes to ext/fts5/fts5_main.c.

1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505


1506
1507
1508
1509
1510
1511
1512
....
1654
1655
1656
1657
1658
1659
1660
1661

1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
....
1687
1688
1689
1690
1691
1692
1693
1694


1695
1696
1697
1698
1699
1700
1701
....
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
  return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}

static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
  int (*xToken)(void*, const char*, int, int, int)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5Tokenize(pTab->pConfig, pText, nText, pUserData, xToken);


}

static int fts5ApiPhraseCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

................................................................................
}

static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */

){
  int *pCnt = (int*)pContext;
  *pCnt = *pCnt + 1;
  return SQLITE_OK;
}

static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;
................................................................................
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          const char *z; int n;
          void *p = (void*)(&pCsr->aColumnSize[i]);
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5Tokenize(pConfig, z, n, p, fts5ColumnSizeCb);


          }
        }
      }
    }
    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  }
  if( iCol<0 ){
................................................................................
  pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  if( pGlobal==0 ){
    rc = SQLITE_NOMEM;
  }else{
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
    pGlobal->api.iVersion = 1;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
    rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);







|



|
>
>







 







|
>


|







 







|
>
>







 







|







1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
....
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
....
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
....
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
  return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}

static int fts5ApiTokenize(
  Fts5Context *pCtx, 
  const char *pText, int nText, 
  void *pUserData,
  int (*xToken)(void*, const char*, int, int, int, int)
){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  return sqlite3Fts5Tokenize(
      pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
  );
}

static int fts5ApiPhraseCount(Fts5Context *pCtx){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
}

................................................................................
}

static int fts5ColumnSizeCb(
  void *pContext,                 /* Pointer to int */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd,                       /* End offset of token */
  int iPos
){
  int *pCnt = (int*)pContext;
  *pCnt = iPos+1;
  return SQLITE_OK;
}

static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
  Fts5Config *pConfig = pTab->pConfig;
................................................................................
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i]==0 ){
          const char *z; int n;
          void *p = (void*)(&pCsr->aColumnSize[i]);
          pCsr->aColumnSize[i] = 0;
          rc = fts5ApiColumnText(pCtx, i, &z, &n);
          if( rc==SQLITE_OK ){
            rc = sqlite3Fts5Tokenize(
                pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
            );
          }
        }
      }
    }
    CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
  }
  if( iCol<0 ){
................................................................................
  pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
  if( pGlobal==0 ){
    rc = SQLITE_NOMEM;
  }else{
    void *p = (void*)pGlobal;
    memset(pGlobal, 0, sizeof(Fts5Global));
    pGlobal->db = db;
    pGlobal->api.iVersion = 2;
    pGlobal->api.xCreateFunction = fts5CreateAux;
    pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
    pGlobal->api.xFindTokenizer = fts5FindTokenizer;
    rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
    if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);

Changes to ext/fts5/fts5_storage.c.

358
359
360
361
362
363
364
365

366
367
368

369
370
371
372
373
374
375
376
...
390
391
392
393
394
395
396

397
398
399
400
401
402
403
...
561
562
563
564
565
566
567

568
569
570
571
572
573
574
...
650
651
652
653
654
655
656

657
658
659
660
661
662
663
...
767
768
769
770
771
772
773

774
775
776
777
778
779
780
...
837
838
839
840
841
842
843
844

845
846

847
848
849
850
851
852
853
854
...
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
** Tokenization callback used when inserting tokens into the FTS index.
*/
static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */

){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;

  int iPos = pCtx->szCol++;
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
}

/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
................................................................................
      ctx.pStorage = p;
      ctx.iCol = -1;
      rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
      for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 

            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
        p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      }
................................................................................
    ctx.iCol = -1;

    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
    for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 

        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
        fts5StorageInsertCallback
      );
      p->aTotalSize[iCol] -= (i64)ctx.szCol;
    }
................................................................................

    sqlite3Fts5BufferZero(&buf);
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
    for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 

            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
      }
      sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
    ctx.pStorage = p;
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 

          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd                        /* End offset of token */

){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;

  int iPos = pCtx->szCol++;
  pCtx->cksum ^= sqlite3Fts5IndexCksum(
      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
  );
  return SQLITE_OK;
}

/*
................................................................................
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;
      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(
            pConfig, 
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
        aTotalSize[i] += ctx.szCol;







|
>



>
|







 







>







 







>







 







>







 







>







 







|
>


>
|







 







|
|







358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
...
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
...
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
...
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
...
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
...
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
...
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
** Tokenization callback used when inserting tokens into the FTS index.
*/
static int fts5StorageInsertCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd,                       /* End offset of token */
  int iPos
){
  Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
  Fts5Index *pIdx = pCtx->pStorage->pIndex;
  assert( iPos+1>=pCtx->szCol );
  pCtx->szCol = iPos+1;
  return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
}

/*
** If a row with rowid iDel is present in the %_content table, add the
** delete-markers to the FTS index necessary to delete it. Do not actually
** remove the %_content row at this time though.
................................................................................
      ctx.pStorage = p;
      ctx.iCol = -1;
      rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
      for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
        if( pConfig->abUnindexed[iCol-1] ) continue;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pSeek, iCol),
            sqlite3_column_bytes(pSeek, iCol),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
        p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
      }
................................................................................
    ctx.iCol = -1;

    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iDel);
    for(iCol=0; rc==SQLITE_OK && iCol<pConfig->nCol; iCol++){
      if( pConfig->abUnindexed[iCol] ) continue;
      ctx.szCol = 0;
      rc = sqlite3Fts5Tokenize(pConfig, 
        FTS5_TOKENIZE_DOCUMENT,
        (const char*)sqlite3_value_text(apVal[iCol]),
        sqlite3_value_bytes(apVal[iCol]),
        (void*)&ctx,
        fts5StorageInsertCallback
      );
      p->aTotalSize[iCol] -= (i64)ctx.szCol;
    }
................................................................................

    sqlite3Fts5BufferZero(&buf);
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, iRowid);
    for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
      ctx.szCol = 0;
      if( pConfig->abUnindexed[ctx.iCol]==0 ){
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, ctx.iCol+1),
            sqlite3_column_bytes(pScan, ctx.iCol+1),
            (void*)&ctx,
            fts5StorageInsertCallback
        );
      }
      sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
    rc = sqlite3Fts5IndexBeginWrite(p->pIndex, *piRowid);
    ctx.pStorage = p;
  }
  for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
    ctx.szCol = 0;
    if( pConfig->abUnindexed[ctx.iCol]==0 ){
      rc = sqlite3Fts5Tokenize(pConfig, 
          FTS5_TOKENIZE_DOCUMENT,
          (const char*)sqlite3_value_text(apVal[ctx.iCol+2]),
          sqlite3_value_bytes(apVal[ctx.iCol+2]),
          (void*)&ctx,
          fts5StorageInsertCallback
      );
    }
    sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
................................................................................
** Tokenization callback used by integrity check.
*/
static int fts5StorageIntegrityCallback(
  void *pContext,                 /* Pointer to Fts5InsertCtx object */
  const char *pToken,             /* Buffer containing token */
  int nToken,                     /* Size of token in bytes */
  int iStart,                     /* Start offset of token */
  int iEnd,                       /* End offset of token */
  int iPos
){
  Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
  assert( iPos+1>=pCtx->szCol );
  pCtx->szCol = iPos+1;
  pCtx->cksum ^= sqlite3Fts5IndexCksum(
      pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
  );
  return SQLITE_OK;
}

/*
................................................................................
      ctx.iRowid = sqlite3_column_int64(pScan, 0);
      ctx.szCol = 0;
      rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
      for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
        if( pConfig->abUnindexed[i] ) continue;
        ctx.iCol = i;
        ctx.szCol = 0;
        rc = sqlite3Fts5Tokenize(pConfig, 
            FTS5_TOKENIZE_DOCUMENT,
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
        aTotalSize[i] += ctx.szCol;

Changes to ext/fts5/fts5_tcl.c.

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
...
662
663
664
665
666
667
668
669


670
671
672
673
674
675
676
...
744
745
746
747
748
749
750

751
752
753
754
755
756
757
758
759
struct F5tAuxData {
  Tcl_Obj *pObj;
};

static int xTokenizeCb(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
  int rc;

  Tcl_IncrRefCount(pEval);
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken));
................................................................................
  int bSubst;
  const char *zInput;
};

static int xTokenizeCb2(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd
){
  F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
  if( p->bSubst ){
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(
        0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
    );
................................................................................
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
  rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);


  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
    Tcl_DecrRefCount(pRet);
    return TCL_ERROR;
  }

................................................................................
  Tcl_DecrRefCount(pInst->pScript);
  ckfree((char *)pInst);
}

static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,

  const char *pText, int nText, 
  int (*xToken)(void*, const char*, int, int, int)
){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
  int (*xOldToken)(void*, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;








|







 







|







 







|
>
>







 







>

|







138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
...
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
...
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
...
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
struct F5tAuxData {
  Tcl_Obj *pObj;
};

static int xTokenizeCb(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd, int iPos
){
  F5tFunction *p = (F5tFunction*)pCtx;
  Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
  int rc;

  Tcl_IncrRefCount(pEval);
  Tcl_ListObjAppendElement(p->interp, pEval, Tcl_NewStringObj(zToken, nToken));
................................................................................
  int bSubst;
  const char *zInput;
};

static int xTokenizeCb2(
  void *pCtx, 
  const char *zToken, int nToken, 
  int iStart, int iEnd, int iPos
){
  F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
  if( p->bSubst ){
    Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
    Tcl_ListObjAppendElement(
        0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
    );
................................................................................
  }

  pRet = Tcl_NewObj();
  Tcl_IncrRefCount(pRet);
  ctx.bSubst = (objc==5);
  ctx.pRet = pRet;
  ctx.zInput = zText;
  rc = tokenizer.xTokenize(
      pTok, (void*)&ctx, FTS5_TOKENIZE_DOCUMENT, zText, nText, xTokenizeCb2
  );
  tokenizer.xDelete(pTok);
  if( rc!=SQLITE_OK ){
    Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
    Tcl_DecrRefCount(pRet);
    return TCL_ERROR;
  }

................................................................................
  Tcl_DecrRefCount(pInst->pScript);
  ckfree((char *)pInst);
}

static int f5tTokenizerTokenize(
  Fts5Tokenizer *p, 
  void *pCtx,
  int flags,
  const char *pText, int nText, 
  int (*xToken)(void*, const char*, int, int, int, int)
){
  F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
  void *pOldCtx;
  int (*xOldToken)(void*, const char*, int, int, int);
  Tcl_Obj *pEval;
  int rc;

Changes to ext/fts5/fts5_tokenize.c.

112
113
114
115
116
117
118

119
120
121
122
123
124
125

126
127
128
129
130
131
132
...
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
...
381
382
383
384
385
386
387

388
389
390
391
392
393

394
395
396
397
398
399
400
...
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
...
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
....
1117
1118
1119
1120
1121
1122
1123
1124

1125
1126
1127
1128
1129
1130
1131
....
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189

1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206

/*
** Tokenize some text using the ascii tokenizer.
*/
static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  int ie;
  int is = 0;


  char aFold[64];
  int nFold = sizeof(aFold);
  char *pFold = aFold;
  unsigned char *a = p->aTokenChar;

  while( is<nText && rc==SQLITE_OK ){
................................................................................
        break;
      }
      nFold = nByte*2;
    }
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
    rc = xToken(pCtx, pFold, nByte, is, ie);
    is = ie+1;
  }
  
  if( pFold!=aFold ) sqlite3_free(pFold);
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}
................................................................................
  assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
  return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  unsigned char *a = p->aTokenChar;


  unsigned char *zTerm = (unsigned char*)&pText[nText];
  unsigned char *zCsr = (unsigned char *)pText;

  /* Output buffer */
  char *aFold = p->aFold;
  int nFold = p->nFold;
................................................................................
        }
        zCsr++;
      }
      ie = zCsr - (unsigned char*)pText;
    }

    /* Invoke the token callback */
    rc = xToken(pCtx, aFold, zOut-aFold, is, ie);
  }
  
 tokenize_done:
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}

................................................................................
  *ppOut = (Fts5Tokenizer*)pRet;
  return rc;
}

typedef struct PorterContext PorterContext;
struct PorterContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int);
  char *aBuf;
};

typedef struct PorterRule PorterRule;
struct PorterRule {
  const char *zSuffix;
  int nSuffix;
................................................................................
}

static int fts5PorterCb(
  void *pCtx, 
  const char *pToken, 
  int nToken, 
  int iStart, 
  int iEnd

){
  PorterContext *p = (PorterContext*)pCtx;

  char *aBuf;
  int nBuf;

  if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
................................................................................
  /* Step 5b. */
  if( nBuf>1 && aBuf[nBuf-1]=='l' 
   && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  ){
    nBuf--;
  }

  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd);

 pass_through:
  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd);
}

/*
** Tokenize using the porter tokenizer.
*/
static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,

  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd)
){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
  sCtx.xToken = xToken;
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
      p->pTokenizer, (void*)&sCtx, pText, nText, fts5PorterCb
  );
}

/*
** Register all built-in tokenizers with FTS5.
*/
int sqlite3Fts5TokenizerInit(fts5_api *pApi){







>

|





>







 







|







 







>

|




>







 







|







 







|







 







|
>







 







|


|








>

|







|







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
...
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
...
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
...
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
...
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
....
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
....
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212

/*
** Tokenize some text using the ascii tokenizer.
*/
static int fts5AsciiTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){
  AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  int ie;
  int is = 0;
  int iPos = 0;

  char aFold[64];
  int nFold = sizeof(aFold);
  char *pFold = aFold;
  unsigned char *a = p->aTokenChar;

  while( is<nText && rc==SQLITE_OK ){
................................................................................
        break;
      }
      nFold = nByte*2;
    }
    asciiFold(pFold, &pText[is], nByte);

    /* Invoke the token callback */
    rc = xToken(pCtx, pFold, nByte, is, ie, iPos++);
    is = ie+1;
  }
  
  if( pFold!=aFold ) sqlite3_free(pFold);
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}
................................................................................
  assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
  return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
}

static int fts5UnicodeTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){
  Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
  int rc = SQLITE_OK;
  unsigned char *a = p->aTokenChar;
  int iPos = 0;

  unsigned char *zTerm = (unsigned char*)&pText[nText];
  unsigned char *zCsr = (unsigned char *)pText;

  /* Output buffer */
  char *aFold = p->aFold;
  int nFold = p->nFold;
................................................................................
        }
        zCsr++;
      }
      ie = zCsr - (unsigned char*)pText;
    }

    /* Invoke the token callback */
    rc = xToken(pCtx, aFold, zOut-aFold, is, ie, iPos++);
  }
  
 tokenize_done:
  if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  return rc;
}

................................................................................
  *ppOut = (Fts5Tokenizer*)pRet;
  return rc;
}

typedef struct PorterContext PorterContext;
struct PorterContext {
  void *pCtx;
  int (*xToken)(void*, const char*, int, int, int, int);
  char *aBuf;
};

typedef struct PorterRule PorterRule;
struct PorterRule {
  const char *zSuffix;
  int nSuffix;
................................................................................
}

static int fts5PorterCb(
  void *pCtx, 
  const char *pToken, 
  int nToken, 
  int iStart, 
  int iEnd,
  int iPos
){
  PorterContext *p = (PorterContext*)pCtx;

  char *aBuf;
  int nBuf;

  if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
................................................................................
  /* Step 5b. */
  if( nBuf>1 && aBuf[nBuf-1]=='l' 
   && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 
  ){
    nBuf--;
  }

  return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos);

 pass_through:
  return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos);
}

/*
** Tokenize using the porter tokenizer.
*/
static int fts5PorterTokenize(
  Fts5Tokenizer *pTokenizer,
  void *pCtx,
  int flags,
  const char *pText, int nText,
  int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){
  PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
  PorterContext sCtx;
  sCtx.xToken = xToken;
  sCtx.pCtx = pCtx;
  sCtx.aBuf = p->aBuf;
  return p->tokenizer.xTokenize(
      p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
  );
}

/*
** Register all built-in tokenizers with FTS5.
*/
int sqlite3Fts5TokenizerInit(fts5_api *pApi){

Changes to ext/fts5/test/fts5matchinfo.test.

351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
367
368
...
428
429
430
431
432
433
434
435
436
437
438
439
440
441

442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
   GROUP BY t10.rowid
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db

do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE tt USING fts3(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
  INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
  INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
  INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
  INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7
................................................................................
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2

  do_execsql_test 11.1.$tn.2  {
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2
}
set sqlite_fts3_enable_parentheses 0

#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db

db func mit mit

do_test 12.0 {
  set cols [list]
  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
  execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
} {}

do_execsql_test 12.1 {
  INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]

set sqlite_fts3_enable_parentheses 0
finish_test








<

>

|







 







<




<

>





|







<


351
352
353
354
355
356
357

358
359
360
361
362
363
364
365
366
367
368
...
428
429
430
431
432
433
434

435
436
437
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

454
455
   GROUP BY t10.rowid
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#

reset_db
sqlite3_fts5_register_matchinfo db
do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE tt USING fts5(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
  INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
  INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
  INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
  INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7
................................................................................
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2

  do_execsql_test 11.1.$tn.2  {
    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
  } $r2
}


#---------------------------------------------------------------------------
# Test the 'b' matchinfo flag
#

reset_db
sqlite3_fts5_register_matchinfo db
db func mit mit

do_test 12.0 {
  set cols [list]
  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
  execsql "CREATE VIRTUAL TABLE tt USING fts5([join $cols ,])"
} {}

do_execsql_test 12.1 {
  INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
} [list [list [expr 1<<4] [expr 1<<(45-32)]]]


finish_test