SQLite

Check-in [de77d6026e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add further tests for fts5. Fix some problems with detail=col mode and auxiliary functions.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: de77d6026e8035c505a704e7b8cfe5af6579d35f
User & Date: dan 2016-01-16 18:58:51.767
Context
2016-01-16
20:50
Improvements to the way sqlite3VdbeAddOpList() works, resulting in a slightly smaller and faster binary. (check-in: 88ceb588bc user: drh tags: trunk)
18:58
Add further tests for fts5. Fix some problems with detail=col mode and auxiliary functions. (check-in: de77d6026e user: dan tags: trunk)
15:12
Use sqlite3VdbeAddOp4() rather than a separate sqlite3VdbeChangeP4() call, for a slightly smaller and faster binary. (check-in: a4258cd461 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5_buffer.c.
318
319
320
321
322
323
324
325
326
327
328


329
330
331
332

333
334
335
336
337
338
339
  const char *pTerm, int nTerm, 
  int *pbPresent
){
  int rc = SQLITE_OK;
  *pbPresent = 0;
  if( p ){
    int i;
    int hash;
    Fts5TermsetEntry *pEntry;

    /* Calculate a hash value for this term */


    hash = 104 + iIdx;
    for(i=0; i<nTerm; i++){
      hash += (hash << 3) + (int)pTerm[i];
    }

    hash = hash % ArraySize(p->apHash);

    for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
      if( pEntry->iIdx==iIdx 
          && pEntry->nTerm==nTerm 
          && memcmp(pEntry->pTerm, pTerm, nTerm)==0 
        ){







|


|
>
>
|
|
|

>







318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
  const char *pTerm, int nTerm, 
  int *pbPresent
){
  int rc = SQLITE_OK;
  *pbPresent = 0;
  if( p ){
    int i;
    int hash = 13;
    Fts5TermsetEntry *pEntry;

    /* Calculate a hash value for this term. This is the same hash checksum
    ** used by the fts5_hash.c module. This is not important for correct
    ** operation of the module, but is necessary to ensure that some tests
    ** designed to produce hash table collisions really do work.  */
    for(i=nTerm-1; i>=0; i--){
      hash = (hash << 3) ^ hash ^ pTerm[i];
    }
    hash = (hash << 3) ^ hash ^ iIdx;
    hash = hash % ArraySize(p->apHash);

    for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
      if( pEntry->iIdx==iIdx 
          && pEntry->nTerm==nTerm 
          && memcmp(pEntry->pTerm, pTerm, nTerm)==0 
        ){
Changes to ext/fts5/fts5_config.c.
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
      }

      while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
        nPre = nPre*10 + (p[0] - '0');
        p++;
      }

      if( rc==SQLITE_OK && (nPre<=0 || nPre>=1000) ){
        *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
        rc = SQLITE_ERROR;
        break;
      }

      pConfig->aPrefix[pConfig->nPrefix] = nPre;
      pConfig->nPrefix++;







|







274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
      }

      while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
        nPre = nPre*10 + (p[0] - '0');
        p++;
      }

      if( nPre<=0 || nPre>=1000 ){
        *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
        rc = SQLITE_ERROR;
        break;
      }

      pConfig->aPrefix[pConfig->nPrefix] = nPre;
      pConfig->nPrefix++;
Changes to ext/fts5/fts5_expr.c.
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
      aPopulator[i].bOk = 0;
    }else{
      aPopulator[i].bOk = 1;
    }
  }

  return sqlite3Fts5Tokenize(pConfig, 
      FTS5_TOKENIZE_AUX, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
  );
}

static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
  if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
    pNode->pNear->apPhrase[0]->poslist.n = 0;
  }else{
    int i;
    for(i=0; i<pNode->nChild; i++){
      fts5ExprClearPoslists(pNode->apChild[i]);
    }
  }
}

static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
  if( pNode ){
    pNode->iRowid = iRowid;
    pNode->bEof = 0;
    switch( pNode->eType ){
      case FTS5_TERM:
      case FTS5_STRING:
        return (pNode->pNear->apPhrase[0]->poslist.n>0);

      case FTS5_AND: {
        int i;
        for(i=0; i<pNode->nChild; i++){
          if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
            fts5ExprClearPoslists(pNode);
            return 0;
          }
        }
        break;
      }

      case FTS5_OR: {
        int i;
        int bRet = 0;
        for(i=0; i<pNode->nChild; i++){
          if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
            bRet = 1;
          }
        }
        if( bRet==0 ){
          fts5ExprClearPoslists(pNode);
        }
        return bRet;
      }

      default: {
        assert( pNode->eType==FTS5_NOT );
        if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
         || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
        ){
          fts5ExprClearPoslists(pNode);
          return 0;
        }
        break;
      }
    }
  }
  return 1;
}

void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
  fts5ExprCheckPoslists(pExpr->pRoot, iRowid);







|















<
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|

|
|
|
|
|
|
|
|
<
<
<
|
|

|
|
|
|

|
|
|
|
<







2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414

2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440



2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452

2453
2454
2455
2456
2457
2458
2459
      aPopulator[i].bOk = 0;
    }else{
      aPopulator[i].bOk = 1;
    }
  }

  return sqlite3Fts5Tokenize(pConfig, 
      FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
  );
}

static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
  if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
    pNode->pNear->apPhrase[0]->poslist.n = 0;
  }else{
    int i;
    for(i=0; i<pNode->nChild; i++){
      fts5ExprClearPoslists(pNode->apChild[i]);
    }
  }
}

static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){

  pNode->iRowid = iRowid;
  pNode->bEof = 0;
  switch( pNode->eType ){
    case FTS5_TERM:
    case FTS5_STRING:
      return (pNode->pNear->apPhrase[0]->poslist.n>0);

    case FTS5_AND: {
      int i;
      for(i=0; i<pNode->nChild; i++){
        if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
          fts5ExprClearPoslists(pNode);
          return 0;
        }
      }
      break;
    }

    case FTS5_OR: {
      int i;
      int bRet = 0;
      for(i=0; i<pNode->nChild; i++){
        if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
          bRet = 1;
        }
      }



      return bRet;
    }

    default: {
      assert( pNode->eType==FTS5_NOT );
      if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
          || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
        ){
        fts5ExprClearPoslists(pNode);
        return 0;
      }
      break;

    }
  }
  return 1;
}

void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
  fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
Changes to ext/fts5/fts5_main.c.
840
841
842
843
844
845
846
847
848
849
850
851
852
853


854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872

873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
    }
  }
  
  return rc;
}


static sqlite3_stmt *fts5PrepareStatement(
  int *pRc,
  Fts5Config *pConfig, 
  const char *zFmt,
  ...
){
  sqlite3_stmt *pRet = 0;


  va_list ap;
  va_start(ap, zFmt);

  if( *pRc==SQLITE_OK ){
    int rc;
    char *zSql = sqlite3_vmprintf(zFmt, ap);
    if( zSql==0 ){
      rc = SQLITE_NOMEM; 
    }else{
      rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pRet, 0);
      if( rc!=SQLITE_OK ){
        *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
      }
      sqlite3_free(zSql);
    }
    *pRc = rc;
  }

  va_end(ap);

  return pRet;
} 

static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Sorter *pSorter;
  int nPhrase;
  int nByte;
  int rc = SQLITE_OK;
  const char *zRank = pCsr->zRank;
  const char *zRankArgs = pCsr->zRankArgs;
  
  nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
  nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
  pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
  if( pSorter==0 ) return SQLITE_NOMEM;
  memset(pSorter, 0, nByte);
  pSorter->nIdx = nPhrase;

  /* TODO: It would be better to have some system for reusing statement
  ** handles here, rather than preparing a new one for each query. But that
  ** is not possible as SQLite reference counts the virtual table objects.
  ** And since the statement required here reads from this very virtual 
  ** table, saving it creates a circular reference.
  **
  ** If SQLite a built-in statement cache, this wouldn't be a problem. */
  pSorter->pStmt = fts5PrepareStatement(&rc, pConfig,
      "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
      pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
      (zRankArgs ? ", " : ""),
      (zRankArgs ? zRankArgs : ""),
      bDesc ? "DESC" : "ASC"
  );








|
|





>
>

<

<
|
|
|
|
|
|
|
|
|
|
|
<
|
<

>
|







|

















|







840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856

857

858
859
860
861
862
863
864
865
866
867
868

869

870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
    }
  }
  
  return rc;
}


static int fts5PrepareStatement(
  sqlite3_stmt **ppStmt,
  Fts5Config *pConfig, 
  const char *zFmt,
  ...
){
  sqlite3_stmt *pRet = 0;
  int rc;
  char *zSql;
  va_list ap;



  va_start(ap, zFmt);
  zSql = sqlite3_vmprintf(zFmt, ap);
  if( zSql==0 ){
    rc = SQLITE_NOMEM; 
  }else{
    rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pRet, 0);
    if( rc!=SQLITE_OK ){
      *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
    }
    sqlite3_free(zSql);
  }



  va_end(ap);
  *ppStmt = pRet;
  return rc;
} 

static int fts5CursorFirstSorted(Fts5Table *pTab, Fts5Cursor *pCsr, int bDesc){
  Fts5Config *pConfig = pTab->pConfig;
  Fts5Sorter *pSorter;
  int nPhrase;
  int nByte;
  int rc;
  const char *zRank = pCsr->zRank;
  const char *zRankArgs = pCsr->zRankArgs;
  
  nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
  nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
  pSorter = (Fts5Sorter*)sqlite3_malloc(nByte);
  if( pSorter==0 ) return SQLITE_NOMEM;
  memset(pSorter, 0, nByte);
  pSorter->nIdx = nPhrase;

  /* TODO: It would be better to have some system for reusing statement
  ** handles here, rather than preparing a new one for each query. But that
  ** is not possible as SQLite reference counts the virtual table objects.
  ** And since the statement required here reads from this very virtual 
  ** table, saving it creates a circular reference.
  **
  ** If SQLite a built-in statement cache, this wouldn't be a problem. */
  rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
      "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(%s%s%s) %s",
      pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
      (zRankArgs ? ", " : ""),
      (zRankArgs ? zRankArgs : ""),
      bDesc ? "DESC" : "ASC"
  );

2035
2036
2037
2038
2039
2040
2041

2042





2043

2044
2045
2046
2047
2048
2049
2050
  int *piCol
){
  int rc = SQLITE_OK;
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;

  if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){

    int n;





    rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);

    if( rc==SQLITE_OK ){
      pIter->b = &pIter->a[n];
      *piCol = 0;
      fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
    }
  }else{
    int n;







>

>
>
>
>
>
|
>







2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
  int *piCol
){
  int rc = SQLITE_OK;
  Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
  Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;

  if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
    Fts5Sorter *pSorter = pCsr->pSorter;
    int n;
    if( pSorter ){
      int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
      n = pSorter->aIdx[iPhrase] - i1;
      pIter->a = &pSorter->aPoslist[i1];
    }else{
      rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
    }
    if( rc==SQLITE_OK ){
      pIter->b = &pIter->a[n];
      *piCol = 0;
      fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
    }
  }else{
    int n;
Changes to ext/fts5/fts5_tcl.c.
443
444
445
446
447
448
449
450
451



452
453
454
455
456
457
458
459
460
      Tcl_Obj *pScript = objv[5];
      Fts5PhraseIter iter;

      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
      zColvar = Tcl_GetString(objv[3]);
      zOffvar = Tcl_GetString(objv[4]);

      for(p->pApi->xPhraseFirst(p->pFts, iPhrase, &iter, &iCol, &iOff);
          iCol>=0;



          p->pApi->xPhraseNext(p->pFts, &iter, &iCol, &iOff)
      ){
        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
        Tcl_SetVar2Ex(interp, zOffvar, 0, Tcl_NewIntObj(iOff), 0);
        rc = Tcl_EvalObjEx(interp, pScript, 0);
        if( rc==TCL_CONTINUE ) rc = TCL_OK;
        if( rc!=TCL_OK ){
          if( rc==TCL_BREAK ) rc = TCL_OK;
          break;







|
|
>
>
>
|
<







443
444
445
446
447
448
449
450
451
452
453
454
455

456
457
458
459
460
461
462
      Tcl_Obj *pScript = objv[5];
      Fts5PhraseIter iter;

      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
      zColvar = Tcl_GetString(objv[3]);
      zOffvar = Tcl_GetString(objv[4]);

      rc = p->pApi->xPhraseFirst(p->pFts, iPhrase, &iter, &iCol, &iOff);
      if( rc!=SQLITE_OK ){
        Tcl_AppendResult(interp, sqlite3ErrName(rc), 0);
        return TCL_ERROR;
      }
      for( ;iCol>=0; p->pApi->xPhraseNext(p->pFts, &iter, &iCol, &iOff) ){

        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
        Tcl_SetVar2Ex(interp, zOffvar, 0, Tcl_NewIntObj(iOff), 0);
        rc = Tcl_EvalObjEx(interp, pScript, 0);
        if( rc==TCL_CONTINUE ) rc = TCL_OK;
        if( rc!=TCL_OK ){
          if( rc==TCL_BREAK ) rc = TCL_OK;
          break;
470
471
472
473
474
475
476
477
478



479
480
481
482
483
484
485
486
487
      const char *zColvar;
      Tcl_Obj *pScript = objv[4];
      Fts5PhraseIter iter;

      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
      zColvar = Tcl_GetString(objv[3]);

      for(p->pApi->xPhraseFirstColumn(p->pFts, iPhrase, &iter, &iCol);
          iCol>=0;



          p->pApi->xPhraseNextColumn(p->pFts, &iter, &iCol)
      ){
        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
        rc = Tcl_EvalObjEx(interp, pScript, 0);
        if( rc==TCL_CONTINUE ) rc = TCL_OK;
        if( rc!=TCL_OK ){
          if( rc==TCL_BREAK ) rc = TCL_OK;
          break;
        }







|
|
>
>
>
|
<







472
473
474
475
476
477
478
479
480
481
482
483
484

485
486
487
488
489
490
491
      const char *zColvar;
      Tcl_Obj *pScript = objv[4];
      Fts5PhraseIter iter;

      if( Tcl_GetIntFromObj(interp, objv[2], &iPhrase) ) return TCL_ERROR;
      zColvar = Tcl_GetString(objv[3]);

      rc = p->pApi->xPhraseFirstColumn(p->pFts, iPhrase, &iter, &iCol);
      if( rc!=SQLITE_OK ){
        Tcl_AppendResult(interp, sqlite3ErrName(rc), 0);
        return TCL_ERROR;
      }
      for( ; iCol>=0; p->pApi->xPhraseNextColumn(p->pFts, &iter, &iCol)){

        Tcl_SetVar2Ex(interp, zColvar, 0, Tcl_NewIntObj(iCol), 0);
        rc = Tcl_EvalObjEx(interp, pScript, 0);
        if( rc==TCL_CONTINUE ) rc = TCL_OK;
        if( rc!=TCL_OK ){
          if( rc==TCL_BREAK ) rc = TCL_OK;
          break;
        }
Changes to ext/fts5/test/fts5_common.tcl.
505
506
507
508
509
510
511
















512
513
514
515
516
517
518
  }

  foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
    lappend res $rowid $poslist
  }
  set res
}

















#-------------------------------------------------------------------------
#

# This command will only work inside a [foreach_detail_mode] block. It tests
# whether or not expression $expr run on FTS5 table $tbl is supported by
# the current mode. If so, 1 is returned. If not, 0.







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
  }

  foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
    lappend res $rowid $poslist
  }
  set res
}

proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {
  set res [list]

  if {$aDictVar!=""} {
    upvar $aDictVar aDict
    set dict aDict
  } else {
    set dict ""
  }

  foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
    lappend res $rowid $collist
  }
  set res
}

#-------------------------------------------------------------------------
#

# This command will only work inside a [foreach_detail_mode] block. It tests
# whether or not expression $expr run on FTS5 table $tbl is supported by
# the current mode. If so, 1 is returned. If not, 0.
557
558
559
560
561
562
563



















































  nearset_rf $aCol {*}$args
  if {[lsearch $args -col]>=0} { 
    set ::expr_not_ok 1
  }
  list
}



























































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
  nearset_rf $aCol {*}$args
  if {[lsearch $args -col]>=0} { 
    set ::expr_not_ok 1
  }
  list
}


#-------------------------------------------------------------------------
# Code for a simple Tcl tokenizer that supports synonyms at query time.
#
proc tclnum_tokenize {mode tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {
      foreach s $::tclnum_syn($w)  { sqlite3_fts5_token -colo $s $iStart $iEnd }
    }
  }
}

proc tclnum_create {args} {
  set mode query
  if {[llength $args]} {
    set mode [lindex $args 0]
  }
  if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
  return [list tclnum_tokenize $mode]
}

proc fts5_tclnum_register {db} {
  foreach SYNDICT {
    {zero  0}
    {one   1 i}
    {two   2 ii}
    {three 3 iii}
    {four  4 iv}
    {five  5 v}
    {six   6 vi}
    {seven 7 vii}
    {eight 8 viii}
    {nine  9 ix}

    {a1 a2 a3 a4 a5 a6 a7 a8 a9}
    {b1 b2 b3 b4 b5 b6 b7 b8 b9}
    {c1 c2 c3 c4 c5 c6 c7 c8 c9}
  } {
    foreach s $SYNDICT {
      set o [list]
      foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
      set ::tclnum_syn($s) $o
    }
  }
  sqlite3_fts5_create_tokenizer db tclnum tclnum_create
}
#
# End of tokenizer code.
#-------------------------------------------------------------------------

Changes to ext/fts5/test/fts5config.test.
40
41
42
43
44
45
46


47
48
49
50
51
52
53
#
foreach {tn opt} {
  1 {prefix=x}  
  2 {prefix='x'}
  3 {prefix='$'}
  4 {prefix='1,2,'}
  5 {prefix=',1'}


} {
  set res [list 1 {malformed prefix=... directive}]
  do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}

#-------------------------------------------------------------------------
# Syntax errors in the 'rank' option.







>
>







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#
foreach {tn opt} {
  1 {prefix=x}  
  2 {prefix='x'}
  3 {prefix='$'}
  4 {prefix='1,2,'}
  5 {prefix=',1'}
  6 {prefix='1,2,3...'}
  7 {prefix='1,2,3xyz'}
} {
  set res [list 1 {malformed prefix=... directive}]
  do_catchsql_test 2.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}

#-------------------------------------------------------------------------
# Syntax errors in the 'rank' option.
155
156
157
158
159
160
161


162
163
164
165
166
167
168

#-------------------------------------------------------------------------
# Errors in:
#
#   9.1.* 'pgsz' options.
#   9.2.* 'automerge' options.
#   9.3.* 'crisismerge' options.


#
do_execsql_test 9.0 {
  CREATE VIRTUAL TABLE abc USING fts5(a, b);
} {}
do_catchsql_test 9.1.1 {
  INSERT INTO abc(abc, rank) VALUES('pgsz', -5);
} {1 {SQL logic error or missing database}}







>
>







157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

#-------------------------------------------------------------------------
# Errors in:
#
#   9.1.* 'pgsz' options.
#   9.2.* 'automerge' options.
#   9.3.* 'crisismerge' options.
#   9.4.* a non-existant option.
#   9.5.* 'hashsize' options.
#
do_execsql_test 9.0 {
  CREATE VIRTUAL TABLE abc USING fts5(a, b);
} {}
do_catchsql_test 9.1.1 {
  INSERT INTO abc(abc, rank) VALUES('pgsz', -5);
} {1 {SQL logic error or missing database}}
198
199
200
201
202
203
204










205
206
207
208
209
210
211
212
213
214
215















216
217
218
do_execsql_test 9.3.4 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000);
} {}

do_catchsql_test 9.4.1 {
  INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1);
} {1 {SQL logic error or missing database}}











#-------------------------------------------------------------------------
# Too many prefix indexes. Maximum allowed is 31.
#
foreach {tn spec} {
  1 {prefix="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32"}
  2 {prefix="1 2 3 4", prefix="5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32"}
} {
  set sql "CREATE VIRTUAL TABLE xyz USING fts5(x, $spec)"
  do_catchsql_test 10.$tn $sql {1 {too many prefix indexes (max 31)}}
}
















finish_test








>
>
>
>
>
>
>
>
>
>











>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
do_execsql_test 9.3.4 {
  INSERT INTO abc(abc, rank) VALUES('crisismerge', 50000000);
} {}

do_catchsql_test 9.4.1 {
  INSERT INTO abc(abc, rank) VALUES('nosuchoption', 1);
} {1 {SQL logic error or missing database}}

do_catchsql_test 9.5.1 {
  INSERT INTO abc(abc, rank) VALUES('hashsize', 'not an integer');
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.5.2 {
  INSERT INTO abc(abc, rank) VALUES('hashsize', -500000);
} {1 {SQL logic error or missing database}}
do_catchsql_test 9.5.3 {
  INSERT INTO abc(abc, rank) VALUES('hashsize', 500000);
} {0 {}}

#-------------------------------------------------------------------------
# Too many prefix indexes. Maximum allowed is 31.
#
foreach {tn spec} {
  1 {prefix="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32"}
  2 {prefix="1 2 3 4", prefix="5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32"}
} {
  set sql "CREATE VIRTUAL TABLE xyz USING fts5(x, $spec)"
  do_catchsql_test 10.$tn $sql {1 {too many prefix indexes (max 31)}}
}

#-------------------------------------------------------------------------
# errors in the detail= option.
#
foreach {tn opt} {
  1 {detail=x}  
  2 {detail='x'}
  3 {detail='$'}
  4 {detail='1,2,'}
  5 {detail=',1'}
  6 {detail=''}
} {
  set res [list 1 {malformed detail=... directive}]
  do_catchsql_test 11.$tn "CREATE VIRTUAL TABLE f1 USING fts5(x, $opt)" $res
}

finish_test

Changes to ext/fts5/test/fts5fault4.test.
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  execsql { SELECT * FROM xx }
} -body {
  execsql { DROP TABLE xx }
} -test {
  faultsim_test_result [list 0 {}]
}

#-------------------------------------------------------------------------
# An OOM within an "ORDER BY rank" query.
#
db func rnddoc fts5_rnddoc 
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x);
  INSERT INTO xx VALUES ('abc ' || rnddoc(10));
  INSERT INTO xx VALUES ('abc abc' || rnddoc(9));
  INSERT INTO xx VALUES ('abc abc abc' || rnddoc(8));
} {}
faultsim_save_and_close

do_faultsim_test 2 -faults oom-* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT * FROM xx }
} -body {
  execsql { SELECT rowid FROM xx WHERE xx MATCH 'abc' ORDER BY rank }
} -test {
  faultsim_test_result [list 0 {3 2 1}]
}

#-------------------------------------------------------------------------
# An OOM while "reseeking" an FTS cursor.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE jj USING fts5(j);
  INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s');
  INSERT INTO jj(rowid, j) VALUES(202, 't w t f s');







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







36
37
38
39
40
41
42





















43
44
45
46
47
48
49
  execsql { SELECT * FROM xx }
} -body {
  execsql { DROP TABLE xx }
} -test {
  faultsim_test_result [list 0 {}]
}






















#-------------------------------------------------------------------------
# An OOM while "reseeking" an FTS cursor.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE jj USING fts5(j);
  INSERT INTO jj(rowid, j) VALUES(101, 'm t w t f s s');
  INSERT INTO jj(rowid, j) VALUES(202, 't w t f s');
Added ext/fts5/test/fts5fault9.test.
























































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# 2015 September 3
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# This file is focused on OOM errors.
#

source [file join [file dirname [info script]] fts5_common.tcl]
source $testdir/malloc_common.tcl
set testprefix fts5fault9

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

foreach_detail_mode $testprefix {

fts5_aux_test_functions db

if 1 {

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, detail=%DETAIL%);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  WITH seq(s) AS ( SELECT 1 UNION ALL SELECT s+1 FROM seq WHERE s<50)
  INSERT INTO t1 SELECT 'x x x y y y', 'a b c d e f' FROM seq;
}

do_faultsim_test 1 -faults oom-* -body {
  execsql { SELECT count(*) FROM t1('x AND y') }
} -test {
  faultsim_test_result {0 50}
}

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, detail=%DETAIL%);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
  INSERT INTO t2 VALUES('abc cba', 'cba abc');
  INSERT INTO t2 VALUES('abc cba', 'cba abc');
  INSERT INTO t2 VALUES('abc cba', 'cba abc');

  INSERT INTO t2 VALUES('axy cyx', 'cyx axy');
  INSERT INTO t2 VALUES('axy cyx', 'cyx axy');
  INSERT INTO t2 VALUES('axy cyx', 'cyx axy');
}

do_faultsim_test 2 -faults oom-* -body {
  execsql { SELECT count(*) FROM t2('a* AND c*') }
} -test {
  faultsim_test_result {0 6}
}


do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(a, detail=%DETAIL%);
  INSERT INTO t3 VALUES('a x x a x a a a');
  INSERT INTO t3 VALUES('x a a x a x x x');
}

do_faultsim_test 3.1 -faults oom-* -body {
  execsql { SELECT highlight(t3, 0, '[', ']') FROM t3('a') }
} -test {
  faultsim_test_result {0 {{[a] x x [a] x [a] [a] [a]} {x [a] [a] x [a] x x x}}}
}

do_faultsim_test 3.2 -faults oom-t* -body {
  execsql { SELECT fts5_test_poslist2(t3) FROM t3('x') }
} -test {
  faultsim_test_result \
      {0 {{0.0.1 0.0.2 0.0.4} {0.0.0 0.0.3 0.0.5 0.0.6 0.0.7}}} \
      {1 SQLITE_NOMEM}
}

#-------------------------------------------------------------------------
# Test OOM injection with the xPhraseFirstColumn() API and a tokenizer
# uses query synonyms.
#
fts5_tclnum_register db
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE t4 USING fts5(x, y, z, detail=%DETAIL%, tokenize=tclnum);
  INSERT INTO t4 VALUES('one two three', '1 2 3', 'i ii iii');
  INSERT INTO t4 VALUES('1 2 3', 'i ii iii', 'one two three');
  INSERT INTO t4 VALUES('i ii iii', 'one two three', 'i ii iii');

  INSERT INTO t4 VALUES('a1 a2 a3', 'a4 a5 a6', 'a7 a8 a9');
  INSERT INTO t4 VALUES('b1 b2 b3', 'b4 b5 b6', 'b7 b8 b9');
  INSERT INTO t4 VALUES('c1 c2 c3', 'c4 c5 c6', 'c7 c8 c9');
}

do_faultsim_test 4.1 -faults oom-t* -body {
  execsql { SELECT rowid, fts5_test_collist(t4) FROM t4('2') }
} -test {
  faultsim_test_result \
      {0 {1 {0.0 0.1 0.2} 2 {0.0 0.1 0.2} 3 {0.0 0.1 0.2}}} {1 SQLITE_NOMEM}
}

do_faultsim_test 4.2 -faults oom-t* -body {
  execsql { SELECT rowid, fts5_test_collist(t4) FROM t4('a5 OR b5 OR c5') }
} -test {
  faultsim_test_result \
      {0 {4 {0.0 0.1 0.2} 5 {1.0 1.1 1.2} 6 {2.0 2.1 2.2}}} {1 SQLITE_NOMEM}
}

}


#-------------------------------------------------------------------------
# An OOM within an "ORDER BY rank" query.
#
db func rnddoc fts5_rnddoc 
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x, y, detail=%DETAIL%);
  INSERT INTO xx VALUES ('def', 'abc ' || rnddoc(10));
  INSERT INTO xx VALUES ('def', 'abc abc' || rnddoc(9));
  INSERT INTO xx VALUES ('def', 'abc abc abc' || rnddoc(8));
} {}
faultsim_save_and_close

do_faultsim_test 5 -faults oom-* -prep {
  faultsim_restore_and_reopen
  execsql { SELECT * FROM xx }
} -body {
  execsql { SELECT rowid FROM xx('abc AND def') ORDER BY rank }
} -test {
  faultsim_test_result [list 0 {3 2 1}]
}

} ;# foreach_detail_mode...

finish_test

Changes to ext/fts5/test/fts5hash.test.
60
61
62
63
64
65
66


67
68
69
70
71
72
73
74
75
76
77
78
  for {set i 0} {$i<$nWord} {incr i} {
    set j [expr {int(rand() * $nVocab)}]
    lappend doc [lindex $vocab $j]
  }
  return $doc
}



set vocab [build_vocab1]
db func r random_doc 

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE eee USING fts5(e, ee);
  BEGIN;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
    INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
    INSERT INTO eee(eee) VALUES('integrity-check');
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}







>
>




|







60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
  for {set i 0} {$i<$nWord} {incr i} {
    set j [expr {int(rand() * $nVocab)}]
    lappend doc [lindex $vocab $j]
  }
  return $doc
}

foreach_detail_mode $testprefix {

set vocab [build_vocab1]
db func r random_doc 

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE eee USING fts5(e, ee, detail=%DETAIL%);
  BEGIN;
    WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
    INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
    INSERT INTO eee(eee) VALUES('integrity-check');
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107


108
109
110
}
do_test 1.2 {
  for {set i 1} {$i <= 100} {incr i} {
    execsql { INSERT INTO eee VALUES( r($vocab, 5), r($vocab, 7) ) }
  }
} {}
  
do_test 1.2 {
  db eval { SELECT term, doc FROM vocab } {
    set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
    if {$nRow != $doc} {
      error "term=$term fts5vocab=$doc cnt=$nRow"
    }
  }
  set {} {}
} {}

do_execsql_test 1.3 {
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}



finish_test








|









|



>
>



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
}
do_test 1.2 {
  for {set i 1} {$i <= 100} {incr i} {
    execsql { INSERT INTO eee VALUES( r($vocab, 5), r($vocab, 7) ) }
  }
} {}
  
do_test 1.3 {
  db eval { SELECT term, doc FROM vocab } {
    set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
    if {$nRow != $doc} {
      error "term=$term fts5vocab=$doc cnt=$nRow"
    }
  }
  set {} {}
} {}

do_execsql_test 1.4 {
  COMMIT;
  INSERT INTO eee(eee) VALUES('integrity-check');
}

} ;# foreach_detail_mode

finish_test

Changes to ext/fts5/test/fts5synonym.test.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85


86
87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

foreach S {
  {zero 0}
  {one 1 i}
  {two 2 ii}
  {three 3 iii}
  {four 4 iv}
  {five 5 v}
  {six 6 vi}
  {seven 7 vii}
  {eight 8 viii}
  {nine 9 ix}
} {
  foreach s $S {
    set o [list]
    foreach x $S {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}

sqlite3_fts5_create_tokenizer db tcl tcl_create

#-------------------------------------------------------------------------
# Warm body test for the code in fts5_tcl.c.
#

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
  INSERT INTO ft VALUES('abc def ghi');
  INSERT INTO ft VALUES('jkl mno pqr');
  SELECT rowid, x FROM ft WHERE ft MATCH 'def';
  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="document" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
}
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);


  INSERT INTO ft VALUES('one two three');
  INSERT INTO ft VALUES('four five six');
  INSERT INTO ft VALUES('eight nine ten');
} {}

foreach {tn expr res} {
  1 "3" 1
  2 "eight OR 8 OR 5" {2 3}
  3 "10" {}
  4 "1*" {1}
  5 "1 + 2" {1}
} {

  do_execsql_test 2.1.$tn {
    SELECT rowid FROM ft WHERE ft MATCH $expr
  } $res
}

#-------------------------------------------------------------------------
# Test some broken tokenizers:







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
|
|
<




>

|










<
<
<
<
<
<
<
<
<
<
<

|


|
>
>












>







17
18
19
20
21
22
23

























24

25
26

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43











44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}


























proc tcl_create {args} { return "tcl_tokenize" }


foreach_detail_mode $testprefix {


#-------------------------------------------------------------------------
# Warm body test for the code in fts5_tcl.c.
#
fts5_tclnum_register db
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = "tclnum document", detail=%DETAIL%);
  INSERT INTO ft VALUES('abc def ghi');
  INSERT INTO ft VALUES('jkl mno pqr');
  SELECT rowid, x FROM ft WHERE ft MATCH 'def';
  SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
} {1 {abc def ghi} {jkl mno pqr} 2}

#-------------------------------------------------------------------------
# Test a tokenizer that supports synonyms by adding extra entries to the
# FTS index.
#











reset_db
fts5_tclnum_register db

do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE ft USING fts5(
      x, tokenize = "tclnum document", detail=%DETAIL%
  );
  INSERT INTO ft VALUES('one two three');
  INSERT INTO ft VALUES('four five six');
  INSERT INTO ft VALUES('eight nine ten');
} {}

foreach {tn expr res} {
  1 "3" 1
  2 "eight OR 8 OR 5" {2 3}
  3 "10" {}
  4 "1*" {1}
  5 "1 + 2" {1}
} {
  if {![fts5_expr_ok $expr ft]} continue
  do_execsql_test 2.1.$tn {
    SELECT rowid FROM ft WHERE ft MATCH $expr
  } $res
}

#-------------------------------------------------------------------------
# Test some broken tokenizers:
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201


202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {}

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [info exists ::syn($w)]} {
      foreach s $::syn($w) {
        sqlite3_fts5_token -colo $s $iStart $iEnd
      }
    }
  }
}

foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {one}                           {"one"|"i"|"1"}
  3  {3}                             {"3"|"iii"|"three"}
  4  {3*}                            {"3"|"iii"|"three" *}
} {
  do_execsql_test 4.1.$tn {SELECT fts5_expr($expr, 'tokenize=tcl')} [list $res]


}

do_execsql_test 4.2.1 {
  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tcl);
  INSERT INTO xx VALUES('one two');
  INSERT INTO xx VALUES('three four');
}

do_execsql_test 4.2.2 {
  SELECT rowid FROM xx WHERE xx MATCH '2'
} {1}

do_execsql_test 4.2.3 {
  SELECT rowid FROM xx WHERE xx MATCH '3'
} {2}

do_test 5.0 {
  execsql { 
    CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tcl)
  }
  foreach {rowid a b} {
    1 {four v 4 i three} {1 3 five five 4 one}
    2 {5 1 3 4 i} {2 2 v two 4}
    3 {5 i 5 2 four 4 1} {iii ii five two 1}
    4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
    5 {three i v i four 4 1} {ii five five five iii}







<
<
<
|
<
<
<
<
<
<
<







|
>
>



|














|







142
143
144
145
146
147
148



149







150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
} {}

#-------------------------------------------------------------------------
# Check that expressions with synonyms can be parsed and executed.
#
reset_db



fts5_tclnum_register db








foreach {tn expr res} {
  1  {abc}                           {"abc"}
  2  {one}                           {"one"|"i"|"1"}
  3  {3}                             {"3"|"iii"|"three"}
  4  {3*}                            {"3"|"iii"|"three" *}
} {
  do_execsql_test 4.1.$tn {
    SELECT fts5_expr($expr, 'tokenize=tclnum')
  } [list $res]
}

do_execsql_test 4.2.1 {
  CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tclnum, detail=%DETAIL%);
  INSERT INTO xx VALUES('one two');
  INSERT INTO xx VALUES('three four');
}

do_execsql_test 4.2.2 {
  SELECT rowid FROM xx WHERE xx MATCH '2'
} {1}

do_execsql_test 4.2.3 {
  SELECT rowid FROM xx WHERE xx MATCH '3'
} {2}

do_test 5.0 {
  execsql { 
    CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tclnum, detail=%DETAIL%)
  }
  foreach {rowid a b} {
    1 {four v 4 i three} {1 3 five five 4 one}
    2 {5 1 3 4 i} {2 2 v two 4}
    3 {5 i 5 2 four 4 1} {iii ii five two 1}
    4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
    5 {three i v i four 4 1} {ii five five five iii}
281
282
283
284
285
286
287

288
289
290
291
292
293
294
  }

  6 {"v v"} {
    1 {four v 4 i three} {1 3 [five five] 4 one}
    5 {three i v i four 4 1} {ii [five five five] iii}
  }
} {

  do_execsql_test 5.1.$tn {
    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
    FROM t1 WHERE t1 MATCH $q
  } $res
}

# Test that the xQueryPhrase() API works with synonyms.







>







239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
  }

  6 {"v v"} {
    1 {four v 4 i three} {1 3 [five five] 4 one}
    5 {three i v i four 4 1} {ii [five five five] iii}
  }
} {
  if {![fts5_expr_ok $q t1]} continue
  do_execsql_test 5.1.$tn {
    SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
    FROM t1 WHERE t1 MATCH $q
  } $res
}

# Test that the xQueryPhrase() API works with synonyms.
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340

341
342
343

344
345
346
347
348
349
350
351
352
353
354
  }
} {
  do_execsql_test 5.2.$tn {
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
  } $res
}


#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 6.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl);
  INSERT INTO t1 VALUES('yy xx qq');
  INSERT INTO t1 VALUES('yy xx xx');
}

do_execsql_test 6.0.2 {
  SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
} {{yy xx qq}}


do_test 6.0.3 {
  execsql { 
    CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl)
  }
  foreach {rowid a b} {
    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
    3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
    4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
    5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}







<

















|



>
|
|
|
>



|







271
272
273
274
275
276
277

278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
  }
} {
  do_execsql_test 5.2.$tn {
    SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
  } $res
}


#-------------------------------------------------------------------------
# Test terms with more than 4 synonyms.
#
reset_db
sqlite3_fts5_create_tokenizer db tcl tcl_create
proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags=="query" && [string length $w]==1} {
      for {set i 2} {$i<=10} {incr i} {
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 6.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl, detail=%DETAIL%);
  INSERT INTO t1 VALUES('yy xx qq');
  INSERT INTO t1 VALUES('yy xx xx');
}
if {[fts5_expr_ok "NEAR(y q)" t1]} {
  do_execsql_test 6.0.2 {
    SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
  } {{yy xx qq}}
}

do_test 6.0.3 {
  execsql { 
    CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl, detail=%DETAIL%)
  }
  foreach {rowid a b} {
    1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
    3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
    4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
    5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
383
384
385
386
387
388
389


390
391
392
393
394
395
396
  }

  4 {NEAR(q y, 20)} {
    1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
  }
} {


  do_execsql_test 6.1.$tn.asc {
    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
    FROM t2 WHERE t2 MATCH $q
  } $res

  set res2 [list]
  foreach {rowid a b} $res {







>
>







343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
  }

  4 {NEAR(q y, 20)} {
    1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
    2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
  }
} {
  if {![fts5_expr_ok $q t2]} continue

  do_execsql_test 6.1.$tn.asc {
    SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
    FROM t2 WHERE t2 MATCH $q
  } $res

  set res2 [list]
  foreach {rowid a b} $res {
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457


458
459
460
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 7.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl);
  INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.0.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

do_execsql_test 7.1.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl);
  INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.1.2 {
  INSERT INTO t2(t2) VALUES('integrity-check');
}



finish_test








|










|








>
>



393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
        sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
      }
    }
  }
}

do_execsql_test 7.0.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl, detail=%DETAIL%);
  INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.0.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

do_execsql_test 7.1.1 {
  CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl, detail=%DETAIL%);
  INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
  INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
  SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
} {{3 4} {2 10}}

do_execsql_test 7.1.2 {
  INSERT INTO t2(t2) VALUES('integrity-check');
}

} ;# foreach_detail_mode

finish_test

Changes to ext/fts5/test/fts5synonym2.test.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

#-------------------------------------------------------------------------
# Code for a simple Tcl tokenizer that supports synonyms at query time.
#
foreach SYNDICT {
  {zero  0}
  {one   1 i}
  {two   2 ii}
  {three 3 iii}
  {four  4 iv}
  {five  5 v}
  {six   6 vi}
  {seven 7 vii}
  {eight 8 viii}
  {nine  9 ix}
} {
  foreach s $SYNDICT {
    set o [list]
    foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
    set ::syn($s) $o
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags == "query"} {
      foreach s $::syn($w)  { sqlite3_fts5_token -colo $s $iStart $iEnd }
    }
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}

#

# End of tokenizer code.
#-------------------------------------------------------------------------

foreach_detail_mode $testprefix {

sqlite3_fts5_create_tokenizer db tcl tcl_create
fts5_aux_test_functions db

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%);
  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');
  INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii');
  INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4');
  INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4');







<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
|
<
|
<
<
|
<
<
|
<
<
|
|
<
|
|
<
>
|
<
|
<
|
<
<

|
<







17
18
19
20
21
22
23



24











25



26

27


28


29


30
31

32
33

34
35

36

37


38
39

40
41
42
43
44
45
46

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}




foreach tok {query document} {











foreach_detail_mode $testprefix {





fts5_tclnum_register db


fts5_aux_test_functions db





proc fts5_rowid {cmd} { expr [$cmd xColumnText -1] }
sqlite3_fts5_create_function db fts5_rowid fts5_rowid


do_execsql_test 1.$tok.0.1 "

  CREATE VIRTUAL TABLE ss USING fts5(a, b, 
       tokenize='tclnum $tok', detail=%DETAIL%);

  INSERT INTO ss(ss, rank) VALUES('rank', 'fts5_rowid()');

"



do_execsql_test 1.$tok.0.2 {

  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');
  INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii');
  INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4');
  INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4');
133
134
135
136
137
138
139
140
141
142
143
144



145
146
147

148
149
150

151
152
153
154
  3.3 "a:one OR b:1 OR {a b} : i"

  4.1 "NEAR(one two, 2)"
  4.2 "NEAR(one two three, 2)"
  4.3 "NEAR(eight nine, 1) OR NEAR(six seven, 1)"
} {
  if {[fts5_expr_ok $expr ss]==0} {
    do_test 1.$tn.OMITTED { list } [list]
    continue
  }

  set res [fts5_query_data $expr ss ASC ::syn]



  breakpoint
  do_execsql_test 1.$tn.[llength $res].asc {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)

  } $res
}


}

finish_test








|



|
>
>
>
|
|

>



>




103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  3.3 "a:one OR b:1 OR {a b} : i"

  4.1 "NEAR(one two, 2)"
  4.2 "NEAR(one two three, 2)"
  4.3 "NEAR(eight nine, 1) OR NEAR(six seven, 1)"
} {
  if {[fts5_expr_ok $expr ss]==0} {
    do_test 1.$tok.$tn.OMITTED { list } [list]
    continue
  }

  set res [fts5_query_data $expr ss ASC ::tclnum_syn]
  do_execsql_test 1.$tok.$tn.[llength $res].asc.1 {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
  } $res

  do_execsql_test 1.$tok.$tn.[llength $res].asc.2 {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
    ORDER BY rank ASC
  } $res
}

}
}

finish_test