SQLite

Changes On Branch fts4-languageid
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch fts4-languageid Excluding Merge-Ins

This is equivalent to a diff from 16330a2f to f8e9c445

2012-03-05
15:33
Merge the fts4-languageid branch with the trunk. (check-in: 99a9073b user: dan tags: trunk)
2012-03-03
18:46
Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater. (Closed-Leaf check-in: f8e9c445 user: dan tags: fts4-languageid)
2012-03-02
19:53
Fix problems with combining content= and languageid= in a single fts4 table. (check-in: 22491e7b user: dan tags: fts4-languageid)
2012-03-01
21:30
Fix a typo in the header comment of sqlite3_commit_hook(). (check-in: 3784d147 user: drh tags: trunk)
19:44
Add the "languageid=" option to fts4. This code is still largely untested and alsmost certainly buggy. (check-in: bea257f7 user: dan tags: fts4-languageid)
18:16
Remove unused #defines from os.h. (check-in: c0891296 user: drh tags: winrt)
2012-02-28
17:57
Fix spurious errors that may occur if an empty database is opened and then initialized as a WAL database by a second connection. (check-in: 16330a2f user: dan tags: trunk)
11:52
Fix a case where an error code was being overwritten in multiplexDelete(). (check-in: c267893a user: dan tags: trunk)

Changes to ext/fts3/fts3.c.

465
466
467
468
469
470
471

472
473
474
475
476
477
478
  for(i=0; i<SizeofArray(p->aStmt); i++){
    sqlite3_finalize(p->aStmt[i]);
  }
  sqlite3_free(p->zSegmentsTbl);
  sqlite3_free(p->zReadExprlist);
  sqlite3_free(p->zWriteExprlist);
  sqlite3_free(p->zContentTbl);


  /* Invoke the tokenizer destructor to free the tokenizer. */
  p->pTokenizer->pModule->xDestroy(p->pTokenizer);

  sqlite3_free(p);
  return SQLITE_OK;
}







>







465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
  for(i=0; i<SizeofArray(p->aStmt); i++){
    sqlite3_finalize(p->aStmt[i]);
  }
  sqlite3_free(p->zSegmentsTbl);
  sqlite3_free(p->zReadExprlist);
  sqlite3_free(p->zWriteExprlist);
  sqlite3_free(p->zContentTbl);
  sqlite3_free(p->zLanguageid);

  /* Invoke the tokenizer destructor to free the tokenizer. */
  p->pTokenizer->pModule->xDestroy(p->pTokenizer);

  sqlite3_free(p);
  return SQLITE_OK;
}
541
542
543
544
545
546
547

548

549
550
551
552
553
554
555
556
557
558
559

560
561
562
563
564
565
566
*/
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
  if( *pRc==SQLITE_OK ){
    int i;                        /* Iterator variable */
    int rc;                       /* Return code */
    char *zSql;                   /* SQL statement passed to declare_vtab() */
    char *zCols;                  /* List of user defined columns */



    sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);

    /* Create a list of user columns for the virtual table */
    zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
    for(i=1; zCols && i<p->nColumn; i++){
      zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
    }

    /* Create the whole "CREATE TABLE" statement to pass to SQLite */
    zSql = sqlite3_mprintf(
        "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName

    );
    if( !zCols || !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_declare_vtab(p->db, zSql);
    }








>

>










|
>







542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
*/
static void fts3DeclareVtab(int *pRc, Fts3Table *p){
  if( *pRc==SQLITE_OK ){
    int i;                        /* Iterator variable */
    int rc;                       /* Return code */
    char *zSql;                   /* SQL statement passed to declare_vtab() */
    char *zCols;                  /* List of user defined columns */
    const char *zLanguageid;

    zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
    sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);

    /* Create a list of user columns for the virtual table */
    zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]);
    for(i=1; zCols && i<p->nColumn; i++){
      zCols = sqlite3_mprintf("%z%Q, ", zCols, p->azColumn[i]);
    }

    /* Create the whole "CREATE TABLE" statement to pass to SQLite */
    zSql = sqlite3_mprintf(
        "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", 
        zCols, p->zName, zLanguageid
    );
    if( !zCols || !zSql ){
      rc = SQLITE_NOMEM;
    }else{
      rc = sqlite3_declare_vtab(p->db, zSql);
    }

581
582
583
584
585
586
587

588
589
590
591
592
593
594
595



596
597
598
599
600
601
602
*/
static int fts3CreateTables(Fts3Table *p){
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Iterator variable */
  sqlite3 *db = p->db;            /* The database connection */

  if( p->zContentTbl==0 ){

    char *zContentCols;           /* Columns of %_content table */

    /* Create a list of user columns for the content table */
    zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
    for(i=0; zContentCols && i<p->nColumn; i++){
      char *z = p->azColumn[i];
      zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
    }



    if( zContentCols==0 ) rc = SQLITE_NOMEM;
  
    /* Create the content table */
    fts3DbExec(&rc, db, 
       "CREATE TABLE %Q.'%q_content'(%s)",
       p->zDb, p->zName, zContentCols
    );







>








>
>
>







585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
*/
static int fts3CreateTables(Fts3Table *p){
  int rc = SQLITE_OK;             /* Return code */
  int i;                          /* Iterator variable */
  sqlite3 *db = p->db;            /* The database connection */

  if( p->zContentTbl==0 ){
    const char *zLanguageid = p->zLanguageid;
    char *zContentCols;           /* Columns of %_content table */

    /* Create a list of user columns for the content table */
    zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
    for(i=0; zContentCols && i<p->nColumn; i++){
      char *z = p->azColumn[i];
      zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
    }
    if( zLanguageid && zContentCols ){
      zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
    }
    if( zContentCols==0 ) rc = SQLITE_NOMEM;
  
    /* Create the content table */
    fts3DbExec(&rc, db, 
       "CREATE TABLE %Q.'%q_content'(%s)",
       p->zDb, p->zName, zContentCols
    );
788
789
790
791
792
793
794



795
796
797
798
799
800


801

802
803
804
805
806
807
808
809
    }else{
      zFree = zFunction = fts3QuoteId(zFunc);
    }
    fts3Appendf(pRc, &zRet, "docid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
    }



    sqlite3_free(zFree);
  }else{
    fts3Appendf(pRc, &zRet, "rowid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
    }


  }

  fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x", 
      p->zDb,
      (p->zContentTbl ? p->zContentTbl : p->zName),
      (p->zContentTbl ? "" : "_content")
  );
  return zRet;
}








>
>
>






>
>
|
>
|







796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
    }else{
      zFree = zFunction = fts3QuoteId(zFunc);
    }
    fts3Appendf(pRc, &zRet, "docid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
    }
    if( p->zLanguageid ){
      fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
    }
    sqlite3_free(zFree);
  }else{
    fts3Appendf(pRc, &zRet, "rowid");
    for(i=0; i<p->nColumn; i++){
      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
    }
    if( p->zLanguageid ){
      fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
    }
  }
  fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", 
      p->zDb,
      (p->zContentTbl ? p->zContentTbl : p->zName),
      (p->zContentTbl ? "" : "_content")
  );
  return zRet;
}

838
839
840
841
842
843
844



845
846
847
848
849
850
851
  }else{
    zFree = zFunction = fts3QuoteId(zFunc);
  }
  fts3Appendf(pRc, &zRet, "?");
  for(i=0; i<p->nColumn; i++){
    fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
  }



  sqlite3_free(zFree);
  return zRet;
}

/*
** This function interprets the string at (*pp) as a non-negative integer
** value. It reads the integer and sets *pnOut to the value read, then 







>
>
>







852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
  }else{
    zFree = zFunction = fts3QuoteId(zFunc);
  }
  fts3Appendf(pRc, &zRet, "?");
  for(i=0; i<p->nColumn; i++){
    fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
  }
  if( p->zLanguageid ){
    fts3Appendf(pRc, &zRet, ", ?");
  }
  sqlite3_free(zFree);
  return zRet;
}

/*
** This function interprets the string at (*pp) as a non-negative integer
** value. It reads the integer and sets *pnOut to the value read, then 
1053
1054
1055
1056
1057
1058
1059

1060
1061
1062
1063
1064
1065
1066
  /* The results of parsing supported FTS4 key=value options: */
  int bNoDocsize = 0;             /* True to omit %_docsize table */
  int bDescIdx = 0;               /* True to store descending indexes */
  char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
  char *zCompress = 0;            /* compress=? parameter (or NULL) */
  char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
  char *zContent = 0;             /* content=? parameter (or NULL) */


  assert( strlen(argv[0])==4 );
  assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
       || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
  );

  nDb = (int)strlen(argv[1]) + 1;







>







1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
  /* The results of parsing supported FTS4 key=value options: */
  int bNoDocsize = 0;             /* True to omit %_docsize table */
  int bDescIdx = 0;               /* True to store descending indexes */
  char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
  char *zCompress = 0;            /* compress=? parameter (or NULL) */
  char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
  char *zContent = 0;             /* content=? parameter (or NULL) */
  char *zLanguageid = 0;          /* languageid=? parameter (or NULL) */

  assert( strlen(argv[0])==4 );
  assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
       || (sqlite3_strnicmp(argv[0], "fts3", 4)==0 && !isFts4)
  );

  nDb = (int)strlen(argv[1]) + 1;
1102
1103
1104
1105
1106
1107
1108
1109

1110
1111
1112
1113
1114
1115
1116
        int nOpt;
      } aFts4Opt[] = {
        { "matchinfo",   9 },     /* 0 -> MATCHINFO */
        { "prefix",      6 },     /* 1 -> PREFIX */
        { "compress",    8 },     /* 2 -> COMPRESS */
        { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
        { "order",       5 },     /* 4 -> ORDER */
        { "content",     7 }      /* 5 -> CONTENT */

      };

      int iOpt;
      if( !zVal ){
        rc = SQLITE_NOMEM;
      }else{
        for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){







|
>







1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
        int nOpt;
      } aFts4Opt[] = {
        { "matchinfo",   9 },     /* 0 -> MATCHINFO */
        { "prefix",      6 },     /* 1 -> PREFIX */
        { "compress",    8 },     /* 2 -> COMPRESS */
        { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
        { "order",       5 },     /* 4 -> ORDER */
        { "content",     7 },     /* 5 -> CONTENT */
        { "languageid", 10 }      /* 6 -> LANGUAGEID */
      };

      int iOpt;
      if( !zVal ){
        rc = SQLITE_NOMEM;
      }else{
        for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168







1169
1170
1171
1172
1173
1174
1175
              ){
                *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
                rc = SQLITE_ERROR;
              }
              bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
              break;

            default:              /* CONTENT */
              assert( iOpt==5 );
              sqlite3_free(zUncompress);
              zContent = zVal;
              zVal = 0;
              break;







          }
        }
        sqlite3_free(zVal);
      }
    }

    /* Otherwise, the argument is a column name. */







|
<
|



>
>
>
>
>
>
>







1175
1176
1177
1178
1179
1180
1181
1182

1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
              ){
                *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
                rc = SQLITE_ERROR;
              }
              bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
              break;

            case 5:              /* CONTENT */

              sqlite3_free(zContent);
              zContent = zVal;
              zVal = 0;
              break;

            case 6:              /* LANGUAGEID */
              assert( iOpt==6 );
              sqlite3_free(zLanguageid);
              zLanguageid = zVal;
              zVal = 0;
              break;
          }
        }
        sqlite3_free(zVal);
      }
    }

    /* Otherwise, the argument is a column name. */
1191
1192
1193
1194
1195
1196
1197
1198


1199










1200
1201
1202
1203
1204
1205
1206
    sqlite3_free(zUncompress); 
    zCompress = 0;
    zUncompress = 0;
    if( nCol==0 ){
      sqlite3_free((void*)aCol); 
      aCol = 0;
      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
    }


    assert( rc!=SQLITE_OK || nCol>0 );










  }
  if( rc!=SQLITE_OK ) goto fts3_init_out;

  if( nCol==0 ){
    assert( nString==0 );
    aCol[0] = "content";
    nString = 8;







|
>
>
|
>
>
>
>
>
>
>
>
>
>







1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
    sqlite3_free(zUncompress); 
    zCompress = 0;
    zUncompress = 0;
    if( nCol==0 ){
      sqlite3_free((void*)aCol); 
      aCol = 0;
      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);

      /* If a languageid= option was specified, remove the language id
      ** column from the aCol[] array. */ 
      if( rc==SQLITE_OK && zLanguageid ){
        int j;
        for(j=0; j<nCol; j++){
          if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
            memmove(&aCol[j], &aCol[j+1], (nCol-j) * sizeof(aCol[0]));
            nCol--;
            break;
          }
        }
      }
    }
  }
  if( rc!=SQLITE_OK ) goto fts3_init_out;

  if( nCol==0 ){
    assert( nString==0 );
    aCol[0] = "content";
    nString = 8;
1239
1240
1241
1242
1243
1244
1245

1246

1247
1248
1249
1250
1251
1252
1253
  p->azColumn = (char **)&p[1];
  p->pTokenizer = pTokenizer;
  p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  p->bHasDocsize = (isFts4 && bNoDocsize==0);
  p->bHasStat = isFts4;
  p->bDescIdx = bDescIdx;
  p->zContentTbl = zContent;

  zContent = 0;

  TESTONLY( p->inTransaction = -1 );
  TESTONLY( p->mxSavepoint = -1 );

  p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
  memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
  p->nIndex = nIndex;
  for(i=0; i<nIndex; i++){







>

>







1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
  p->azColumn = (char **)&p[1];
  p->pTokenizer = pTokenizer;
  p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
  p->bHasDocsize = (isFts4 && bNoDocsize==0);
  p->bHasStat = isFts4;
  p->bDescIdx = bDescIdx;
  p->zContentTbl = zContent;
  p->zLanguageid = zLanguageid;
  zContent = 0;
  zLanguageid = 0;
  TESTONLY( p->inTransaction = -1 );
  TESTONLY( p->mxSavepoint = -1 );

  p->aIndex = (struct Fts3Index *)&p->azColumn[nCol];
  memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex);
  p->nIndex = nIndex;
  for(i=0; i<nIndex; i++){
1302
1303
1304
1305
1306
1307
1308

1309
1310
1311
1312
1313
1314
1315

fts3_init_out:
  sqlite3_free(zPrefix);
  sqlite3_free(aIndex);
  sqlite3_free(zCompress);
  sqlite3_free(zUncompress);
  sqlite3_free(zContent);

  sqlite3_free((void *)aCol);
  if( rc!=SQLITE_OK ){
    if( p ){
      fts3DisconnectMethod((sqlite3_vtab *)p);
    }else if( pTokenizer ){
      pTokenizer->pModule->xDestroy(pTokenizer);
    }







>







1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355

fts3_init_out:
  sqlite3_free(zPrefix);
  sqlite3_free(aIndex);
  sqlite3_free(zCompress);
  sqlite3_free(zUncompress);
  sqlite3_free(zContent);
  sqlite3_free(zLanguageid);
  sqlite3_free((void *)aCol);
  if( rc!=SQLITE_OK ){
    if( p ){
      fts3DisconnectMethod((sqlite3_vtab *)p);
    }else if( pTokenizer ){
      pTokenizer->pModule->xDestroy(pTokenizer);
    }
1353
1354
1355
1356
1357
1358
1359

1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371

1372
1373
1374
1375
1376
1377
1378
1379
**   2. Full-text search using a MATCH operator on a non-docid column.
**   3. Linear scan of %_content table.
*/
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  Fts3Table *p = (Fts3Table *)pVTab;
  int i;                          /* Iterator variable */
  int iCons = -1;                 /* Index of constraint to use */


  /* By default use a full table scan. This is an expensive option,
  ** so search through the constraints to see if a more efficient 
  ** strategy is possible.
  */
  pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
  pInfo->estimatedCost = 500000;
  for(i=0; i<pInfo->nConstraint; i++){
    struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
    if( pCons->usable==0 ) continue;

    /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */

    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
     && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
    ){
      pInfo->idxNum = FTS3_DOCID_SEARCH;
      pInfo->estimatedCost = 1.0;
      iCons = i;
    }








>












>
|







1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
**   2. Full-text search using a MATCH operator on a non-docid column.
**   3. Linear scan of %_content table.
*/
static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  Fts3Table *p = (Fts3Table *)pVTab;
  int i;                          /* Iterator variable */
  int iCons = -1;                 /* Index of constraint to use */
  int iLangidCons = -1;           /* Index of langid=x constraint, if present */

  /* By default use a full table scan. This is an expensive option,
  ** so search through the constraints to see if a more efficient 
  ** strategy is possible.
  */
  pInfo->idxNum = FTS3_FULLSCAN_SEARCH;
  pInfo->estimatedCost = 500000;
  for(i=0; i<pInfo->nConstraint; i++){
    struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i];
    if( pCons->usable==0 ) continue;

    /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
    if( iCons<0 
     && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
     && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
    ){
      pInfo->idxNum = FTS3_DOCID_SEARCH;
      pInfo->estimatedCost = 1.0;
      iCons = i;
    }

1388
1389
1390
1391
1392
1393
1394

1395





1396
1397
1398
1399
1400
1401
1402



1403
1404
1405
1406
1407
1408
1409
    */
    if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH 
     && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
    ){
      pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
      pInfo->estimatedCost = 2.0;
      iCons = i;

      break;





    }
  }

  if( iCons>=0 ){
    pInfo->aConstraintUsage[iCons].argvIndex = 1;
    pInfo->aConstraintUsage[iCons].omit = 1;
  } 




  /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
  ** docid) order. Both ascending and descending are possible. 
  */
  if( pInfo->nOrderBy==1 ){
    struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
    if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){







>
|
>
>
>
>
>







>
>
>







1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
    */
    if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH 
     && pCons->iColumn>=0 && pCons->iColumn<=p->nColumn
    ){
      pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
      pInfo->estimatedCost = 2.0;
      iCons = i;
    }

    /* Equality constraint on the langid column */
    if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ 
     && pCons->iColumn==p->nColumn + 2
    ){
      iLangidCons = i;
    }
  }

  if( iCons>=0 ){
    pInfo->aConstraintUsage[iCons].argvIndex = 1;
    pInfo->aConstraintUsage[iCons].omit = 1;
  } 
  if( iLangidCons>=0 ){
    pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
  } 

  /* Regardless of the strategy selected, FTS can deliver rows in rowid (or
  ** docid) order. Both ascending and descending are possible. 
  */
  if( pInfo->nOrderBy==1 ){
    struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0];
    if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){
2545
2546
2547
2548
2549
2550
2551

2552
2553
2554
2555
2556
2557
2558
** 8th argument.
**
** This function returns SQLITE_OK if successful, or an SQLite error code
** otherwise.
*/
static int fts3SegReaderCursor(
  Fts3Table *p,                   /* FTS3 table handle */

  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  int iLevel,                     /* Level of segments to scan */
  const char *zTerm,              /* Term to query for */
  int nTerm,                      /* Size of zTerm in bytes */
  int isPrefix,                   /* True for a prefix search */
  int isScan,                     /* True to scan from zTerm to EOF */
  Fts3MultiSegReader *pCsr        /* Cursor object to populate */







>







2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
** 8th argument.
**
** This function returns SQLITE_OK if successful, or an SQLite error code
** otherwise.
*/
static int fts3SegReaderCursor(
  Fts3Table *p,                   /* FTS3 table handle */
  int iLangid,                    /* Language id */
  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  int iLevel,                     /* Level of segments to scan */
  const char *zTerm,              /* Term to query for */
  int nTerm,                      /* Size of zTerm in bytes */
  int isPrefix,                   /* True for a prefix search */
  int isScan,                     /* True to scan from zTerm to EOF */
  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
    if( rc==SQLITE_OK && pSeg ){
      rc = fts3SegReaderCursorAppend(pCsr, pSeg);
    }
  }

  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
    }

    while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
      Fts3SegReader *pSeg = 0;

      /* Read the values returned by the SELECT into local variables. */
      sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);







|







2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
    if( rc==SQLITE_OK && pSeg ){
      rc = fts3SegReaderCursorAppend(pCsr, pSeg);
    }
  }

  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
    }

    while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
      Fts3SegReader *pSeg = 0;

      /* Read the values returned by the SELECT into local variables. */
      sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1);
2618
2619
2620
2621
2622
2623
2624

2625
2626
2627
2628
2629
2630
2631

/*
** Set up a cursor object for iterating through a full-text index or a 
** single level therein.
*/
int sqlite3Fts3SegReaderCursor(
  Fts3Table *p,                   /* FTS3 table handle */

  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  int iLevel,                     /* Level of segments to scan */
  const char *zTerm,              /* Term to query for */
  int nTerm,                      /* Size of zTerm in bytes */
  int isPrefix,                   /* True for a prefix search */
  int isScan,                     /* True to scan from zTerm to EOF */
  Fts3MultiSegReader *pCsr       /* Cursor object to populate */







>







2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684

/*
** Set up a cursor object for iterating through a full-text index or a 
** single level therein.
*/
int sqlite3Fts3SegReaderCursor(
  Fts3Table *p,                   /* FTS3 table handle */
  int iLangid,
  int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  int iLevel,                     /* Level of segments to scan */
  const char *zTerm,              /* Term to query for */
  int nTerm,                      /* Size of zTerm in bytes */
  int isPrefix,                   /* True for a prefix search */
  int isScan,                     /* True to scan from zTerm to EOF */
  Fts3MultiSegReader *pCsr       /* Cursor object to populate */
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660

2661
2662
2663
2664
2665


2666
2667
2668
2669
2670
2671
2672
  /* "isScan" is only set to true by the ft4aux module, an ordinary
  ** full-text tables. */
  assert( isScan==0 || p->aIndex==0 );

  memset(pCsr, 0, sizeof(Fts3MultiSegReader));

  return fts3SegReaderCursor(
      p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
  );
}

/*
** In addition to its current configuration, have the Fts3MultiSegReader
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3SegReaderCursorAddZero(
  Fts3Table *p,                   /* FTS virtual table handle */

  const char *zTerm,              /* Term to scan doclist of */
  int nTerm,                      /* Number of bytes in zTerm */
  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
){
  return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);


}

/*
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
** if isPrefix is true, to scan the doclist for all terms for which 
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return







|











>




|
>
>







2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
  /* "isScan" is only set to true by the ft4aux module, an ordinary
  ** full-text tables. */
  assert( isScan==0 || p->aIndex==0 );

  memset(pCsr, 0, sizeof(Fts3MultiSegReader));

  return fts3SegReaderCursor(
      p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
  );
}

/*
** In addition to its current configuration, have the Fts3MultiSegReader
** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
*/
static int fts3SegReaderCursorAddZero(
  Fts3Table *p,                   /* FTS virtual table handle */
  int iLangid,
  const char *zTerm,              /* Term to scan doclist of */
  int nTerm,                      /* Number of bytes in zTerm */
  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
){
  return fts3SegReaderCursor(p, 
      iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
  );
}

/*
** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
** if isPrefix is true, to scan the doclist for all terms for which 
** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
2694
2695
2696
2697
2698
2699
2700
2701
2702

2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714


2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
    int bFound = 0;               /* True once an index has been found */
    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;

    if( isPrefix ){
      for(i=1; bFound==0 && i<p->nIndex; i++){
        if( p->aIndex[i].nPrefix==nTerm ){
          bFound = 1;
          rc = sqlite3Fts3SegReaderCursor(
              p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);

          pSegcsr->bLookup = 1;
        }
      }

      for(i=1; bFound==0 && i<p->nIndex; i++){
        if( p->aIndex[i].nPrefix==nTerm+1 ){
          bFound = 1;
          rc = sqlite3Fts3SegReaderCursor(
              p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
          );
          if( rc==SQLITE_OK ){
            rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);


          }
        }
      }
    }

    if( bFound==0 ){
      rc = sqlite3Fts3SegReaderCursor(
          p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
      );
      pSegcsr->bLookup = !isPrefix;
    }
  }

  *ppSegcsr = pSegcsr;
  return rc;







|
|
>







|
|


|
>
>






|
|







2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
    int bFound = 0;               /* True once an index has been found */
    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;

    if( isPrefix ){
      for(i=1; bFound==0 && i<p->nIndex; i++){
        if( p->aIndex[i].nPrefix==nTerm ){
          bFound = 1;
          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
          );
          pSegcsr->bLookup = 1;
        }
      }

      for(i=1; bFound==0 && i<p->nIndex; i++){
        if( p->aIndex[i].nPrefix==nTerm+1 ){
          bFound = 1;
          rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
              i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
          );
          if( rc==SQLITE_OK ){
            rc = fts3SegReaderCursorAddZero(
                p, pCsr->iLangid, zTerm, nTerm, pSegcsr
            );
          }
        }
      }
    }

    if( bFound==0 ){
      rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, 
          0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
      );
      pSegcsr->bLookup = !isPrefix;
    }
  }

  *ppSegcsr = pSegcsr;
  return rc;
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;

  UNUSED_PARAMETER(idxStr);
  UNUSED_PARAMETER(nVal);

  assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
  assert( nVal==0 || nVal==1 );
  assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
  assert( p->pSegments==0 );

  /* In case the cursor has been used before, clear it now. */
  sqlite3_finalize(pCsr->pStmt);
  sqlite3_free(pCsr->aDoclist);
  sqlite3Fts3ExprFree(pCsr->pExpr);







|







2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;

  UNUSED_PARAMETER(idxStr);
  UNUSED_PARAMETER(nVal);

  assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
  assert( nVal==0 || nVal==1 || nVal==2 );
  assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
  assert( p->pSegments==0 );

  /* In case the cursor has been used before, clear it now. */
  sqlite3_finalize(pCsr->pStmt);
  sqlite3_free(pCsr->aDoclist);
  sqlite3Fts3ExprFree(pCsr->pExpr);
2895
2896
2897
2898
2899
2900
2901



2902
2903
2904
2905
2906
2907
2908
2909
2910
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }




    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
        p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_ERROR ){
        static const char *zErr = "malformed MATCH expression: [%s]";
        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
      }
      return rc;







>
>
>
|
|







2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
    int iCol = idxNum-FTS3_FULLTEXT_SEARCH;
    const char *zQuery = (const char *)sqlite3_value_text(apVal[0]);

    if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
      return SQLITE_NOMEM;
    }

    pCsr->iLangid = 0;
    if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);

    rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
        p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
    );
    if( rc!=SQLITE_OK ){
      if( rc==SQLITE_ERROR ){
        static const char *zErr = "malformed MATCH expression: [%s]";
        p->base.zErrMsg = sqlite3_mprintf(zErr, zQuery);
      }
      return rc;
2967
2968
2969
2970
2971
2972
2973







2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996


2997


2998








2999
3000

3001
3002
3003
3004
3005
3006
3007
  *pRowid = pCsr->iPrevId;
  return SQLITE_OK;
}

/* 
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.







*/
static int fts3ColumnMethod(
  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
  sqlite3_context *pContext,      /* Context for sqlite3_result_xxx() calls */
  int iCol                        /* Index of column to read value from */
){
  int rc = SQLITE_OK;             /* Return Code */
  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;

  /* The column value supplied by SQLite must be in range. */
  assert( iCol>=0 && iCol<=p->nColumn+1 );

  if( iCol==p->nColumn+1 ){
    /* This call is a request for the "docid" column. Since "docid" is an 
    ** alias for "rowid", use the xRowid() method to obtain the value.
    */
    sqlite3_result_int64(pContext, pCsr->iPrevId);
  }else if( iCol==p->nColumn ){
    /* The extra column whose name is the same as the table.
    ** Return a blob which is a pointer to the cursor.
    */
    sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);


  }else{


    rc = fts3CursorSeek(0, pCsr);








    if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
      sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));

    }
  }

  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
  return rc;
}








>
>
>
>
>
>
>



|







|





|


|
<
|
>
>

>
>

>
>
>
>
>
>
>
>
|
|
>







3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063

3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
  *pRowid = pCsr->iPrevId;
  return SQLITE_OK;
}

/* 
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
**
** If:
**
**   (iCol <  p->nColumn)   -> The value of the iCol'th user column.
**   (iCol == p->nColumn)   -> Magic column with the same name as the table.
**   (iCol == p->nColumn+1) -> Docid column
**   (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
  sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
  sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
  int iCol                        /* Index of column to read value from */
){
  int rc = SQLITE_OK;             /* Return Code */
  Fts3Cursor *pCsr = (Fts3Cursor *) pCursor;
  Fts3Table *p = (Fts3Table *)pCursor->pVtab;

  /* The column value supplied by SQLite must be in range. */
  assert( iCol>=0 && iCol<=p->nColumn+2 );

  if( iCol==p->nColumn+1 ){
    /* This call is a request for the "docid" column. Since "docid" is an 
    ** alias for "rowid", use the xRowid() method to obtain the value.
    */
    sqlite3_result_int64(pCtx, pCsr->iPrevId);
  }else if( iCol==p->nColumn ){
    /* The extra column whose name is the same as the table.
    ** Return a blob which is a pointer to the cursor.  */

    sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
  }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
    sqlite3_result_int64(pCtx, pCsr->iLangid);
  }else{
    /* The requested column is either a user column (one that contains 
    ** indexed data), or the language-id column.  */
    rc = fts3CursorSeek(0, pCsr);

    if( rc==SQLITE_OK ){
      if( iCol==p->nColumn+2 ){
        int iLangid = 0;
        if( p->zLanguageid ){
          iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
        }
        sqlite3_result_int(pCtx, iLangid);
      }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
        sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
      }
    }
  }

  assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
  return rc;
}

Changes to ext/fts3/fts3Int.h.

188
189
190
191
192
193
194

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

258
259
260
261
262
263
264
  sqlite3 *db;                    /* The database connection */
  const char *zDb;                /* logical database name */
  const char *zName;              /* virtual table name */
  int nColumn;                    /* number of named columns in virtual table */
  char **azColumn;                /* column names.  malloced */
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */


  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[27];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */
  u8 bDescIdx;                    /* True if doclists are in reverse order */
  int nPgsz;                      /* Page size for host database */
  char *zSegmentsTbl;             /* Name of %_segments table */
  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */

  /* TODO: Fix the first paragraph of this comment.
  **
  ** The following hash table is used to buffer pending index updates during
  ** transactions. Variable nPendingData estimates the memory size of the 
  ** pending data, including hash table overhead, but not malloc overhead. 
  ** When nPendingData exceeds nMaxPendingData, the buffer is flushed 
  ** automatically. Variable iPrevDocid is the docid of the most recently
  ** inserted record.
  **
  ** A single FTS4 table may have multiple full-text indexes. For each index
  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
  ** terms that appear in the document set. Each subsequent index in aIndex[]
  ** is an index of prefixes of a specific length.
  */
  int nIndex;                     /* Size of aIndex[] */
  struct Fts3Index {
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */


#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
  /* State variables used for validating that the transaction control
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribution to the FTS computation; they are used for
  ** verifying the SQLite core.
  */
  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
  int mxSavepoint;       /* Largest valid xSavepoint integer */
#endif
};

/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/
struct Fts3Cursor {
  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
  i16 eSearch;                    /* Search strategy (see below) */
  u8 isEof;                       /* True if at End Of Results */
  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
  Fts3Expr *pExpr;                /* Parsed MATCH query string */

  int nPhrase;                    /* Number of matchable phrases in query */
  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */
  u8 bDesc;                       /* True to sort in descending order */







>




|














|
|
|
|
|
|














>




|
|


















>







188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
  sqlite3 *db;                    /* The database connection */
  const char *zDb;                /* logical database name */
  const char *zName;              /* virtual table name */
  int nColumn;                    /* number of named columns in virtual table */
  char **azColumn;                /* column names.  malloced */
  sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
  char *zContentTbl;              /* content=xxx option, or NULL */
  char *zLanguageid;              /* languageid=xxx option, or NULL */

  /* Precompiled statements used by the implementation. Each of these 
  ** statements is run and reset within a single virtual table API call. 
  */
  sqlite3_stmt *aStmt[28];

  char *zReadExprlist;
  char *zWriteExprlist;

  int nNodeSize;                  /* Soft limit for node size */
  u8 bHasStat;                    /* True if %_stat table exists */
  u8 bHasDocsize;                 /* True if %_docsize table exists */
  u8 bDescIdx;                    /* True if doclists are in reverse order */
  int nPgsz;                      /* Page size for host database */
  char *zSegmentsTbl;             /* Name of %_segments table */
  sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */

  /* TODO: Fix the first paragraph of this comment.
  **
  ** The following array of hash tables is used to buffer pending index 
  ** updates during transactions. Variable nPendingData estimates the memory 
  ** size of the pending data, including hash table overhead, not including
  ** malloc overhead.  When nPendingData exceeds nMaxPendingData, the buffer 
  ** is flushed automatically. Variable iPrevDocid is the docid of the most 
  ** recently inserted record.
  **
  ** A single FTS4 table may have multiple full-text indexes. For each index
  ** there is an entry in the aIndex[] array. Index 0 is an index of all the
  ** terms that appear in the document set. Each subsequent index in aIndex[]
  ** is an index of prefixes of a specific length.
  */
  int nIndex;                     /* Size of aIndex[] */
  struct Fts3Index {
    int nPrefix;                  /* Prefix length (0 for main terms index) */
    Fts3Hash hPending;            /* Pending terms table for this index */
  } *aIndex;
  int nMaxPendingData;            /* Max pending data before flush to disk */
  int nPendingData;               /* Current bytes of pending data */
  sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
  int iPrevLangid;                /* Langid of recently inserted document */

#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
  /* State variables used for validating that the transaction control
  ** methods of the virtual table are called at appropriate times.  These
  ** values do not contribute to FTS functionality; they are used for
  ** verifying the operation of the SQLite core.
  */
  int inTransaction;     /* True after xBegin but before xCommit/xRollback */
  int mxSavepoint;       /* Largest valid xSavepoint integer */
#endif
};

/*
** When the core wants to read from the virtual table, it creates a
** virtual table cursor (an instance of the following structure) using
** the xOpen method. Cursors are destroyed using the xClose method.
*/
struct Fts3Cursor {
  sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
  i16 eSearch;                    /* Search strategy (see below) */
  u8 isEof;                       /* True if at End Of Results */
  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
  Fts3Expr *pExpr;                /* Parsed MATCH query string */
  int iLangid;                    /* Language being queried for */
  int nPhrase;                    /* Number of matchable phrases in query */
  Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */
  u8 bDesc;                       /* True to sort in descending order */
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
int sqlite3Fts3SegReaderPending(
  Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);

int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);

void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3SegmentsClose(Fts3Table *);

/* Special values interpreted by sqlite3SegReaderCursor() */
#define FTS3_SEGCURSOR_PENDING        -1
#define FTS3_SEGCURSOR_ALL            -2

int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);

int sqlite3Fts3SegReaderCursor(
    Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX        0x00000008
#define FTS3_SEGMENT_SCAN          0x00000010







|




















|
|







405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
  sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
int sqlite3Fts3SegReaderPending(
  Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);

int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);

void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3SegmentsClose(Fts3Table *);

/* Special values interpreted by sqlite3SegReaderCursor() */
#define FTS3_SEGCURSOR_PENDING        -1
#define FTS3_SEGCURSOR_ALL            -2

int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);

int sqlite3Fts3SegReaderCursor(Fts3Table *, 
    int, int, int, const char *, int, int, int, Fts3MultiSegReader *);

/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS   0x00000001
#define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX        0x00000008
#define FTS3_SEGMENT_SCAN          0x00000010
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505




506
507
508
509
510
511
512
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
  const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);

/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
  char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif





/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);

void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);

int sqlite3Fts3MsrIncrStart(







|







>
>
>
>







494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
  const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);

/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
  char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif

int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
  sqlite3_tokenizer_cursor **
);

/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);

void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);

int sqlite3Fts3MsrIncrStart(

Changes to ext/fts3/fts3_aux.c.

372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
  if( idxNum&FTS4AUX_LE_CONSTRAINT ){
    int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0;
    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx]));
    pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]);
    if( pCsr->zStop==0 ) return SQLITE_NOMEM;
  }

  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
  );
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
  }

  if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);







|







372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
  if( idxNum&FTS4AUX_LE_CONSTRAINT ){
    int iIdx = (idxNum&FTS4AUX_GE_CONSTRAINT) ? 1 : 0;
    pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iIdx]));
    pCsr->nStop = sqlite3_value_bytes(apVal[iIdx]);
    if( pCsr->zStop==0 ) return SQLITE_NOMEM;
  }

  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
  );
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
  }

  if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);

Changes to ext/fts3/fts3_expr.c.

88
89
90
91
92
93
94

95
96
97
98
99
100
101
**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
**   zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */

  const char **azCol;                 /* Array of column names for fts3 table */
  int bFts4;                          /* True to allow FTS4-only syntax */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  int isNot;                          /* True if getNextNode() sees a unary - */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */







>







88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
**   FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
**   FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
**   zero.
*/
typedef struct ParseContext ParseContext;
struct ParseContext {
  sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
  int iLangid;                        /* Language id used with tokenizer */
  const char **azCol;                 /* Array of column names for fts3 table */
  int bFts4;                          /* True to allow FTS4-only syntax */
  int nCol;                           /* Number of entries in azCol[] */
  int iDefaultCol;                    /* Default column to query */
  int isNot;                          /* True if getNextNode() sees a unary - */
  sqlite3_context *pCtx;              /* Write error message here */
  int nNest;                          /* Number of nested brackets */
123
124
125
126
127
128
129



























130
131
132
133
134
135
136
*/
static void *fts3MallocZero(int nByte){
  void *pRet = sqlite3_malloc(nByte);
  if( pRet ) memset(pRet, 0, nByte);
  return pRet;
}





























/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
*/
static void *fts3MallocZero(int nByte){
  void *pRet = sqlite3_malloc(nByte);
  if( pRet ) memset(pRet, 0, nByte);
  return pRet;
}

int sqlite3Fts3OpenTokenizer(
  sqlite3_tokenizer *pTokenizer,
  int iLangid,
  const char *z,
  int n,
  sqlite3_tokenizer_cursor **ppCsr
){
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  sqlite3_tokenizer_cursor *pCsr = 0;
  int rc;

  rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
  assert( rc==SQLITE_OK || pCsr==0 );
  if( rc==SQLITE_OK ){
    pCsr->pTokenizer = pTokenizer;
    if( pModule->iVersion>=1 ){
      rc = pModule->xLanguageid(pCsr, iLangid);
      if( rc!=SQLITE_OK ){
        pModule->xClose(pCsr);
        pCsr = 0;
      }
    }
  }
  *ppCsr = pCsr;
  return rc;
}


/*
** Extract the next token from buffer z (length n) using the tokenizer
** and other information (column names etc.) in pParse. Create an Fts3Expr
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
** single token and set *ppExpr to point to it. If the end of the buffer is
** reached before a token is found, set *ppExpr to zero. It is the
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

  rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */

    pCursor->pTokenizer = pTokenizer;
    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;







|





<

<







178
179
180
181
182
183
184
185
186
187
188
189
190

191

192
193
194
195
196
197
198
  sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
  sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
  int rc;
  sqlite3_tokenizer_cursor *pCursor;
  Fts3Expr *pRet = 0;
  int nConsumed = 0;

  rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
  if( rc==SQLITE_OK ){
    const char *zToken;
    int nToken, iStart, iEnd, iPosition;
    int nByte;                               /* total space to allocate */


    rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);

    if( rc==SQLITE_OK ){
      nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
      pRet = (Fts3Expr *)fts3MallocZero(nByte);
      if( !pRet ){
        rc = SQLITE_NOMEM;
      }else{
        pRet->eType = FTSQUERY_PHRASE;
264
265
266
267
268
269
270

271
272
273
274
275
276
277
278
279
280
281
  **
  **   Buffer zTemp: Contains copies of all tokens.
  **
  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */

  rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;
    pCursor->pTokenizer = pTokenizer;
    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        Fts3PhraseToken *pToken;








>
|


<







290
291
292
293
294
295
296
297
298
299
300

301
302
303
304
305
306
307
  **
  **   Buffer zTemp: Contains copies of all tokens.
  **
  ** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
  ** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
  ** structures.
  */
  rc = sqlite3Fts3OpenTokenizer(
      pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
  if( rc==SQLITE_OK ){
    int ii;

    for(ii=0; rc==SQLITE_OK; ii++){
      const char *zByte;
      int nByte, iBegin, iEnd, iPos;
      rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
      if( rc==SQLITE_OK ){
        Fts3PhraseToken *pToken;

741
742
743
744
745
746
747

748
749
750
751
752
753
754
755
756
757


758

759
760
761
762
763
764
765
766
767
768
769
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */

  char **azCol,                       /* Array of column names for fts3 table */
  int bFts4,                          /* True to allow FTS4-only syntax */
  int nCol,                           /* Number of entries in azCol[] */
  int iDefaultCol,                    /* Default column to query */
  const char *z, int n,               /* Text of MATCH query */
  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
){
  int nParsed;
  int rc;
  ParseContext sParse;


  sParse.pTokenizer = pTokenizer;

  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;
  sParse.nNest = 0;
  sParse.bFts4 = bFts4;
  if( z==0 ){
    *ppExpr = 0;
    return SQLITE_OK;
  }
  if( n<0 ){
    n = (int)strlen(z);







>










>
>

>



<







767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791

792
793
794
795
796
797
798
** that appears on the left-hand-side of the MATCH operator (the default
** column to match against for tokens for which a column name is not explicitly
** specified as part of the query string), or -1 if tokens may by default
** match any table column.
*/
int sqlite3Fts3ExprParse(
  sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
  int iLangid,                        /* Language id for tokenizer */
  char **azCol,                       /* Array of column names for fts3 table */
  int bFts4,                          /* True to allow FTS4-only syntax */
  int nCol,                           /* Number of entries in azCol[] */
  int iDefaultCol,                    /* Default column to query */
  const char *z, int n,               /* Text of MATCH query */
  Fts3Expr **ppExpr                   /* OUT: Parsed query structure */
){
  int nParsed;
  int rc;
  ParseContext sParse;

  memset(&sParse, 0, sizeof(ParseContext));
  sParse.pTokenizer = pTokenizer;
  sParse.iLangid = iLangid;
  sParse.azCol = (const char **)azCol;
  sParse.nCol = nCol;
  sParse.iDefaultCol = iDefaultCol;

  sParse.bFts4 = bFts4;
  if( z==0 ){
    *ppExpr = 0;
    return SQLITE_OK;
  }
  if( n<0 ){
    n = (int)strlen(z);
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
    goto exprtest_out;
  }
  for(ii=0; ii<nCol; ii++){
    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  }

  rc = sqlite3Fts3ExprParse(
      pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    sqlite3_result_error(context, "Error parsing expression", -1);
  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
    sqlite3_result_error_nomem(context);
  }else{
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);







|







975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
    goto exprtest_out;
  }
  for(ii=0; ii<nCol; ii++){
    azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
  }

  rc = sqlite3Fts3ExprParse(
      pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
  );
  if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
    sqlite3_result_error(context, "Error parsing expression", -1);
  }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
    sqlite3_result_error_nomem(context);
  }else{
    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);

Changes to ext/fts3/fts3_snippet.c.

528
529
530
531
532
533
534

535
536
537
538
539
540
541
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document 
** actually contains terms that follow the final highlighted term. 
*/
static int fts3SnippetShift(
  Fts3Table *pTab,                /* FTS3 table snippet comes from */

  int nSnippet,                   /* Number of tokens desired for snippet */
  const char *zDoc,               /* Document text to extract snippet from */
  int nDoc,                       /* Size of buffer zDoc in bytes */
  int *piPos,                     /* IN/OUT: First token of snippet */
  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
){
  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */







>







528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
** This is done as part of extracting the snippet text, not when selecting
** the snippet. Snippet selection is done based on doclists only, so there
** is no way for fts3BestSnippet() to know whether or not the document 
** actually contains terms that follow the final highlighted term. 
*/
static int fts3SnippetShift(
  Fts3Table *pTab,                /* FTS3 table snippet comes from */
  int iLangid,                    /* Language id to use in tokenizing */
  int nSnippet,                   /* Number of tokens desired for snippet */
  const char *zDoc,               /* Document text to extract snippet from */
  int nDoc,                       /* Size of buffer zDoc in bytes */
  int *piPos,                     /* IN/OUT: First token of snippet */
  u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
){
  u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
      sqlite3_tokenizer_module *pMod;
      sqlite3_tokenizer_cursor *pC;
      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;

      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
      ** or more tokens in zDoc/nDoc.
      */
      rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      pC->pTokenizer = pTab->pTokenizer;
      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }








|



<







564
565
566
567
568
569
570
571
572
573
574

575
576
577
578
579
580
581
      sqlite3_tokenizer_module *pMod;
      sqlite3_tokenizer_cursor *pC;
      pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;

      /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
      ** or more tokens in zDoc/nDoc.
      */
      rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
      if( rc!=SQLITE_OK ){
        return rc;
      }

      while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
        const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
        rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      }
      pMod->xClose(pC);
      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }

627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);

  /* Open a token cursor on the document. */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pC->pTokenizer = pTab->pTokenizer;

  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
    int iFin;                     /* Offset in zDoc of end of token */
    int isHighlight;              /* True for highlighted terms */

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);







|



<







627
628
629
630
631
632
633
634
635
636
637

638
639
640
641
642
643
644
    }
    return SQLITE_OK;
  }
  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);

  /* Open a token cursor on the document. */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
  if( rc!=SQLITE_OK ){
    return rc;
  }


  while( rc==SQLITE_OK ){
    int iBegin;                   /* Offset in zDoc of start of token */
    int iFin;                     /* Offset in zDoc of end of token */
    int isHighlight;              /* True for highlighted terms */

    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
653
654
655
656
657
658
659
660


661
662
663
664
665
666
667
      }
      break;
    }
    if( iCurrent<iPos ){ continue; }

    if( !isShiftDone ){
      int n = nDoc - iBegin;
      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);


      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
      ** required. They are required if (a) this is not the first fragment,
      ** or (b) this fragment does not begin at position 0 of its column. 
      */
      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){







|
>
>







652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
      }
      break;
    }
    if( iCurrent<iPos ){ continue; }

    if( !isShiftDone ){
      int n = nDoc - iBegin;
      rc = fts3SnippetShift(
          pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
      );
      isShiftDone = 1;

      /* Now that the shift has been done, check if the initial "..." are
      ** required. They are required if (a) this is not the first fragment,
      ** or (b) this fragment does not begin at position 0 of its column. 
      */
      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
1386
1387
1388
1389
1390
1391
1392

1393

1394
1395
1396
1397
1398
1399
1400
1401
1402
        continue;
      }
      rc = SQLITE_NOMEM;
      goto offsets_out;
    }

    /* Initialize a tokenizer iterator to iterate through column iCol. */

    rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);

    if( rc!=SQLITE_OK ) goto offsets_out;
    pC->pTokenizer = pTab->pTokenizer;

    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
    while( rc==SQLITE_OK ){
      int i;                      /* Used to loop through terms */
      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
      TermOffset *pTerm = 0;      /* TermOffset associated with next token */








>
|
>

<







1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397

1398
1399
1400
1401
1402
1403
1404
        continue;
      }
      rc = SQLITE_NOMEM;
      goto offsets_out;
    }

    /* Initialize a tokenizer iterator to iterate through column iCol. */
    rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
        zDoc, nDoc, &pC
    );
    if( rc!=SQLITE_OK ) goto offsets_out;


    rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
    while( rc==SQLITE_OK ){
      int i;                      /* Used to loop through terms */
      int iMinPos = 0x7FFFFFFF;   /* Position of next token */
      TermOffset *pTerm = 0;      /* TermOffset associated with next token */

Changes to ext/fts3/fts3_term.c.

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  testcase(pCsr->filter.zTerm);
  sqlite3Fts3SegReaderFinish(&pCsr->csr);
  memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);

  pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
  pCsr->filter.flags |= FTS3_SEGMENT_SCAN;

  rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
  );
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
  }
  if( rc==SQLITE_OK ){
    rc = fts3termNextMethod(pCursor);







|







267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  testcase(pCsr->filter.zTerm);
  sqlite3Fts3SegReaderFinish(&pCsr->csr);
  memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);

  pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
  pCsr->filter.flags |= FTS3_SEGMENT_SCAN;

  rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
      pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
  );
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
  }
  if( rc==SQLITE_OK ){
    rc = fts3termNextMethod(pCursor);

Changes to ext/fts3/fts3_test.c.

9
10
11
12
13
14
15



16
17
18
19
20
21
22
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.



*/

#include <tcl.h>
#include <string.h>
#include <assert.h>

#ifdef SQLITE_TEST







>
>
>







9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
**
** As of March 2012, it also contains a version 1 tokenizer used for testing
** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/

#include <tcl.h>
#include <string.h>
#include <assert.h>

#ifdef SQLITE_TEST
310
311
312
313
314
315
316

































































































































































































317
318
319
320
321



322
323
324

  Tcl_SetObjResult(interp, pRet);
  Tcl_DecrRefCount(pRet);
#endif
  return TCL_OK;
}


































































































































































































int Sqlitetestfts3_Init(Tcl_Interp *interp){
  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
  Tcl_CreateObjCommand(interp, 
      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
  );



  return TCL_OK;
}
#endif                  /* ifdef SQLITE_TEST */







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
>
>



313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523

  Tcl_SetObjResult(interp, pRet);
  Tcl_DecrRefCount(pRet);
#endif
  return TCL_OK;
}

/**************************************************************************
** Beginning of test tokenizer code.
**
** For language 0, this tokenizer is similar to the default 'simple' 
** tokenizer. For other languages L, the following:
**
**   * Odd numbered languages are case-sensitive. Even numbered 
**     languages are not.
**
**   * Language ids 100 or greater are considered an error.
**
** The implementation assumes that the input contains only ASCII characters
** (i.e. those that may be encoded in UTF-8 using a single byte).
*/
typedef struct test_tokenizer {
  sqlite3_tokenizer base;
} test_tokenizer;

typedef struct test_tokenizer_cursor {
  sqlite3_tokenizer_cursor base;
  const char *aInput;          /* Input being tokenized */
  int nInput;                  /* Size of the input in bytes */
  int iInput;                  /* Current offset in aInput */
  int iToken;                  /* Index of next token to be returned */
  char *aBuffer;               /* Buffer containing current token */
  int nBuffer;                 /* Number of bytes allocated at pToken */
  int iLangid;                 /* Configured language id */
} test_tokenizer_cursor;

static int testTokenizerCreate(
  int argc, const char * const *argv,
  sqlite3_tokenizer **ppTokenizer
){
  test_tokenizer *pNew;

  pNew = sqlite3_malloc(sizeof(test_tokenizer));
  if( !pNew ) return SQLITE_NOMEM;
  memset(pNew, 0, sizeof(test_tokenizer));

  *ppTokenizer = (sqlite3_tokenizer *)pNew;
  return SQLITE_OK;
}

static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
  test_tokenizer *p = (test_tokenizer *)pTokenizer;
  sqlite3_free(p);
  return SQLITE_OK;
}

static int testTokenizerOpen(
  sqlite3_tokenizer *pTokenizer,         /* The tokenizer */
  const char *pInput, int nBytes,        /* String to be tokenized */
  sqlite3_tokenizer_cursor **ppCursor    /* OUT: Tokenization cursor */
){
  int rc = SQLITE_OK;                    /* Return code */
  test_tokenizer_cursor *pCsr;           /* New cursor object */

  UNUSED_PARAMETER(pTokenizer);

  pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
  if( pCsr==0 ){
    rc = SQLITE_NOMEM;
  }else{
    memset(pCsr, 0, sizeof(test_tokenizer_cursor));
    pCsr->aInput = pInput;
    if( nBytes<0 ){
      pCsr->nInput = strlen(pInput);
    }else{
      pCsr->nInput = nBytes;
    }
  }

  *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
  return rc;
}

static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  sqlite3_free(pCsr->aBuffer);
  sqlite3_free(pCsr);
  return SQLITE_OK;
}

static int testIsTokenChar(char c){
  return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
static int testTolower(char c){
  char ret = c;
  if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
  return ret;
}

static int testTokenizerNext(
  sqlite3_tokenizer_cursor *pCursor,  /* Cursor returned by testTokenizerOpen */
  const char **ppToken,               /* OUT: *ppToken is the token text */
  int *pnBytes,                       /* OUT: Number of bytes in token */
  int *piStartOffset,                 /* OUT: Starting offset of token */
  int *piEndOffset,                   /* OUT: Ending offset of token */
  int *piPosition                     /* OUT: Position integer of token */
){
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  int rc = SQLITE_OK;
  const char *p;
  const char *pEnd;

  p = &pCsr->aInput[pCsr->iInput];
  pEnd = &pCsr->aInput[pCsr->nInput];

  /* Skip past any white-space */
  assert( p<=pEnd );
  while( p<pEnd && testIsTokenChar(*p)==0 ) p++;

  if( p==pEnd ){
    rc = SQLITE_DONE;
  }else{
    /* Advance to the end of the token */
    const char *pToken = p;
    int nToken;
    while( p<pEnd && testIsTokenChar(*p) ) p++;
    nToken = p-pToken;

    /* Copy the token into the buffer */
    if( nToken>pCsr->nBuffer ){
      sqlite3_free(pCsr->aBuffer);
      pCsr->aBuffer = sqlite3_malloc(nToken);
    }
    if( pCsr->aBuffer==0 ){
      rc = SQLITE_NOMEM;
    }else{
      int i;

      if( pCsr->iLangid & 0x00000001 ){
        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
      }else{
        for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
      }
      pCsr->iToken++;
      pCsr->iInput = p - pCsr->aInput;

      *ppToken = pCsr->aBuffer;
      *pnBytes = nToken;
      *piStartOffset = pToken - pCsr->aInput;
      *piEndOffset = p - pCsr->aInput;
      *piPosition = pCsr->iToken;
    }
  }

  return rc;
}

static int testTokenizerLanguage(
  sqlite3_tokenizer_cursor *pCursor,
  int iLangid
){
  int rc = SQLITE_OK;
  test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
  pCsr->iLangid = iLangid;
  if( pCsr->iLangid>=100 ){
    rc = SQLITE_ERROR;
  }
  return rc;
}

static int fts3_test_tokenizer_cmd(
  ClientData clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  static const sqlite3_tokenizer_module testTokenizerModule = {
    1,
    testTokenizerCreate,
    testTokenizerDestroy,
    testTokenizerOpen,
    testTokenizerClose,
    testTokenizerNext,
    testTokenizerLanguage
  };
  const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
  if( objc!=1 ){
    Tcl_WrongNumArgs(interp, 1, objv, "");
    return TCL_ERROR;
  }
  Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
    (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
  ));
  return TCL_OK;
}

/* 
** End of tokenizer code.
**************************************************************************/ 

int Sqlitetestfts3_Init(Tcl_Interp *interp){
  Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
  Tcl_CreateObjCommand(interp, 
      "fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
  );
  Tcl_CreateObjCommand(
      interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
  );
  return TCL_OK;
}
#endif                  /* ifdef SQLITE_TEST */

Changes to ext/fts3/fts3_tokenizer.c.

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
  Tcl_IncrRefCount(pRet);

  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
    zErr = "error in xCreate()";
    goto finish;
  }
  pTokenizer->pModule = p;
  if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }
  pCsr->pTokenizer = pTokenizer;

  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
    zToken = &zInput[iStart];
    nToken = iEnd-iStart;
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));







|



<







284
285
286
287
288
289
290
291
292
293
294

295
296
297
298
299
300
301
  Tcl_IncrRefCount(pRet);

  if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
    zErr = "error in xCreate()";
    goto finish;
  }
  pTokenizer->pModule = p;
  if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
    zErr = "error in xOpen()";
    goto finish;
  }


  while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
    zToken = &zInput[iStart];
    nToken = iEnd-iStart;
    Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));

Changes to ext/fts3/fts3_tokenizer.h.

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;

struct sqlite3_tokenizer_module {

  /*
  ** Structure version. Should always be set to 0.
  */
  int iVersion;

  /*
  ** Create a new tokenizer. The values in the argv[] array are the
  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
  ** TABLE statement that created the fts3 table. For example, if







|







48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module;
typedef struct sqlite3_tokenizer sqlite3_tokenizer;
typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;

struct sqlite3_tokenizer_module {

  /*
  ** Structure version. Should always be set to 0 or 1.
  */
  int iVersion;

  /*
  ** Create a new tokenizer. The values in the argv[] array are the
  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
  ** TABLE statement that created the fts3 table. For example, if
129
130
131
132
133
134
135









136
137
138
139
140
141
142
  int (*xNext)(
    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );









};

struct sqlite3_tokenizer {
  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
  /* Tokenizer implementations will typically add additional fields */
};








>
>
>
>
>
>
>
>
>







129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
  int (*xNext)(
    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );

  /***********************************************************************
  ** Methods below this point are only available if iVersion>=1.
  */

  /* 
  ** Configure the language id of a tokenizer cursor.
  */
  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};

struct sqlite3_tokenizer {
  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
  /* Tokenizer implementations will typically add additional fields */
};

Changes to ext/fts3/fts3_write.c.

228
229
230
231
232
233
234


235
236
237
238
239
240
241
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26



/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with







>
>







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
#define SQL_REPLACE_DOCTOTAL          23

#define SQL_SELECT_ALL_PREFIX_LEVEL   24
#define SQL_DELETE_ALL_TERMS_SEGDIR   25

#define SQL_DELETE_SEGDIR_RANGE       26

#define SQL_SELECT_ALL_LANGID         27

/*
** This function is used to obtain an SQLite prepared statement handle
** for the statement identified by the second argument. If successful,
** *pp is set to the requested statement handle and SQLITE_OK returned.
** Otherwise, an SQLite error code is returned and *pp is set to 0.
**
** If argument apVal is not NULL, then it must point to an array with
281
282
283
284
285
286
287

288
289
290
291
292
293
294
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",


  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );







>







283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
/* 21 */  "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
/* 22 */  "SELECT value FROM %Q.'%q_stat' WHERE id=0",
/* 23 */  "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
/* 24 */  "",
/* 25 */  "",

/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",

  };
  int rc = SQLITE_OK;
  sqlite3_stmt *pStmt;

  assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
  assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
426
427
428
429
430
431
432













433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

453
454
455
456
457
458
459
460
461
462
463
464
465
466
467

468
469

470
471
472
473
474
475
476
477
478
479
480
481
482
    }
  }else{
    rc = SQLITE_OK;
  }

  return rc;
}














/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement, 
** return an SQLite error code.
**
** There is only ever one instance of this SQL statement compiled for
** each FTS3 table.
**
** The statement returns the following columns from the %_segdir table:
**
**   0: idx
**   1: start_block
**   2: leaves_end_block
**   3: end_block
**   4: root
*/
int sqlite3Fts3AllSegdirs(
  Fts3Table *p,                   /* FTS3 table */

  int iIndex,                     /* Index for p->aIndex[] */
  int iLevel,                     /* Level to select */
  sqlite3_stmt **ppStmt           /* OUT: Compiled statement */
){
  int rc;
  sqlite3_stmt *pStmt = 0;

  assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  assert( iIndex>=0 && iIndex<p->nIndex );

  if( iLevel<0 ){
    /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
    if( rc==SQLITE_OK ){ 

      sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
      sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1);

    }
  }else{
    /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
    if( rc==SQLITE_OK ){ 
      sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL);
    }
  }
  *ppStmt = pStmt;
  return rc;
}









>
>
>
>
>
>
>
>
>
>
>
>
>




















>















>
|
|
>





|







429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    }
  }else{
    rc = SQLITE_OK;
  }

  return rc;
}

static sqlite3_int64 getAbsoluteLevel(
  Fts3Table *p, 
  int iLangid, 
  int iIndex, 
  int iLevel
){
  assert( iLangid>=0 );
  assert( p->nIndex>0 );
  assert( iIndex>=0 && iIndex<p->nIndex );
  return (iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL + iLevel;
}


/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement, 
** return an SQLite error code.
**
** There is only ever one instance of this SQL statement compiled for
** each FTS3 table.
**
** The statement returns the following columns from the %_segdir table:
**
**   0: idx
**   1: start_block
**   2: leaves_end_block
**   3: end_block
**   4: root
*/
int sqlite3Fts3AllSegdirs(
  Fts3Table *p,                   /* FTS3 table */
  int iLangid,                    /* Language being queried */
  int iIndex,                     /* Index for p->aIndex[] */
  int iLevel,                     /* Level to select */
  sqlite3_stmt **ppStmt           /* OUT: Compiled statement */
){
  int rc;
  sqlite3_stmt *pStmt = 0;

  assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 );
  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  assert( iIndex>=0 && iIndex<p->nIndex );

  if( iLevel<0 ){
    /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
    if( rc==SQLITE_OK ){ 
      sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
      sqlite3_bind_int(pStmt, 2, 
          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
      );
    }
  }else{
    /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
    rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
    if( rc==SQLITE_OK ){ 
      sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
    }
  }
  *ppStmt = pStmt;
  return rc;
}


634
635
636
637
638
639
640

641
642
643
644
645
646
647
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
  Fts3Table *p,                   /* Table into which text will be inserted */

  const char *zText,              /* Text of document to be inserted */
  int iCol,                       /* Column into which text is being inserted */
  u32 *pnWord                     /* OUT: Number of tokens inserted */
){
  int rc;
  int iStart;
  int iEnd;







>







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
** pending-terms hash-table. The docid used is that currently stored in
** p->iPrevDocid, and the column is specified by argument iCol.
**
** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
*/
static int fts3PendingTermsAdd(
  Fts3Table *p,                   /* Table into which text will be inserted */
  int iLangid,                    /* Language id to use */
  const char *zText,              /* Text of document to be inserted */
  int iCol,                       /* Column into which text is being inserted */
  u32 *pnWord                     /* OUT: Number of tokens inserted */
){
  int rc;
  int iStart;
  int iEnd;
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
  ** zText==0. In this case, add zero token entries to the hash table and 
  ** return early. */
  if( zText==0 ){
    *pnWord = 0;
    return SQLITE_OK;
  }

  rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
  if( rc!=SQLITE_OK ){
    return rc;
  }
  pCsr->pTokenizer = pTokenizer;

  xNext = pModule->xNext;
  while( SQLITE_OK==rc
      && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
  ){
    int i;
    if( iPos>=nWord ) nWord = iPos+1;







|



<







683
684
685
686
687
688
689
690
691
692
693

694
695
696
697
698
699
700
  ** zText==0. In this case, add zero token entries to the hash table and 
  ** return early. */
  if( zText==0 ){
    *pnWord = 0;
    return SQLITE_OK;
  }

  rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
  if( rc!=SQLITE_OK ){
    return rc;
  }


  xNext = pModule->xNext;
  while( SQLITE_OK==rc
      && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
  ){
    int i;
    if( iPos>=nWord ) nWord = iPos+1;
710
711
712
713
714
715
716
717






718
719
720
721
722
723
724



725
726
727
728

729
730
731
732
733
734
735
}

/* 
** Calling this function indicates that subsequent calls to 
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){






  /* TODO(shess) Explore whether partially flushing the buffer on
  ** forced-flush would provide better performance.  I suspect that if
  ** we ordered the doclists by size and flushed the largest until the
  ** buffer was half empty, that would let the less frequent terms
  ** generate longer doclists.
  */
  if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){



    int rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc!=SQLITE_OK ) return rc;
  }
  p->iPrevDocid = iDocid;

  return SQLITE_OK;
}

/*
** Discard the contents of the pending-terms hash tables. 
*/
void sqlite3Fts3PendingTermsClear(Fts3Table *p){







|
>
>
>
>
>
>






|
>
>
>




>







729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
}

/* 
** Calling this function indicates that subsequent calls to 
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
static int fts3PendingTermsDocid(
  Fts3Table *p,                   /* Full-text table handle */
  int iLangid,                    /* Language id of row being written */
  sqlite_int64 iDocid             /* Docid of row being written */
){
  assert( iLangid>=0 );

  /* TODO(shess) Explore whether partially flushing the buffer on
  ** forced-flush would provide better performance.  I suspect that if
  ** we ordered the doclists by size and flushed the largest until the
  ** buffer was half empty, that would let the less frequent terms
  ** generate longer doclists.
  */
  if( iDocid<=p->iPrevDocid 
   || p->iPrevLangid!=iLangid
   || p->nPendingData>p->nMaxPendingData 
  ){
    int rc = sqlite3Fts3PendingTermsFlush(p);
    if( rc!=SQLITE_OK ) return rc;
  }
  p->iPrevDocid = iDocid;
  p->iPrevLangid = iLangid;
  return SQLITE_OK;
}

/*
** Discard the contents of the pending-terms hash tables. 
*/
void sqlite3Fts3PendingTermsClear(Fts3Table *p){
750
751
752
753
754
755
756
757





758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781

782
783
784
785
786
787
788
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){





  int i;                          /* Iterator variable */
  for(i=2; i<p->nColumn+2; i++){
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
** SQLite to the xUpdate() method. i.e:
**
**   apVal[0]                Not used for INSERT.
**   apVal[1]                rowid
**   apVal[2]                Left-most user-defined column
**   ...
**   apVal[p->nColumn+1]     Right-most user-defined column
**   apVal[p->nColumn+2]     Hidden column with same name as table
**   apVal[p->nColumn+3]     Hidden "docid" column (alias for rowid)

*/
static int fts3InsertData(
  Fts3Table *p,                   /* Full-text table */
  sqlite3_value **apVal,          /* Array of values to insert */
  sqlite3_int64 *piDocid          /* OUT: Docid for row just inserted */
){
  int rc;                         /* Return code */







|
>
>
>
>
>



|




















>







779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
** This function is called by the xUpdate() method as part of an INSERT
** operation. It adds entries for each term in the new record to the
** pendingTerms hash table.
**
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
static int fts3InsertTerms(
  Fts3Table *p, 
  int iLangid, 
  sqlite3_value **apVal, 
  u32 *aSz
){
  int i;                          /* Iterator variable */
  for(i=2; i<p->nColumn+2; i++){
    const char *zText = (const char *)sqlite3_value_text(apVal[i]);
    int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
  }
  return SQLITE_OK;
}

/*
** This function is called by the xUpdate() method for an INSERT operation.
** The apVal parameter is passed a copy of the apVal argument passed by
** SQLite to the xUpdate() method. i.e:
**
**   apVal[0]                Not used for INSERT.
**   apVal[1]                rowid
**   apVal[2]                Left-most user-defined column
**   ...
**   apVal[p->nColumn+1]     Right-most user-defined column
**   apVal[p->nColumn+2]     Hidden column with same name as table
**   apVal[p->nColumn+3]     Hidden "docid" column (alias for rowid)
**   apVal[p->nColumn+4]     Hidden languageid column
*/
static int fts3InsertData(
  Fts3Table *p,                   /* Full-text table */
  sqlite3_value **apVal,          /* Array of values to insert */
  sqlite3_int64 *piDocid          /* OUT: Docid for row just inserted */
){
  int rc;                         /* Return code */
805
806
807
808
809
810
811
812
813



814

815
816
817
818
819
820
821
  **
  **   INSERT INTO %_content VALUES(?, ?, ?, ...)
  **
  ** The statement features N '?' variables, where N is the number of user
  ** defined columns in the FTS3 table, plus one for the docid field.
  */
  rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
  if( rc!=SQLITE_OK ){
    return rc;



  }


  /* There is a quirk here. The users INSERT statement may have specified
  ** a value for the "rowid" field, for the "docid" field, or for both.
  ** Which is a problem, since "rowid" and "docid" are aliases for the
  ** same value. For example:
  **
  **   INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);







|
|
>
>
>

>







840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
  **
  **   INSERT INTO %_content VALUES(?, ?, ?, ...)
  **
  ** The statement features N '?' variables, where N is the number of user
  ** defined columns in the FTS3 table, plus one for the docid field.
  */
  rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
  if( rc==SQLITE_OK && p->zLanguageid ){
    rc = sqlite3_bind_int(
        pContentInsert, p->nColumn+2, 
        sqlite3_value_int(apVal[p->nColumn+4])
    );
  }
  if( rc!=SQLITE_OK ) return rc;

  /* There is a quirk here. The users INSERT statement may have specified
  ** a value for the "rowid" field, for the "docid" field, or for both.
  ** Which is a problem, since "rowid" and "docid" are aliases for the
  ** same value. For example:
  **
  **   INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
866
867
868
869
870
871
872









873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892


893
894
895


896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933

934
935
936
937
938
939
940
941



942
943
944
945


946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
    fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
  }
  if( p->bHasStat ){
    fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
  }
  return rc;
}










/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms( 
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value *pRowid,  /* The docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;

  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;


      for(i=1; i<=p->nColumn; i++){
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);


        if( rc!=SQLITE_OK ){
          sqlite3_reset(pSelect);
          *pRC = rc;
          return;
        }
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;
}

/*
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
static int fts3SegmentMerge(Fts3Table *, int, int);

/* 
** This function allocates a new level iLevel index in the segdir table.
** Usually, indexes are allocated within a level sequentially starting
** with 0, so the allocated index is one greater than the value returned
** by:
**
**   SELECT max(idx) FROM %_segdir WHERE level = :iLevel
**
** However, if there are already FTS3_MERGE_COUNT indexes at the requested
** level, they are merged into a single level (iLevel+1) segment and the 
** allocated index is 0.
**
** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
** returned. Otherwise, an SQLite error code is returned.
*/
static int fts3AllocateSegdirIdx(
  Fts3Table *p, 

  int iIndex,                     /* Index for p->aIndex */
  int iLevel, 
  int *piIdx
){
  int rc;                         /* Return Code */
  sqlite3_stmt *pNextIdx;         /* Query for next idx at level iLevel */
  int iNext = 0;                  /* Result of query pNextIdx */




  /* Set variable iNext to the next available segdir index at level iLevel. */
  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);


    if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
      iNext = sqlite3_column_int(pNextIdx, 0);
    }
    rc = sqlite3_reset(pNextIdx);
  }

  if( rc==SQLITE_OK ){
    /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
    ** full, merge all segments in level iLevel into a single iLevel+1
    ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
    ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
    */
    if( iNext>=FTS3_MERGE_COUNT ){
      rc = fts3SegmentMerge(p, iIndex, iLevel);
      *piIdx = 0;
    }else{
      *piIdx = iNext;
    }
  }

  return rc;







>
>
>
>
>
>
>
>
>




















>
>
|

|
>
>
|
|
|
|
<
<













|


















>








>
>
>



|
>
>













|







905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951


952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
    fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
  }
  if( p->bHasStat ){
    fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
  }
  return rc;
}

/*
**
*/
static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
  int iLangid = 0;
  if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
  return iLangid;
}

/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
*/
static void fts3DeleteTerms( 
  int *pRC,               /* Result code */
  Fts3Table *p,           /* The FTS table to delete from */
  sqlite3_value *pRowid,  /* The docid to be deleted */
  u32 *aSz                /* Sizes of deleted document written here */
){
  int rc;
  sqlite3_stmt *pSelect;

  if( *pRC ) return;
  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
  if( rc==SQLITE_OK ){
    if( SQLITE_ROW==sqlite3_step(pSelect) ){
      int i;
      int iLangid = langidFromSelect(p, pSelect);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
      for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
        const char *zText = (const char *)sqlite3_column_text(pSelect, i);
        rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
        aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_reset(pSelect);
        *pRC = rc;
        return;


      }
    }
    rc = sqlite3_reset(pSelect);
  }else{
    sqlite3_reset(pSelect);
  }
  *pRC = rc;
}

/*
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
static int fts3SegmentMerge(Fts3Table *, int, int, int);

/* 
** This function allocates a new level iLevel index in the segdir table.
** Usually, indexes are allocated within a level sequentially starting
** with 0, so the allocated index is one greater than the value returned
** by:
**
**   SELECT max(idx) FROM %_segdir WHERE level = :iLevel
**
** However, if there are already FTS3_MERGE_COUNT indexes at the requested
** level, they are merged into a single level (iLevel+1) segment and the 
** allocated index is 0.
**
** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
** returned. Otherwise, an SQLite error code is returned.
*/
static int fts3AllocateSegdirIdx(
  Fts3Table *p, 
  int iLangid,                    /* Language id */
  int iIndex,                     /* Index for p->aIndex */
  int iLevel, 
  int *piIdx
){
  int rc;                         /* Return Code */
  sqlite3_stmt *pNextIdx;         /* Query for next idx at level iLevel */
  int iNext = 0;                  /* Result of query pNextIdx */

  assert( iLangid>=0 );
  assert( p->nIndex>=1 );

  /* Set variable iNext to the next available segdir index at level iLevel. */
  rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
  if( rc==SQLITE_OK ){
    sqlite3_bind_int64(
        pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
    );
    if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
      iNext = sqlite3_column_int(pNextIdx, 0);
    }
    rc = sqlite3_reset(pNextIdx);
  }

  if( rc==SQLITE_OK ){
    /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
    ** full, merge all segments in level iLevel into a single iLevel+1
    ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
    ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
    */
    if( iNext>=FTS3_MERGE_COUNT ){
      rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
      *piIdx = 0;
    }else{
      *piIdx = iNext;
    }
  }

  return rc;
2175
2176
2177
2178
2179
2180
2181
2182





2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196


2197
2198
2199
2200
2201
2202
2203
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){





  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
  sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);


  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*







|
>
>
>
>
>












|
|
>
>







2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
** Set *pnMax to the largest segment level in the database for the index
** iIndex.
**
** Segment levels are stored in the 'level' column of the %_segdir table.
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
static int fts3SegmentMaxLevel(
  Fts3Table *p, 
  int iLangid,
  int iIndex, 
  int *pnMax
){
  sqlite3_stmt *pStmt;
  int rc;
  assert( iIndex>=0 && iIndex<p->nIndex );

  /* Set pStmt to the compiled version of:
  **
  **   SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?
  **
  ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR).
  */
  rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
  if( rc!=SQLITE_OK ) return rc;
  sqlite3_bind_int(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
  sqlite3_bind_int(pStmt, 2, 
      getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
  );
  if( SQLITE_ROW==sqlite3_step(pStmt) ){
    *pnMax = sqlite3_column_int(pStmt, 0);
  }
  return sqlite3_reset(pStmt);
}

/*
2212
2213
2214
2215
2216
2217
2218

2219
2220
2221
2222
2223
2224
2225
**   2) deletes all %_segdir entries with level iLevel, or all %_segdir
**      entries regardless of level if (iLevel<0).
**
** SQLITE_OK is returned if successful, otherwise an SQLite error code.
*/
static int fts3DeleteSegdir(
  Fts3Table *p,                   /* Virtual table handle */

  int iIndex,                     /* Index for p->aIndex */
  int iLevel,                     /* Level of %_segdir entries to delete */
  Fts3SegReader **apSegment,      /* Array of SegReader objects */
  int nReader                     /* Size of array apSegment */
){
  int rc;                         /* Return Code */
  int i;                          /* Iterator variable */







>







2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
**   2) deletes all %_segdir entries with level iLevel, or all %_segdir
**      entries regardless of level if (iLevel<0).
**
** SQLITE_OK is returned if successful, otherwise an SQLite error code.
*/
static int fts3DeleteSegdir(
  Fts3Table *p,                   /* Virtual table handle */
  int iLangid,                    /* Language id */
  int iIndex,                     /* Index for p->aIndex */
  int iLevel,                     /* Level of %_segdir entries to delete */
  Fts3SegReader **apSegment,      /* Array of SegReader objects */
  int nReader                     /* Size of array apSegment */
){
  int rc;                         /* Return Code */
  int i;                          /* Iterator variable */
2239
2240
2241
2242
2243
2244
2245
2246
2247


2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
    return rc;
  }

  assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
  if( iLevel==FTS3_SEGCURSOR_ALL ){
    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
      sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1);


    }
  }else{
    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
    }
  }

  if( rc==SQLITE_OK ){
    sqlite3_step(pDelete);
    rc = sqlite3_reset(pDelete);
  }







|
|
>
>




|







2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
    return rc;
  }

  assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL );
  if( iLevel==FTS3_SEGCURSOR_ALL ){
    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
      sqlite3_bind_int(pDelete, 2, 
          getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
      );
    }
  }else{
    rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
    if( rc==SQLITE_OK ){
      sqlite3_bind_int(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
    }
  }

  if( rc==SQLITE_OK ){
    sqlite3_step(pDelete);
    rc = sqlite3_reset(pDelete);
  }
2714
2715
2716
2717
2718
2719
2720
2721





2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783


2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
** currently present in the database.
**
** If this function is called with iLevel<0, but there is only one
** segment in the database, SQLITE_DONE is returned immediately. 
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, 
** an SQLite error code is returned.
*/
static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){





  int rc;                         /* Return code */
  int iIdx = 0;                   /* Index of new segment */
  int iNewLevel = 0;              /* Level/index to create new segment at */
  SegmentWriter *pWriter = 0;     /* Used to write the new, merged, segment */
  Fts3SegFilter filter;           /* Segment term filter condition */
  Fts3MultiSegReader csr;        /* Cursor to iterate through level(s) */
  int bIgnoreEmpty = 0;           /* True to ignore empty segments */

  assert( iLevel==FTS3_SEGCURSOR_ALL
       || iLevel==FTS3_SEGCURSOR_PENDING
       || iLevel>=0
  );
  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  assert( iIndex>=0 && iIndex<p->nIndex );

  rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr);
  if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;

  if( iLevel==FTS3_SEGCURSOR_ALL ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL; 
    rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next
    ** available segment index at level iLevel+1. The call to
    ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
    ** a single iLevel+2 segment if necessary.  */
    rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx);
    iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1;
  }
  if( rc!=SQLITE_OK ) goto finished;
  assert( csr.nSegment>0 );
  assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) );
  assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) );

  memset(&filter, 0, sizeof(Fts3SegFilter));
  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);

  rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
  while( SQLITE_OK==rc ){
    rc = sqlite3Fts3SegReaderStep(p, &csr);
    if( rc!=SQLITE_ROW ) break;
    rc = fts3SegWriterAdd(p, &pWriter, 1, 
        csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
  }
  if( rc!=SQLITE_OK ) goto finished;
  assert( pWriter );

  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
    rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment);


    if( rc!=SQLITE_OK ) goto finished;
  }
  rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);

 finished:
  fts3SegWriterFree(pWriter);
  sqlite3Fts3SegReaderFinish(&csr);
  return rc;
}


/* 
** Flush the contents of pendingTerms to level 0 segments.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  int rc = SQLITE_OK;
  int i;
  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
    rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING);
    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  }
  sqlite3Fts3PendingTermsClear(p);
  return rc;
}

/*







|
>
>
>
>
>





|









|











|



|
|





|
|



|
|
















|
>
>


















|







2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
** currently present in the database.
**
** If this function is called with iLevel<0, but there is only one
** segment in the database, SQLITE_DONE is returned immediately. 
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, 
** an SQLite error code is returned.
*/
static int fts3SegmentMerge(
  Fts3Table *p, 
  int iLangid,                    /* Language id to merge */
  int iIndex,                     /* Index in p->aIndex[] to merge */
  int iLevel                      /* Level to merge */
){
  int rc;                         /* Return code */
  int iIdx = 0;                   /* Index of new segment */
  int iNewLevel = 0;              /* Level/index to create new segment at */
  SegmentWriter *pWriter = 0;     /* Used to write the new, merged, segment */
  Fts3SegFilter filter;           /* Segment term filter condition */
  Fts3MultiSegReader csr;         /* Cursor to iterate through level(s) */
  int bIgnoreEmpty = 0;           /* True to ignore empty segments */

  assert( iLevel==FTS3_SEGCURSOR_ALL
       || iLevel==FTS3_SEGCURSOR_PENDING
       || iLevel>=0
  );
  assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
  assert( iIndex>=0 && iIndex<p->nIndex );

  rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
  if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;

  if( iLevel==FTS3_SEGCURSOR_ALL ){
    /* This call is to merge all segments in the database to a single
    ** segment. The level of the new segment is equal to the the numerically 
    ** greatest segment level currently present in the database for this
    ** index. The idx of the new segment is always 0.  */
    if( csr.nSegment==1 ){
      rc = SQLITE_DONE;
      goto finished;
    }
    rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
    bIgnoreEmpty = 1;

  }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
  }else{
    /* This call is to merge all segments at level iLevel. find the next
    ** available segment index at level iLevel+1. The call to
    ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to 
    ** a single iLevel+2 segment if necessary.  */
    rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
    iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
  }
  if( rc!=SQLITE_OK ) goto finished;
  assert( csr.nSegment>0 );
  assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
  assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );

  memset(&filter, 0, sizeof(Fts3SegFilter));
  filter.flags = FTS3_SEGMENT_REQUIRE_POS;
  filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0);

  rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
  while( SQLITE_OK==rc ){
    rc = sqlite3Fts3SegReaderStep(p, &csr);
    if( rc!=SQLITE_ROW ) break;
    rc = fts3SegWriterAdd(p, &pWriter, 1, 
        csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
  }
  if( rc!=SQLITE_OK ) goto finished;
  assert( pWriter );

  if( iLevel!=FTS3_SEGCURSOR_PENDING ){
    rc = fts3DeleteSegdir(
        p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
    );
    if( rc!=SQLITE_OK ) goto finished;
  }
  rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);

 finished:
  fts3SegWriterFree(pWriter);
  sqlite3Fts3SegReaderFinish(&csr);
  return rc;
}


/* 
** Flush the contents of pendingTerms to level 0 segments.
*/
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
  int rc = SQLITE_OK;
  int i;
  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
    rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
    if( rc==SQLITE_DONE ) rc = SQLITE_OK;
  }
  sqlite3Fts3PendingTermsClear(p);
  return rc;
}

/*
2950
2951
2952
2953
2954
2955
2956




2957
2958
2959
2960









2961
2962
2963
2964
2965
2966
2967





2968
2969
2970
2971
2972
2973
2974
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
  sqlite3_free(a);
}





static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
  int i;
  int bSeenDone = 0;
  int rc = SQLITE_OK;









  for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
    rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL);
    if( rc==SQLITE_DONE ){
      bSeenDone = 1;
      rc = SQLITE_OK;
    }
  }





  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*







>
>
>
>

<

|
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
>
>
>
>
>







3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034

3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
  }
  sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
  sqlite3_step(pStmt);
  *pRC = sqlite3_reset(pStmt);
  sqlite3_free(a);
}

/*
** Merge the entire database so that there is one segment for each 
** iIndex/iLangid combination.
*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){

  int bSeenDone = 0;
  int rc;
  sqlite3_stmt *pAllLangid = 0;

  rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
  if( rc==SQLITE_OK ){
    int rc2;
    sqlite3_bind_int(pAllLangid, 1, p->nIndex);
    while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
      int i;
      int iLangid = sqlite3_column_int(pAllLangid, 0);
      for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
        rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
        if( rc==SQLITE_DONE ){
          bSeenDone = 1;
          rc = SQLITE_OK;
        }
      }
    }
    rc2 = sqlite3_reset(pAllLangid);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  sqlite3Fts3SegmentsClose(p);
  sqlite3Fts3PendingTermsClear(p);

  return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc;
}

/*
3011
3012
3013
3014
3015
3016
3017

3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
        aSzIns = &aSz[p->nColumn+1];
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;

      rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0));
      aSz[p->nColumn] = 0;
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
        rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
      }
      if( p->bHasDocsize ){
        fts3InsertDocsize(&rc, p, aSz);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_finalize(pStmt);







>
|



|







3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
        aSzIns = &aSz[p->nColumn+1];
        aSzDel = &aSzIns[p->nColumn+1];
      }
    }

    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
      int iCol;
      int iLangid = langidFromSelect(p, pStmt);
      rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
      aSz[p->nColumn] = 0;
      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
        rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
      }
      if( p->bHasDocsize ){
        fts3InsertDocsize(&rc, p, aSz);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_finalize(pStmt);
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
    assert( pCsr->isRequireSeek==0 );
    iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
  
    for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
      sqlite3_tokenizer_cursor *pTC = 0;
  
      rc = pModule->xOpen(pT, zText, -1, &pTC);
      while( rc==SQLITE_OK ){
        char const *zToken;       /* Buffer containing token */
        int nToken;               /* Number of bytes in token */
        int iDum1, iDum2;         /* Dummy variables */
        int iPos;                 /* Position of token in zText */
  
        pTC->pTokenizer = pT;
        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
           && (pPT->bFirst==0 || iPos==0)
           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))







|






<







3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238

3239
3240
3241
3242
3243
3244
3245
    assert( pCsr->isRequireSeek==0 );
    iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
  
    for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
      const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
      sqlite3_tokenizer_cursor *pTC = 0;
  
      rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
      while( rc==SQLITE_OK ){
        char const *zToken;       /* Buffer containing token */
        int nToken;               /* Number of bytes in token */
        int iDum1, iDum2;         /* Dummy variables */
        int iPos;                 /* Position of token in zText */
  

        rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
        for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
          Fts3PhraseToken *pPT = pDef->pToken;
          if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
           && (pPT->bFirst==0 || iPos==0)
           && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
           && (0==memcmp(zToken, pPT->z, pPT->n))
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268









3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284




3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296





3297
3298
3299
3300
3301
3302
3303
    if( isEmpty ){
      /* Deleting this row means the whole table is empty. In this case
      ** delete the contents of all three tables and throw away any
      ** data in the pendingTerms hash table.  */
      rc = fts3DeleteAll(p, 1);
      *pnDoc = *pnDoc - 1;
    }else{
      sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
      rc = fts3PendingTermsDocid(p, iRemove);
      fts3DeleteTerms(&rc, p, pRowid, aSzDel);
      if( p->zContentTbl==0 ){
        fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
        if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
      }else{
        *pnDoc = *pnDoc - 1;
      }
      if( p->bHasDocsize ){
        fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
      }
    }
  }

  return rc;
}

/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables.









*/
int sqlite3Fts3UpdateMethod(
  sqlite3_vtab *pVtab,            /* FTS3 vtab object */
  int nArg,                       /* Size of argument array */
  sqlite3_value **apVal,          /* Array of arguments */
  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
){
  Fts3Table *p = (Fts3Table *)pVtab;
  int rc = SQLITE_OK;             /* Return Code */
  int isRemove = 0;               /* True for an UPDATE or DELETE */
  u32 *aSzIns = 0;                /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */
  int bInsertDone = 0;

  assert( p->pSegments==0 );





  /* Check for a "special" INSERT operation. One of the form:
  **
  **   INSERT INTO xyz(xyz) VALUES('command');
  */
  if( nArg>1 
   && sqlite3_value_type(apVal[0])==SQLITE_NULL 
   && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL 
  ){
    rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
    goto update_out;
  }






  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
  if( aSzIns==0 ){
    rc = SQLITE_NOMEM;
    goto update_out;
  }







<
<


















|
>
>
>
>
>
>
>
>
>
















>
>
>
>












>
>
>
>
>







3331
3332
3333
3334
3335
3336
3337


3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
    if( isEmpty ){
      /* Deleting this row means the whole table is empty. In this case
      ** delete the contents of all three tables and throw away any
      ** data in the pendingTerms hash table.  */
      rc = fts3DeleteAll(p, 1);
      *pnDoc = *pnDoc - 1;
    }else{


      fts3DeleteTerms(&rc, p, pRowid, aSzDel);
      if( p->zContentTbl==0 ){
        fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
        if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
      }else{
        *pnDoc = *pnDoc - 1;
      }
      if( p->bHasDocsize ){
        fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
      }
    }
  }

  return rc;
}

/*
** This function does the work for the xUpdate method of FTS3 virtual
** tables. The schema of the virtual table being:
**
**     CREATE TABLE <table name>( 
**       <user COLUMns>,
**       <table name> HIDDEN, 
**       docid HIDDEN, 
**       <langid> HIDDEN
**     );
**
** 
*/
int sqlite3Fts3UpdateMethod(
  sqlite3_vtab *pVtab,            /* FTS3 vtab object */
  int nArg,                       /* Size of argument array */
  sqlite3_value **apVal,          /* Array of arguments */
  sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
){
  Fts3Table *p = (Fts3Table *)pVtab;
  int rc = SQLITE_OK;             /* Return Code */
  int isRemove = 0;               /* True for an UPDATE or DELETE */
  u32 *aSzIns = 0;                /* Sizes of inserted documents */
  u32 *aSzDel;                    /* Sizes of deleted documents */
  int nChng = 0;                  /* Net change in number of documents */
  int bInsertDone = 0;

  assert( p->pSegments==0 );
  assert( 
      nArg==1                     /* DELETE operations */
   || nArg==(2 + p->nColumn + 3)  /* INSERT or UPDATE operations */
  );

  /* Check for a "special" INSERT operation. One of the form:
  **
  **   INSERT INTO xyz(xyz) VALUES('command');
  */
  if( nArg>1 
   && sqlite3_value_type(apVal[0])==SQLITE_NULL 
   && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL 
  ){
    rc = fts3SpecialInsert(p, apVal[p->nColumn+2]);
    goto update_out;
  }

  if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
    rc = SQLITE_CONSTRAINT;
    goto update_out;
  }

  /* Allocate space to hold the change in document sizes */
  aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
  if( aSzIns==0 ){
    rc = SQLITE_NOMEM;
    goto update_out;
  }
3358
3359
3360
3361
3362
3363
3364

3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
    assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
    rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
    isRemove = 1;
  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){

    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, *pRowid);
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);
    }
    nChng++;
  }








>







|



|







3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
    assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
    rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
    isRemove = 1;
  }
  
  /* If this is an INSERT or UPDATE operation, insert the new record. */
  if( nArg>1 && rc==SQLITE_OK ){
    int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
    if( bInsertDone==0 ){
      rc = fts3InsertData(p, apVal, pRowid);
      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
        rc = FTS_CORRUPT_VTAB;
      }
    }
    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
      rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
    }
    if( rc==SQLITE_OK ){
      assert( p->iPrevDocid==*pRowid );
      rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
    }
    if( p->bHasDocsize ){
      fts3InsertDocsize(&rc, p, aSzIns);
    }
    nChng++;
  }

Added test/fts4langid.test.



































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# 2012 March 01
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the languageid=xxx FTS4 option.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix fts4content

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

set ::testprefix fts4langid

#---------------------------------------------------------------------------
# Test plan:
#
#   1.* - Warm-body tests created for specific purposes during development.
#         Passing these doesn't really prove much.
#
#   2.1.* - Test that FTS queries only ever return rows associated with
#           the requested language.
#
#   2.2.* - Same as 2.1.*, after an 'optimize' command.
#
#   2.3.* - Same as 2.1.*, after a 'rebuild' command.
#
#   3.* - Tests with content= tables. Both where there is a real 
#         underlying content table and where there is not.
#
#   4.* - Test that if one is provided, the tokenizer xLanguage method
#         is called to configure the tokenizer before tokenizing query
#         or document text.
#
#   5.* - Test the fts4aux table when the associated FTS4 table contains
#         multiple languages.
#

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
}

do_execsql_test 1.2 {
  SELECT sql FROM sqlite_master WHERE name = 't1_content';
} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}

do_execsql_test 1.3 {SELECT docid FROM t1} {}
do_execsql_test 1.4 {SELECT lang_id FROM t1} {}

do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}

do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}

do_execsql_test 1.9  {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}

do_execsql_test 1.11 {
  CREATE VIRTUAL TABLE t2 USING fts4;
  INSERT INTO t2 VALUES('abc');
} 
do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1

do_execsql_test 1.13 {
  DROP TABLE t1;
  CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
  INSERT INTO t1(content)          VALUES('a b c');
  INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
}

do_execsql_test 1.14 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'b';
} {1}
do_execsql_test 1.15 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
} {1}

do_execsql_test 1.16 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
} {2}

do_catchsql_test 1.17 {
  INSERT INTO t1(content, lang_id) VALUES('123', -1);
} {1 {constraint failed}}

do_execsql_test 1.18 {
  DROP TABLE t1;
  CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
  INSERT INTO t1(content, lang_id) VALUES('A', 13);
  INSERT INTO t1(content, lang_id) VALUES('B', 13);
  INSERT INTO t1(content, lang_id) VALUES('C', 13);
  INSERT INTO t1(content, lang_id) VALUES('D', 13);
  INSERT INTO t1(content, lang_id) VALUES('E', 13);
  INSERT INTO t1(content, lang_id) VALUES('F', 13);
  INSERT INTO t1(content, lang_id) VALUES('G', 13);
  INSERT INTO t1(content, lang_id) VALUES('H', 13);
  INSERT INTO t1(content, lang_id) VALUES('I', 13);
  INSERT INTO t1(content, lang_id) VALUES('J', 13);
  INSERT INTO t1(content, lang_id) VALUES('K', 13);
  INSERT INTO t1(content, lang_id) VALUES('L', 13);
  INSERT INTO t1(content, lang_id) VALUES('M', 13);
  INSERT INTO t1(content, lang_id) VALUES('N', 13);
  INSERT INTO t1(content, lang_id) VALUES('O', 13);
  INSERT INTO t1(content, lang_id) VALUES('P', 13);
  INSERT INTO t1(content, lang_id) VALUES('Q', 13);
  INSERT INTO t1(content, lang_id) VALUES('R', 13);
  INSERT INTO t1(content, lang_id) VALUES('S', 13);
  SELECT rowid FROM t1 WHERE t1 MATCH 'A';
} {}


#-------------------------------------------------------------------------
# Test cases 2.*
#
proc build_multilingual_db_1 {db} {
  $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }

  set xwords [list zero one two three four five six seven eight nine ten]
  set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]

  for {set i 0} {$i < 1000} {incr i} {
    set iLangid [expr $i%9]
    set x ""
    set y ""

    set x [list]
    lappend x [lindex $xwords [expr ($i / 1000) % 10]]
    lappend x [lindex $xwords [expr ($i / 100)  % 10]]
    lappend x [lindex $xwords [expr ($i / 10)   % 10]]
    lappend x [lindex $xwords [expr ($i / 1)   % 10]]

    set y [list]
    lappend y [lindex $ywords [expr ($i / 1000) % 10]]
    lappend y [lindex $ywords [expr ($i / 100)  % 10]]
    lappend y [lindex $ywords [expr ($i / 10)   % 10]]
    lappend y [lindex $ywords [expr ($i / 1)   % 10]]

    $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
  }

  $db eval {
    CREATE TABLE data(x, y, l);
    INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
  }
}

proc rowid_list_set_langid {langid} {
  set ::rowid_list_langid $langid
}
proc rowid_list {pattern} {
  set langid $::rowid_list_langid
  set res [list]
  db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
    if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
      lappend res $rowid
    }
  }
  return $res
}

proc or_merge_list {list1 list2} {
  set res [list]

  set i1 0
  set i2 0

  set n1 [llength $list1]
  set n2 [llength $list2]

  while {$i1 < $n1 && $i2 < $n2} {
    set e1 [lindex $list1 $i1]
    set e2 [lindex $list2 $i2]

    if {$e1==$e2} {
      lappend res $e1
      incr i1
      incr i2
    } elseif {$e1 < $e2} {
      lappend res $e1
      incr i1
    } else {
      lappend res $e2
      incr i2
    }
  }

  concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
}

proc or_merge_lists {args} {
  set res [lindex $args 0]
  for {set i 1} {$i < [llength $args]} {incr i} {
    set res [or_merge_list $res [lindex $args $i]]
  }
  set res
}

proc and_merge_list {list1 list2} {
  foreach i $list2 { set a($i) 1 }
  set res [list]
  foreach i $list1 {
    if {[info exists a($i)]} {lappend res $i}
  }
  set res
}


proc and_merge_lists {args} {
  set res [lindex $args 0]
  for {set i 1} {$i < [llength $args]} {incr i} {
    set res [and_merge_list $res [lindex $args $i]]
  }
  set res
}

proc filter_list {list langid} {
  set res [list]
  foreach i $list {
    if {($i % 9) == $langid} {lappend res $i}
  }
  set res
}

do_test 2.0 { 
  reset_db
  build_multilingual_db_1 db
} {}

proc do_test_query1 {tn query res_script} {
  for {set langid 0} {$langid < 10} {incr langid} {
    rowid_list_set_langid $langid
    set res [eval $res_script]

    set actual [
      execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
    ]
    do_test $tn.$langid [list set {} $actual] $res
  }
}

# Run some queries. 
do_test_query1 2.1.1  {delta}          { rowid_list delta }
do_test_query1 2.1.2  {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.1.3  {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.1.4  {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# Now try the same tests as above, but after running the 'optimize'
# command on the FTS table.
#
do_execsql_test 2.2 {
  INSERT INTO t2(t2) VALUES('optimize');
  SELECT count(*) FROM t2_segdir;
} {9}
do_test_query1 2.2.1 {delta}          { rowid_list delta }
do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.2.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.2.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

# And rebuild.
#
do_test 2.3 { 
  reset_db
  build_multilingual_db_1 db
  execsql { INSERT INTO t2(t2) VALUES('rebuild') }
} {}
do_test_query1 2.3.1 {delta}          { rowid_list delta }
do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 2.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 2.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

#-------------------------------------------------------------------------
# Test cases 3.*
#
do_test 3.0 {
  reset_db
  build_multilingual_db_1 db
  execsql {
    CREATE TABLE t3_data(l, x, y);
    INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
    DROP TABLE t2;
  }
} {}
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
  INSERT INTO t2(t2) VALUES('rebuild');
}

do_test_query1 3.1.1 {delta}          { rowid_list delta }
do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.1.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.1.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

do_execsql_test 3.2.1 {
  DROP TABLE t2;
  CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
}

do_execsql_test 3.2.2 {
  INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
}

do_execsql_test 3.2.3 {
  DROP TABLE t3_data;
}

do_test_query1 3.3.1 {delta}          { rowid_list delta }
do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
do_test_query1 3.3.3 {zero one two} {
  and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
}
do_test_query1 3.3.4 {"zero one" OR "one two"} {
  or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
}

#-------------------------------------------------------------------------
# Test cases 4.*
#
proc build_multilingual_db_2 {db} {
  $db eval {
    CREATE VIRTUAL TABLE t4 USING fts4(
        tokenize=testtokenizer, 
        languageid=lid
    );
  }
  for {set i 0} {$i < 50} {incr i} {
    execsql { 
      INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 
    }
  }
}

do_test 4.1.0 {
  reset_db
  set ptr [fts3_test_tokenizer]
  execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
  build_multilingual_db_2 db
} {}
do_execsql_test 4.1.1 {
  SELECT docid FROM t4 WHERE t4 MATCH 'quick';
} {0}
do_execsql_test 4.1.2 {
  SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
} {}
do_execsql_test 4.1.3 {
  SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
} {1}
for {set i 0} {$i < 50} {incr i} {
  do_execsql_test 4.1.4.$i {
    SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
  } [expr 0==($i%2)]
}
do_catchsql_test 4.1.5 {
  INSERT INTO t4(content, lid) VALUES('hello world', 101)
} {1 {SQL logic error or missing database}}

finish_test

Changes to test/permutations.test.

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test
  fts3first.test
}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#







|
<







180
181
182
183
184
185
186
187

188
189
190
191
192
193
194
  fts3defer.test fts3defer2.test fts3e.test fts3expr.test fts3expr2.test 
  fts3near.test fts3query.test fts3shared.test fts3snippet.test 
  fts3sort.test
  fts3fault.test fts3malloc.test fts3matchinfo.test
  fts3aux1.test fts3comp1.test fts3auto.test
  fts4aa.test fts4content.test
  fts3conf.test fts3prefix.test fts3fault2.test fts3corrupt.test
  fts3corrupt2.test fts3first.test fts4langid.test

}


lappend ::testsuitelist xxx
#-------------------------------------------------------------------------
# Define the coverage related test suites:
#