/ Check-in [939b7a5d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Remove some redundant code from fts5.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 939b7a5de25e064bdf08e03864c35ab718da6f6f
User & Date: dan 2015-01-23 06:50:33
Context
2015-01-23
17:43
Fix compression of keys stored on internal segment b-tree nodes by fts5. check-in: 51444f67 user: dan tags: fts5
06:50
Remove some redundant code from fts5. check-in: 939b7a5d user: dan tags: fts5
2015-01-22
19:13
Add further tests and fixes for fts5. check-in: 5b295897 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

    82     82     Fts5Tokenizer *pTok;
    83     83     fts5_tokenizer *pTokApi;
    84     84   
    85     85     /* Values loaded from the %_config table */
    86     86     int iCookie;                    /* Incremented when %_config is modified */
    87     87     int pgsz;                       /* Approximate page size used in %_data */
    88     88     int nAutomerge;                 /* 'automerge' setting */
           89  +  int nCrisisMerge;               /* Maximum allowed segments per level */
    89     90     char *zRank;                    /* Name of rank function */
    90     91     char *zRankArgs;                /* Arguments to rank function */
    91     92   };
    92     93   
    93     94   #define FTS5_CONTENT_NORMAL   0
    94     95   #define FTS5_CONTENT_NONE     1
    95     96   #define FTS5_CONTENT_EXTERNAL 2

Changes to ext/fts5/fts5_config.c.

    13     13   ** This is an SQLite module implementing full-text search.
    14     14   */
    15     15   
    16     16   #include "fts5Int.h"
    17     17   
    18     18   #define FTS5_DEFAULT_PAGE_SIZE   1000
    19     19   #define FTS5_DEFAULT_AUTOMERGE      4
           20  +#define FTS5_DEFAULT_CRISISMERGE   16
    20     21   
    21     22   /* Maximum allowed page size */
    22     23   #define FTS5_MAX_PAGE_SIZE (128*1024)
    23     24   
    24     25   static int fts5_iswhitespace(char x){
    25     26     return (x==' ');
    26     27   }
................................................................................
   712    713       if( nAutomerge<0 || nAutomerge>64 ){
   713    714         if( pbBadkey ) *pbBadkey = 1;
   714    715       }else{
   715    716         if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
   716    717         pConfig->nAutomerge = nAutomerge;
   717    718       }
   718    719     }
          720  +
          721  +  else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
          722  +    int nCrisisMerge = -1;
          723  +    if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
          724  +      nCrisisMerge = sqlite3_value_int(pVal);
          725  +    }
          726  +    if( nCrisisMerge<0 ){
          727  +      if( pbBadkey ) *pbBadkey = 1;
          728  +    }else{
          729  +      if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
          730  +      pConfig->nCrisisMerge = nCrisisMerge;
          731  +    }
          732  +  }
   719    733   
   720    734     else if( 0==sqlite3_stricmp(zKey, "rank") ){
   721    735       const char *zIn = (const char*)sqlite3_value_text(pVal);
   722    736       char *zRank;
   723    737       char *zRankArgs;
   724    738       rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
   725    739       if( rc==SQLITE_OK ){
................................................................................
   745    759     char *zSql;
   746    760     sqlite3_stmt *p = 0;
   747    761     int rc;
   748    762   
   749    763     /* Set default values */
   750    764     pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
   751    765     pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
          766  +  pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
   752    767   
   753    768     zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName);
   754    769     if( zSql==0 ){
   755    770       rc = SQLITE_NOMEM;
   756    771     }else{
   757    772       rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
   758    773       sqlite3_free(zSql);

Changes to ext/fts5/fts5_index.c.

    39     39   **   * extra fields in the "structure record" record the state of ongoing
    40     40   **     incremental merge operations.
    41     41   **
    42     42   */
    43     43   
    44     44   #define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
    45     45   #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
    46         -#define FTS5_CRISIS_MERGE   16    /* Maximum number of segments to merge */
    47     46   
    48     47   #define FTS5_MIN_DLIDX_SIZE  4    /* Add dlidx if this many empty pages */
    49     48   
    50     49   /*
    51     50   ** Details:
    52     51   **
    53     52   ** The %_data table managed by this module,
................................................................................
   289    288   
   290    289   /*
   291    290   ** One object per %_data table.
   292    291   */
   293    292   struct Fts5Index {
   294    293     Fts5Config *pConfig;            /* Virtual table configuration */
   295    294     char *zDataTbl;                 /* Name of %_data table */
   296         -  int nCrisisMerge;               /* Maximum allowed segments per level */
   297    295     int nWorkUnit;                  /* Leaf pages in a "unit" of work */
   298    296   
   299    297     /*
   300    298     ** Variables related to the accumulation of tokens and doclists within the
   301    299     ** in-memory hash tables before they are flushed to disk.
   302    300     */
   303    301     Fts5Hash **apHash;              /* Array of hash tables */
................................................................................
  1101   1099     int iCookie;                    /* Configuration cookie */
  1102   1100   
  1103   1101     assert( iIdx<=pConfig->nPrefix );
  1104   1102     pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx));
  1105   1103     if( !pData ) return 0;
  1106   1104     p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
  1107   1105   
  1108         -  if( p->rc==SQLITE_OK && p->pConfig->iCookie!=iCookie ){
  1109         -    p->rc = sqlite3Fts5ConfigLoad(p->pConfig, iCookie);
         1106  +  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
         1107  +    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  1110   1108     }
  1111   1109   
  1112   1110     fts5DataRelease(pData);
  1113   1111     if( p->rc!=SQLITE_OK ){
  1114   1112       fts5StructureRelease(pRet);
  1115   1113       pRet = 0;
  1116   1114     }
................................................................................
  1246   1244     Fts5Structure *pStruct          /* Index structure */
  1247   1245   ){
  1248   1246     if( p->rc==SQLITE_OK ){
  1249   1247       int iTst;
  1250   1248       int iPromote = -1;
  1251   1249       int szPromote;                /* Promote anything this size or smaller */
  1252   1250       Fts5StructureSegment *pSeg;   /* Segment just written */
  1253         -    Fts5StructureLevel *pTst;
  1254   1251       int szSeg;                    /* Size of segment just written */
  1255   1252   
  1256   1253   
  1257   1254       pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
  1258   1255       szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
  1259   1256   
  1260   1257       /* Check for condition (a) */
  1261   1258       for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
  1262         -    pTst = &pStruct->aLevel[iTst];
  1263         -    assert( pTst->nMerge==0 );
  1264   1259       if( iTst>=0 ){
  1265   1260         int i;
  1266   1261         int szMax = 0;
         1262  +      Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
         1263  +      assert( pTst->nMerge==0 );
  1267   1264         for(i=0; i<pTst->nSeg; i++){
  1268   1265           int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
  1269   1266           if( sz>szMax ) szMax = sz;
  1270   1267         }
  1271   1268         if( szMax>=szSeg ){
  1272   1269           /* Condition (a) is true. Promote the newest segment on level 
  1273   1270           ** iLvl to level iTst.  */
................................................................................
  2479   2476   ** Return true if the position iterator passed as the second argument is
  2480   2477   ** at EOF. Or if an error has already occurred. Otherwise, return false.
  2481   2478   */
  2482   2479   static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  2483   2480     return (p->rc || pIter->chunk.pLeaf==0);
  2484   2481   }
  2485   2482   
  2486         -/*
  2487         -** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken)
  2488         -** in hash table for index iIdx. If iIdx is zero, this is the main terms 
  2489         -** index. Values of 1 and greater for iIdx are prefix indexes.
  2490         -**
  2491         -** If an OOM error is encountered, set the Fts5Index.rc error code 
  2492         -** accordingly.
  2493         -*/
  2494         -static void fts5AddTermToHash(
  2495         -  Fts5Index *p,                   /* Index object to write to */
  2496         -  int iIdx,                       /* Entry in p->aHash[] to update */
  2497         -  int iCol,                       /* Column token appears in (-ve -> delete) */
  2498         -  int iPos,                       /* Position of token within column */
  2499         -  const char *pToken, int nToken  /* Token to add or remove to or from index */
  2500         -){
  2501         -  if( p->rc==SQLITE_OK ){
  2502         -    p->rc = sqlite3Fts5HashWrite(
  2503         -        p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken
  2504         -    );
  2505         -  }
  2506         -}
  2507         -
  2508   2483   /*
  2509   2484   ** Allocate a new segment-id for the structure pStruct.
  2510   2485   **
  2511   2486   ** If an error has already occurred, this function is a no-op. 0 is 
  2512   2487   ** returned in this case.
  2513   2488   */
  2514   2489   static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
................................................................................
  3224   3199     int iIdx,                       /* Index to work on */
  3225   3200     Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
  3226   3201   ){
  3227   3202     Fts5Structure *pStruct = *ppStruct;
  3228   3203     int iLvl = 0;
  3229   3204     while( p->rc==SQLITE_OK 
  3230   3205         && iLvl<pStruct->nLevel
  3231         -      && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge 
         3206  +      && pStruct->aLevel[iLvl].nSeg>=p->pConfig->nCrisisMerge 
  3232   3207     ){
  3233   3208       fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0);
  3234   3209       fts5StructurePromote(p, iLvl+1, pStruct);
  3235   3210       iLvl++;
  3236   3211     }
  3237   3212     *ppStruct = pStruct;
  3238   3213   }
................................................................................
  3996   3971   
  3997   3972   /*
  3998   3973   ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
  3999   3974   ** to the document with rowid iRowid.
  4000   3975   */
  4001   3976   int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){
  4002   3977     assert( p->rc==SQLITE_OK );
         3978  +
         3979  +  /* Allocate hash tables if they have not already been allocated */
         3980  +  if( p->apHash==0 ){
         3981  +    int i;
         3982  +    int rc = SQLITE_OK;
         3983  +    int nHash = p->pConfig->nPrefix + 1;
         3984  +    Fts5Hash **apNew;
         3985  +
         3986  +    apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash);
         3987  +    for(i=0; rc==SQLITE_OK && i<nHash; i++){
         3988  +      rc = sqlite3Fts5HashNew(&apNew[i], &p->nPendingData);
         3989  +    }
         3990  +    if( rc==SQLITE_OK ){
         3991  +      p->apHash = apNew;
         3992  +    }else{
         3993  +      for(i=0; i<nHash; i++){
         3994  +        sqlite3Fts5HashFree(apNew[i]);
         3995  +      }
         3996  +      sqlite3_free(apNew);
         3997  +      return rc;
         3998  +    }
         3999  +  }
         4000  +
  4003   4001     if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){
  4004   4002       fts5IndexFlush(p);
  4005   4003     }
  4006   4004     p->iWriteRowid = iRowid;
  4007   4005     return fts5IndexReturn(p);
  4008   4006   }
  4009   4007   
................................................................................
  4067   4065     Fts5Index *p;                   /* New object */
  4068   4066   
  4069   4067     *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index));
  4070   4068     if( !p ) return SQLITE_NOMEM;
  4071   4069   
  4072   4070     memset(p, 0, sizeof(Fts5Index));
  4073   4071     p->pConfig = pConfig;
  4074         -  p->nCrisisMerge = FTS5_CRISIS_MERGE;
  4075   4072     p->nWorkUnit = FTS5_WORK_UNIT;
  4076   4073     p->nMaxPendingData = 1024*1024;
  4077   4074     p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName);
  4078   4075     if( p->zDataTbl==0 ){
  4079   4076       rc = SQLITE_NOMEM;
  4080   4077     }else if( bCreate ){
  4081   4078       rc = sqlite3Fts5CreateTable(
................................................................................
  4192   4189   int sqlite3Fts5IndexWrite(
  4193   4190     Fts5Index *p,                   /* Index to write to */
  4194   4191     int iCol,                       /* Column token appears in (-ve -> delete) */
  4195   4192     int iPos,                       /* Position of token within column */
  4196   4193     const char *pToken, int nToken  /* Token to add or remove to or from index */
  4197   4194   ){
  4198   4195     int i;                          /* Used to iterate through indexes */
         4196  +  int rc;                         /* Return code */
  4199   4197     Fts5Config *pConfig = p->pConfig;
         4198  +
  4200   4199     assert( p->rc==SQLITE_OK );
  4201   4200   
  4202         -  /* Allocate hash tables if they have not already been allocated */
  4203         -  if( p->apHash==0 ){
  4204         -    int nHash = pConfig->nPrefix + 1;
  4205         -    p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash);
  4206         -    for(i=0; p->rc==SQLITE_OK && i<nHash; i++){
  4207         -      p->rc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData);
  4208         -    }
  4209         -  }
  4210         -
  4211   4201     /* Add the new token to the main terms hash table. And to each of the
  4212   4202     ** prefix hash tables that it is large enough for. */
  4213         -  fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken);
  4214         -  for(i=0; i<pConfig->nPrefix; i++){
         4203  +  rc = sqlite3Fts5HashWrite(
         4204  +      p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken
         4205  +  );
         4206  +  for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
  4215   4207       int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]);
  4216   4208       if( nByte ){
  4217         -      fts5AddTermToHash(p, i+1, iCol, iPos, pToken, nByte);
         4209  +      rc = sqlite3Fts5HashWrite(
         4210  +          p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte
         4211  +      );
  4218   4212       }
  4219   4213     }
  4220   4214   
  4221         -  return fts5IndexReturn(p);
         4215  +  return rc;
  4222   4216   }
  4223   4217   
  4224   4218   /*
  4225   4219   ** Open a new iterator to iterate though all docids that match the 
  4226   4220   ** specified token or token prefix.
  4227   4221   */
  4228   4222   int sqlite3Fts5IndexQuery(

Changes to ext/fts5/tool/loadfts5.tcl.

    25     25     puts stderr ""
    26     26     puts stderr "Switches are:"
    27     27     puts stderr "  -fts4        (use fts4 instead of fts5)"
    28     28     puts stderr "  -fts5        (use fts5)"
    29     29     puts stderr "  -porter      (use porter tokenizer)"
    30     30     puts stderr "  -limit N     (load no more than N documents)"
    31     31     puts stderr "  -automerge N (set the automerge parameter to N)"
           32  +  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
    32     33     exit 1
    33     34   }
    34     35   
    35     36   set O(vtab)       fts5
    36     37   set O(tok)        ""
    37     38   set O(limit)      0
    38     39   set O(automerge)  -1
           40  +set O(crisismerge)  -1
    39     41   
    40     42   if {[llength $argv]<2} usage
    41     43   set nOpt [expr {[llength $argv]-2}]
    42     44   for {set i 0} {$i < $nOpt} {incr i} {
    43     45     set arg [lindex $argv $i]
    44     46     switch -- [lindex $argv $i] {
    45     47       -fts4 {
................................................................................
    59     61         set O(limit) [lindex $argv $i]
    60     62       }
    61     63       
    62     64       -automerge {
    63     65         if { [incr i]>=$nOpt } usage
    64     66         set O(automerge) [lindex $argv $i]
    65     67       }
           68  +
           69  +    -crisismerge {
           70  +      if { [incr i]>=$nOpt } usage
           71  +      set O(crisismerge) [lindex $argv $i]
           72  +    }
    66     73   
    67     74       default {
    68     75         usage
    69     76       }
    70     77     }
    71     78   }
    72     79   
................................................................................
    77     84     db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok))"
    78     85     if {$O(automerge)>=0} {
    79     86       if {$O(vtab) == "fts5"} {
    80     87         db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    81     88       } else {
    82     89         db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    83     90       }
           91  +  }
           92  +  if {$O(crisismerge)>=0} {
           93  +    if {$O(vtab) == "fts5"} {
           94  +      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
           95  +    } else {
           96  +    }
    84     97     }
    85     98     load_hierachy [lindex $argv end]
    86     99   }
    87    100   
    88    101   
    89    102