/ Check-in [2397404e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 2397404e152b908d838e6491294b263b05943b3f
User & Date: dan 2014-08-18 19:30:01
Context
2014-08-25
19:58
Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html. check-in: e240d467 user: dan tags: fts5
2014-08-18
19:30
Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. check-in: 2397404e user: dan tags: fts5
2014-08-12
16:07
Cache the value of the "totals" record in memory during transactions. check-in: 05dfdad4 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5.c.

   868    868     }else
   869    869   
   870    870     if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){
   871    871       int pgsz = atoi(&z[5]);
   872    872       if( pgsz<32 ) pgsz = 32;
   873    873       sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz);
   874    874       rc = SQLITE_OK;
          875  +  }else
          876  +  
          877  +  if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){
          878  +    int nAutomerge = atoi(&z[10]);
          879  +    sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge);
          880  +    rc = SQLITE_OK;
   875    881     }
   876    882   
   877    883     return rc;
   878    884   }
   879    885   
   880    886   /* 
   881    887   ** This function is the implementation of the xUpdate callback used by 

Changes to ext/fts5/fts5.h.

    22     22   #define _FTS5_H
    23     23   
    24     24   #include "sqlite3.h"
    25     25   
    26     26   /*************************************************************************
    27     27   ** CUSTOM AUXILIARY FUNCTIONS
    28     28   **
    29         -** Virtual table implemenations may overload SQL functions by implementing
           29  +** Virtual table implementations may overload SQL functions by implementing
    30     30   ** the sqlite3_module.xFindFunction() method.
    31     31   */
    32     32   
    33     33   typedef struct Fts5ExtensionApi Fts5ExtensionApi;
    34     34   typedef struct Fts5Context Fts5Context;
    35     35   
    36     36   typedef void (*fts5_extension_function)(
................................................................................
    38     38     Fts5Context *pFts,              /* First arg to pass to pApi functions */
    39     39     sqlite3_context *pCtx,          /* Context for returning result/error */
    40     40     int nVal,                       /* Number of values in apVal[] array */
    41     41     sqlite3_value **apVal           /* Array of trailing arguments */
    42     42   );
    43     43   
    44     44   /*
    45         -**
    46     45   ** xUserData(pFts):
    47     46   **
    48     47   **   Return a copy of the context pointer the extension function was 
    49     48   **   registered with.
    50     49   **
    51     50   **
    52     51   ** xColumnTotalSize(pFts, iCol, pnToken):

Changes to ext/fts5/fts5Int.h.

   268    268   
   269    269   /*
   270    270   ** Set the page size to use when writing. It doesn't matter if this
   271    271   ** changes mid-transaction, or if inconsistent values are used by 
   272    272   ** multiple clients.
   273    273   */
   274    274   void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz);
          275  +
          276  +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge);
   275    277   
   276    278   /*
   277    279   ** Return the total number of entries read from the %_data table by 
   278    280   ** this connection since it was created.
   279    281   */
   280    282   int sqlite3Fts5IndexReads(Fts5Index *p);
   281    283   

Changes to ext/fts5/fts5_index.c.

    41     41   **
    42     42   */
    43     43   
    44     44   #define FTS5_DEFAULT_PAGE_SIZE   1000
    45     45   
    46     46   #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
    47     47   #define FTS5_MIN_MERGE       4    /* Minimum number of segments to merge */
           48  +#define FTS5_CRISIS_MERGE   16    /* Maximum number of segments to merge */
    48     49   
    49     50   #define FTS5_MIN_DLIDX_SIZE  4    /* Add dlidx if this many empty pages */
    50     51   
    51     52   /*
    52     53   ** Details:
    53     54   **
    54     55   ** The %_data table managed by this module,
................................................................................
   287    288   ** One object per %_data table.
   288    289   */
   289    290   struct Fts5Index {
   290    291     Fts5Config *pConfig;            /* Virtual table configuration */
   291    292     char *zDataTbl;                 /* Name of %_data table */
   292    293     int pgsz;                       /* Target page size for this index */
   293    294     int nMinMerge;                  /* Minimum input segments in a merge */
          295  +  int nCrisisMerge;               /* Maximum allowed segments per level */
   294    296     int nWorkUnit;                  /* Leaf pages in a "unit" of work */
   295    297   
   296    298     /*
   297    299     ** Variables related to the accumulation of tokens and doclists within the
   298    300     ** in-memory hash tables before they are flushed to disk.
   299    301     */
   300    302     Fts5Hash **apHash;              /* Array of hash tables */
................................................................................
  2983   2985   
  2984   2986   /*
  2985   2987   **
  2986   2988   */
  2987   2989   static void fts5IndexMergeLevel(
  2988   2990     Fts5Index *p,                   /* FTS5 backend object */
  2989   2991     int iIdx,                       /* Index to work on */
  2990         -  Fts5Structure *pStruct,         /* Stucture of index iIdx */
         2992  +  Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index iIdx */
  2991   2993     int iLvl,                       /* Level to read input from */
  2992   2994     int *pnRem                      /* Write up to this many output leaves */
  2993   2995   ){
         2996  +  Fts5Structure *pStruct = *ppStruct;
  2994   2997     Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
  2995         -  Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1];
         2998  +  Fts5StructureLevel *pLvlOut;
  2996   2999     Fts5MultiSegIter *pIter = 0;    /* Iterator to read input data */
  2997         -  int nRem = *pnRem;              /* Output leaf pages left to write */
         3000  +  int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
  2998   3001     int nInput;                     /* Number of input segments */
  2999   3002     Fts5SegWriter writer;           /* Writer object */
  3000   3003     Fts5StructureSegment *pSeg;     /* Output segment */
  3001   3004     Fts5Buffer term;
  3002   3005     int bRequireDoclistTerm = 0;    /* Doclist terminator (0x00) required */
  3003   3006     int bOldest;                    /* True if the output segment is the oldest */
  3004   3007   
................................................................................
  3005   3008     assert( iLvl<pStruct->nLevel );
  3006   3009     assert( pLvl->nMerge<=pLvl->nSeg );
  3007   3010   
  3008   3011     memset(&writer, 0, sizeof(Fts5SegWriter));
  3009   3012     memset(&term, 0, sizeof(Fts5Buffer));
  3010   3013     writer.iIdx = iIdx;
  3011   3014     if( pLvl->nMerge ){
         3015  +    pLvlOut = &pStruct->aLevel[iLvl+1];
  3012   3016       assert( pLvlOut->nSeg>0 );
  3013   3017       nInput = pLvl->nMerge;
  3014   3018       fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]);
  3015   3019       pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
  3016   3020     }else{
  3017   3021       int iSegid = fts5AllocateSegid(p, pStruct);
         3022  +
         3023  +    /* Extend the Fts5Structure object as required to ensure the output
         3024  +    ** segment exists. */
         3025  +    if( iLvl==pStruct->nLevel-1 ){
         3026  +      fts5StructureAddLevel(&p->rc, ppStruct);
         3027  +      pStruct = *ppStruct;
         3028  +    }
         3029  +    fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
         3030  +    pLvl = &pStruct->aLevel[iLvl];
         3031  +    pLvlOut = &pStruct->aLevel[iLvl+1];
         3032  +
  3018   3033       fts5WriteInit(p, &writer, iIdx, iSegid);
  3019   3034   
  3020   3035       /* Add the new segment to the output level */
  3021   3036       if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++;
  3022   3037       pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
  3023   3038       pLvlOut->nSeg++;
  3024   3039       pSeg->pgnoFirst = 1;
................................................................................
  3045   3060       ** the position list is empty (i.e. the entry is a delete marker), no
  3046   3061       ** entry need be written to the output.  */
  3047   3062       fts5ChunkIterInit(p, pSeg, &sPos);
  3048   3063       if( bOldest==0 || sPos.nRem>0 ){
  3049   3064         int nTerm;
  3050   3065         const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);
  3051   3066         if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
  3052         -        if( writer.nLeafWritten>nRem ){
         3067  +        if( pnRem && writer.nLeafWritten>nRem ){
  3053   3068             fts5ChunkIterRelease(&sPos);
  3054   3069             break;
  3055   3070           }
  3056   3071   
  3057   3072           /* This is a new term. Append a term to the output segment. */
  3058   3073           if( bRequireDoclistTerm ){
  3059   3074             fts5WriteAppendZerobyte(p, &writer);
................................................................................
  3102   3117       assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
  3103   3118       fts5TrimSegments(p, pIter);
  3104   3119       pLvl->nMerge = nInput;
  3105   3120     }
  3106   3121   
  3107   3122     fts5MultiIterFree(p, pIter);
  3108   3123     fts5BufferFree(&term);
  3109         -  *pnRem -= writer.nLeafWritten;
         3124  +  if( pnRem ) *pnRem -= writer.nLeafWritten;
  3110   3125   }
  3111   3126   
  3112   3127   /*
  3113   3128   ** A total of nLeaf leaf pages of data has just been flushed to a level-0
  3114   3129   ** segments in index iIdx with structure pStruct. This function updates the
  3115   3130   ** write-counter accordingly and, if necessary, performs incremental merge
  3116   3131   ** work.
................................................................................
  3161   3176   #ifdef SQLITE_DEBUG
  3162   3177       for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
  3163   3178         assert( pStruct->aLevel[iLvl].nSeg==0 );
  3164   3179       }
  3165   3180   #endif
  3166   3181   
  3167   3182       if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break;
  3168         -    if( iBestLvl==pStruct->nLevel-1 ){
  3169         -      fts5StructureAddLevel(&p->rc, &pStruct);
  3170         -    }
  3171         -    fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0);
  3172         -    fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem);
         3183  +    fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem);
  3173   3184       fts5StructurePromote(p, iBestLvl+1, pStruct);
  3174   3185       assert( nRem==0 || p->rc==SQLITE_OK );
  3175   3186       *ppStruct = pStruct;
  3176   3187     }
  3177   3188   }
         3189  +
         3190  +static void fts5IndexCrisisMerge(
         3191  +  Fts5Index *p,                   /* FTS5 backend object */
         3192  +  int iIdx,                       /* Index to work on */
         3193  +  Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
         3194  +){
         3195  +  Fts5Structure *pStruct = *ppStruct;
         3196  +  int iLvl = 0;
         3197  +  while( p->rc==SQLITE_OK 
         3198  +      && iLvl<pStruct->nLevel
         3199  +      && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge 
         3200  +  ){
         3201  +    fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0);
         3202  +    fts5StructurePromote(p, iLvl+1, pStruct);
         3203  +    iLvl++;
         3204  +  }
         3205  +  *ppStruct = pStruct;
         3206  +}
  3178   3207   
  3179   3208   typedef struct Fts5FlushCtx Fts5FlushCtx;
  3180   3209   struct Fts5FlushCtx {
  3181   3210     Fts5Index *pIdx;
  3182   3211     Fts5SegWriter writer; 
  3183   3212   };
  3184   3213   
................................................................................
  3199   3228   
  3200   3229   static int fts5FlushNewEntry(
  3201   3230     void *pCtx, 
  3202   3231     i64 iRowid, 
  3203   3232     const u8 *aPoslist, 
  3204   3233     int nPoslist
  3205   3234   ){
  3206         -  Fts5Buffer *pBuf;
  3207   3235     Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  3208   3236     int rc = SQLITE_OK;
  3209   3237   
  3210   3238     /* Append the rowid itself */
  3211   3239     fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid);
  3212   3240   
  3213   3241     /* Append the size of the position list in bytes */
................................................................................
  3246   3274   
  3247   3275       rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, 
  3248   3276           fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
  3249   3277       );
  3250   3278       if( p->rc==SQLITE_OK ) p->rc = rc;
  3251   3279       fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
  3252   3280   
  3253         -    /* Edit the Fts5Structure and write it back to the database. */
         3281  +    /* Update the Fts5Structure. It is written back to the database by the
         3282  +    ** fts5StructureRelease() call below.  */
  3254   3283       if( pStruct->nLevel==0 ){
  3255   3284         fts5StructureAddLevel(&p->rc, &pStruct);
  3256   3285       }
  3257   3286       fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
  3258   3287       if( p->rc==SQLITE_OK ){
  3259   3288         pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
  3260   3289         pSeg->iSegid = iSegid;
  3261   3290         pSeg->nHeight = nHeight;
  3262   3291         pSeg->pgnoFirst = 1;
  3263   3292         pSeg->pgnoLast = pgnoLast;
  3264   3293       }
  3265   3294     }
  3266   3295   
  3267         -  fts5IndexWork(p, iHash, &pStruct, pgnoLast);
         3296  +  if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast);
         3297  +  fts5IndexCrisisMerge(p, iHash, &pStruct);
  3268   3298     fts5StructureWrite(p, iHash, pStruct);
  3269   3299     fts5StructureRelease(pStruct);
  3270   3300   }
  3271   3301   
  3272   3302   /*
  3273   3303   ** Flush any data stored in the in-memory hash tables to the database.
  3274   3304   */
................................................................................
  3339   3369     *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index));
  3340   3370     if( !p ) return SQLITE_NOMEM;
  3341   3371   
  3342   3372     memset(p, 0, sizeof(Fts5Index));
  3343   3373     p->pConfig = pConfig;
  3344   3374     p->pgsz = 1000;
  3345   3375     p->nMinMerge = FTS5_MIN_MERGE;
         3376  +  p->nCrisisMerge = FTS5_CRISIS_MERGE;
  3346   3377     p->nWorkUnit = FTS5_WORK_UNIT;
  3347   3378     p->nMaxPendingData = 1024*1024;
  3348   3379     p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName);
  3349   3380     if( p->zDataTbl==0 ){
  3350   3381       rc = SQLITE_NOMEM;
  3351   3382     }else if( bCreate ){
  3352   3383       int i;
................................................................................
  3957   3988   
  3958   3989   /*
  3959   3990   ** Set the target page size for the index object.
  3960   3991   */
  3961   3992   void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){
  3962   3993     p->pgsz = pgsz;
  3963   3994   }
         3995  +
         3996  +/*
         3997  +** Set the minimum number of segments that an auto-merge operation should
         3998  +** attempt to merge together. A value of 1 sets the object to use the 
         3999  +** compile time default. Zero or less disables auto-merge altogether.
         4000  +*/
         4001  +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){
         4002  +  if( nMinMerge==1 ){
         4003  +    p->nMinMerge = FTS5_MIN_MERGE;
         4004  +  }else{
         4005  +    p->nMinMerge = nMinMerge;
         4006  +  }
         4007  +}
  3964   4008   
  3965   4009   /*
  3966   4010   ** Iterator pMulti currently points to a valid entry (not EOF). This
  3967   4011   ** function appends a copy of the position-list of the entry pMulti 
  3968   4012   ** currently points to to buffer pBuf.
  3969   4013   **
  3970   4014   ** If an error occurs, an error code is left in p->rc. It is assumed

Changes to tool/loadfts.c.

   159    159     int i;
   160    160     int rc;
   161    161     int nRowPerTrans = 0;
   162    162     sqlite3 *db;
   163    163     char *zSql;
   164    164     VisitContext sCtx;
   165    165   
          166  +  int nCmd = 0;
          167  +  char **aCmd = 0;
          168  +
   166    169     if( argc % 2 ) showHelp(argv[0]);
   167    170   
   168    171     for(i=1; i<(argc-1); i+=2){
   169    172       char *zOpt = argv[i];
   170    173       char *zArg = argv[i+1];
   171    174       if( strcmp(zOpt, "-fts")==0 ){
   172    175         iFts = atoi(zArg);
   173    176         if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
   174    177       }
   175         -    if( strcmp(zOpt, "-trans")==0 ){
          178  +    else if( strcmp(zOpt, "-trans")==0 ){
   176    179         nRowPerTrans = atoi(zArg);
   177    180       }
   178    181       else if( strcmp(zOpt, "-idx")==0 ){
   179    182         bMap = atoi(zArg);
   180    183         if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
   181    184       }
   182    185       else if( strcmp(zOpt, "-dir")==0 ){
   183    186         zDir = zArg;
          187  +    }
          188  +    else if( strcmp(zOpt, "-special")==0 ){
          189  +      nCmd++;
          190  +      aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd);
          191  +      aCmd[nCmd-1] = zArg;
          192  +    }
          193  +    else{
          194  +      showHelp(argv[0]);
   184    195       }
   185    196     }
   186    197   
   187    198     /* Open the database file */
   188    199     rc = sqlite3_open(argv[argc-1], &db);
   189    200     if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db);
   190    201   
................................................................................
   193    204     if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db);
   194    205   
   195    206     /* Create the FTS table */
   196    207     zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts);
   197    208     rc = sqlite3_exec(db, zSql, 0, 0, 0);
   198    209     if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
   199    210     sqlite3_free(zSql);
          211  +
          212  +  for(i=0; i<nCmd; i++){
          213  +    zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]);
          214  +    rc = sqlite3_exec(db, zSql, 0, 0, 0);
          215  +    if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
          216  +    sqlite3_free(zSql);
          217  +  }
   200    218   
   201    219     /* Compile the INSERT statement to write data to the FTS table. */
   202    220     memset(&sCtx, 0, sizeof(VisitContext));
   203    221     sCtx.db = db;
   204    222     sCtx.nRowPerTrans = nRowPerTrans;
   205    223     rc = sqlite3_prepare_v2(db, 
   206    224         "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
................................................................................
   211    229     if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
   212    230     traverse(zDir, (void*)&sCtx, visit_file);
   213    231     if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
   214    232   
   215    233     /* Clean up and exit. */
   216    234     sqlite3_finalize(sCtx.pInsert);
   217    235     sqlite3_close(db);
          236  +  sqlite3_free(aCmd);
   218    237     return 0;
   219    238   }