Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
2397404e152b908d838e6491294b263b |
User & Date: | dan 2014-08-18 19:30:01 |
Context
2014-08-25
| ||
19:58 | Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html. check-in: e240d467 user: dan tags: fts5 | |
2014-08-18
| ||
19:30 | Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. check-in: 2397404e user: dan tags: fts5 | |
2014-08-12
| ||
16:07 | Cache the value of the "totals" record in memory during transactions. check-in: 05dfdad4 user: dan tags: fts5 | |
Changes
Changes to ext/fts5/fts5.c.
868 868 }else 869 869 870 870 if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ 871 871 int pgsz = atoi(&z[5]); 872 872 if( pgsz<32 ) pgsz = 32; 873 873 sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); 874 874 rc = SQLITE_OK; 875 + }else 876 + 877 + if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){ 878 + int nAutomerge = atoi(&z[10]); 879 + sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge); 880 + rc = SQLITE_OK; 875 881 } 876 882 877 883 return rc; 878 884 } 879 885 880 886 /* 881 887 ** This function is the implementation of the xUpdate callback used by
Changes to ext/fts5/fts5.h.
22 22 #define _FTS5_H 23 23 24 24 #include "sqlite3.h" 25 25 26 26 /************************************************************************* 27 27 ** CUSTOM AUXILIARY FUNCTIONS 28 28 ** 29 -** Virtual table implemenations may overload SQL functions by implementing 29 +** Virtual table implementations may overload SQL functions by implementing 30 30 ** the sqlite3_module.xFindFunction() method. 31 31 */ 32 32 33 33 typedef struct Fts5ExtensionApi Fts5ExtensionApi; 34 34 typedef struct Fts5Context Fts5Context; 35 35 36 36 typedef void (*fts5_extension_function)( ................................................................................ 38 38 Fts5Context *pFts, /* First arg to pass to pApi functions */ 39 39 sqlite3_context *pCtx, /* Context for returning result/error */ 40 40 int nVal, /* Number of values in apVal[] array */ 41 41 sqlite3_value **apVal /* Array of trailing arguments */ 42 42 ); 43 43 44 44 /* 45 -** 46 45 ** xUserData(pFts): 47 46 ** 48 47 ** Return a copy of the context pointer the extension function was 49 48 ** registered with. 50 49 ** 51 50 ** 52 51 ** xColumnTotalSize(pFts, iCol, pnToken):
Changes to ext/fts5/fts5Int.h.
268 268 269 269 /* 270 270 ** Set the page size to use when writing. It doesn't matter if this 271 271 ** changes mid-transaction, or if inconsistent values are used by 272 272 ** multiple clients. 273 273 */ 274 274 void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); 275 + 276 +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); 275 277 276 278 /* 277 279 ** Return the total number of entries read from the %_data table by 278 280 ** this connection since it was created. 279 281 */ 280 282 int sqlite3Fts5IndexReads(Fts5Index *p); 281 283
Changes to ext/fts5/fts5_index.c.
41 41 ** 42 42 */ 43 43 44 44 #define FTS5_DEFAULT_PAGE_SIZE 1000 45 45 46 46 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ 47 47 #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ 48 +#define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ 48 49 49 50 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ 50 51 51 52 /* 52 53 ** Details: 53 54 ** 54 55 ** The %_data table managed by this module, ................................................................................ 287 288 ** One object per %_data table. 288 289 */ 289 290 struct Fts5Index { 290 291 Fts5Config *pConfig; /* Virtual table configuration */ 291 292 char *zDataTbl; /* Name of %_data table */ 292 293 int pgsz; /* Target page size for this index */ 293 294 int nMinMerge; /* Minimum input segments in a merge */ 295 + int nCrisisMerge; /* Maximum allowed segments per level */ 294 296 int nWorkUnit; /* Leaf pages in a "unit" of work */ 295 297 296 298 /* 297 299 ** Variables related to the accumulation of tokens and doclists within the 298 300 ** in-memory hash tables before they are flushed to disk. 299 301 */ 300 302 Fts5Hash **apHash; /* Array of hash tables */ ................................................................................ 2983 2985 2984 2986 /* 2985 2987 ** 2986 2988 */ 2987 2989 static void fts5IndexMergeLevel( 2988 2990 Fts5Index *p, /* FTS5 backend object */ 2989 2991 int iIdx, /* Index to work on */ 2990 - Fts5Structure *pStruct, /* Stucture of index iIdx */ 2992 + Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ 2991 2993 int iLvl, /* Level to read input from */ 2992 2994 int *pnRem /* Write up to this many output leaves */ 2993 2995 ){ 2996 + Fts5Structure *pStruct = *ppStruct; 2994 2997 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 2995 - Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; 2998 + Fts5StructureLevel *pLvlOut; 2996 2999 Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ 2997 - int nRem = *pnRem; /* Output leaf pages left to write */ 3000 + int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ 2998 3001 int nInput; /* Number of input segments */ 2999 3002 Fts5SegWriter writer; /* Writer object */ 3000 3003 Fts5StructureSegment *pSeg; /* Output segment */ 3001 3004 Fts5Buffer term; 3002 3005 int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ 3003 3006 int bOldest; /* True if the output segment is the oldest */ 3004 3007 ................................................................................ 3005 3008 assert( iLvl<pStruct->nLevel ); 3006 3009 assert( pLvl->nMerge<=pLvl->nSeg ); 3007 3010 3008 3011 memset(&writer, 0, sizeof(Fts5SegWriter)); 3009 3012 memset(&term, 0, sizeof(Fts5Buffer)); 3010 3013 writer.iIdx = iIdx; 3011 3014 if( pLvl->nMerge ){ 3015 + pLvlOut = &pStruct->aLevel[iLvl+1]; 3012 3016 assert( pLvlOut->nSeg>0 ); 3013 3017 nInput = pLvl->nMerge; 3014 3018 fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); 3015 3019 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; 3016 3020 }else{ 3017 3021 int iSegid = fts5AllocateSegid(p, pStruct); 3022 + 3023 + /* Extend the Fts5Structure object as required to ensure the output 3024 + ** segment exists. */ 3025 + if( iLvl==pStruct->nLevel-1 ){ 3026 + fts5StructureAddLevel(&p->rc, ppStruct); 3027 + pStruct = *ppStruct; 3028 + } 3029 + fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); 3030 + pLvl = &pStruct->aLevel[iLvl]; 3031 + pLvlOut = &pStruct->aLevel[iLvl+1]; 3032 + 3018 3033 fts5WriteInit(p, &writer, iIdx, iSegid); 3019 3034 3020 3035 /* Add the new segment to the output level */ 3021 3036 if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; 3022 3037 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; 3023 3038 pLvlOut->nSeg++; 3024 3039 pSeg->pgnoFirst = 1; ................................................................................ 3045 3060 ** the position list is empty (i.e. the entry is a delete marker), no 3046 3061 ** entry need be written to the output. */ 3047 3062 fts5ChunkIterInit(p, pSeg, &sPos); 3048 3063 if( bOldest==0 || sPos.nRem>0 ){ 3049 3064 int nTerm; 3050 3065 const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); 3051 3066 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ 3052 - if( writer.nLeafWritten>nRem ){ 3067 + if( pnRem && writer.nLeafWritten>nRem ){ 3053 3068 fts5ChunkIterRelease(&sPos); 3054 3069 break; 3055 3070 } 3056 3071 3057 3072 /* This is a new term. Append a term to the output segment. */ 3058 3073 if( bRequireDoclistTerm ){ 3059 3074 fts5WriteAppendZerobyte(p, &writer); ................................................................................ 3102 3117 assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); 3103 3118 fts5TrimSegments(p, pIter); 3104 3119 pLvl->nMerge = nInput; 3105 3120 } 3106 3121 3107 3122 fts5MultiIterFree(p, pIter); 3108 3123 fts5BufferFree(&term); 3109 - *pnRem -= writer.nLeafWritten; 3124 + if( pnRem ) *pnRem -= writer.nLeafWritten; 3110 3125 } 3111 3126 3112 3127 /* 3113 3128 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 3114 3129 ** segments in index iIdx with structure pStruct. This function updates the 3115 3130 ** write-counter accordingly and, if necessary, performs incremental merge 3116 3131 ** work. ................................................................................ 3161 3176 #ifdef SQLITE_DEBUG 3162 3177 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ 3163 3178 assert( pStruct->aLevel[iLvl].nSeg==0 ); 3164 3179 } 3165 3180 #endif 3166 3181 3167 3182 if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; 3168 - if( iBestLvl==pStruct->nLevel-1 ){ 3169 - fts5StructureAddLevel(&p->rc, &pStruct); 3170 - } 3171 - fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); 3172 - fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); 3183 + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); 3173 3184 fts5StructurePromote(p, iBestLvl+1, pStruct); 3174 3185 assert( nRem==0 || p->rc==SQLITE_OK ); 3175 3186 *ppStruct = pStruct; 3176 3187 } 3177 3188 } 3189 + 3190 +static void fts5IndexCrisisMerge( 3191 + Fts5Index *p, /* FTS5 backend object */ 3192 + int iIdx, /* Index to work on */ 3193 + Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ 3194 +){ 3195 + Fts5Structure *pStruct = *ppStruct; 3196 + int iLvl = 0; 3197 + while( p->rc==SQLITE_OK 3198 + && iLvl<pStruct->nLevel 3199 + && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge 3200 + ){ 3201 + fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); 3202 + fts5StructurePromote(p, iLvl+1, pStruct); 3203 + iLvl++; 3204 + } 3205 + *ppStruct = pStruct; 3206 +} 3178 3207 3179 3208 typedef struct Fts5FlushCtx Fts5FlushCtx; 3180 3209 struct Fts5FlushCtx { 3181 3210 Fts5Index *pIdx; 3182 3211 Fts5SegWriter writer; 3183 3212 }; 3184 3213 ................................................................................ 3199 3228 3200 3229 static int fts5FlushNewEntry( 3201 3230 void *pCtx, 3202 3231 i64 iRowid, 3203 3232 const u8 *aPoslist, 3204 3233 int nPoslist 3205 3234 ){ 3206 - Fts5Buffer *pBuf; 3207 3235 Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; 3208 3236 int rc = SQLITE_OK; 3209 3237 3210 3238 /* Append the rowid itself */ 3211 3239 fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); 3212 3240 3213 3241 /* Append the size of the position list in bytes */ ................................................................................ 3246 3274 3247 3275 rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, 3248 3276 fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone 3249 3277 ); 3250 3278 if( p->rc==SQLITE_OK ) p->rc = rc; 3251 3279 fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); 3252 3280 3253 - /* Edit the Fts5Structure and write it back to the database. */ 3281 + /* Update the Fts5Structure. It is written back to the database by the 3282 + ** fts5StructureRelease() call below. */ 3254 3283 if( pStruct->nLevel==0 ){ 3255 3284 fts5StructureAddLevel(&p->rc, &pStruct); 3256 3285 } 3257 3286 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); 3258 3287 if( p->rc==SQLITE_OK ){ 3259 3288 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; 3260 3289 pSeg->iSegid = iSegid; 3261 3290 pSeg->nHeight = nHeight; 3262 3291 pSeg->pgnoFirst = 1; 3263 3292 pSeg->pgnoLast = pgnoLast; 3264 3293 } 3265 3294 } 3266 3295 3267 - fts5IndexWork(p, iHash, &pStruct, pgnoLast); 3296 + if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); 3297 + fts5IndexCrisisMerge(p, iHash, &pStruct); 3268 3298 fts5StructureWrite(p, iHash, pStruct); 3269 3299 fts5StructureRelease(pStruct); 3270 3300 } 3271 3301 3272 3302 /* 3273 3303 ** Flush any data stored in the in-memory hash tables to the database. 3274 3304 */ ................................................................................ 3339 3369 *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); 3340 3370 if( !p ) return SQLITE_NOMEM; 3341 3371 3342 3372 memset(p, 0, sizeof(Fts5Index)); 3343 3373 p->pConfig = pConfig; 3344 3374 p->pgsz = 1000; 3345 3375 p->nMinMerge = FTS5_MIN_MERGE; 3376 + p->nCrisisMerge = FTS5_CRISIS_MERGE; 3346 3377 p->nWorkUnit = FTS5_WORK_UNIT; 3347 3378 p->nMaxPendingData = 1024*1024; 3348 3379 p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); 3349 3380 if( p->zDataTbl==0 ){ 3350 3381 rc = SQLITE_NOMEM; 3351 3382 }else if( bCreate ){ 3352 3383 int i; ................................................................................ 3957 3988 3958 3989 /* 3959 3990 ** Set the target page size for the index object. 3960 3991 */ 3961 3992 void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ 3962 3993 p->pgsz = pgsz; 3963 3994 } 3995 + 3996 +/* 3997 +** Set the minimum number of segments that an auto-merge operation should 3998 +** attempt to merge together. A value of 1 sets the object to use the 3999 +** compile time default. Zero or less disables auto-merge altogether. 4000 +*/ 4001 +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){ 4002 + if( nMinMerge==1 ){ 4003 + p->nMinMerge = FTS5_MIN_MERGE; 4004 + }else{ 4005 + p->nMinMerge = nMinMerge; 4006 + } 4007 +} 3964 4008 3965 4009 /* 3966 4010 ** Iterator pMulti currently points to a valid entry (not EOF). This 3967 4011 ** function appends a copy of the position-list of the entry pMulti 3968 4012 ** currently points to to buffer pBuf. 3969 4013 ** 3970 4014 ** If an error occurs, an error code is left in p->rc. It is assumed
Changes to tool/loadfts.c.
159 159 int i; 160 160 int rc; 161 161 int nRowPerTrans = 0; 162 162 sqlite3 *db; 163 163 char *zSql; 164 164 VisitContext sCtx; 165 165 166 + int nCmd = 0; 167 + char **aCmd = 0; 168 + 166 169 if( argc % 2 ) showHelp(argv[0]); 167 170 168 171 for(i=1; i<(argc-1); i+=2){ 169 172 char *zOpt = argv[i]; 170 173 char *zArg = argv[i+1]; 171 174 if( strcmp(zOpt, "-fts")==0 ){ 172 175 iFts = atoi(zArg); 173 176 if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); 174 177 } 175 - if( strcmp(zOpt, "-trans")==0 ){ 178 + else if( strcmp(zOpt, "-trans")==0 ){ 176 179 nRowPerTrans = atoi(zArg); 177 180 } 178 181 else if( strcmp(zOpt, "-idx")==0 ){ 179 182 bMap = atoi(zArg); 180 183 if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); 181 184 } 182 185 else if( strcmp(zOpt, "-dir")==0 ){ 183 186 zDir = zArg; 187 + } 188 + else if( strcmp(zOpt, "-special")==0 ){ 189 + nCmd++; 190 + aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); 191 + aCmd[nCmd-1] = zArg; 192 + } 193 + else{ 194 + showHelp(argv[0]); 184 195 } 185 196 } 186 197 187 198 /* Open the database file */ 188 199 rc = sqlite3_open(argv[argc-1], &db); 189 200 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db); 190 201 ................................................................................ 193 204 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db); 194 205 195 206 /* Create the FTS table */ 196 207 zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); 197 208 rc = sqlite3_exec(db, zSql, 0, 0, 0); 198 209 if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); 199 210 sqlite3_free(zSql); 211 + 212 + for(i=0; i<nCmd; i++){ 213 + zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]); 214 + rc = sqlite3_exec(db, zSql, 0, 0, 0); 215 + if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); 216 + sqlite3_free(zSql); 217 + } 200 218 201 219 /* Compile the INSERT statement to write data to the FTS table. */ 202 220 memset(&sCtx, 0, sizeof(VisitContext)); 203 221 sCtx.db = db; 204 222 sCtx.nRowPerTrans = nRowPerTrans; 205 223 rc = sqlite3_prepare_v2(db, 206 224 "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ................................................................................ 211 229 if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); 212 230 traverse(zDir, (void*)&sCtx, visit_file); 213 231 if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); 214 232 215 233 /* Clean up and exit. */ 216 234 sqlite3_finalize(sCtx.pInsert); 217 235 sqlite3_close(db); 236 + sqlite3_free(aCmd); 218 237 return 0; 219 238 }