Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
2397404e152b908d838e6491294b263b |
User & Date: | dan 2014-08-18 19:30:01.020 |
Context
2014-08-25
| ||
19:58 | Add documentation for tokenizer api to fts5.h. Also add a script to extract extension API docs and format them as html. (check-in: e240d467e6 user: dan tags: fts5) | |
2014-08-18
| ||
19:30 | Add an "automerge=0" mode that disables auto-merging and falls back to fts4-style crisis merges. (check-in: 2397404e15 user: dan tags: fts5) | |
2014-08-12
| ||
16:07 | Cache the value of the "totals" record in memory during transactions. (check-in: 05dfdad445 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5.c.
︙ | ︙ | |||
868 869 870 871 872 873 874 875 876 877 878 879 880 881 | }else if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ int pgsz = atoi(&z[5]); if( pgsz<32 ) pgsz = 32; sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); rc = SQLITE_OK; } return rc; } /* ** This function is the implementation of the xUpdate callback used by | > > > > > > | 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 | }else if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ int pgsz = atoi(&z[5]); if( pgsz<32 ) pgsz = 32; sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); rc = SQLITE_OK; }else if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){ int nAutomerge = atoi(&z[10]); sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge); rc = SQLITE_OK; } return rc; } /* ** This function is the implementation of the xUpdate callback used by |
︙ | ︙ |
Changes to ext/fts5/fts5.h.
︙ | ︙ | |||
22 23 24 25 26 27 28 | #define _FTS5_H #include "sqlite3.h" /************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** | | < | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | #define _FTS5_H #include "sqlite3.h" /************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** ** Virtual table implementations may overload SQL functions by implementing ** the sqlite3_module.xFindFunction() method. */ typedef struct Fts5ExtensionApi Fts5ExtensionApi; typedef struct Fts5Context Fts5Context; typedef void (*fts5_extension_function)( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ sqlite3_context *pCtx, /* Context for returning result/error */ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ); /* ** xUserData(pFts): ** ** Return a copy of the context pointer the extension function was ** registered with. ** ** ** xColumnTotalSize(pFts, iCol, pnToken): |
︙ | ︙ |
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
268 269 270 271 272 273 274 275 276 277 278 279 280 281 | /* ** Set the page size to use when writing. It doesn't matter if this ** changes mid-transaction, or if inconsistent values are used by ** multiple clients. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p); | > > | 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | /* ** Set the page size to use when writing. It doesn't matter if this ** changes mid-transaction, or if inconsistent values are used by ** multiple clients. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p); |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
41 42 43 44 45 46 47 48 49 50 51 52 53 54 | ** */ #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ /* ** Details: ** ** The %_data table managed by this module, | > | 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | ** */ #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ #define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ /* ** Details: ** ** The %_data table managed by this module, |
︙ | ︙ | |||
287 288 289 290 291 292 293 294 295 296 297 298 299 300 | ** One object per %_data table. */ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ int pgsz; /* Target page size for this index */ int nMinMerge; /* Minimum input segments in a merge */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ Fts5Hash **apHash; /* Array of hash tables */ | > | 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | ** One object per %_data table. */ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ int pgsz; /* Target page size for this index */ int nMinMerge; /* Minimum input segments in a merge */ int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. */ Fts5Hash **apHash; /* Array of hash tables */ |
︙ | ︙ | |||
2983 2984 2985 2986 2987 2988 2989 | /* ** */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ | | > | | > > > > > > > > > > > > | 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 | /* ** */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ int bOldest; /* True if the output segment is the oldest */ assert( iLvl<pStruct->nLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); writer.iIdx = iIdx; if( pLvl->nMerge ){ pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); /* Extend the Fts5Structure object as required to ensure the output ** segment exists. */ if( iLvl==pStruct->nLevel-1 ){ fts5StructureAddLevel(&p->rc, ppStruct); pStruct = *ppStruct; } fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); pLvl = &pStruct->aLevel[iLvl]; pLvlOut = &pStruct->aLevel[iLvl+1]; fts5WriteInit(p, &writer, iIdx, iSegid); /* Add the new segment to the output level */ if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; pLvlOut->nSeg++; pSeg->pgnoFirst = 1; |
︙ | ︙ | |||
3045 3046 3047 3048 3049 3050 3051 | ** the position list is empty (i.e. the entry is a delete marker), no ** entry need be written to the output. */ fts5ChunkIterInit(p, pSeg, &sPos); if( bOldest==0 || sPos.nRem>0 ){ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ | | | 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 | ** the position list is empty (i.e. the entry is a delete marker), no ** entry need be written to the output. */ fts5ChunkIterInit(p, pSeg, &sPos); if( bOldest==0 || sPos.nRem>0 ){ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ fts5ChunkIterRelease(&sPos); break; } /* This is a new term. Append a term to the output segment. */ if( bRequireDoclistTerm ){ fts5WriteAppendZerobyte(p, &writer); |
︙ | ︙ | |||
3102 3103 3104 3105 3106 3107 3108 | assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); | | | 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 | assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); if( pnRem ) *pnRem -= writer.nLeafWritten; } /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segments in index iIdx with structure pStruct. This function updates the ** write-counter accordingly and, if necessary, performs incremental merge ** work. |
︙ | ︙ | |||
3161 3162 3163 3164 3165 3166 3167 | #ifdef SQLITE_DEBUG for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; | < < < < | > > > > > > > > > > > > > > > > > > | 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 | #ifdef SQLITE_DEBUG for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBest<p->nMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); *ppStruct = pStruct; } } static void fts5IndexCrisisMerge( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ Fts5Structure *pStruct = *ppStruct; int iLvl = 0; while( p->rc==SQLITE_OK && iLvl<pStruct->nLevel && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge ){ fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; |
︙ | ︙ | |||
3199 3200 3201 3202 3203 3204 3205 | static int fts5FlushNewEntry( void *pCtx, i64 iRowid, const u8 *aPoslist, int nPoslist ){ | < | 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 | static int fts5FlushNewEntry( void *pCtx, i64 iRowid, const u8 *aPoslist, int nPoslist ){ Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; /* Append the rowid itself */ fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); /* Append the size of the position list in bytes */ |
︙ | ︙ | |||
3246 3247 3248 3249 3250 3251 3252 | rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone ); if( p->rc==SQLITE_OK ) p->rc = rc; fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); | | > | > | 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 | rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone ); if( p->rc==SQLITE_OK ) p->rc = rc; fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; } } if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); fts5IndexCrisisMerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); } /* ** Flush any data stored in the in-memory hash tables to the database. */ |
︙ | ︙ | |||
3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 | *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; p->pgsz = 1000; p->nMinMerge = FTS5_MIN_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); if( p->zDataTbl==0 ){ rc = SQLITE_NOMEM; }else if( bCreate ){ int i; | > | 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 | *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; p->pgsz = 1000; p->nMinMerge = FTS5_MIN_MERGE; p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); if( p->zDataTbl==0 ){ rc = SQLITE_NOMEM; }else if( bCreate ){ int i; |
︙ | ︙ | |||
3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 | /* ** Set the target page size for the index object. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti ** currently points to to buffer pBuf. ** ** If an error occurs, an error code is left in p->rc. It is assumed | > > > > > > > > > > > > > | 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 | /* ** Set the target page size for the index object. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } /* ** Set the minimum number of segments that an auto-merge operation should ** attempt to merge together. A value of 1 sets the object to use the ** compile time default. Zero or less disables auto-merge altogether. */ void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){ if( nMinMerge==1 ){ p->nMinMerge = FTS5_MIN_MERGE; }else{ p->nMinMerge = nMinMerge; } } /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti ** currently points to to buffer pBuf. ** ** If an error occurs, an error code is left in p->rc. It is assumed |
︙ | ︙ |
Changes to tool/loadfts.c.
︙ | ︙ | |||
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | int i; int rc; int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } | > > > | > > > > > > > > > > > > > > > > | 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | int i; int rc; int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; int nCmd = 0; char **aCmd = 0; if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } else if( strcmp(zOpt, "-trans")==0 ){ nRowPerTrans = atoi(zArg); } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); } else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; } else if( strcmp(zOpt, "-special")==0 ){ nCmd++; aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); aCmd[nCmd-1] = zArg; } else{ showHelp(argv[0]); } } /* Open the database file */ rc = sqlite3_open(argv[argc-1], &db); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db); rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0, readfileFunc, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db); /* Create the FTS table */ zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); for(i=0; i<nCmd; i++){ zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]); rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); } /* Compile the INSERT statement to write data to the FTS table. */ memset(&sCtx, 0, sizeof(VisitContext)); sCtx.db = db; sCtx.nRowPerTrans = nRowPerTrans; rc = sqlite3_prepare_v2(db, "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0 ); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db); /* Load all files in the directory hierarchy into the FTS table. */ if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0); traverse(zDir, (void*)&sCtx, visit_file); if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); sqlite3_close(db); sqlite3_free(aCmd); return 0; } |