Index: ext/fts5/fts5.c ================================================================== --- ext/fts5/fts5.c +++ ext/fts5/fts5.c @@ -870,10 +870,16 @@ if( n>5 && 0==sqlite3_strnicmp("pgsz=", z, 5) ){ int pgsz = atoi(&z[5]); if( pgsz<32 ) pgsz = 32; sqlite3Fts5IndexPgsz(pTab->pIndex, pgsz); rc = SQLITE_OK; + }else + + if( n>10 && 0==sqlite3_strnicmp("automerge=", z, 10) ){ + int nAutomerge = atoi(&z[10]); + sqlite3Fts5IndexAutomerge(pTab->pIndex, nAutomerge); + rc = SQLITE_OK; } return rc; } Index: ext/fts5/fts5.h ================================================================== --- ext/fts5/fts5.h +++ ext/fts5/fts5.h @@ -24,11 +24,11 @@ #include "sqlite3.h" /************************************************************************* ** CUSTOM AUXILIARY FUNCTIONS ** -** Virtual table implemenations may overload SQL functions by implementing +** Virtual table implementations may overload SQL functions by implementing ** the sqlite3_module.xFindFunction() method. */ typedef struct Fts5ExtensionApi Fts5ExtensionApi; typedef struct Fts5Context Fts5Context; @@ -40,11 +40,10 @@ int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ); /* -** ** xUserData(pFts): ** ** Return a copy of the context pointer the extension function was ** registered with. ** Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -270,10 +270,12 @@ ** Set the page size to use when writing. It doesn't matter if this ** changes mid-transaction, or if inconsistent values are used by ** multiple clients. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz); + +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMerge); /* ** Return the total number of entries read from the %_data table by ** this connection since it was created. */ Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -43,10 +43,11 @@ #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_MERGE 4 /* Minimum number of segments to merge */ +#define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ /* ** Details: @@ -289,10 +290,11 @@ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ int pgsz; /* Target page size for this index */ int nMinMerge; /* Minimum input segments in a merge */ + int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. @@ -2985,18 +2987,19 @@ ** */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ int iIdx, /* Index to work on */ - Fts5Structure *pStruct, /* Stucture of index iIdx */ + Fts5Structure **ppStruct, /* IN/OUT: Stucture of index iIdx */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ + Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; - Fts5StructureLevel *pLvlOut = &pStruct->aLevel[iLvl+1]; + Fts5StructureLevel *pLvlOut; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ - int nRem = *pnRem; /* Output leaf pages left to write */ + int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ @@ -3007,16 +3010,28 @@ memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); writer.iIdx = iIdx; if( pLvl->nMerge ){ + pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; fts5WriteInitForAppend(p, &writer, iIdx, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); + + /* Extend the Fts5Structure object as required to ensure the output + ** segment exists. */ + if( iLvl==pStruct->nLevel-1 ){ + fts5StructureAddLevel(&p->rc, ppStruct); + pStruct = *ppStruct; + } + fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); + pLvl = &pStruct->aLevel[iLvl]; + pLvlOut = &pStruct->aLevel[iLvl+1]; + fts5WriteInit(p, &writer, iIdx, iSegid); /* Add the new segment to the output level */ if( iLvl+1==pStruct->nLevel ) pStruct->nLevel++; pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; @@ -3047,11 +3062,11 @@ fts5ChunkIterInit(p, pSeg, &sPos); if( bOldest==0 || sPos.nRem>0 ){ int nTerm; const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ - if( writer.nLeafWritten>nRem ){ + if( pnRem && writer.nLeafWritten>nRem ){ fts5ChunkIterRelease(&sPos); break; } /* This is a new term. Append a term to the output segment. */ @@ -3104,11 +3119,11 @@ pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); - *pnRem -= writer.nLeafWritten; + if( pnRem ) *pnRem -= writer.nLeafWritten; } /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segments in index iIdx with structure pStruct. This function updates the @@ -3163,20 +3178,34 @@ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBestnMinMerge && pStruct->aLevel[iBestLvl].nMerge==0 ) break; - if( iBestLvl==pStruct->nLevel-1 ){ - fts5StructureAddLevel(&p->rc, &pStruct); - } - fts5StructureExtendLevel(&p->rc, pStruct, iBestLvl+1, 1, 0); - fts5IndexMergeLevel(p, iIdx, pStruct, iBestLvl, &nRem); + fts5IndexMergeLevel(p, iIdx, &pStruct, iBestLvl, &nRem); fts5StructurePromote(p, iBestLvl+1, pStruct); assert( nRem==0 || p->rc==SQLITE_OK ); *ppStruct = pStruct; } } + +static void fts5IndexCrisisMerge( + Fts5Index *p, /* FTS5 backend object */ + int iIdx, /* Index to work on */ + Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ +){ + Fts5Structure *pStruct = *ppStruct; + int iLvl = 0; + while( p->rc==SQLITE_OK + && iLvlnLevel + && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge + ){ + fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); + fts5StructurePromote(p, iLvl+1, pStruct); + iLvl++; + } + *ppStruct = pStruct; +} typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; @@ -3201,11 +3230,10 @@ void *pCtx, i64 iRowid, const u8 *aPoslist, int nPoslist ){ - Fts5Buffer *pBuf; Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; int rc = SQLITE_OK; /* Append the rowid itself */ fts5WriteAppendRowid(p->pIdx, &p->writer, iRowid); @@ -3248,11 +3276,12 @@ fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone ); if( p->rc==SQLITE_OK ) p->rc = rc; fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); - /* Edit the Fts5Structure and write it back to the database. */ + /* Update the Fts5Structure. It is written back to the database by the + ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ @@ -3262,11 +3291,12 @@ pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; } } - fts5IndexWork(p, iHash, &pStruct, pgnoLast); + if( p->nMinMerge>0 ) fts5IndexWork(p, iHash, &pStruct, pgnoLast); + fts5IndexCrisisMerge(p, iHash, &pStruct); fts5StructureWrite(p, iHash, pStruct); fts5StructureRelease(pStruct); } /* @@ -3341,10 +3371,11 @@ memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; p->pgsz = 1000; p->nMinMerge = FTS5_MIN_MERGE; + p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); if( p->zDataTbl==0 ){ rc = SQLITE_NOMEM; @@ -3959,10 +3990,23 @@ ** Set the target page size for the index object. */ void sqlite3Fts5IndexPgsz(Fts5Index *p, int pgsz){ p->pgsz = pgsz; } + +/* +** Set the minimum number of segments that an auto-merge operation should +** attempt to merge together. A value of 1 sets the object to use the +** compile time default. Zero or less disables auto-merge altogether. +*/ +void sqlite3Fts5IndexAutomerge(Fts5Index *p, int nMinMerge){ + if( nMinMerge==1 ){ + p->nMinMerge = FTS5_MIN_MERGE; + }else{ + p->nMinMerge = nMinMerge; + } +} /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti ** currently points to to buffer pBuf. Index: tool/loadfts.c ================================================================== --- tool/loadfts.c +++ tool/loadfts.c @@ -161,28 +161,39 @@ int nRowPerTrans = 0; sqlite3 *db; char *zSql; VisitContext sCtx; + int nCmd = 0; + char **aCmd = 0; + if( argc % 2 ) showHelp(argv[0]); for(i=1; i<(argc-1); i+=2){ char *zOpt = argv[i]; char *zArg = argv[i+1]; if( strcmp(zOpt, "-fts")==0 ){ iFts = atoi(zArg); if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]); } - if( strcmp(zOpt, "-trans")==0 ){ + else if( strcmp(zOpt, "-trans")==0 ){ nRowPerTrans = atoi(zArg); } else if( strcmp(zOpt, "-idx")==0 ){ bMap = atoi(zArg); if( bMap!=0 && bMap!=1 ) showHelp(argv[0]); } else if( strcmp(zOpt, "-dir")==0 ){ zDir = zArg; + } + else if( strcmp(zOpt, "-special")==0 ){ + nCmd++; + aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd); + aCmd[nCmd-1] = zArg; + } + else{ + showHelp(argv[0]); } } /* Open the database file */ rc = sqlite3_open(argv[argc-1], &db); @@ -195,10 +206,17 @@ /* Create the FTS table */ zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts); rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db); sqlite3_free(zSql); + + for(i=0; i0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0); /* Clean up and exit. */ sqlite3_finalize(sCtx.pInsert); sqlite3_close(db); + sqlite3_free(aCmd); return 0; }