Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -84,10 +84,11 @@ /* Values loaded from the %_config table */ int iCookie; /* Incremented when %_config is modified */ int pgsz; /* Approximate page size used in %_data */ int nAutomerge; /* 'automerge' setting */ + int nCrisisMerge; /* Maximum allowed segments per level */ char *zRank; /* Name of rank function */ char *zRankArgs; /* Arguments to rank function */ }; #define FTS5_CONTENT_NORMAL 0 Index: ext/fts5/fts5_config.c ================================================================== --- ext/fts5/fts5_config.c +++ ext/fts5/fts5_config.c @@ -15,10 +15,11 @@ #include "fts5Int.h" #define FTS5_DEFAULT_PAGE_SIZE 1000 #define FTS5_DEFAULT_AUTOMERGE 4 +#define FTS5_DEFAULT_CRISISMERGE 16 /* Maximum allowed page size */ #define FTS5_MAX_PAGE_SIZE (128*1024) static int fts5_iswhitespace(char x){ @@ -714,10 +715,23 @@ }else{ if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; pConfig->nAutomerge = nAutomerge; } } + + else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ + int nCrisisMerge = -1; + if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ + nCrisisMerge = sqlite3_value_int(pVal); + } + if( nCrisisMerge<0 ){ + if( pbBadkey ) *pbBadkey = 1; + }else{ + if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; + pConfig->nCrisisMerge = nCrisisMerge; + } + } else if( 0==sqlite3_stricmp(zKey, "rank") ){ const char *zIn = (const char*)sqlite3_value_text(pVal); char *zRank; char *zRankArgs; @@ -747,10 +761,11 @@ int rc; /* Set default values */ pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; + pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; zSql = sqlite3_mprintf(zSelect, pConfig->zDb, pConfig->zName); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -41,11 +41,10 @@ ** */ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ -#define FTS5_CRISIS_MERGE 16 /* Maximum number of segments to merge */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ /* ** Details: @@ -291,11 +290,10 @@ ** One object per %_data table. */ struct Fts5Index { Fts5Config *pConfig; /* Virtual table configuration */ char *zDataTbl; /* Name of %_data table */ - int nCrisisMerge; /* Maximum allowed segments per level */ int nWorkUnit; /* Leaf pages in a "unit" of work */ /* ** Variables related to the accumulation of tokens and doclists within the ** in-memory hash tables before they are flushed to disk. @@ -1103,12 +1101,12 @@ assert( iIdx<=pConfig->nPrefix ); pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID(iIdx)); if( !pData ) return 0; p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); - if( p->rc==SQLITE_OK && p->pConfig->iCookie!=iCookie ){ - p->rc = sqlite3Fts5ConfigLoad(p->pConfig, iCookie); + if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ + p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); @@ -1248,24 +1246,23 @@ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ - Fts5StructureLevel *pTst; int szSeg; /* Size of segment just written */ pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); - pTst = &pStruct->aLevel[iTst]; - assert( pTst->nMerge==0 ); if( iTst>=0 ){ int i; int szMax = 0; + Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; + assert( pTst->nMerge==0 ); for(i=0; inSeg; i++){ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; if( sz>szMax ) szMax = sz; } if( szMax>=szSeg ){ @@ -2481,32 +2478,10 @@ */ static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){ return (p->rc || pIter->chunk.pLeaf==0); } -/* -** Add an entry for (iRowid/iCol/iPos) to the doclist for (pToken/nToken) -** in hash table for index iIdx. If iIdx is zero, this is the main terms -** index. Values of 1 and greater for iIdx are prefix indexes. -** -** If an OOM error is encountered, set the Fts5Index.rc error code -** accordingly. -*/ -static void fts5AddTermToHash( - Fts5Index *p, /* Index object to write to */ - int iIdx, /* Entry in p->aHash[] to update */ - int iCol, /* Column token appears in (-ve -> delete) */ - int iPos, /* Position of token within column */ - const char *pToken, int nToken /* Token to add or remove to or from index */ -){ - if( p->rc==SQLITE_OK ){ - p->rc = sqlite3Fts5HashWrite( - p->apHash[iIdx], p->iWriteRowid, iCol, iPos, pToken, nToken - ); - } -} - /* ** Allocate a new segment-id for the structure pStruct. ** ** If an error has already occurred, this function is a no-op. 0 is ** returned in this case. @@ -3226,11 +3201,11 @@ ){ Fts5Structure *pStruct = *ppStruct; int iLvl = 0; while( p->rc==SQLITE_OK && iLvlnLevel - && pStruct->aLevel[iLvl].nSeg>=p->nCrisisMerge + && pStruct->aLevel[iLvl].nSeg>=p->pConfig->nCrisisMerge ){ fts5IndexMergeLevel(p, iIdx, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } @@ -3998,10 +3973,33 @@ ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain ** to the document with rowid iRowid. */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ assert( p->rc==SQLITE_OK ); + + /* Allocate hash tables if they have not already been allocated */ + if( p->apHash==0 ){ + int i; + int rc = SQLITE_OK; + int nHash = p->pConfig->nPrefix + 1; + Fts5Hash **apNew; + + apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash); + for(i=0; rc==SQLITE_OK && inPendingData); + } + if( rc==SQLITE_OK ){ + p->apHash = apNew; + }else{ + for(i=0; iiWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } p->iWriteRowid = iRowid; return fts5IndexReturn(p); @@ -4069,11 +4067,10 @@ *pp = p = (Fts5Index*)sqlite3_malloc(sizeof(Fts5Index)); if( !p ) return SQLITE_NOMEM; memset(p, 0, sizeof(Fts5Index)); p->pConfig = pConfig; - p->nCrisisMerge = FTS5_CRISIS_MERGE; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3_mprintf("%s_data", pConfig->zName); if( p->zDataTbl==0 ){ rc = SQLITE_NOMEM; @@ -4194,33 +4191,30 @@ int iCol, /* Column token appears in (-ve -> delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ int i; /* Used to iterate through indexes */ + int rc; /* Return code */ Fts5Config *pConfig = p->pConfig; - assert( p->rc==SQLITE_OK ); - - /* Allocate hash tables if they have not already been allocated */ - if( p->apHash==0 ){ - int nHash = pConfig->nPrefix + 1; - p->apHash = (Fts5Hash**)fts5IdxMalloc(p, sizeof(Fts5Hash*) * nHash); - for(i=0; p->rc==SQLITE_OK && irc = sqlite3Fts5HashNew(&p->apHash[i], &p->nPendingData); - } - } + + assert( p->rc==SQLITE_OK ); /* Add the new token to the main terms hash table. And to each of the ** prefix hash tables that it is large enough for. */ - fts5AddTermToHash(p, 0, iCol, iPos, pToken, nToken); - for(i=0; inPrefix; i++){ + rc = sqlite3Fts5HashWrite( + p->apHash[0], p->iWriteRowid, iCol, iPos, pToken, nToken + ); + for(i=0; inPrefix && rc==SQLITE_OK; i++){ int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); if( nByte ){ - fts5AddTermToHash(p, i+1, iCol, iPos, pToken, nByte); + rc = sqlite3Fts5HashWrite( + p->apHash[i+1], p->iWriteRowid, iCol, iPos, pToken, nByte + ); } } - return fts5IndexReturn(p); + return rc; } /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. Index: ext/fts5/tool/loadfts5.tcl ================================================================== --- ext/fts5/tool/loadfts5.tcl +++ ext/fts5/tool/loadfts5.tcl @@ -27,17 +27,19 @@ puts stderr " -fts4 (use fts4 instead of fts5)" puts stderr " -fts5 (use fts5)" puts stderr " -porter (use porter tokenizer)" puts stderr " -limit N (load no more than N documents)" puts stderr " -automerge N (set the automerge parameter to N)" + puts stderr " -crisismerge N (set the crisismerge parameter to N)" exit 1 } set O(vtab) fts5 set O(tok) "" set O(limit) 0 set O(automerge) -1 +set O(crisismerge) -1 if {[llength $argv]<2} usage set nOpt [expr {[llength $argv]-2}] for {set i 0} {$i < $nOpt} {incr i} { set arg [lindex $argv $i] @@ -61,10 +63,15 @@ -automerge { if { [incr i]>=$nOpt } usage set O(automerge) [lindex $argv $i] } + + -crisismerge { + if { [incr i]>=$nOpt } usage + set O(crisismerge) [lindex $argv $i] + } default { usage } } @@ -79,11 +86,17 @@ if {$O(vtab) == "fts5"} { db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) } } else { db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) } } + } + if {$O(crisismerge)>=0} { + if {$O(vtab) == "fts5"} { + db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))} + } else { + } } load_hierachy [lindex $argv end] }