Index: ext/fts5/fts5_hash.c ================================================================== --- ext/fts5/fts5_hash.c +++ ext/fts5/fts5_hash.c @@ -391,59 +391,10 @@ sqlite3_free(ap); *ppSorted = pList; return SQLITE_OK; } -int sqlite3Fts5HashIterate( - Fts5Hash *pHash, - void *pCtx, - int (*xTerm)(void*, const char*, int), - int (*xEntry)(void*, i64, const u8*, int), - int (*xTermDone)(void*) -){ - Fts5HashEntry *pList; - int rc; - - rc = fts5HashEntrySort(pHash, 0, 0, &pList); - if( rc==SQLITE_OK ){ - memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot); - while( pList ){ - Fts5HashEntry *pNext = pList->pScanNext; - if( rc==SQLITE_OK ){ - const int nKey = strlen(pList->zKey); - i64 iRowid = 0; - u8 *pPtr = (u8*)pList; - int iOff = sizeof(Fts5HashEntry) + nKey + 1; - - /* Fill in the final poslist size field */ - fts5HashAddPoslistSize(pList); - - /* Issue the new-term callback */ - rc = xTerm(pCtx, pList->zKey, nKey); - - /* Issue the xEntry callbacks */ - while( rc==SQLITE_OK && iOffnData ){ - i64 iDelta; /* Rowid delta value */ - int nPoslist; /* Size of position list in bytes */ - int nVarint; - iOff += getVarint(&pPtr[iOff], (u64*)&iDelta); - iRowid += iDelta; - nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist); - rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint); - iOff += nVarint+nPoslist; - } - - /* Issue the term-done callback */ - if( rc==SQLITE_OK ) rc = xTermDone(pCtx); - } - sqlite3_free(pList); - pList = pNext; - } - } - return rc; -} - /* ** Query the hash table for a doclist associated with term pTerm/nTerm. */ int sqlite3Fts5HashQuery( Fts5Hash *pHash, /* Hash table to query */ @@ -476,13 +427,12 @@ ){ fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); } void sqlite3Fts5HashScanNext(Fts5Hash *p){ - if( p->pScan ){ - p->pScan = p->pScan->pScanNext; - } + Fts5HashEntry *pScan = p->pScan; + if( pScan ) p->pScan = pScan->pScanNext; } int sqlite3Fts5HashScanEof(Fts5Hash *p){ return (p->pScan==0); } Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -111,11 +111,11 @@ ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) -** poslist: first poslist +** poslist: next poslist ** } ** 0x00 byte ** ** poslist format: ** @@ -2675,11 +2675,11 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; - if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){ i64 iKey = FTS5_DOCLIST_IDX_ROWID( pWriter->iIdx, pWriter->iSegid, pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty ); assert( pWriter->cdlidx.n>0 ); @@ -3002,16 +3002,19 @@ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; if( p->rc==SQLITE_OK ){ - *pnLeaf = pWriter->aWriter[0].pgno; - if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){ + Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; + if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ - fts5WriteFlushLeaf(p, pWriter); + if( pLeaf->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); @@ -3379,48 +3382,24 @@ struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; -static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm); - return rc; -} - -static int fts5FlushTermDone(void *pCtx){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - int rc = SQLITE_OK; - /* Write the doclist terminator */ - fts5WriteAppendZerobyte(p->pIdx, &p->writer); - return rc; -} - -static int fts5FlushNewEntry( - void *pCtx, - i64 iRowid, - const u8 *aPoslist, - int nPoslist -){ - Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx; - Fts5Index *pIdx = p->pIdx; - -#ifdef SQLITE_DEBUG - /* The poslist-size varint should already be at the start of the - ** aPoslist/nPoslist buffer. This assert verifies that. */ - int n, i; - i = fts5GetVarint32(aPoslist, n); - assert( nPoslist==(n+i) ); -#endif - - /* Append the rowid itself */ - fts5WriteAppendRowid(pIdx, &p->writer, iRowid); - - /* And the poslist data */ - fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist); - return pIdx->rc; +/* +** Buffer aBuf[] contains a list of varints, all small enough to fit +** in a 32-bit integer. Return the size of the largest prefix of this +** list nMax bytes or less in size. +*/ +static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ + int ret = 0; + while( 1 ){ + u32 dummy; + int i = fts5GetVarint32(&aBuf[ret], dummy); + if( (ret + i) > nMax ) break; + ret += i; + } + return ret; } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. @@ -3427,10 +3406,11 @@ ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ + Fts5Hash *pHash = p->apHash[iHash]; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id @@ -3437,23 +3417,131 @@ ** for the new level-0 segment. */ pStruct = fts5StructureRead(p, iHash); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ + const int pgsz = p->pConfig->pgsz; + Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ - int rc; - Fts5FlushCtx ctx; - - fts5WriteInit(p, &ctx.writer, iHash, iSegid); - ctx.pIdx = p; - - rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, - fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone - ); - if( p->rc==SQLITE_OK ) p->rc = rc; - fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast); + Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ + + Fts5SegWriter writer; + fts5WriteInit(p, &writer, iHash, iSegid); + + /* Pre-allocate the buffer used to assemble leaf pages to the target + ** page size. */ + assert( pgsz>0 ); + pBuf = &writer.aWriter[0].buf; + fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* Begin scanning through hash table entries. */ + if( p->rc==SQLITE_OK ){ + memset(pBuf->p, 0, 4); + pBuf->n = 4; + sqlite3Fts5HashScanInit(pHash, 0, 0); + } + + while( 0==sqlite3Fts5HashScanEof(pHash) ){ + const char *zTerm; + int nTerm; + const u8 *pDoclist; + int nDoclist; + + sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist); + nTerm = strlen(zTerm); + + /* Decide if the term fits on the current leaf. If not, flush it + ** to disk. */ + if( (pBuf->n + nTerm + 2) > pgsz ){ + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + if( (nTerm + 32) > pBuf->nSpace ){ + fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); + } + } + + /* Write the term to the leaf. And push it up into the b-tree hierarchy */ + if( writer.bFirstTermInPage==0 ){ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0); + }else{ + fts5PutU16(&pBuf->p[2], pBuf->n); + writer.bFirstTermInPage = 0; + if( writer.aWriter[0].pgno!=1 ){ + fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm); + pBuf = &writer.aWriter[0].buf; + } + } + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm); + fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm); + + if( pgsz>=(pBuf->n + nDoclist + 1) ){ + /* The entire doclist will fit on the current leaf. */ + fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist); + }else{ + i64 iRowid = 0; + i64 iDelta = 0; + int iOff = 0; + int bFirstDocid = 0; + + /* The entire doclist will not fit on this leaf. The following + ** loop iterates through the poslists that make up the current + ** doclist. */ + while( iOffp[0], pBuf->n); /* first docid on page */ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); + bFirstDocid = 0; + }else{ + pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); + } + assert( pBuf->n<=pBuf->nSpace ); + + if( (pBuf->n + nCopy) <= pgsz ){ + /* The entire poslist will fit on the current leaf. So copy + ** it in one go. */ + fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]); + }else{ + /* The entire poslist will not fit on this leaf. So it needs + ** to be broken into sections. The only qualification being + ** that each varint must be stored contiguously. */ + const u8 *pPoslist = &pDoclist[iOff]; + int iPos = 0; + while( 1 ){ + int nSpace = pgsz - pBuf->n; + int n; + if( (nCopy - iPos)<=nSpace ){ + n = nCopy - iPos; + }else{ + n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); + } + fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]); + iPos += n; + if( iPos>=nCopy ) break; + fts5WriteFlushLeaf(p, &writer); + pBuf = &writer.aWriter[0].buf; + } + bFirstDocid = 1; + } + assert( pBuf->n<=pgsz ); + iOff += nCopy; + } + } + + pBuf->p[pBuf->n++] = '\0'; + assert( pBuf->n<=pBuf->nSpace ); + sqlite3Fts5HashScanNext(pHash); + } + sqlite3Fts5HashClear(pHash); + fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct);