Index: ext/fts2/fts2.c ================================================================== --- ext/fts2/fts2.c +++ ext/fts2/fts2.c @@ -136,17 +136,23 @@ ** itself grows too big and must be split. The format of interior ** nodes: ** ** varint iHeight; (height from leaf level, always >0) ** varint iBlockid; (block id of node's leftmost subtree) -** array { -** varint nTerm; (length of term) -** char pTerm[nTerm]; (content of term) +** optional { +** varint nTerm; (length of first term) +** char pTerm[nTerm]; (content of first term) +** array { +** (further terms are delta-encoded) +** varint nPrefix; (length of shared prefix with previous term) +** varint nSuffix; (length of unshared suffix) +** char pTermSuffix[nSuffix]; (unshared suffix of next term) +** } ** } ** -** Here, array { X } means zero or more occurrences of X, adjacent in -** memory. +** Here, optional { X } means an optional element, while array { X } +** means zero or more occurrences of X, adjacent in memory. ** ** An interior node encodes n terms separating n+1 subtrees. The ** subtree blocks are contiguous, so only the first subtree's blockid ** is encoded. The subtree at iBlockid will contain all terms less ** than the first term encoded (or all terms if no term is encoded). @@ -3688,18 +3694,39 @@ assert( n<=nData ); pData += n; nData -= n; /* Zero or more terms of positive length */ - while( nData!=0 ){ + if( nData!=0 ){ + /* First term is not delta-encoded. */ n = getVarint32(pData, &iDummy); assert( n>0 ); assert( iDummy>0 ); assert( n+iDummy>0); assert( n+iDummy<=nData ); pData += n+iDummy; nData -= n+iDummy; + + /* Following terms delta-encoded. */ + while( nData!=0 ){ + /* Length of shared prefix. */ + n = getVarint32(pData, &iDummy); + assert( n>0 ); + assert( iDummy>=0 ); + assert( n0 ); + assert( iDummy>0 ); + assert( n+iDummy>0); + assert( n+iDummy<=nData ); + pData += n+iDummy; + nData -= n+iDummy; + } } } #define ASSERT_VALID_INTERIOR_BLOCK(x) interiorBlockValidate(x) #else #define ASSERT_VALID_INTERIOR_BLOCK(x) assert( 1 ) @@ -3708,10 +3735,11 @@ typedef struct InteriorWriter { int iHeight; /* from 0 at leaves. */ InteriorBlock *first, *last; struct InteriorWriter *parentWriter; + DataBuffer term; /* Last term written to block "last". */ sqlite_int64 iOpeningChildBlock; /* First child block in block "last". */ #ifndef NDEBUG sqlite_int64 iLastChildBlock; /* for consistency checks. */ #endif } InteriorWriter; @@ -3733,39 +3761,61 @@ pWriter->iLastChildBlock = iChildBlock; #endif block = interiorBlockNew(iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->first = block; ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); + dataBufferInit(&pWriter->term, 0); } /* Append the child node rooted at iChildBlock to the interior node, ** with pTerm[nTerm] as the leftmost term in iChildBlock's subtree. */ static void interiorWriterAppend(InteriorWriter *pWriter, const char *pTerm, int nTerm, sqlite_int64 iChildBlock){ char c[VARINT_MAX+VARINT_MAX]; - int n = putVarint(c, nTerm); + int n, nPrefix = 0; ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); + + /* The first term written into an interior node is actually + ** associated with the second child added (the first child was added + ** in interiorWriterInit, or in the if clause at the bottom of this + ** function). That term gets encoded straight up, with nPrefix left + ** at 0. + */ + if( pWriter->term.nData==0 ){ + n = putVarint(c, nTerm); + }else{ + while( nPrefixterm.nData && + pTerm[nPrefix]==pWriter->term.pData[nPrefix] ){ + nPrefix++; + } + + n = putVarint(c, nPrefix); + n += putVarint(c+n, nTerm-nPrefix); + } #ifndef NDEBUG pWriter->iLastChildBlock++; #endif assert( pWriter->iLastChildBlock==iChildBlock ); /* Overflow to a new block if the new term makes the current block ** too big, and the current block already has enough terms. */ - if( pWriter->last->data.nData+n+nTerm>INTERIOR_MAX && + if( pWriter->last->data.nData+n+nTerm-nPrefix>INTERIOR_MAX && iChildBlock-pWriter->iOpeningChildBlock>INTERIOR_MIN_TERMS ){ pWriter->last->next = interiorBlockNew(pWriter->iHeight, iChildBlock, pTerm, nTerm); pWriter->last = pWriter->last->next; pWriter->iOpeningChildBlock = iChildBlock; + dataBufferReset(&pWriter->term); }else{ - dataBufferAppend2(&pWriter->last->data, c, n, pTerm, nTerm); + dataBufferAppend2(&pWriter->last->data, c, n, + pTerm+nPrefix, nTerm-nPrefix); + dataBufferReplace(&pWriter->term, pTerm, nTerm); } ASSERT_VALID_INTERIOR_BLOCK(pWriter->last); } /* Free the space used by pWriter, including the linked-list of @@ -3783,10 +3833,11 @@ } if( pWriter->parentWriter!=NULL ){ interiorWriterDestroy(pWriter->parentWriter); free(pWriter->parentWriter); } + dataBufferDestroy(&pWriter->term); SCRAMBLE(pWriter); return SQLITE_OK; } /* If pWriter can fit entirely in ROOT_MAX, return it as the root info @@ -3839,16 +3890,17 @@ ppRootInfo, pnRootInfo, piEndBlockid); } /****************************************************************/ /* InteriorReader is used to read off the data from an interior node -** (see comment at top of file for the format). InteriorReader does -** not own its data, so interiorReaderDestroy() is a formality. +** (see comment at top of file for the format). */ typedef struct InteriorReader { const char *pData; int nData; + + DataBuffer term; /* previous term, for decoding term delta. */ sqlite_int64 iBlockid; } InteriorReader; static void interiorReaderDestroy(InteriorReader *pReader){ @@ -3855,11 +3907,11 @@ SCRAMBLE(pReader); } static void interiorReaderInit(const char *pData, int nData, InteriorReader *pReader){ - int n; + int n, nTerm; /* Require at least the leading flag byte */ assert( nData>0 ); assert( pData[0]!='\0' ); @@ -3868,41 +3920,67 @@ /* Decode the base blockid, and set the cursor to the first term. */ n = getVarint(pData+1, &pReader->iBlockid); assert( 1+n<=nData ); pReader->pData = pData+1+n; pReader->nData = nData-(1+n); + + /* A single-child interior node (such as when a leaf node was too + ** large for the segment directory) won't have any terms. + ** Otherwise, decode the first term. + */ + if( pReader->nData==0 ){ + dataBufferInit(&pReader->term, 0); + }else{ + n = getVarint32(pReader->pData, &nTerm); + dataBufferInit(&pReader->term, nTerm); + dataBufferReplace(&pReader->term, pReader->pData+n, nTerm); + assert( n+nTerm<=pReader->nData ); + pReader->pData += n+nTerm; + pReader->nData -= n+nTerm; + } } static int interiorReaderAtEnd(InteriorReader *pReader){ - return pReader->nData<=0; + return pReader->term.nData==0; } static sqlite_int64 interiorReaderCurrentBlockid(InteriorReader *pReader){ return pReader->iBlockid; } static int interiorReaderTermBytes(InteriorReader *pReader){ - int nTerm; assert( !interiorReaderAtEnd(pReader) ); - getVarint32(pReader->pData, &nTerm); - return nTerm; + return pReader->term.nData; } static const char *interiorReaderTerm(InteriorReader *pReader){ - int n, nTerm; assert( !interiorReaderAtEnd(pReader) ); - n = getVarint32(pReader->pData, &nTerm); - return pReader->pData+n; + return pReader->term.pData; } /* Step forward to the next term in the node. */ static void interiorReaderStep(InteriorReader *pReader){ - int n, nTerm; assert( !interiorReaderAtEnd(pReader) ); - n = getVarint32(pReader->pData, &nTerm); - assert( n+nTerm<=pReader->nData ); - pReader->pData += n+nTerm; - pReader->nData -= n+nTerm; + + /* If the last term has been read, signal eof, else construct the + ** next term. + */ + if( pReader->nData==0 ){ + dataBufferReset(&pReader->term); + }else{ + int n, nPrefix, nSuffix; + + n = getVarint32(pReader->pData, &nPrefix); + n += getVarint32(pReader->pData+n, &nSuffix); + + /* Truncate the current term and append suffix data. */ + pReader->term.nData = nPrefix; + dataBufferAppend(&pReader->term, pReader->pData+n, nSuffix); + + assert( n+nSuffix<=pReader->nData ); + pReader->pData += n+nSuffix; + pReader->nData -= n+nSuffix; + } pReader->iBlockid++; } /* Compare the current term to pTerm[nTerm], returning strcmp-style ** results.