Index: ext/fts5/fts5Int.h ================================================================== --- ext/fts5/fts5Int.h +++ ext/fts5/fts5Int.h @@ -380,21 +380,10 @@ /* ** Empty (but do not delete) a hash table. */ void sqlite3Fts5HashClear(Fts5Hash*); -/* -** Iterate through the contents of the hash table. -*/ -int sqlite3Fts5HashIterate( - Fts5Hash*, - void *pCtx, - int (*xTerm)(void*, const char*, int), - int (*xEntry)(void*, i64, const u8*, int), - int (*xTermDone)(void*) -); - int sqlite3Fts5HashQuery( Fts5Hash*, /* Hash table to query */ const char *pTerm, int nTerm, /* Query term */ const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ int *pnDoclist /* OUT: Size of doclist in bytes */ Index: ext/fts5/fts5_hash.c ================================================================== --- ext/fts5/fts5_hash.c +++ ext/fts5/fts5_hash.c @@ -165,18 +165,20 @@ return SQLITE_OK; } static void fts5HashAddPoslistSize(Fts5HashEntry *p){ if( p->iSzPoslist ){ + /* WRITEPOSLISTSIZE */ u8 *pPtr = (u8*)p; - int nSz = p->nData - p->iSzPoslist - 1; + int nSz = (p->nData - p->iSzPoslist - 1) * 2; if( nSz<=127 ){ pPtr[p->iSzPoslist] = nSz; }else{ int nByte = sqlite3Fts5GetVarintLen((u32)nSz); - memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); + /* WRITEPOSLISTSIZE */ + memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz/2); sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz); p->nData += (nByte-1); } p->iSzPoslist = 0; } Index: ext/fts5/fts5_index.c ================================================================== --- ext/fts5/fts5_index.c +++ ext/fts5/fts5_index.c @@ -40,10 +40,11 @@ ** ** * extra fields in the "structure record" record the state of ongoing ** incremental merge operations. ** */ + #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ @@ -119,11 +120,12 @@ ** } ** 0x00 byte ** ** poslist format: ** -** varint: size of poslist in bytes. not including this field. +** varint: size of poslist in bytes multiplied by 2, not including +** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I @@ -1627,11 +1629,11 @@ } } /* ** This function is only ever called on iterators created by calls to -** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set. +** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** ** When this function is called, iterator pIter points to the first rowid ** on the current leaf associated with the term being queried. This function ** advances it to point to the last such rowid and, if necessary, initializes ** the aRowidOffset[] and iRowidOffset variables. @@ -1644,12 +1646,13 @@ while( p->rc==SQLITE_OK && i=n ) break; i += getVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; pIter->iRowid += iDelta; @@ -1763,12 +1766,13 @@ int nPos; i64 iDelta; pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&a[iOff], nPos); - iOff += nPos; + iOff += (nPos / 2); getVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; }else{ fts5SegIterReverseNewPage(p, pIter); } @@ -1783,12 +1787,13 @@ int n = pLeaf->n; iOff = pIter->iLeafOffset; if( iOffpLeaf->p = (u8*)pList; pIter->pLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid); if( pIter->flags & FTS5_SEGITER_REVERSE ){ + assert( 0 ); fts5SegIterReverseInitPage(p, pIter); } } }else{ iOff = 0; @@ -1879,12 +1885,13 @@ while( iOffn ){ int nPos; i64 iDelta; /* Position list size in bytes */ + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos); - iOff += nPos; + iOff += (nPos / 2); if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) break; @@ -1962,12 +1969,13 @@ while( iOffn ){ i64 iDelta; int nPoslist; /* iOff is currently the offset of the size field of a position list. */ + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist); - iOff += nPoslist; + iOff += nPoslist / 2; if( iOffn ){ iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; } @@ -2654,11 +2662,13 @@ if( p->rc ) return; iOff = 4; pLeaf = pIter->pLeaf; } + /* READPOSLISTSIZE */ iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem); + pIter->nRem = pIter->nRem / 2; pIter->n = MIN(pLeaf->n - iOff, pIter->nRem); pIter->p = pLeaf->p + iOff; if( pIter->n==0 ){ fts5ChunkIterNext(p, pIter); @@ -3367,11 +3377,12 @@ /* Append the rowid to the output */ fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); /* Copy the position list from input to output */ - fts5WriteAppendPoslistInt(p, &writer, sPos.nRem); + /* WRITEPOSLISTSIZE */ + fts5WriteAppendPoslistInt(p, &writer, sPos.nRem * 2); for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){ fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n); } } @@ -3585,12 +3596,12 @@ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); - /* Decide if the term fits on the current leaf. If not, flush it - ** to disk. */ + /* Decide if the term will fit on the current leaf. If it will not, + ** flush the leaf to disk here. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); @@ -3631,12 +3642,13 @@ ** doclist. */ while( iOffp[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); @@ -4069,11 +4081,12 @@ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); fts5ChunkIterInit(p, pSeg, &iter); if( fts5ChunkIterEof(p, &iter)==0 ){ if( bSz ){ - fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2); } while( fts5ChunkIterEof(p, &iter)==0 ){ fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p); fts5ChunkIterNext(p, &iter); } @@ -4093,11 +4106,13 @@ pIter->iRowid += iDelta; } }else{ pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } + /* READPOSLISTSIZE */ pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist); + pIter->nPoslist = pIter->nPoslist / 2; pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ pIter->aPoslist = 0; } @@ -4164,18 +4179,20 @@ if( i2.aPoslist==0 || (i1.aPoslist && ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowidrc, bDesc, &out, &iLastRowid, i1.iRowid); - fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid); - fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); } else{ Fts5PoslistReader r1; @@ -4200,11 +4217,12 @@ if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1); } p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); } - fts5BufferAppendVarint(&p->rc, &out, tmp.n); + /* WRITEPOSLISTSIZE */ + fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); fts5DoclistIterNext(&i1); fts5DoclistIterNext(&i2); } } @@ -4295,10 +4313,45 @@ } fts5StructureRelease(pStruct); sqlite3_free(aBuf); } + +static int fts5QueryCksum( + Fts5Index *p, + const char *z, + int n, + int flags, + u64 *pCksum +){ + u64 cksum = *pCksum; + Fts5IndexIter *pIdxIter = 0; + int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); + + while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ + const u8 *pPos; + int nPos; + i64 rowid = sqlite3Fts5IterRowid(pIdxIter); + rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); + if( rc==SQLITE_OK ){ + Fts5PoslistReader sReader; + for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); + sReader.bEof==0; + sqlite3Fts5PoslistReaderNext(&sReader) + ){ + int iCol = FTS5_POS2COLUMN(sReader.iPos); + int iOff = FTS5_POS2OFFSET(sReader.iPos); + cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); + } + rc = sqlite3Fts5IterNext(pIdxIter); + } + } + sqlite3Fts5IterClose(pIdxIter); + + *pCksum = cksum; + return rc; +} /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums ** as calculated by fts5IndexEntryCksum() is cksum. @@ -4364,32 +4417,24 @@ cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n); } /* If this is a new term, query for it. Update cksum3 with the results. */ if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){ - Fts5IndexIter *pIdxIter = 0; + int rc; int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); - int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); - while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ - const u8 *pPos; - int nPos; - i64 rowid = sqlite3Fts5IterRowid(pIdxIter); - rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos); - if( rc==SQLITE_OK ){ - Fts5PoslistReader sReader; - for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); - sReader.bEof==0; - sqlite3Fts5PoslistReaderNext(&sReader) - ){ - int iCol = FTS5_POS2COLUMN(sReader.iPos); - int iOff = FTS5_POS2OFFSET(sReader.iPos); - cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n); - } - rc = sqlite3Fts5IterNext(pIdxIter); - } - } - sqlite3Fts5IterClose(pIdxIter); + u64 ck1 = 0; + u64 ck2 = 0; + + /* Check that the results returned for ASC and DESC queries are + ** the same. If not, call this corruption. */ + rc = fts5QueryCksum(p, z, n, flags, &ck1); + if( rc==SQLITE_OK ){ + rc = fts5QueryCksum(p, z, n, flags | FTS5INDEX_QUERY_DESC, &ck2); + } + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + + cksum3 ^= ck1; fts5BufferSet(&rc, &term, n, (const u8*)z); p->rc = rc; } } fts5MultiIterFree(p, pIter); @@ -4771,12 +4816,12 @@ /* ** Return a pointer to a buffer containing a copy of the position list for ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** -** The returned buffer does not include the 0x00 terminator byte stored on -** disk. +** The returned position list does not include the "number of bytes" varint +** field that starts the position list on disk. */ int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){ assert( pIter->pIndex->rc==SQLITE_OK ); if( pIter->pDoclist ){ *pn = pIter->pDoclist->nPoslist; @@ -5009,12 +5054,13 @@ iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid); sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); } while( iOff