/ Check-in [49c1e745]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve fts5 integrity-check so that it checks that DESC queries return the same as ASC. Change the poslist format slightly to make room for a delete-flag.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 49c1e74522a26e5dbe6f8305bc96487279b80dfb
User & Date: dan 2015-04-11 16:23:31
Context
2015-04-11
18:25
Have fts5 integrity check verify that prefix indexes contain the same values as returned by prefix queries on the main terms index. check-in: bdb8e82a user: dan tags: fts5
16:23
Improve fts5 integrity-check so that it checks that DESC queries return the same as ASC. Change the poslist format slightly to make room for a delete-flag. check-in: 49c1e745 user: dan tags: fts5
2015-03-21
15:45
Merge trunk changes with this branch. check-in: 14274391 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);

/*
** Iterate through the contents of the hash table.
*/
int sqlite3Fts5HashIterate(
  Fts5Hash*,
  void *pCtx,
  int (*xTerm)(void*, const char*, int),
  int (*xEntry)(void*, i64, const u8*, int),
  int (*xTermDone)(void*)
);

int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);








<
<
<
<
<
<
<
<
<
<
<







378
379
380
381
382
383
384











385
386
387
388
389
390
391
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);












int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);

Changes to ext/fts5/fts5_hash.c.

163
164
165
166
167
168
169

170
171
172
173
174
175
176

177
178
179
180
181
182
183
184
  pHash->nSlot = nNew;
  pHash->aSlot = apNew;
  return SQLITE_OK;
}

static void fts5HashAddPoslistSize(Fts5HashEntry *p){
  if( p->iSzPoslist ){

    u8 *pPtr = (u8*)p;
    int nSz = p->nData - p->iSzPoslist - 1;

    if( nSz<=127 ){
      pPtr[p->iSzPoslist] = nSz;
    }else{
      int nByte = sqlite3Fts5GetVarintLen((u32)nSz);

      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
      sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz);
      p->nData += (nByte-1);
    }
    p->iSzPoslist = 0;
  }
}








>

|





>
|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  pHash->nSlot = nNew;
  pHash->aSlot = apNew;
  return SQLITE_OK;
}

static void fts5HashAddPoslistSize(Fts5HashEntry *p){
  if( p->iSzPoslist ){
    /* WRITEPOSLISTSIZE */
    u8 *pPtr = (u8*)p;
    int nSz = (p->nData - p->iSzPoslist - 1) * 2;

    if( nSz<=127 ){
      pPtr[p->iSzPoslist] = nSz;
    }else{
      int nByte = sqlite3Fts5GetVarintLen((u32)nSz);
      /* WRITEPOSLISTSIZE */
      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz/2);
      sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz);
      p->nData += (nByte-1);
    }
    p->iSzPoslist = 0;
  }
}

Changes to ext/fts5/fts5_index.c.

38
39
40
41
42
43
44

45
46
47
48
49
50
51
...
117
118
119
120
121
122
123
124

125
126
127
128
129
130
131
....
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
....
1642
1643
1644
1645
1646
1647
1648

1649
1650
1651
1652
1653
1654
1655
1656
1657
....
1761
1762
1763
1764
1765
1766
1767

1768
1769
1770
1771
1772
1773
1774
1775
1776
....
1781
1782
1783
1784
1785
1786
1787

1788
1789
1790
1791
1792
1793
1794
1795
1796
....
1818
1819
1820
1821
1822
1823
1824

1825
1826
1827
1828
1829
1830
1831
....
1877
1878
1879
1880
1881
1882
1883

1884
1885
1886
1887
1888
1889
1890
1891
1892
....
1960
1961
1962
1963
1964
1965
1966

1967
1968
1969
1970
1971
1972
1973
1974
1975
....
2652
2653
2654
2655
2656
2657
2658

2659

2660
2661
2662
2663
2664
2665
2666
....
3365
3366
3367
3368
3369
3370
3371

3372
3373
3374
3375
3376
3377
3378
3379
....
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
....
3629
3630
3631
3632
3633
3634
3635

3636
3637
3638
3639
3640
3641
3642
3643
3644
....
4067
4068
4069
4070
4071
4072
4073

4074
4075
4076
4077
4078
4079
4080
4081
....
4091
4092
4093
4094
4095
4096
4097

4098

4099
4100
4101
4102
4103
4104
4105
....
4162
4163
4164
4165
4166
4167
4168

4169
4170
4171
4172
4173
4174
4175

4176
4177
4178
4179
4180
4181
4182
4183
....
4198
4199
4200
4201
4202
4203
4204

4205
4206
4207
4208
4209
4210
4211
4212
....
4293
4294
4295
4296
4297
4298
4299



































4300
4301
4302
4303
4304
4305
4306
....
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373

4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387

4388
4389
4390

4391
4392
4393
4394
4395
4396
4397
....
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
....
5007
5008
5009
5010
5011
5012
5013

5014
5015
5016
5017
5018
5019
5020
5021
5022
**     the doclist. This is used to speed up seek operations, and merges of
**     large doclists with very small doclists.
**
**   * extra fields in the "structure record" record the state of ongoing
**     incremental merge operations.
**
*/


#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

/*
................................................................................
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes. not including this field.

**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
................................................................................
    pIter->iLeafOffset = fts5GetU16(&a[2]);
    fts5SegIterLoadTerm(p, pIter, 0);
  }
}

/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set.
**
** When this function is called, iterator pIter points to the first rowid
** on the current leaf associated with the term being queried. This function
** advances it to point to the last such rowid and, if necessary, initializes
** the aRowidOffset[] and iRowidOffset variables.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
................................................................................
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  while( p->rc==SQLITE_OK && i<n ){
    i64 iDelta = 0;
    int nPos;


    i += fts5GetVarint32(&a[i], nPos);
    i += nPos;
    if( i>=n ) break;
    i += getVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
................................................................................
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        i64 iDelta;
        pIter->iRowidOffset--;

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];

        iOff += fts5GetVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
................................................................................
      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;

      iOff = pIter->iLeafOffset;
      if( iOff<n ){
        int nPoslist;

        iOff += fts5GetVarint32(&a[iOff], nPoslist);
        iOff += nPoslist;
      }

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;
................................................................................
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid);
          if( pIter->flags & FTS5_SEGITER_REVERSE ){

            fts5SegIterReverseInitPage(p, pIter);
          }
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
................................................................................
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
  }else{
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;

      /* Position list size in bytes */

      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos);
      iOff += nPos;
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }
................................................................................
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    while( iOff<pLeaf->n ){
      i64 iDelta;
      int nPoslist;

      /* iOff is currently the offset of the size field of a position list. */

      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist);
      iOff += nPoslist;

      if( iOff<pLeaf->n ){
        iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
        if( iDelta==0 ) return;
      }
    }
  }
................................................................................
    pIter->nRem = 1;
    fts5ChunkIterNext(p, pIter);
    if( p->rc ) return;
    iOff = 4;
    pLeaf = pIter->pLeaf;
  }


  iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem);

  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;

  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}
................................................................................
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

      /* Copy the position list from input to output */

      fts5WriteAppendPoslistInt(p, &writer, sPos.nRem);
      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }
................................................................................
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term fits on the current leaf. If not, flush it
      ** to disk.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
................................................................................
        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          u32 nPos;
          int nCopy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);

          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
          }else{
................................................................................
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    fts5ChunkIterInit(p, pSeg, &iter);
    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){

        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
        fts5ChunkIterNext(p, &iter);
      }
    }
    fts5ChunkIterRelease(&iter);
................................................................................
        pIter->iRowid -= iDelta;
      }else{
        pIter->iRowid += iDelta;
      }
    }else{
      pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }

    pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist);

    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{
    pIter->aPoslist = 0;
  }
}

................................................................................
    fts5DoclistIterInit(p2, bDesc, &i2);
    while( i1.aPoslist!=0 || i2.aPoslist!=0 ){
      if( i2.aPoslist==0 || (i1.aPoslist && 
           ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid<i2.iRowid) )
      )){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid);

        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid);

        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;
        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;
................................................................................
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);
            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }


        fts5BufferAppendVarint(&p->rc, &out, tmp.n);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
................................................................................
      fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist);
    }
  }

  fts5StructureRelease(pStruct);
  sqlite3_free(aBuf);
}




































/*
** Run internal checks to ensure that the FTS index (a) is internally 
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
................................................................................
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        Fts5IndexIter *pIdxIter = 0;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
        while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
          const u8 *pPos;

          int nPos;
          i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
          rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
          if( rc==SQLITE_OK ){
            Fts5PoslistReader sReader;
            for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
                sReader.bEof==0;
                sqlite3Fts5PoslistReaderNext(&sReader)
            ){
              int iCol = FTS5_POS2COLUMN(sReader.iPos);
              int iOff = FTS5_POS2OFFSET(sReader.iPos);
              cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
            }
            rc = sqlite3Fts5IterNext(pIdxIter);

          }
        }
        sqlite3Fts5IterClose(pIdxIter);

        fts5BufferSet(&rc, &term, n, (const u8*)z);
        p->rc = rc;
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
................................................................................


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned buffer does not include the 0x00 terminator byte stored on
** disk.
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{
................................................................................

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;

    iOff += fts5GetVarint32(&a[iOff], nPos);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }







>







 







|
>







 







|







 







>

|







 







>

|







 







>

|







 







>







 







>

|







 







>

|







 







>

>







 







>
|







 







|
|







 







>

|







 







>
|







 







>

>







 







>
|






>
|







 







>
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|

|
|
<
>
|
|
|
|
|
<
<
<
<
<
<
<
|
<
>
|
<
<
>







 







|
|







 







>

|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
...
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
....
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
....
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
....
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
....
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
....
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
....
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
....
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
....
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
....
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
....
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
....
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
....
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
....
4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
....
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
....
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
....
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
....
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425

4426
4427
4428
4429
4430
4431







4432

4433
4434


4435
4436
4437
4438
4439
4440
4441
4442
....
4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
....
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
**     the doclist. This is used to speed up seek operations, and merges of
**     large doclists with very small doclists.
**
**   * extra fields in the "structure record" record the state of ongoing
**     incremental merge operations.
**
*/


#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

/*
................................................................................
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes multiplied by 2, not including
**                 this field. Plus 1 if this entry carries the "delete" flag.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
................................................................................
    pIter->iLeafOffset = fts5GetU16(&a[2]);
    fts5SegIterLoadTerm(p, pIter, 0);
  }
}

/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
**
** When this function is called, iterator pIter points to the first rowid
** on the current leaf associated with the term being queried. This function
** advances it to point to the last such rowid and, if necessary, initializes
** the aRowidOffset[] and iRowidOffset variables.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
................................................................................
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  while( p->rc==SQLITE_OK && i<n ){
    i64 iDelta = 0;
    int nPos;

    /* READPOSLISTSIZE */
    i += fts5GetVarint32(&a[i], nPos);
    i += nPos / 2;
    if( i>=n ) break;
    i += getVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
................................................................................
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        i64 iDelta;
        pIter->iRowidOffset--;

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        /* READPOSLISTSIZE */
        iOff += fts5GetVarint32(&a[iOff], nPos);
        iOff += (nPos / 2);
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
................................................................................
      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;

      iOff = pIter->iLeafOffset;
      if( iOff<n ){
        int nPoslist;
        /* READPOSLISTSIZE */
        iOff += fts5GetVarint32(&a[iOff], nPoslist);
        iOff += nPoslist / 2;
      }

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;
................................................................................
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid);
          if( pIter->flags & FTS5_SEGITER_REVERSE ){
            assert( 0 );
            fts5SegIterReverseInitPage(p, pIter);
          }
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
................................................................................
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
  }else{
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;

      /* Position list size in bytes */
      /* READPOSLISTSIZE */
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos);
      iOff += (nPos / 2);
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }
................................................................................
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    while( iOff<pLeaf->n ){
      i64 iDelta;
      int nPoslist;

      /* iOff is currently the offset of the size field of a position list. */
      /* READPOSLISTSIZE */
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist);
      iOff += nPoslist / 2;

      if( iOff<pLeaf->n ){
        iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
        if( iDelta==0 ) return;
      }
    }
  }
................................................................................
    pIter->nRem = 1;
    fts5ChunkIterNext(p, pIter);
    if( p->rc ) return;
    iOff = 4;
    pLeaf = pIter->pLeaf;
  }

  /* READPOSLISTSIZE */
  iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem);
  pIter->nRem = pIter->nRem / 2;
  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;

  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}
................................................................................
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

      /* Copy the position list from input to output */
      /* WRITEPOSLISTSIZE */
      fts5WriteAppendPoslistInt(p, &writer, sPos.nRem * 2);
      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }
................................................................................
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
................................................................................
        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          u32 nPos;
          int nCopy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
          /* READPOSLISTSIZE */
          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
          nCopy += (nPos / 2);
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
          }else{
................................................................................
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    fts5ChunkIterInit(p, pSeg, &iter);
    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
        fts5ChunkIterNext(p, &iter);
      }
    }
    fts5ChunkIterRelease(&iter);
................................................................................
        pIter->iRowid -= iDelta;
      }else{
        pIter->iRowid += iDelta;
      }
    }else{
      pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }
    /* READPOSLISTSIZE */
    pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist);
    pIter->nPoslist = pIter->nPoslist / 2;
    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{
    pIter->aPoslist = 0;
  }
}

................................................................................
    fts5DoclistIterInit(p2, bDesc, &i2);
    while( i1.aPoslist!=0 || i2.aPoslist!=0 ){
      if( i2.aPoslist==0 || (i1.aPoslist && 
           ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid<i2.iRowid) )
      )){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;
        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;
................................................................................
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);
            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }

        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
................................................................................
      fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist);
    }
  }

  fts5StructureRelease(pStruct);
  sqlite3_free(aBuf);
}

static int fts5QueryCksum(
  Fts5Index *p,
  const char *z,
  int n,
  int flags,
  u64 *pCksum
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
      }
      rc = sqlite3Fts5IterNext(pIdxIter);
    }
  }
  sqlite3Fts5IterClose(pIdxIter);

  *pCksum = cksum;
  return rc;
}

/*
** Run internal checks to ensure that the FTS index (a) is internally 
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
................................................................................
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        int rc;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        u64 ck1 = 0;
        u64 ck2 = 0;


        /* Check that the results returned for ASC and DESC queries are
        ** the same. If not, call this corruption.  */
        rc = fts5QueryCksum(p, z, n, flags, &ck1);
        if( rc==SQLITE_OK ){
          rc = fts5QueryCksum(p, z, n, flags | FTS5INDEX_QUERY_DESC, &ck2);







        }

        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;



        cksum3 ^= ck1;
        fts5BufferSet(&rc, &term, n, (const u8*)z);
        p->rc = rc;
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
................................................................................


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{
................................................................................

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    /* READPOSLISTSIZE */
    iOff += fts5GetVarint32(&a[iOff], nPos);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos / 2));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }