SQLite

Check-in [49c1e74522]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve fts5 integrity-check so that it checks that DESC queries return the same as ASC. Change the poslist format slightly to make room for a delete-flag.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 49c1e74522a26e5dbe6f8305bc96487279b80dfb
User & Date: dan 2015-04-11 16:23:31.390
Context
2015-04-11
18:25
Have fts5 integrity check verify that prefix indexes contain the same values as returned by prefix queries on the main terms index. (check-in: bdb8e82ab6 user: dan tags: fts5)
16:23
Improve fts5 integrity-check so that it checks that DESC queries return the same as ASC. Change the poslist format slightly to make room for a delete-flag. (check-in: 49c1e74522 user: dan tags: fts5)
2015-03-21
15:45
Merge trunk changes with this branch. (check-in: 142743918f user: dan tags: fts5)
Changes
Unified Diff Show Whitespace Changes Patch
Changes to ext/fts5/fts5Int.h.
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);

/*
** Iterate through the contents of the hash table.
*/
int sqlite3Fts5HashIterate(
  Fts5Hash*,
  void *pCtx,
  int (*xTerm)(void*, const char*, int),
  int (*xEntry)(void*, i64, const u8*, int),
  int (*xTermDone)(void*)
);

int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);








<
<
<
<
<
<
<
<
<
<
<







378
379
380
381
382
383
384











385
386
387
388
389
390
391
);

/*
** Empty (but do not delete) a hash table.
*/
void sqlite3Fts5HashClear(Fts5Hash*);












int sqlite3Fts5HashQuery(
  Fts5Hash*,                      /* Hash table to query */
  const char *pTerm, int nTerm,   /* Query term */
  const u8 **ppDoclist,           /* OUT: Pointer to doclist for pTerm */
  int *pnDoclist                  /* OUT: Size of doclist in bytes */
);

Changes to ext/fts5/fts5_hash.c.
163
164
165
166
167
168
169

170
171
172
173
174
175
176

177
178
179
180
181
182
183
184
  pHash->nSlot = nNew;
  pHash->aSlot = apNew;
  return SQLITE_OK;
}

static void fts5HashAddPoslistSize(Fts5HashEntry *p){
  if( p->iSzPoslist ){

    u8 *pPtr = (u8*)p;
    int nSz = p->nData - p->iSzPoslist - 1;

    if( nSz<=127 ){
      pPtr[p->iSzPoslist] = nSz;
    }else{
      int nByte = sqlite3Fts5GetVarintLen((u32)nSz);

      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
      sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz);
      p->nData += (nByte-1);
    }
    p->iSzPoslist = 0;
  }
}








>

|





>
|







163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
  pHash->nSlot = nNew;
  pHash->aSlot = apNew;
  return SQLITE_OK;
}

static void fts5HashAddPoslistSize(Fts5HashEntry *p){
  if( p->iSzPoslist ){
    /* WRITEPOSLISTSIZE */
    u8 *pPtr = (u8*)p;
    int nSz = (p->nData - p->iSzPoslist - 1) * 2;

    if( nSz<=127 ){
      pPtr[p->iSzPoslist] = nSz;
    }else{
      int nByte = sqlite3Fts5GetVarintLen((u32)nSz);
      /* WRITEPOSLISTSIZE */
      memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz/2);
      sqlite3PutVarint(&pPtr[p->iSzPoslist], nSz);
      p->nData += (nByte-1);
    }
    p->iSzPoslist = 0;
  }
}

Changes to ext/fts5/fts5_index.c.
38
39
40
41
42
43
44

45
46
47
48
49
50
51
**     the doclist. This is used to speed up seek operations, and merges of
**     large doclists with very small doclists.
**
**   * extra fields in the "structure record" record the state of ongoing
**     incremental merge operations.
**
*/


#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

/*







>







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
**     the doclist. This is used to speed up seek operations, and merges of
**     large doclists with very small doclists.
**
**   * extra fields in the "structure record" record the state of ongoing
**     incremental merge operations.
**
*/


#define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
#define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */

#define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */

/*
117
118
119
120
121
122
123
124

125
126
127
128
129
130
131
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes. not including this field.

**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**







|
>







118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes multiplied by 2, not including
**                 this field. Plus 1 if this entry carries the "delete" flag.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648

1649
1650
1651
1652
1653
1654
1655
1656
1657
    pIter->iLeafOffset = fts5GetU16(&a[2]);
    fts5SegIterLoadTerm(p, pIter, 0);
  }
}

/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_ASC flag set.
**
** When this function is called, iterator pIter points to the first rowid
** on the current leaf associated with the term being queried. This function
** advances it to point to the last such rowid and, if necessary, initializes
** the aRowidOffset[] and iRowidOffset variables.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int n = pIter->pLeaf->n;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  while( p->rc==SQLITE_OK && i<n ){
    i64 iDelta = 0;
    int nPos;


    i += fts5GetVarint32(&a[i], nPos);
    i += nPos;
    if( i>=n ) break;
    i += getVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;







|
















>

|







1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
    pIter->iLeafOffset = fts5GetU16(&a[2]);
    fts5SegIterLoadTerm(p, pIter, 0);
  }
}

/*
** This function is only ever called on iterators created by calls to
** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
**
** When this function is called, iterator pIter points to the first rowid
** on the current leaf associated with the term being queried. This function
** advances it to point to the last such rowid and, if necessary, initializes
** the aRowidOffset[] and iRowidOffset variables.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int n = pIter->pLeaf->n;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  while( p->rc==SQLITE_OK && i<n ){
    i64 iDelta = 0;
    int nPos;

    /* READPOSLISTSIZE */
    i += fts5GetVarint32(&a[i], nPos);
    i += nPos / 2;
    if( i>=n ) break;
    i += getVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
1761
1762
1763
1764
1765
1766
1767

1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787

1788
1789
1790
1791
1792
1793
1794
1795
1796
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        i64 iDelta;
        pIter->iRowidOffset--;

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];

        iOff += fts5GetVarint32(&a[iOff], nPos);
        iOff += nPos;
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;

      iOff = pIter->iLeafOffset;
      if( iOff<n ){
        int nPoslist;

        iOff += fts5GetVarint32(&a[iOff], nPoslist);
        iOff += nPoslist;
      }

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;







>

|


















>

|







1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
        u8 *a = pIter->pLeaf->p;
        int iOff;
        int nPos;
        i64 iDelta;
        pIter->iRowidOffset--;

        pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset];
        /* READPOSLISTSIZE */
        iOff += fts5GetVarint32(&a[iOff], nPos);
        iOff += (nPos / 2);
        getVarint(&a[iOff], (u64*)&iDelta);
        pIter->iRowid -= iDelta;
      }else{
        fts5SegIterReverseNewPage(p, pIter);
      }
    }else{
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;

      iOff = pIter->iLeafOffset;
      if( iOff<n ){
        int nPoslist;
        /* READPOSLISTSIZE */
        iOff += fts5GetVarint32(&a[iOff], nPoslist);
        iOff += nPoslist / 2;
      }

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;
1818
1819
1820
1821
1822
1823
1824

1825
1826
1827
1828
1829
1830
1831
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid);
          if( pIter->flags & FTS5_SEGITER_REVERSE ){

            fts5SegIterReverseInitPage(p, pIter);
          }
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){







>







1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = getVarint(pList, (u64*)&pIter->iRowid);
          if( pIter->flags & FTS5_SEGITER_REVERSE ){
            assert( 0 );
            fts5SegIterReverseInitPage(p, pIter);
          }
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
1877
1878
1879
1880
1881
1882
1883

1884
1885
1886
1887
1888
1889
1890
1891
1892
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
  }else{
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;

      /* Position list size in bytes */

      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos);
      iOff += nPos;
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }







>

|







1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
  }else{
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;

      /* Position list size in bytes */
      /* READPOSLISTSIZE */
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPos);
      iOff += (nPos / 2);
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }
1960
1961
1962
1963
1964
1965
1966

1967
1968
1969
1970
1971
1972
1973
1974
1975
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    while( iOff<pLeaf->n ){
      i64 iDelta;
      int nPoslist;

      /* iOff is currently the offset of the size field of a position list. */

      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist);
      iOff += nPoslist;

      if( iOff<pLeaf->n ){
        iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
        if( iDelta==0 ) return;
      }
    }
  }







>

|







1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    while( iOff<pLeaf->n ){
      i64 iDelta;
      int nPoslist;

      /* iOff is currently the offset of the size field of a position list. */
      /* READPOSLISTSIZE */
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nPoslist);
      iOff += nPoslist / 2;

      if( iOff<pLeaf->n ){
        iOff += getVarint(&pLeaf->p[iOff], (u64*)&iDelta);
        if( iDelta==0 ) return;
      }
    }
  }
2652
2653
2654
2655
2656
2657
2658

2659

2660
2661
2662
2663
2664
2665
2666
    pIter->nRem = 1;
    fts5ChunkIterNext(p, pIter);
    if( p->rc ) return;
    iOff = 4;
    pLeaf = pIter->pLeaf;
  }


  iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem);

  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;

  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}







>

>







2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
    pIter->nRem = 1;
    fts5ChunkIterNext(p, pIter);
    if( p->rc ) return;
    iOff = 4;
    pLeaf = pIter->pLeaf;
  }

  /* READPOSLISTSIZE */
  iOff += fts5GetVarint32(&pLeaf->p[iOff], pIter->nRem);
  pIter->nRem = pIter->nRem / 2;
  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;

  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}
3365
3366
3367
3368
3369
3370
3371

3372
3373
3374
3375
3376
3377
3378
3379
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

      /* Copy the position list from input to output */

      fts5WriteAppendPoslistInt(p, &writer, sPos.nRem);
      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }







>
|







3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
        bRequireDoclistTerm = 1;
      }

      /* Append the rowid to the output */
      fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

      /* Copy the position list from input to output */
      /* WRITEPOSLISTSIZE */
      fts5WriteAppendPoslistInt(p, &writer, sPos.nRem * 2);
      for(/* noop */; !fts5ChunkIterEof(p, &sPos); fts5ChunkIterNext(p, &sPos)){
        fts5WriteAppendPoslistData(p, &writer, sPos.p, sPos.n);
      }
    }

    fts5ChunkIterRelease(&sPos);
  }
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term fits on the current leaf. If not, flush it
      ** to disk.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }







|
|







3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
      const u8 *pDoclist;
      int nDoclist;
      int nSuffix;                /* Size of term suffix */

      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.aWriter[0].buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
3629
3630
3631
3632
3633
3634
3635

3636
3637
3638
3639
3640
3641
3642
3643
3644
        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          u32 nPos;
          int nCopy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);

          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
          nCopy += nPos;
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
          }else{







>

|







3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( iOff<nDoclist ){
          u32 nPos;
          int nCopy;
          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
          /* READPOSLISTSIZE */
          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
          nCopy += (nPos / 2);
          iRowid += iDelta;
          
          if( bFirstDocid ){
            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
            bFirstDocid = 0;
          }else{
4067
4068
4069
4070
4071
4072
4073

4074
4075
4076
4077
4078
4079
4080
4081
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    fts5ChunkIterInit(p, pSeg, &iter);
    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){

        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
        fts5ChunkIterNext(p, &iter);
      }
    }
    fts5ChunkIterRelease(&iter);







>
|







4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );
    fts5ChunkIterInit(p, pSeg, &iter);
    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
        fts5ChunkIterNext(p, &iter);
      }
    }
    fts5ChunkIterRelease(&iter);
4091
4092
4093
4094
4095
4096
4097

4098

4099
4100
4101
4102
4103
4104
4105
        pIter->iRowid -= iDelta;
      }else{
        pIter->iRowid += iDelta;
      }
    }else{
      pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }

    pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist);

    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{
    pIter->aPoslist = 0;
  }
}








>

>







4104
4105
4106
4107
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
        pIter->iRowid -= iDelta;
      }else{
        pIter->iRowid += iDelta;
      }
    }else{
      pIter->i += getVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid);
    }
    /* READPOSLISTSIZE */
    pIter->i += fts5GetVarint32(&pIter->a[pIter->i], pIter->nPoslist);
    pIter->nPoslist = pIter->nPoslist / 2;
    pIter->aPoslist = &pIter->a[pIter->i];
    pIter->i += pIter->nPoslist;
  }else{
    pIter->aPoslist = 0;
  }
}

4162
4163
4164
4165
4166
4167
4168

4169
4170
4171
4172
4173
4174
4175

4176
4177
4178
4179
4180
4181
4182
4183
    fts5DoclistIterInit(p2, bDesc, &i2);
    while( i1.aPoslist!=0 || i2.aPoslist!=0 ){
      if( i2.aPoslist==0 || (i1.aPoslist && 
           ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid<i2.iRowid) )
      )){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid);

        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid);

        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;
        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;







>
|






>
|







4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
    fts5DoclistIterInit(p2, bDesc, &i2);
    while( i1.aPoslist!=0 || i2.aPoslist!=0 ){
      if( i2.aPoslist==0 || (i1.aPoslist && 
           ( (bDesc && i1.iRowid>i2.iRowid) || (!bDesc && i1.iRowid<i2.iRowid) )
      )){
        /* Copy entry from i1 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i1.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist);
        fts5DoclistIterNext(&i1);
      }
      else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){
        /* Copy entry from i2 */
        fts5MergeAppendDocid(&p->rc, bDesc, &out, &iLastRowid, i2.iRowid);
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2);
        fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist);
        fts5DoclistIterNext(&i2);
      }
      else{
        Fts5PoslistReader r1;
        Fts5PoslistReader r2;
        Fts5PoslistWriter writer;
4198
4199
4200
4201
4202
4203
4204

4205
4206
4207
4208
4209
4210
4211
4212
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);
            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }


        fts5BufferAppendVarint(&p->rc, &out, tmp.n);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);







>
|







4215
4216
4217
4218
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
            iNew = r2.iPos;
            sqlite3Fts5PoslistReaderNext(&r2);
            if( r1.iPos==r2.iPos ) sqlite3Fts5PoslistReaderNext(&r1);
          }
          p->rc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew);
        }

        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2);
        fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p);
        fts5DoclistIterNext(&i1);
        fts5DoclistIterNext(&i2);
      }
    }

    fts5BufferSet(&p->rc, p1, out.n, out.p);
4293
4294
4295
4296
4297
4298
4299



































4300
4301
4302
4303
4304
4305
4306
      fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist);
    }
  }

  fts5StructureRelease(pStruct);
  sqlite3_free(aBuf);
}




































/*
** Run internal checks to ensure that the FTS index (a) is internally 
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
      fts5DoclistIterInit(&doclist, bDesc, pIter->pDoclist);
    }
  }

  fts5StructureRelease(pStruct);
  sqlite3_free(aBuf);
}

static int fts5QueryCksum(
  Fts5Index *p,
  const char *z,
  int n,
  int flags,
  u64 *pCksum
){
  u64 cksum = *pCksum;
  Fts5IndexIter *pIdxIter = 0;
  int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);

  while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
    const u8 *pPos;
    int nPos;
    i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
    rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
    if( rc==SQLITE_OK ){
      Fts5PoslistReader sReader;
      for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
          sReader.bEof==0;
          sqlite3Fts5PoslistReaderNext(&sReader)
      ){
        int iCol = FTS5_POS2COLUMN(sReader.iPos);
        int iOff = FTS5_POS2OFFSET(sReader.iPos);
        cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
      }
      rc = sqlite3Fts5IterNext(pIdxIter);
    }
  }
  sqlite3Fts5IterClose(pIdxIter);

  *pCksum = cksum;
  return rc;
}

/*
** Run internal checks to ensure that the FTS index (a) is internally 
** consistent and (b) contains entries for which the XOR of the checksums
** as calculated by fts5IndexEntryCksum() is cksum.
**
** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390

4391
4392
4393
4394
4395
4396
4397
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        Fts5IndexIter *pIdxIter = 0;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
        while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
          const u8 *pPos;
          int nPos;
          i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
          rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
          if( rc==SQLITE_OK ){
            Fts5PoslistReader sReader;
            for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
                sReader.bEof==0;
                sqlite3Fts5PoslistReaderNext(&sReader)
            ){
              int iCol = FTS5_POS2COLUMN(sReader.iPos);
              int iOff = FTS5_POS2OFFSET(sReader.iPos);
              cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
            }
            rc = sqlite3Fts5IterNext(pIdxIter);
          }
        }
        sqlite3Fts5IterClose(pIdxIter);

        fts5BufferSet(&rc, &term, n, (const u8*)z);
        p->rc = rc;
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }







|

|
|
|
|
|
|

<
<
<
<
<
<
<
|

|
|
<
<
>







4415
4416
4417
4418
4419
4420
4421
4422
4423
4424
4425
4426
4427
4428
4429
4430







4431
4432
4433
4434


4435
4436
4437
4438
4439
4440
4441
4442
          fts5PosIterNext(p, &sPos)
      ){
        cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
      }

      /* If this is a new term, query for it. Update cksum3 with the results. */
      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
        int rc;
        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
        u64 ck1 = 0;
        u64 ck2 = 0;

        /* Check that the results returned for ASC and DESC queries are
        ** the same. If not, call this corruption.  */
        rc = fts5QueryCksum(p, z, n, flags, &ck1);
          if( rc==SQLITE_OK ){







          rc = fts5QueryCksum(p, z, n, flags | FTS5INDEX_QUERY_DESC, &ck2);
            }
        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;



        cksum3 ^= ck1;
        fts5BufferSet(&rc, &term, n, (const u8*)z);
        p->rc = rc;
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned buffer does not include the 0x00 terminator byte stored on
** disk.
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{







|
|







4814
4815
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829


/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned position list does not include the "number of bytes" varint
** field that starts the position list on disk.
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{
5007
5008
5009
5010
5011
5012
5013

5014
5015
5016
5017
5018
5019
5020
5021
5022

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;

    iOff += fts5GetVarint32(&a[iOff], nPos);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }







>

|







5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    /* READPOSLISTSIZE */
    iOff += fts5GetVarint32(&a[iOff], nPos);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos / 2));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }