SQLite

Check-in [51444f67c0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix compression of keys stored on internal segment b-tree nodes by fts5.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 51444f67c0cc58a3023eb1cd78e7cf889da6c80f
User & Date: dan 2015-01-23 17:43:21.454
Context
2015-01-24
19:57
Have fts5 store rowids in ascending order. Query speed is virtually the same regardless of rowid order, and ascending order makes some insert optimizations easier. (check-in: 5206ca6005 user: dan tags: fts5)
2015-01-23
17:43
Fix compression of keys stored on internal segment b-tree nodes by fts5. (check-in: 51444f67c0 user: dan tags: fts5)
06:50
Remove some redundant code from fts5. (check-in: 939b7a5de2 user: dan tags: fts5)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts5/fts5.c.
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
    rc = sqlite3_step(pCsr->pStmt);
    if( rc==SQLITE_ROW ){
      rc = SQLITE_OK;
      CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
    }else{
      rc = sqlite3_reset(pCsr->pStmt);
      if( rc==SQLITE_OK ){
        rc = SQLITE_CORRUPT_VTAB;
      }
    }
  }
  return rc;
}

static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){







|







1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
    rc = sqlite3_step(pCsr->pStmt);
    if( rc==SQLITE_ROW ){
      rc = SQLITE_OK;
      CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
    }else{
      rc = sqlite3_reset(pCsr->pStmt);
      if( rc==SQLITE_OK ){
        rc = FTS5_CORRUPT;
      }
    }
  }
  return rc;
}

static void fts5SetVtabError(Fts5Table *p, const char *zFormat, ...){
Changes to ext/fts5/fts5Int.h.
28
29
30
31
32
33
34







35
36
37
38
39
40
41

#define FTS5_DEFAULT_NEARDIST 10
#define FTS5_DEFAULT_RANK     "bm25"

/* Name of rank and rowid columns */
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"








/**************************************************************************
** Interface to code in fts5.c. 
*/
typedef struct Fts5Global Fts5Global;

int sqlite3Fts5GetTokenizer(







>
>
>
>
>
>
>







28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

#define FTS5_DEFAULT_NEARDIST 10
#define FTS5_DEFAULT_RANK     "bm25"

/* Name of rank and rowid columns */
#define FTS5_RANK_NAME "rank"
#define FTS5_ROWID_NAME "rowid"

#ifdef SQLITE_DEBUG
# define FTS5_CORRUPT sqlite3Fts5Corrupt()
int sqlite3Fts5Corrupt(void);
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif

/**************************************************************************
** Interface to code in fts5.c. 
*/
typedef struct Fts5Global Fts5Global;

int sqlite3Fts5GetTokenizer(
Changes to ext/fts5/fts5_index.c.
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(idx, segid, pgno) \
        FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)

#ifdef SQLITE_DEBUG
static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
# define FTS5_CORRUPT fts5Corrupt()
#else
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif


/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.







|
<
<
<







252
253
254
255
256
257
258
259



260
261
262
263
264
265
266
** The rowid for the doclist index associated with leaf page pgno of segment
** segid in index idx.
*/
#define FTS5_DOCLIST_IDX_ROWID(idx, segid, pgno) \
        FTS5_SEGMENT_ROWID(idx, segid, FTS5_SEGMENT_MAX_HEIGHT, pgno)

#ifdef SQLITE_DEBUG
int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }



#endif


/*
** Each time a blob is read from the %_data table, it is padded with this
** many zero bytes. This makes it easier to decode the various record formats
** without overreading if the records are corrupt.
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385

386
387
388
389
390
391
392

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  Fts5Buffer buf;                 /* Buffer containing page data */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5SegWriter {
  int iIdx;                       /* Index to write to */
  int iSegid;                     /* Segid to write to */
  int nWriter;                    /* Number of entries in aWriter */
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */

  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */
  Fts5Buffer dlidx;               /* Doclist index */
  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
};








|









>







366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  Fts5Buffer buf;                 /* Buffer containing page data */
  Fts5Buffer term;              /* Buffer containing previous term on page */
};
struct Fts5SegWriter {
  int iIdx;                       /* Index to write to */
  int iSegid;                     /* Segid to write to */
  int nWriter;                    /* Number of entries in aWriter */
  Fts5PageWriter *aWriter;        /* Array of PageWriter objects */
  i64 iPrevRowid;                 /* Previous docid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */
  Fts5Buffer dlidx;               /* Doclist index */
  i64 iDlidxPrev;                 /* Previous rowid appended to dlidx */
  int bDlidxPrevValid;            /* True if iDlidxPrev is valid */
};

2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697



2698
2699
2700
2701
2702
2703
2704
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  i64 iRowid;

  if( pPage->term.n==0 ){
    /* No term was written to this page. */
    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
    fts5WriteBtreeNoTerm(p, pWriter);
  }

  /* Write the current page to the db. */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->term);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;



}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 







|











<





>
>
>







2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689

2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->aWriter[0];
  i64 iRowid;

  if( pWriter->bFirstTermInPage ){
    /* No term was written to this page. */
    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
    fts5WriteBtreeNoTerm(p, pWriter);
  }

  /* Write the current page to the db. */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);

  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms */
  pWriter->bFirstTermInPage = 1;
}

/*
** Append term pTerm/nTerm to the segment being written by the writer passed
** as the second argument.
**
** If an error occurs, set the Fts5Index.rc error code. If an error has 
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729

















2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746

2747
2748
2749
2750
2751
2752
2753
  Fts5PageWriter *pPage = &pWriter->aWriter[0];

  assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 );
  if( pPage && pPage->buf.n==0 ){
    /* Zero the first term and first docid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pPage->term.n==0 );
  }
  if( p->rc ) return;
  
  if( pPage->term.n==0 ){
    /* Update the "first term" field of the page header. */
    assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
    fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
    nPrefix = 0;
    if( pWriter->aWriter[0].pgno!=1 ){

















      fts5WriteBtreeTerm(p, pWriter, nTerm, pTerm);
      pPage = &pWriter->aWriter[0];
    }
  }else{
    nPrefix = fts5PrefixCompress(
        pPage->term.n, pPage->term.p, nTerm, pTerm
    );
    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
  }

  /* Append the number of bytes of new data, then the term data itself
  ** to the page. */
  fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);


  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);







|



|




|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|



|
<
<










>







2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751


2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
  Fts5PageWriter *pPage = &pWriter->aWriter[0];

  assert( pPage==0 || pPage->buf.n==0 || pPage->buf.n>4 );
  if( pPage && pPage->buf.n==0 ){
    /* Zero the first term and first docid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pWriter->bFirstTermInPage );
  }
  if( p->rc ) return;
  
  if( pWriter->bFirstTermInPage ){
    /* Update the "first term" field of the page header. */
    assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
    fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one
      ** byte longer than the longest prefix (pTerm/nTerm) shares with the
      ** previous term. 
      **
      ** Usually, the previous term is available in pPage->term. The exception
      ** is if this is the first term written in an incremental-merge step.
      ** In this case the previous term is not available, so just write a
      ** copy of (pTerm/nTerm) into the parent node. This is slightly
      ** inefficient, but still correct.  */
      int n = nTerm;
      if( pPage->term.n ){
        n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
      }
      fts5WriteBtreeTerm(p, pWriter, n, pTerm);
      pPage = &pWriter->aWriter[0];
    }
  }else{
    nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);


    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
  }

  /* Append the number of bytes of new data, then the term data itself
  ** to the page. */
  fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);

  /* Update the Fts5PageWriter.term field. */
  fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
2896
2897
2898
2899
2900
2901
2902

2903
2904
2905
2906
2907
2908
2909
  pWriter->iIdx = iIdx;
  pWriter->iSegid = iSegid;

  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter));
  if( pWriter->aWriter==0 ) return;
  pWriter->nWriter = 1;
  pWriter->aWriter[0].pgno = 1;

}

static void fts5WriteInitForAppend(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer to initialize */
  int iIdx,                       /* Index segment is a part of */
  Fts5StructureSegment *pSeg      /* Segment object to append to */







>







2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
  pWriter->iIdx = iIdx;
  pWriter->iSegid = iSegid;

  pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p,sizeof(Fts5PageWriter));
  if( pWriter->aWriter==0 ) return;
  pWriter->nWriter = 1;
  pWriter->aWriter[0].pgno = 1;
  pWriter->bFirstTermInPage = 1;
}

static void fts5WriteInitForAppend(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegWriter *pWriter,         /* Writer to initialize */
  int iIdx,                       /* Index segment is a part of */
  Fts5StructureSegment *pSeg      /* Segment object to append to */
2933
2934
2935
2936
2937
2938
2939


2940
2941
2942
2943
2944
2945
2946
        fts5NodeIterFree(&ss);
      }
    }
    if( pSeg->nHeight==1 ){
      pWriter->nEmpty = pSeg->pgnoLast-1;
    }
    assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );


  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.







>
>







2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
        fts5NodeIterFree(&ss);
      }
    }
    if( pSeg->nHeight==1 ){
      pWriter->nEmpty = pSeg->pgnoLast-1;
    }
    assert( (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
    pWriter->bFirstTermInPage = 1;
    assert( pWriter->aWriter[0].term.n==0 );
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
** merge step has finished but the input has not been completely exhausted.
3913
3914
3915
3916
3917
3918
3919















3920
3921
3922
3923
3924
3925
3926
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
















  /* Check that the checksum of the index matches the argument checksum */
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
** error, or some other SQLite error code if another error (e.g. OOM)
** occurs.
*/
int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
  Fts5Config *pConfig = p->pConfig;
  int iIdx;                       /* Used to iterate through indexes */
  u64 cksum2 = 0;                 /* Checksum based on contents of indexes */

  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
          fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
        }
      }
    }
    fts5StructureRelease(pStruct);
  }

  /* Check that the checksum of the index matches the argument checksum */
  for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
    Fts5MultiSegIter *pIter;
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, 0, 0, -1, 0, &pIter);
        fts5MultiIterEof(p, pIter)==0;
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;

  /* Check that the internal nodes of each segment match the leaves */
  for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
    Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
    if( pStruct ){
      int iLvl, iSeg;
      for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
        for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
          Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
          fts5IndexIntegrityCheckSegment(p, iIdx, pSeg);
        }
      }
    }
    fts5StructureRelease(pStruct);
  }

  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







3980
3981
3982
3983
3984
3985
3986















3987
3988
3989
3990
3991
3992
3993
      }
    }
    fts5MultiIterFree(p, pIter);
    fts5StructureRelease(pStruct);
  }
  if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
















  return fts5IndexReturn(p);
}


/*
** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
** to the document with rowid iRowid.
3986
3987
3988
3989
3990
3991
3992

3993
3994
3995
3996

3997
3998
3999
4000
4001
4002
4003
    apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash);
    for(i=0; rc==SQLITE_OK && i<nHash; i++){
      rc = sqlite3Fts5HashNew(&apNew[i], &p->nPendingData);
    }
    if( rc==SQLITE_OK ){
      p->apHash = apNew;
    }else{

      for(i=0; i<nHash; i++){
        sqlite3Fts5HashFree(apNew[i]);
      }
      sqlite3_free(apNew);

      return rc;
    }
  }

  if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){
    fts5IndexFlush(p);
  }







>
|
|
|
|
>







4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
    apNew = (Fts5Hash**)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Hash*)*nHash);
    for(i=0; rc==SQLITE_OK && i<nHash; i++){
      rc = sqlite3Fts5HashNew(&apNew[i], &p->nPendingData);
    }
    if( rc==SQLITE_OK ){
      p->apHash = apNew;
    }else{
      if( apNew ){
        for(i=0; i<nHash; i++){
          sqlite3Fts5HashFree(apNew[i]);
        }
        sqlite3_free(apNew);
      }
      return rc;
    }
  }

  if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){
    fts5IndexFlush(p);
  }
Changes to ext/fts5/fts5_storage.c.
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
        rc = sqlite3Fts5Tokenize(
            pConfig, 
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( ctx.szCol!=aColSize[i] ) rc = SQLITE_CORRUPT_VTAB;
        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
    }
    rc2 = sqlite3_reset(pScan);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  /* Test that the "totals" (sometimes called "averages") record looks Ok */
  if( rc==SQLITE_OK ){
    int i;
    rc = fts5StorageLoadTotals(p, 0);
    for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
      if( p->aTotalSize[i]!=aTotalSize[i] ) rc = SQLITE_CORRUPT_VTAB;
    }
  }

  /* Check that the %_docsize and %_content tables contain the expected
  ** number of rows.  */
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    i64 nRow;
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
  }
  if( rc==SQLITE_OK ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = SQLITE_CORRUPT_VTAB;
  }

  /* Pass the expected checksum down to the FTS index module. It will
  ** verify, amongst other things, that it matches the checksum generated by
  ** inspecting the index itself.  */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);







|













|








|




|







785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
        rc = sqlite3Fts5Tokenize(
            pConfig, 
            (const char*)sqlite3_column_text(pScan, i+1),
            sqlite3_column_bytes(pScan, i+1),
            (void*)&ctx,
            fts5StorageIntegrityCallback
        );
        if( ctx.szCol!=aColSize[i] ) rc = FTS5_CORRUPT;
        aTotalSize[i] += ctx.szCol;
      }
      if( rc!=SQLITE_OK ) break;
    }
    rc2 = sqlite3_reset(pScan);
    if( rc==SQLITE_OK ) rc = rc2;
  }

  /* Test that the "totals" (sometimes called "averages") record looks Ok */
  if( rc==SQLITE_OK ){
    int i;
    rc = fts5StorageLoadTotals(p, 0);
    for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
      if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
    }
  }

  /* Check that the %_docsize and %_content tables contain the expected
  ** number of rows.  */
  if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
    i64 nRow;
    rc = fts5StorageCount(p, "content", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }
  if( rc==SQLITE_OK ){
    i64 nRow;
    rc = fts5StorageCount(p, "docsize", &nRow);
    if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
  }

  /* Pass the expected checksum down to the FTS index module. It will
  ** verify, amongst other things, that it matches the checksum generated by
  ** inspecting the index itself.  */
  if( rc==SQLITE_OK ){
    rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum);
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
      int nBlob = sqlite3_column_bytes(pLookup, 0);
      if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
        bCorrupt = 0;
      }
    }
    rc = sqlite3_reset(pLookup);
    if( bCorrupt && rc==SQLITE_OK ){
      rc = SQLITE_CORRUPT_VTAB;
    }
  }
  return rc;
}

int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
  int rc = fts5StorageLoadTotals(p, 0);







|







909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
      int nBlob = sqlite3_column_bytes(pLookup, 0);
      if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
        bCorrupt = 0;
      }
    }
    rc = sqlite3_reset(pLookup);
    if( bCorrupt && rc==SQLITE_OK ){
      rc = FTS5_CORRUPT;
    }
  }
  return rc;
}

int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
  int rc = fts5StorageLoadTotals(p, 0);
Changes to ext/fts5/test/fts5_common.tcl.
118
119
120
121
122
123
124














125
126
127
128
129
130
131
  set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
  set ret [list]
  foreach L [lrange [db one $sql] 1 end] {
    lappend ret [expr [llength $L] - 2]
  }
  set ret
} 















proc fts5_rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
  }







>
>
>
>
>
>
>
>
>
>
>
>
>
>







118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
  set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
  set ret [list]
  foreach L [lrange [db one $sql] 1 end] {
    lappend ret [expr [llength $L] - 2]
  }
  set ret
} 

proc fts5_level_segids {tbl} {
  set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
  set ret [list]
  foreach L [lrange [db one $sql] 1 end] {
    set lvl [list]
    foreach S [lrange $L 2 end] {
      regexp {id=([1234567890]*)} $S -> segid
      lappend lvl $segid
    }
    lappend ret $lvl
  }
  set ret
}

proc fts5_rnddoc {n} {
  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
  set doc [list]
  for {set i 0} {$i < $n} {incr i} {
    lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
  }
Added ext/fts5/test/fts5corrupt.test.






















































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# 2014 Dec 20
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5corrupt

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_test 1.1 {
  db transaction {
    for {set i 1} {$i < 200} {incr i} {
      set doc [list [string repeat x $i] [string repeat y $i]]
      execsql { INSERT INTO t1(rowid, x) VALUES($i, $doc) }
    }
  }
  fts5_level_segs t1
} {1}
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]

do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', 0, $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {SQL logic error or missing database}}

do_test 1.4 {
  db_restore_and_reopen
  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid('segment', 0, $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}



#--------------------------------------------------------------------
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t2 USING fts5(x);
  INSERT INTO t2(t2, rank) VALUES('pgsz', 32);
}
do_test 2.1 {
  db transaction {
    for {set i 0} {$i < 20} {incr i} {
      execsql { INSERT INTO t2 VALUES('xxxxxxxxxx') }
    }
    for {set i 0} {$i < 20} {incr i} {
      execsql { INSERT INTO t2 VALUES('xxxxxxxxxzzzz') }
    }
  }
} {}
db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t2_data} {puts $r}

finish_test