SQLite

Changes On Branch fts5-incompatible
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch fts5-incompatible Excluding Merge-Ins

This is equivalent to a diff from 47a46a9f to 99de5e36

2015-09-10
17:23
Modify the fts5 leaf page format to permit faster seek operations. This is a file-format change. Any existing databases can be upgraded by running the fts5 'rebuild' command. (check-in: 0c0c4ae9 user: dan tags: trunk)
17:20
Create separate "path" and "root" columns in the json_each() and json_tree() virtual tables. "Root" is the 2nd parameter and is fixed. "Path" varies as json_tree() walks the hierarchy. (check-in: 127cce3e user: drh tags: trunk)
16:39
Increment the fts5 version value to indicate that the on-disk format has changed. (Closed-Leaf check-in: 99de5e36 user: dan tags: fts5-incompatible)
16:19
Fix a segfault in fts5 that could occur if the database contents were corrupt. (check-in: 4931e37d user: dan tags: fts5-incompatible)
15:52
Merge latest changes from trunk. Including fts5_expr.c fixes. (check-in: 716e7e74 user: dan tags: fts5-incompatible)
15:24
Make the sqlite3ext.h header file responsive to -DSQLITE_OMIT_LOAD_EXTENSION. (check-in: 47a46a9f user: drh tags: trunk)
15:22
Disable tests for json_each() and json_tree() on builds where virtual tables are not supported (check-in: bb8ee3b1 user: drh tags: trunk)

Changes to ext/fts5/fts5Int.h.

113
114
115
116
117
118
119






120
121
122
123
124
125
126
**   This exists in order to allow the fts5_index.c module to return a 
**   decent error message if it encounters a file-format version it does
**   not understand.
**
** bColumnsize:
**   True if the %_docsize table is created.
**






*/
struct Fts5Config {
  sqlite3 *db;                    /* Database handle */
  char *zDb;                      /* Database holding FTS index (e.g. "main") */
  char *zName;                    /* Name of FTS index */
  int nCol;                       /* Number of columns */
  char **azCol;                   /* Column names */







>
>
>
>
>
>







113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
**   This exists in order to allow the fts5_index.c module to return a 
**   decent error message if it encounters a file-format version it does
**   not understand.
**
** bColumnsize:
**   True if the %_docsize table is created.
**
** bPrefixIndex:
**   This is only used for debugging. If set to false, any prefix indexes
**   are ignored. This value is configured using:
**
**       INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
**
*/
struct Fts5Config {
  sqlite3 *db;                    /* Database handle */
  char *zDb;                      /* Database holding FTS index (e.g. "main") */
  char *zName;                    /* Name of FTS index */
  int nCol;                       /* Number of columns */
  char **azCol;                   /* Column names */
141
142
143
144
145
146
147




148
149
150
151
152
153
154
155
156
157
158
  int nAutomerge;                 /* 'automerge' setting */
  int nCrisisMerge;               /* Maximum allowed segments per level */
  char *zRank;                    /* Name of rank function */
  char *zRankArgs;                /* Arguments to rank function */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;




};

/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 3

#define FTS5_CONTENT_NORMAL   0
#define FTS5_CONTENT_NONE     1
#define FTS5_CONTENT_EXTERNAL 2










>
>
>
>



|







147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  int nAutomerge;                 /* 'automerge' setting */
  int nCrisisMerge;               /* Maximum allowed segments per level */
  char *zRank;                    /* Name of rank function */
  char *zRankArgs;                /* Arguments to rank function */

  /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
  char **pzErrmsg;

#ifdef SQLITE_DEBUG
  int bPrefixIndex;               /* True to use prefix-indexes */
#endif
};

/* Current expected value of %_config table 'version' field */
#define FTS5_CURRENT_VERSION 4

#define FTS5_CONTENT_NORMAL   0
#define FTS5_CONTENT_NONE     1
#define FTS5_CONTENT_EXTERNAL 2



Changes to ext/fts5/fts5_buffer.c.

12
13
14
15
16
17
18
19
20
21
22
23
24




25
26
27
28
29
30
31
*/



#include "fts5Int.h"

int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){
  /* A no-op if an error has already occurred */
  if( *pRc ) return 1;

  if( (pBuf->n + nByte) > pBuf->nSpace ){
    u8 *pNew;
    int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;




    while( nNew<(pBuf->n + nByte) ){
      nNew = nNew * 2;
    }
    pNew = sqlite3_realloc(pBuf->p, nNew);
    if( pNew==0 ){
      *pRc = SQLITE_NOMEM;
      return 1;







<
<




>
>
>
>







12
13
14
15
16
17
18


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
*/



#include "fts5Int.h"

int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){



  if( (pBuf->n + nByte) > pBuf->nSpace ){
    u8 *pNew;
    int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64;

    /* A no-op if an error has already occurred */
    if( *pRc ) return 1;

    while( nNew<(pBuf->n + nByte) ){
      nNew = nNew * 2;
    }
    pNew = sqlite3_realloc(pBuf->p, nNew);
    if( pNew==0 ){
      *pRc = SQLITE_NOMEM;
      return 1;

Changes to ext/fts5/fts5_config.c.

476
477
478
479
480
481
482



483
484
485
486
487
488
489

  nByte = nArg * (sizeof(char*) + sizeof(u8));
  pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
  pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
  pRet->bColumnsize = 1;



  if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
    *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
    rc = SQLITE_ERROR;
  }

  for(i=3; rc==SQLITE_OK && i<nArg; i++){
    const char *zOrig = azArg[i];







>
>
>







476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492

  nByte = nArg * (sizeof(char*) + sizeof(u8));
  pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
  pRet->abUnindexed = (u8*)&pRet->azCol[nArg];
  pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
  pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
  pRet->bColumnsize = 1;
#ifdef SQLITE_DEBUG
  pRet->bPrefixIndex = 1;
#endif
  if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
    *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
    rc = SQLITE_ERROR;
  }

  for(i=3; rc==SQLITE_OK && i<nArg; i++){
    const char *zOrig = azArg[i];

Changes to ext/fts5/fts5_index.c.

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

















148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
**
**   Then, for each level from 0 to nMax:
**
**     + number of input segments in ongoing merge.
**     + total number of segments in level.
**     + for each segment from oldest to newest:
**         + segment id (always > 0)
**         + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.)
**         + first leaf page number (often 1, always greater than 0)
**         + final leaf page number
**
** 2. The Averages Record:
**
**   A single record within the %_data table. The data is a list of varints.
**   The first value is the number of rows in the index. Then, for each column
**   from left to right, the total number of tokens in the column for all 
**   rows of the table.
**
** 3. Segment leaves:
**
**   TERM DOCLIST FORMAT:
**
**     Most of each segment leaf is taken up by term/doclist data. The 
**     general format of the term/doclist data is:

**
**         varint : size of first term
**         blob:    first term data
**         doclist: first doclist
**         zero-or-more {
**           varint:  number of bytes in common with previous term
**           varint:  number of bytes of new term data (nNew)
**           blob:    nNew bytes of new term data
**           doclist: next doclist
**         }
**
**     doclist format:
**
**         varint:  first rowid
**         poslist: first poslist
**         zero-or-more {
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes multiplied by 2, not including
**                 this field. Plus 1 if this entry carries the "delete" flag.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
**     collist format:
**
**         varint: first offset + 2
**         zero-or-more {
**           varint: offset delta + 2
**         }
**
**   PAGINATION
**

















**     The format described above is only accurate if the entire term/doclist
**     data fits on a single leaf page. If this is not the case, the format
**     is changed in two ways:
**
**       + if the first rowid on a page occurs before the first term, it
**         is stored as a literal value:
**
**             varint:  first rowid
**
**       + the first term on each page is stored in the same way as the
**         very first term of the segment:
**
**             varint : size of first term
**             blob:    first term data
**
**     Each leaf page begins with:
**
**       + 2-byte unsigned containing offset to first rowid (or 0).
**       + 2-byte unsigned containing offset to first term (or 0).
**
**   Followed by term/doclist data.
**
** 4. Segment interior nodes:
**
**   The interior nodes turn the list of leaves into a b+tree. 
**
**   Each interior node begins with a varint - the page number of the left
**   most child node. Following this, for each leaf page except the first,
**   the interior nodes contain:
**
**     a) If the leaf page contains at least one term, then a term-prefix that
**        is greater than all previous terms, and less than or equal to the
**        first term on the leaf page.
**
**     b) If the leaf page no terms, a record indicating how many consecutive
**        leaves contain no terms, and whether or not there is an associated
**        by-rowid index record.
**
**   By definition, there is never more than one type (b) record in a row.
**   Type (b) records only ever appear on height=1 pages - immediate parents
**   of leaves. Only type (a) records are pushed to higher levels.
**
**   Term format:
**
**     * Number of bytes in common with previous term plus 2, as a varint.
**     * Number of bytes of new term data, as a varint.
**     * new term data.
**
**   No-term format:
**
**     * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there 
**       is an associated index-by-rowid record.
**     * the number of zero-term leaves as a varint.
**
** 5. Segment doclist indexes:
**
**   Doclist indexes are themselves b-trees, however they usually consist of
**   a single leaf record only. The format of each doclist index leaf page 
**   is:
**
**     * Flags byte. Bits are:







<







|




|


|
>



















<



















|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|












<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







83
84
85
86
87
88
89

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178







































179
180
181
182
183
184
185
**
**   Then, for each level from 0 to nMax:
**
**     + number of input segments in ongoing merge.
**     + total number of segments in level.
**     + for each segment from oldest to newest:
**         + segment id (always > 0)

**         + first leaf page number (often 1, always greater than 0)
**         + final leaf page number
**
** 2. The Averages Record:
**
**   A single record within the %_data table. The data is a list of varints.
**   The first value is the number of rows in the index. Then, for each column
**   from left to right, the total number of tokens in the column for all
**   rows of the table.
**
** 3. Segment leaves:
**
**   TERM/DOCLIST FORMAT:
**
**     Most of each segment leaf is taken up by term/doclist data. The 
**     general format of term/doclist, starting with the first term
**     on the leaf page, is:
**
**         varint : size of first term
**         blob:    first term data
**         doclist: first doclist
**         zero-or-more {
**           varint:  number of bytes in common with previous term
**           varint:  number of bytes of new term data (nNew)
**           blob:    nNew bytes of new term data
**           doclist: next doclist
**         }
**
**     doclist format:
**
**         varint:  first rowid
**         poslist: first poslist
**         zero-or-more {
**           varint:  rowid delta (always > 0)
**           poslist: next poslist
**         }

**
**     poslist format:
**
**         varint: size of poslist in bytes multiplied by 2, not including
**                 this field. Plus 1 if this entry carries the "delete" flag.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**
**     collist format:
**
**         varint: first offset + 2
**         zero-or-more {
**           varint: offset delta + 2
**         }
**
**   PAGE FORMAT
**
**     Each leaf page begins with a 4-byte header containing 2 16-bit 
**     unsigned integer fields in big-endian format. They are:
**
**       * The byte offset of the first rowid on the page, if it exists
**         and occurs before the first term (otherwise 0).
**
**       * The byte offset of the start of the page footer. If the page
**         footer is 0 bytes in size, then this field is the same as the
**         size of the leaf page in bytes.
**
**     The page footer consists of a single varint for each term located
**     on the page. Each varint is the byte offset of the current term
**     within the page, delta-compressed against the previous value. In
**     other words, the first varint in the footer is the byte offset of
**     the first term, the second is the byte offset of the second less that
**     of the first, and so on.
**
**     The term/doclist format described above is accurate if the entire
**     term/doclist data fits on a single leaf page. If this is not the case,
**     the format is changed in two ways:
**
**       + if the first rowid on a page occurs before the first term, it
**         is stored as a literal value:
**
**             varint:  first rowid
**
**       + the first term on each page is stored in the same way as the
**         very first term of the segment:
**
**             varint : size of first term
**             blob:    first term data
**







































** 5. Segment doclist indexes:
**
**   Doclist indexes are themselves b-trees, however they usually consist of
**   a single leaf record only. The format of each doclist index leaf page 
**   is:
**
**     * Flags byte. Bits are:
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257

258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID   10    /* The structure record */

/*
** Macros determining the rowids used by segment nodes. All nodes in all
** segments for all indexes (the regular FTS index and any prefix indexes)
** are stored in the %_data table with large positive rowids.
**
** The %_data table may contain up to (1<<FTS5_SEGMENT_INDEX_BITS) 
** indexes - one regular term index and zero or more prefix indexes.
**
** Each segment in an index has a unique id greater than zero.
**
** Each node in a segment b-tree is assigned a "page number" that is unique
** within nodes of its height within the segment (leaf nodes have a height 
** of 0, parents 1, etc.). Page numbers are allocated sequentially so that
** a nodes page number is always one more than its left sibling.
**
** The rowid for a node is then found using the FTS5_SEGMENT_ROWID() macro
** below. The FTS5_SEGMENT_*_BITS macros define the number of bits used
** to encode the three FTS5_SEGMENT_ROWID() arguments. This module returns
** SQLITE_FULL and fails the current operation if they ever prove too small.

*/
#define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B  5     /* Max b-tree height of 32 */
#define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */

#define fts5_dri(segid, dlidx, height, pgno) (                                 \
 ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
 ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
 ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
 ((i64)(pgno))                                                                 \
)

#define FTS5_SEGMENT_ROWID(segid, height, pgno) fts5_dri(segid, 0, height, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno)   fts5_dri(segid, 1, height, pgno)

/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000

#ifdef SQLITE_DEBUG







|
<
|
|
<
<

|

<
<
<
<
<
|
<
|
<
>



|









|
|







210
211
212
213
214
215
216
217

218
219


220
221
222





223

224

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/*
** Rowids for the averages and structure records in the %_data table.
*/
#define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
#define FTS5_STRUCTURE_ROWID   10    /* The structure record */

/*
** Macros determining the rowids used by segment leaves and dlidx leaves

** and nodes. All nodes and leaves are stored in the %_data table with large
** positive rowids.


**
** Each segment has a unique non-zero 16-bit id.
**





** The rowid for each segment leaf is found by passing the segment id and 

** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered

** sequentially starting from 1.
*/
#define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
#define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
#define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
#define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */

#define fts5_dri(segid, dlidx, height, pgno) (                                 \
 ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
 ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
 ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
 ((i64)(pgno))                                                                 \
)

#define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)

/*
** Maximum segments permitted in a single index 
*/
#define FTS5_MAX_SEGMENT 2000

#ifdef SQLITE_DEBUG
299
300
301
302
303
304
305
306

307
308
309
310
311
312
313
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */

};

/*
** One object per %_data table.
*/
struct Fts5Index {
  Fts5Config *pConfig;            /* Virtual table configuration */







|
>







267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
typedef struct Fts5SegWriter Fts5SegWriter;
typedef struct Fts5Structure Fts5Structure;
typedef struct Fts5StructureLevel Fts5StructureLevel;
typedef struct Fts5StructureSegment Fts5StructureSegment;

struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int nn;                         /* Size of record in bytes */
  int szLeaf;                     /* Size of leaf without page-index */
};

/*
** One object per %_data table.
*/
struct Fts5Index {
  Fts5Config *pConfig;            /* Virtual table configuration */
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the 
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
  int iSegid;                     /* Segment id */
  int nHeight;                    /* Height of segment b-tree */
  int pgnoFirst;                  /* First leaf page number in segment */
  int pgnoLast;                   /* Last leaf page number in segment */
};
struct Fts5StructureLevel {
  int nMerge;                     /* Number of segments in incr-merge */
  int nSeg;                       /* Total number of segments on level */
  Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */







<







320
321
322
323
324
325
326

327
328
329
330
331
332
333
/*
** The contents of the "structure" record for each index are represented
** using an Fts5Structure record in memory. Which uses instances of the 
** other Fts5StructureXXX types as components.
*/
struct Fts5StructureSegment {
  int iSegid;                     /* Segment id */

  int pgnoFirst;                  /* First leaf page number in segment */
  int pgnoLast;                   /* Last leaf page number in segment */
};
struct Fts5StructureLevel {
  int nMerge;                     /* Number of segments in incr-merge */
  int nSeg;                       /* Total number of segments on level */
  Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
373
374
375
376
377
378
379

380

381
382
383
384
385
386
387
388
389
390
391
392
393
394

395
396
397
398
399
400
401
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */

  Fts5Buffer buf;                 /* Buffer containing page data */

  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous rowid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  Fts5PageWriter writer;          /* PageWriter object */
  i64 iPrevRowid;                 /* Previous rowid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */

  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */








>
|
>














>







341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
};

/*
** An object of type Fts5SegWriter is used to write to segments.
*/
struct Fts5PageWriter {
  int pgno;                       /* Page number for this page */
  int iPrevPgidx;                 /* Previous value written into pgidx */
  Fts5Buffer buf;                 /* Buffer containing leaf data */
  Fts5Buffer pgidx;               /* Buffer containing page-index */
  Fts5Buffer term;                /* Buffer containing previous term on page */
};
struct Fts5DlidxWriter {
  int pgno;                       /* Page number for this page */
  int bPrevValid;                 /* True if iPrev is valid */
  i64 iPrev;                      /* Previous rowid value written to page */
  Fts5Buffer buf;                 /* Buffer containing page data */
};
struct Fts5SegWriter {
  int iSegid;                     /* Segid to write to */
  Fts5PageWriter writer;          /* PageWriter object */
  i64 iPrevRowid;                 /* Previous rowid written to current leaf */
  u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
  u8 bFirstRowidInPage;           /* True if next rowid is first in page */
  /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
  u8 bFirstTermInPage;            /* True if next term will be first in leaf */
  int nLeafWritten;               /* Number of leaf pages written */
  int nEmpty;                     /* Number of contiguous term-less nodes */

  int nDlidx;                     /* Allocated size of aDlidx[] array */
  Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */

468
469
470
471
472
473
474



475
476
477
478
479
480
481
482
483
484
485
486
487



488
489
490
491
492
493
494
495
496
497
498
499
500
501








502
503
504
505
506











507
508
509
510
511
512
513
**
** iRowidOffset/nRowidOffset/aRowidOffset:
**     These are used if the FTS5_SEGITER_REVERSE flag is set.
**
**     For each rowid on the page corresponding to the current term, the
**     corresponding aRowidOffset[] entry is set to the byte offset of the
**     start of the "position-list-size" field within the page.



*/
struct Fts5SegIter {
  Fts5StructureSegment *pSeg;     /* Segment to iterate through */
  int flags;                      /* Mask of configuration flags */
  int iLeafPgno;                  /* Current leaf page number */
  Fts5Data *pLeaf;                /* Current leaf data */
  Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;




  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

  Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
  int nPos;                       /* Number of bytes in current position list */
  int bDel;                       /* True if the delete flag is set */
};









#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02













/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */







>
>
>













>
>
>














>
>
>
>
>
>
>
>





>
>
>
>
>
>
>
>
>
>
>







439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
**
** iRowidOffset/nRowidOffset/aRowidOffset:
**     These are used if the FTS5_SEGITER_REVERSE flag is set.
**
**     For each rowid on the page corresponding to the current term, the
**     corresponding aRowidOffset[] entry is set to the byte offset of the
**     start of the "position-list-size" field within the page.
**
** iTermIdx:
**     Index of current term on iTermLeafPgno.
*/
struct Fts5SegIter {
  Fts5StructureSegment *pSeg;     /* Segment to iterate through */
  int flags;                      /* Mask of configuration flags */
  int iLeafPgno;                  /* Current leaf page number */
  Fts5Data *pLeaf;                /* Current leaf data */
  Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
  int iLeafOffset;                /* Byte offset within current leaf */

  /* The page and offset from which the current term was read. The offset 
  ** is the offset of the first rowid in the current doclist.  */
  int iTermLeafPgno;
  int iTermLeafOffset;

  int iPgidxOff;                  /* Next offset in pgidx */
  int iEndofDoclist;

  /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
  int iRowidOffset;               /* Current entry in aRowidOffset[] */
  int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
  int *aRowidOffset;              /* Array of offset to rowid fields */

  Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
  int nPos;                       /* Number of bytes in current position list */
  int bDel;                       /* True if the delete flag is set */
};

/*
** Argument is a pointer to an Fts5Data structure that contains a 
** leaf page.
*/
#define ASSERT_SZLEAF_OK(x) assert( \
    (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
)

#define FTS5_SEGITER_ONETERM 0x01
#define FTS5_SEGITER_REVERSE 0x02


/* 
** Argument is a pointer to an Fts5Data structure that contains a leaf
** page. This macro evaluates to true if the leaf contains no terms, or
** false if it contains at least one term.
*/
#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)

#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))

#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))

/*
** poslist:
**   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
**   There is no way to tell if this is populated or not.
*/
struct Fts5IndexIter {
  Fts5Index *pIndex;              /* Index that owns this iterator */
614
615
616
617
618
619
620





621
622
623
624
625
626
627
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif







/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;







>
>
>
>
>







610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
){
  int nCmp = MIN(nLeft, nRight);
  int res = memcmp(pLeft, pRight, nCmp);
  return (res==0 ? (nLeft - nRight) : res);
}
#endif

static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
  int ret;
  fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
  return ret;
}

/*
** Close the read-only blob handle, if it is open.
*/
static void fts5CloseReader(Fts5Index *p){
  if( p->pReader ){
    sqlite3_blob *pReader = p->pReader;
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693



694
695
696
697
698
699
700

    if( rc==SQLITE_OK ){
      u8 *aOut = 0;               /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);
      int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
      pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
      if( pRet ){
        pRet->n = nByte;
        aOut = pRet->p = (u8*)&pRet[1];
      }else{
        rc = SQLITE_NOMEM;
      }

      if( rc==SQLITE_OK ){
        rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_free(pRet);
        pRet = 0;



      }
    }
    p->rc = rc;
    p->nRead++;
  }

  assert( (pRet==0)==(p->rc!=SQLITE_OK) );







|











>
>
>







676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704

    if( rc==SQLITE_OK ){
      u8 *aOut = 0;               /* Read blob data into this buffer */
      int nByte = sqlite3_blob_bytes(p->pReader);
      int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
      pRet = (Fts5Data*)sqlite3_malloc(nAlloc);
      if( pRet ){
        pRet->nn = nByte;
        aOut = pRet->p = (u8*)&pRet[1];
      }else{
        rc = SQLITE_NOMEM;
      }

      if( rc==SQLITE_OK ){
        rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
      }
      if( rc!=SQLITE_OK ){
        sqlite3_free(pRet);
        pRet = 0;
      }else{
        /* TODO1: Fix this */
        pRet->szLeaf = fts5GetU16(&pRet->p[2]);
      }
    }
    p->rc = rc;
    p->nRead++;
  }

  assert( (pRet==0)==(p->rc!=SQLITE_OK) );
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
  p->rc = sqlite3_reset(p->pDeleter);
}

/*
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
  i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
  i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
  fts5DataDelete(p, iFirst, iLast);
  if( p->pIdxDeleter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
          "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
          pConfig->zDb, pConfig->zName
    ));







|
|







785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
  p->rc = sqlite3_reset(p->pDeleter);
}

/*
** Remove all records associated with segment iSegid.
*/
static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
  i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
  i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
  fts5DataDelete(p, iFirst, iLast);
  if( p->pIdxDeleter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
          "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
          pConfig->zDb, pConfig->zName
    ));
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
          nTotal * sizeof(Fts5StructureSegment)
      );

      if( rc==SQLITE_OK ){
        pLvl->nSeg = nTotal;
        for(iSeg=0; iSeg<nTotal; iSeg++){
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
        }
      }else{
        fts5StructureRelease(pRet);
        pRet = 0;
      }







<







883
884
885
886
887
888
889

890
891
892
893
894
895
896
          nTotal * sizeof(Fts5StructureSegment)
      );

      if( rc==SQLITE_OK ){
        pLvl->nSeg = nTotal;
        for(iSeg=0; iSeg<nTotal; iSeg++){
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid);

          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst);
          i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast);
        }
      }else{
        fts5StructureRelease(pRet);
        pRet = 0;
      }
970
971
972
973
974
975
976

977
978
979
980
981
982
983
984
985
  Fts5Structure *pRet = 0;        /* Object to return */
  int iCookie;                    /* Configuration cookie */
  Fts5Data *pData;
  Fts5Buffer buf = {0, 0, 0};

  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
  if( p->rc ) return 0;

  memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING);
  p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet);
  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

  fts5DataRelease(pData);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);







>
|
|







973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
  Fts5Structure *pRet = 0;        /* Object to return */
  int iCookie;                    /* Configuration cookie */
  Fts5Data *pData;
  Fts5Buffer buf = {0, 0, 0};

  pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
  if( p->rc ) return 0;
  /* TODO: Do we need this if the leaf-index is appended? Probably... */
  memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
  p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
  if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){
    p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
  }

  fts5DataRelease(pData);
  if( p->rc!=SQLITE_OK ){
    fts5StructureRelease(pRet);
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
      assert( pLvl->nMerge<=pLvl->nSeg );

      for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
      }
    }

    fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
    fts5BufferFree(&buf);







<







1039
1040
1041
1042
1043
1044
1045

1046
1047
1048
1049
1050
1051
1052
      Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
      fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
      assert( pLvl->nMerge<=pLvl->nSeg );

      for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);

        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
        fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
      }
    }

    fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
    fts5BufferFree(&buf);
1124
1125
1126
1127
1128
1129
1130
1131
1132

1133
1134
1135
1136
1137
1138
1139
){
  if( p->rc==SQLITE_OK ){
    int iTst;
    int iPromote = -1;
    int szPromote = 0;            /* Promote anything this size or smaller */
    Fts5StructureSegment *pSeg;   /* Segment just written */
    int szSeg;                    /* Size of segment just written */



    pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
    szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);

    /* Check for condition (a) */
    for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
    if( iTst>=0 ){
      int i;







|

>







1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
){
  if( p->rc==SQLITE_OK ){
    int iTst;
    int iPromote = -1;
    int szPromote = 0;            /* Promote anything this size or smaller */
    Fts5StructureSegment *pSeg;   /* Segment just written */
    int szSeg;                    /* Size of segment just written */
    int nSeg = pStruct->aLevel[iLvl].nSeg;

    if( nSeg==0 ) return;
    pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
    szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);

    /* Check for condition (a) */
    for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
    if( iTst>=0 ){
      int i;
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
    assert( pLvl->bEof==0 );
    pLvl->iOff = 1;
    pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
    pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
    pLvl->iFirstOff = pLvl->iOff;
  }else{
    int iOff;
    for(iOff=pLvl->iOff; iOff<pData->n; iOff++){
      if( pData->p[iOff] ) break; 
    }

    if( iOff<pData->n ){
      i64 iVal;
      pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
      iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
      pLvl->iRowid += iVal;
      pLvl->iOff = iOff;
    }else{
      pLvl->bEof = 1;







|



|







1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
    assert( pLvl->bEof==0 );
    pLvl->iOff = 1;
    pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
    pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
    pLvl->iFirstOff = pLvl->iOff;
  }else{
    int iOff;
    for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
      if( pData->p[iOff] ) break; 
    }

    if( iOff<pData->nn ){
      i64 iVal;
      pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
      iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
      pLvl->iRowid += iVal;
      pLvl->iOff = iOff;
    }else{
      pLvl->bEof = 1;
1421
1422
1423
1424
1425
1426
1427

1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440












1441
1442
1443
1444
1445
1446
1447
/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
){

  Fts5StructureSegment *pSeg = pIter->pSeg;
  fts5DataRelease(pIter->pLeaf);
  pIter->iLeafPgno++;
  if( pIter->pNextLeaf ){
    assert( pIter->iLeafPgno<=pSeg->pgnoLast );
    pIter->pLeaf = pIter->pNextLeaf;
    pIter->pNextLeaf = 0;
  }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
    pIter->pLeaf = fts5DataRead(p, 
        FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;












  }
}

/*
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position







>









|



>
>
>
>
>
>
>
>
>
>
>
>







1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
/*
** Load the next leaf page into the segment iterator.
*/
static void fts5SegIterNextPage(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pIter              /* Iterator to advance to next page */
){
  Fts5Data *pLeaf;
  Fts5StructureSegment *pSeg = pIter->pSeg;
  fts5DataRelease(pIter->pLeaf);
  pIter->iLeafPgno++;
  if( pIter->pNextLeaf ){
    assert( pIter->iLeafPgno<=pSeg->pgnoLast );
    pIter->pLeaf = pIter->pNextLeaf;
    pIter->pNextLeaf = 0;
  }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
    pIter->pLeaf = fts5DataRead(p, 
        FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
  pLeaf = pIter->pLeaf;

  if( pLeaf ){
    pIter->iPgidxOff = pLeaf->szLeaf;
    if( fts5LeafIsTermless(pLeaf) ){
      pIter->iEndofDoclist = pLeaf->nn+1;
    }else{
      pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
          pIter->iEndofDoclist
      );
    }
  }
}

/*
** Argument p points to a buffer containing a varint to be interpreted as a
** position list size field. Read the varint and return the number of bytes
** read. Before returning, set *pnSz to the number of bytes in the position
1466
1467
1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
1493
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;  /* Offset to read at */

    if( iOff>=pIter->pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;


  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;







>
|












>
|







1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
**
** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 
** position list content (if any).
*/
static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;  /* Offset to read at */
    ASSERT_SZLEAF_OK(pIter->pLeaf);
    if( iOff>=pIter->pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  if( iOff>=pIter->pLeaf->szLeaf ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
1519
1520
1521
1522
1523
1524
1525








1526
1527
1528
1529
1530
1531
1532
  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;









  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 







>
>
>
>
>
>
>
>







1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;

  if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
    pIter->iEndofDoclist += nExtra;
  }

  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
1554
1555
1556
1557
1558
1559
1560
1561
1562



1563
1564
1565
1566
1567
1568
1569
    memset(pIter, 0, sizeof(*pIter));
    pIter->pSeg = pSeg;
    pIter->iLeafPgno = pSeg->pgnoFirst-1;
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){
    u8 *a = pIter->pLeaf->p;
    pIter->iLeafOffset = fts5GetU16(&a[2]);



    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to







<
|
>
>
>







1581
1582
1583
1584
1585
1586
1587

1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
    memset(pIter, 0, sizeof(*pIter));
    pIter->pSeg = pSeg;
    pIter->iLeafPgno = pSeg->pgnoFirst-1;
    fts5SegIterNextPage(p, pIter);
  }

  if( p->rc==SQLITE_OK ){

    pIter->iLeafOffset = 4;
    assert_nc( pIter->pLeaf->nn>4 );
    assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
    pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
    fts5SegIterLoadTerm(p, pIter, 0);
    fts5SegIterLoadNPos(p, pIter);
  }
}

/*
** This function is only ever called on iterators created by calls to
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588





1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
** This function advances the iterator so that it points to the last 
** relevant rowid on the page and, if necessary, initializes the 
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int n = pIter->pLeaf->n;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;






  while( 1 ){
    i64 iDelta = 0;
    int nPos;
    int bDummy;

    i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
    i += nPos;
    if( i>=n ) break;
    i += fts5GetVarint(&a[i], (u64*)&iDelta);
    if( iDelta==0 ) break;
    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
      int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
      if( aNew==0 ){
        p->rc = SQLITE_NOMEM;







|




>
>
>
>
>









<







1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631

1632
1633
1634
1635
1636
1637
1638
** This function advances the iterator so that it points to the last 
** relevant rowid on the page and, if necessary, initializes the 
** aRowidOffset[] and iRowidOffset variables. At this point the iterator
** is in its regular state - Fts5SegIter.iLeafOffset points to the first
** byte of the position list content associated with said rowid.
*/
static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
  int n = pIter->pLeaf->szLeaf;
  int i = pIter->iLeafOffset;
  u8 *a = pIter->pLeaf->p;
  int iRowidOffset = 0;

  if( n>pIter->iEndofDoclist ){
    n = pIter->iEndofDoclist;
  }

  ASSERT_SZLEAF_OK(pIter->pLeaf);
  while( 1 ){
    i64 iDelta = 0;
    int nPos;
    int bDummy;

    i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
    i += nPos;
    if( i>=n ) break;
    i += fts5GetVarint(&a[i], (u64*)&iDelta);

    pIter->iRowid += iDelta;

    if( iRowidOffset>=pIter->nRowidOffset ){
      int nNew = pIter->nRowidOffset + 8;
      int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int));
      if( aNew==0 ){
        p->rc = SQLITE_NOMEM;
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659

1660
1661
1662
1663
1664
1665
1666

  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
    Fts5Data *pNew;
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->pSeg->iSegid, 0, pIter->iLeafPgno
    ));
    if( pNew ){
      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
        if( pIter->iTermLeafOffset<pNew->n ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = pIter->iTermLeafOffset;
        }
      }else{
        int iRowidOff, dummy;
        fts5LeafHeader(pNew, &iRowidOff, &dummy);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
      }

      if( pIter->pLeaf ){
        u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
        pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
        break;
      }else{
        fts5DataRelease(pNew);
      }
    }
  }

  if( pIter->pLeaf ){

    fts5SegIterReverseInitPage(p, pIter);
  }
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte







|



|




|
|

















>







1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700

  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
    Fts5Data *pNew;
    pIter->iLeafPgno--;
    pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
          pIter->pSeg->iSegid, pIter->iLeafPgno
    ));
    if( pNew ){
      if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
        if( pIter->iTermLeafOffset<pNew->szLeaf ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = pIter->iTermLeafOffset;
        }
      }else{
        int iRowidOff;
        iRowidOff = fts5LeafFirstRowidOff(pNew);
        if( iRowidOff ){
          pIter->pLeaf = pNew;
          pIter->iLeafOffset = iRowidOff;
        }
      }

      if( pIter->pLeaf ){
        u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
        pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
        break;
      }else{
        fts5DataRelease(pNew);
      }
    }
  }

  if( pIter->pLeaf ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
    fts5SegIterReverseInitPage(p, pIter);
  }
}

/*
** Return true if the iterator passed as the second argument currently
** points to a delete marker. A delete marker is an entry with a 0 byte
1708
1709
1710
1711
1712
1713
1714
1715
1716

1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732


1733

1734


1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748


1749
1750
1751
1752
1753
1754
1755
1756
1757
1758

1759
1760
1761
1762





1763





1764

1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780

1781
1782
1783
1784
1785
1786
1787
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->n;


      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page */
        u64 iDelta;
        iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
        pIter->iLeafOffset = iOff;
        if( iDelta==0 ){
          bNewTerm = 1;
          if( iOff>=n ){
            fts5SegIterNextPage(p, pIter);
            pIter->iLeafOffset = 4;
          }else if( iOff!=fts5GetU16(&a[2]) ){
            pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{


          pIter->iRowid += iDelta;

        }


      }else if( pIter->pSeg==0 ){
        const u8 *pList = 0;
        const char *zTerm = 0;
        int nList = 0;
        if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
          sqlite3Fts5HashScanNext(p->pHash);
          sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
        }
        if( pList==0 ){
          fts5DataRelease(pIter->pLeaf);
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->n = nList;


          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;

          if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOff<pLeaf->n ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;
          }





          else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){





            pIter->iLeafOffset = iOff;

            bNewTerm = 1;
          }
          if( iOff>=pLeaf->n ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }

      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{

            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }







|

>



|
<
<
|
|

|
<
<
<
|


>
>

>

>
>













|
>
>










>
|


|
>
>
>
>
>
|
>
>
>
>
>

>


|













>







1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755


1756
1757
1758
1759



1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
      Fts5Data *pLeaf = pIter->pLeaf;
      int iOff;
      int bNewTerm = 0;
      int nKeep = 0;

      /* Search for the end of the position list within the current page. */
      u8 *a = pLeaf->p;
      int n = pLeaf->szLeaf;

      ASSERT_SZLEAF_OK(pLeaf);
      iOff = pIter->iLeafOffset + pIter->nPos;

      if( iOff<n ){
        /* The next entry is on the current page. */


        assert_nc( iOff<=pIter->iEndofDoclist );
        if( iOff>=pIter->iEndofDoclist ){
          bNewTerm = 1;
          if( iOff!=fts5LeafFirstTermOff(pLeaf) ){



            iOff += fts5GetVarint32(&a[iOff], nKeep);
          }
        }else{
          u64 iDelta;
          iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
          pIter->iRowid += iDelta;
          assert_nc( iDelta>0 );
        }
        pIter->iLeafOffset = iOff;

      }else if( pIter->pSeg==0 ){
        const u8 *pList = 0;
        const char *zTerm = 0;
        int nList = 0;
        if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
          sqlite3Fts5HashScanNext(p->pHash);
          sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
        }
        if( pList==0 ){
          fts5DataRelease(pIter->pLeaf);
          pIter->pLeaf = 0;
        }else{
          pIter->pLeaf->p = (u8*)pList;
          pIter->pLeaf->nn = nList;
          pIter->pLeaf->szLeaf = nList;
          pIter->iEndofDoclist = nList+1;
          sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm);
          pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
        }
      }else{
        iOff = 0;
        /* Next entry is not on the current page */
        while( iOff==0 ){
          fts5SegIterNextPage(p, pIter);
          pLeaf = pIter->pLeaf;
          if( pLeaf==0 ) break;
          ASSERT_SZLEAF_OK(pLeaf);
          if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
            iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
            pIter->iLeafOffset = iOff;

            if( pLeaf->nn>pLeaf->szLeaf ){
              pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                  &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
              );
            }

          }
          else if( pLeaf->nn>pLeaf->szLeaf ){
            pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
                &pLeaf->p[pLeaf->szLeaf], iOff
            );
            pIter->iLeafOffset = iOff;
            pIter->iEndofDoclist = iOff;
            bNewTerm = 1;
          }
          if( iOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
            return;
          }
        }
      }

      /* Check if the iterator is now at EOF. If so, return early. */
      if( pIter->pLeaf ){
        if( bNewTerm ){
          if( pIter->flags & FTS5_SEGITER_ONETERM ){
            fts5DataRelease(pIter->pLeaf);
            pIter->pLeaf = 0;
          }else{
            int nExtra;
            fts5SegIterLoadTerm(p, pIter, nKeep);
            fts5SegIterLoadNPos(p, pIter);
            if( pbNewTerm ) *pbNewTerm = 1;
          }
        }else{
          fts5SegIterLoadNPos(p, pIter);
        }
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852

1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881







1882
1883
1884
1885
1886
1887
1888
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  if( pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = fts5DlidxIterPgno(pDlidx);
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast));
  }else{
    int iOff;                               /* Byte offset within pLeaf */
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */

    /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first 
    ** byte of position-list content for the current rowid. Back it up
    ** so that it points to the start of the position-list size field. */
    pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
    iOff = pIter->iLeafOffset;
    assert( iOff>=4 );

    /* Search for a new term within the current leaf. If one can be found,
    ** then this page contains the largest rowid for the current term. */
    while( iOff<pLeaf->n ){
      int nPos;
      i64 iDelta;
      int bDummy;

      /* Read the position-list size field */
      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
      iOff += nPos;
      if( iOff>=pLeaf->n ) break;

      /* Rowid delta. Or, if 0x00, the end of doclist marker. */
      nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) break;
      iOff += nPos;
    }

    /* If this condition is true then the largest rowid for the current
    ** term may not be stored on the current page. So search forward to
    ** see where said rowid really is.  */
    if( iOff>=pLeaf->n ){
      int pgno;
      Fts5StructureSegment *pSeg = pIter->pSeg;

      /* The last rowid in the doclist may not be on the current page. Search
      ** forward to find the page containing the last rowid.  */
      for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
        i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno);
        Fts5Data *pNew = fts5DataRead(p, iAbs);
        if( pNew ){
          int iRowid, iTerm;
          fts5LeafHeader(pNew, &iRowid, &iTerm);

          if( iRowid ){
            SWAPVAL(Fts5Data*, pNew, pLast);
            pgnoLast = pgno;
          }
          fts5DataRelease(pNew);
          if( iTerm ) break;
        }
      }
    }
  }

  /* If pLast is NULL at this point, then the last rowid for this doclist
  ** lies on the page currently indicated by the iterator. In this case 
  ** pIter->iLeafOffset is already set to point to the position-list size
  ** field associated with the first relevant rowid on the page.
  **
  ** Or, if pLast is non-NULL, then it is the page that contains the last
  ** rowid. In this case configure the iterator so that it points to the
  ** first rowid on this page.
  */
  if( pLast ){
    int dummy;
    int iOff;
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = pLast;
    pIter->iLeafPgno = pgnoLast;
    fts5LeafHeader(pLast, &iOff, &dummy);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;







  }

  fts5SegIterReverseInitPage(p, pIter);
}

/*
** Iterator pIter currently points to the first rowid of a doclist.







|








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<




|






|


|
|
>





|















<




|


>
>
>
>
>
>
>







1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866




















1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904

1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
  Fts5DlidxIter *pDlidx = pIter->pDlidx;
  Fts5Data *pLast = 0;
  int pgnoLast = 0;

  if( pDlidx ){
    int iSegid = pIter->pSeg->iSegid;
    pgnoLast = fts5DlidxIterPgno(pDlidx);
    pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
  }else{
    int iOff;                               /* Byte offset within pLeaf */
    Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */

    /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first 
    ** byte of position-list content for the current rowid. Back it up
    ** so that it points to the start of the position-list size field. */
    pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);





















    /* If this condition is true then the largest rowid for the current
    ** term may not be stored on the current page. So search forward to
    ** see where said rowid really is.  */
    if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
      int pgno;
      Fts5StructureSegment *pSeg = pIter->pSeg;

      /* The last rowid in the doclist may not be on the current page. Search
      ** forward to find the page containing the last rowid.  */
      for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
        i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
        Fts5Data *pNew = fts5DataRead(p, iAbs);
        if( pNew ){
          int iRowid, bTermless;
          iRowid = fts5LeafFirstRowidOff(pNew);
          bTermless = fts5LeafIsTermless(pNew);
          if( iRowid ){
            SWAPVAL(Fts5Data*, pNew, pLast);
            pgnoLast = pgno;
          }
          fts5DataRelease(pNew);
          if( bTermless==0 ) break;
        }
      }
    }
  }

  /* If pLast is NULL at this point, then the last rowid for this doclist
  ** lies on the page currently indicated by the iterator. In this case 
  ** pIter->iLeafOffset is already set to point to the position-list size
  ** field associated with the first relevant rowid on the page.
  **
  ** Or, if pLast is non-NULL, then it is the page that contains the last
  ** rowid. In this case configure the iterator so that it points to the
  ** first rowid on this page.
  */
  if( pLast ){

    int iOff;
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = pLast;
    pIter->iLeafPgno = pgnoLast;
    iOff = fts5LeafFirstRowidOff(pLast);
    iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
    pIter->iLeafOffset = iOff;

    if( fts5LeafIsTermless(pLast) ){
      pIter->iEndofDoclist = pLast->nn+1;
    }else{
      pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
    }

  }

  fts5SegIterReverseInitPage(p, pIter);
}

/*
** Iterator pIter currently points to the first rowid of a doclist.
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx==0 );

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
    int iOff = pIter->iLeafOffset + pIter->nPos;
    while( iOff<pLeaf->n ){
      int bDummy;
      int nPos;
      i64 iDelta;

      /* iOff is currently the offset of the start of position list data */
      iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return;
      assert_nc( iOff<pLeaf->n );
      iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy);
      iOff += nPos;
    }
  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}

#define fts5IndexGetVarint32(a, iOff, nVal) {     \
  nVal = a[iOff++];                               \
  if( nVal & 0x80 ){                              \
    iOff--;                                       \
    iOff += fts5GetVarint32(&a[iOff], nVal);      \
  }                                               \
}

#define fts5IndexSkipVarint(a, iOff) {            \
  int iEnd = iOff+9;                              \
  while( (a[iOff++] & 0x80) && iOff<iEnd );       \
}







|
<
|
<
<
<
|
<
<
|
<
<
<
<






|


|







1934
1935
1936
1937
1938
1939
1940
1941

1942



1943


1944




1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961

  assert( pIter->flags & FTS5_SEGITER_ONETERM );
  assert( pIter->pDlidx==0 );

  /* Check if the current doclist ends on this page. If it does, return
  ** early without loading the doclist-index (as it belongs to a different
  ** term. */
  if( pIter->iTermLeafPgno==pIter->iLeafPgno 

   && pIter->iEndofDoclist<pLeaf->szLeaf 



  ){


    return;




  }

  pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
}

#define fts5IndexGetVarint32(a, iOff, nVal) {     \
  nVal = (a)[iOff++];                             \
  if( nVal & 0x80 ){                              \
    iOff--;                                       \
    iOff += fts5GetVarint32(&(a)[iOff], nVal);    \
  }                                               \
}

#define fts5IndexSkipVarint(a, iOff) {            \
  int iEnd = iOff+9;                              \
  while( (a[iOff++] & 0x80) && iOff<iEnd );       \
}
1951
1952
1953
1954
1955
1956
1957

1958
1959
1960
1961
1962




1963
1964
1965
1966

1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985


1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003

2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036

2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053

2054
2055
2056
2057
2058
2059
2060









2061
2062
2063
2064
2065
2066
2067
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;

  int n = pIter->pLeaf->n;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;





  assert( p->rc==SQLITE_OK );
  assert( pIter->pLeaf );


  iOff = fts5GetU16(&a[2]);
  if( iOff<4 || iOff>=n ){
    p->rc = FTS5_CORRUPT;
    return;
  }

  while( 1 ){
    int i;
    int nCmp;

    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){


      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }
    iOff += nNew;


    /* Skip past the doclist. If the end of the page is reached, bail out. */
    while( 1 ){
      int nPos;

      /* Skip past rowid delta */
      fts5IndexSkipVarint(a, iOff);

      /* Skip past position list */
      fts5IndexGetVarint32(a, iOff, nPos);
      iOff += (nPos >> 1);
      if( iOff>=(n-1) ){
        iOff = n;
        goto search_failed;
      }

      /* If this is the end of the doclist, break out of the loop */
      if( a[iOff]==0x00 ){
        iOff++;
        break;
      }
    };

    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( iOff>=n ){
    do {

      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      iOff = fts5GetU16(&a[2]);
      if( iOff ){
        if( iOff<4 || iOff>=n ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:

  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);










  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.







>
|




>
>
>
>


<

>
|
|
<
<
|
<

<
<



<






>
>
















<

>
|
|
<
|
<
<

<
|
|
<
|
<
<
<
<
<
<
<
<
<










|

>



|
|
|











>







>
>
>
>
>
>
>
>
>







1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996

1997
1998
1999
2000


2001

2002


2003
2004
2005

2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029

2030
2031
2032
2033

2034


2035

2036
2037

2038









2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;
  int szLeaf = pIter->pLeaf->szLeaf;
  int n = pIter->pLeaf->nn;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;
  int iTerm = 0;
  int iTermOff;
  int iPgidx;                     /* Current offset in pgidx */
  int bEndOfPage = 0;

  assert( p->rc==SQLITE_OK );


  iPgidx = szLeaf;
  iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
  iOff = iTermOff;




  while( 1 ){



    /* Figure out how many new bytes are in this term */
    fts5IndexGetVarint32(a, iOff, nNew);

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){
      int nCmp;
      int i;
      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }


    if( iPgidx>=n ){
      bEndOfPage = 1;
      break;

    }




    iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
    iTermOff += nKeep;

    iOff = iTermOff;










    /* Read the nKeep field of the next term. */
    fts5IndexGetVarint32(a, iOff, nKeep);
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( bEndOfPage ){
    do {
      iTerm = 0;
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
        fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff);
        if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:

  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  if( iPgidx>=n ){
    pIter->iEndofDoclist = pIter->pLeaf->nn+1;
  }else{
    int nExtra;
    iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
    pIter->iEndofDoclist = iTermOff + nExtra;
  }
  pIter->iPgidxOff = iPgidx;

  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195

2196
2197
2198
2199
2200
2201
2202

  if( pList ){
    Fts5Data *pLeaf;
    sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
    pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
    if( pLeaf==0 ) return;
    pLeaf->p = (u8*)pList;
    pLeaf->n = nList;
    pIter->pLeaf = pLeaf;
    pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);


    if( flags & FTS5INDEX_QUERY_DESC ){
      pIter->flags |= FTS5_SEGITER_REVERSE;
      fts5SegIterReverseInitPage(p, pIter);
    }else{
      fts5SegIterLoadNPos(p, pIter);
    }







|


>







2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228

  if( pList ){
    Fts5Data *pLeaf;
    sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
    pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
    if( pLeaf==0 ) return;
    pLeaf->p = (u8*)pList;
    pLeaf->nn = pLeaf->szLeaf = nList;
    pIter->pLeaf = pLeaf;
    pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
    pIter->iEndofDoclist = pLeaf->nn+1;

    if( flags & FTS5INDEX_QUERY_DESC ){
      pIter->flags |= FTS5_SEGITER_REVERSE;
      fts5SegIterReverseInitPage(p, pIter);
    }else{
      fts5SegIterLoadNPos(p, pIter);
    }
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
      int n = pIter->pLeaf->n;

      iOff = fts5GetU16(&a[0]);
      if( iOff<4 || iOff>=n ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
        pIter->iLeafOffset = iOff;
        fts5SegIterLoadNPos(p, pIter);
      }







|

|







2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
    pIter->iLeafPgno = iLeafPgno-1;
    fts5SegIterNextPage(p, pIter);
    assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );

    if( p->rc==SQLITE_OK ){
      int iOff;
      u8 *a = pIter->pLeaf->p;
      int n = pIter->pLeaf->szLeaf;

      iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
      if( iOff<4 || iOff>=n ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
        pIter->iLeafOffset = iOff;
        fts5SegIterLoadNPos(p, pIter);
      }
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722

2723
2724
2725
2726
2727
2728
2729
){
  Fts5IndexIter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];

    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->n>0 ){
      pIter->pLeaf = pData;
      pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);

      pNew->aFirst[1].iFirst = 1;
      if( bDesc ){
        pNew->bRev = 1;
        pIter->flags |= FTS5_SEGITER_REVERSE;
        fts5SegIterReverseInitPage(p, pIter);
      }else{
        fts5SegIterLoadNPos(p, pIter);







|


>







2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
){
  Fts5IndexIter *pNew;
  pNew = fts5MultiIterAlloc(p, 2);
  if( pNew ){
    Fts5SegIter *pIter = &pNew->aSeg[1];

    pIter->flags = FTS5_SEGITER_ONETERM;
    if( pData->szLeaf>0 ){
      pIter->pLeaf = pData;
      pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
      pIter->iEndofDoclist = pData->nn;
      pNew->aFirst[1].iFirst = 1;
      if( bDesc ){
        pNew->bRev = 1;
        pIter->flags |= FTS5_SEGITER_REVERSE;
        fts5SegIterReverseInitPage(p, pIter);
      }else{
        fts5SegIterLoadNPos(p, pIter);
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;
    fts5DataRelease(pData);
    if( nRem<=0 ){
      break;
    }else{
      pgno++;
      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno));
      if( pData==0 ) break;
      pChunk = &pData->p[4];
      nChunk = MIN(nRem, pData->n - 4);
      if( pgno==pgnoSave ){
        assert( pSeg->pNextLeaf==0 );
        pSeg->pNextLeaf = pData;
        pData = 0;
      }
    }
  }







|















|


|







2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
  Fts5SegIter *pSeg,              /* Poslist of this iterator */
  void *pCtx,                     /* Context pointer for xChunk callback */
  void (*xChunk)(Fts5Index*, void*, const u8*, int)
){
  int nRem = pSeg->nPos;          /* Number of bytes still to come */
  Fts5Data *pData = 0;
  u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
  int pgno = pSeg->iLeafPgno;
  int pgnoSave = 0;

  if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
    pgnoSave = pgno+1;
  }

  while( 1 ){
    xChunk(p, pCtx, pChunk, nChunk);
    nRem -= nChunk;
    fts5DataRelease(pData);
    if( nRem<=0 ){
      break;
    }else{
      pgno++;
      pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
      if( pData==0 ) break;
      pChunk = &pData->p[4];
      nChunk = MIN(nRem, pData->szLeaf - 4);
      if( pgno==pgnoSave ){
        assert( pSeg->pNextLeaf==0 );
        pSeg->pNextLeaf = pData;
        pData = 0;
      }
    }
  }
3098
3099
3100
3101
3102
3103
3104






3105
3106
3107
3108



3109
3110
3111
3112
3113
3114
3115
3116

3117

3118
3119
3120
3121
3122
3123
3124
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->writer;
  i64 iRowid;







  if( pWriter->bFirstTermInPage ){
    /* No term was written to this page. */
    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
    fts5WriteBtreeNoTerm(p, pWriter);



  }

  /* Write the current page to the db. */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);

  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);

  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;







>
>
>
>
>
>


|

>
>
>


|
|




>

>







3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
}

static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
  static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
  Fts5PageWriter *pPage = &pWriter->writer;
  i64 iRowid;

  assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );

  /* Set the szLeaf header field. */
  assert( 0==fts5GetU16(&pPage->buf.p[2]) );
  fts5PutU16(&pPage->buf.p[2], pPage->buf.n);

  if( pWriter->bFirstTermInPage ){
    /* No term was written to this page. */
    assert( pPage->pgidx.n==0 );
    fts5WriteBtreeNoTerm(p, pWriter);
  }else{
    /* Append the pgidx to the page buffer. Set the szLeaf header field. */
    fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
  }

  /* Write the page out to disk */
  iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
  fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);

  /* Initialize the next page. */
  fts5BufferZero(&pPage->buf);
  fts5BufferZero(&pPage->pgidx);
  fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
  pPage->iPrevPgidx = 0;
  pPage->pgno++;

  /* Increase the leaves written counter */
  pWriter->nLeafWritten++;

  /* The new leaf holds no terms or rowids */
  pWriter->bFirstTermInPage = 1;
3135
3136
3137
3138
3139
3140
3141

3142

3143




3144
3145
3146


3147
3148
3149
3150









3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
static void fts5WriteAppendTerm(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->writer;



  assert( pPage->buf.n==0 || pPage->buf.n>4 );




  if( pPage->buf.n==0 ){
    /* Zero the first term and first rowid fields */
    static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };


    fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
    assert( pWriter->bFirstTermInPage );
  }
  if( p->rc ) return;









  
  if( pWriter->bFirstTermInPage ){
    /* Update the "first term" field of the page header. */
    assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 );
    fts5PutU16(&pPage->buf.p[2], pPage->buf.n);
    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one







>

>
|
>
>
>
>
|
<
<
>
>
|
<

|
>
>
>
>
>
>
>
>
>
|

<
<
<







3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188


3189
3190
3191

3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204



3205
3206
3207
3208
3209
3210
3211
static void fts5WriteAppendTerm(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  int nTerm, const u8 *pTerm 
){
  int nPrefix;                    /* Bytes of prefix compression for term */
  Fts5PageWriter *pPage = &pWriter->writer;
  Fts5Buffer *pPgidx = &pWriter->writer.pgidx;

  if( p->rc ) return;
  assert( pPage->buf.n>=4 );
  assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );

  /* If the current leaf page is full, flush it to disk. */
  if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
    if( pPage->buf.n>4 ){


      fts5WriteFlushLeaf(p, pWriter);
    }
    fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);

  }
  
  /* TODO1: Updating pgidx here. */
  pPgidx->n += sqlite3Fts5PutVarint(
      &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
  );
  pPage->iPrevPgidx = pPage->buf.n;
#if 0
  fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
  pPgidx->n += 2;
#endif

  if( pWriter->bFirstTermInPage ){



    nPrefix = 0;
    if( pPage->pgno!=1 ){
      /* This is the first term on a leaf that is not the leftmost leaf in
      ** the segment b-tree. In this case it is necessary to add a term to
      ** the b-tree hierarchy that is (a) larger than the largest term 
      ** already written to the segment and (b) smaller than or equal to
      ** this term. In other words, a prefix of (pTerm/nTerm) that is one
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214




3215
3216
3217
3218
3219
3220
3221
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pPage->pgno;

  /* If the current leaf page is full, flush it to disk. */
  if( pPage->buf.n>=p->pConfig->pgsz ){
    fts5WriteFlushLeaf(p, pWriter);
  }
}

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid,
  int nPos
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->writer;





    /* If this is to be the first rowid written to the page, set the 
    ** rowid-pointer in the page-header. Also append a value to the dlidx
    ** buffer, in case a doclist-index is required.  */
    if( pWriter->bFirstRowidInPage ){
      fts5PutU16(pPage->buf.p, pPage->buf.n);
      fts5WriteDlidxAppend(p, pWriter, iRowid);







<
<
<
<
<













>
>
>
>







3239
3240
3241
3242
3243
3244
3245





3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
  pWriter->bFirstTermInPage = 0;

  pWriter->bFirstRowidInPage = 0;
  pWriter->bFirstRowidInDoclist = 1;

  assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
  pWriter->aDlidx[0].pgno = pPage->pgno;





}

/*
** Append a rowid and position-list size field to the writers output. 
*/
static void fts5WriteAppendRowid(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,
  i64 iRowid,
  int nPos
){
  if( p->rc==SQLITE_OK ){
    Fts5PageWriter *pPage = &pWriter->writer;

    if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }

    /* If this is to be the first rowid written to the page, set the 
    ** rowid-pointer in the page-header. Also append a value to the dlidx
    ** buffer, in case a doclist-index is required.  */
    if( pWriter->bFirstRowidInPage ){
      fts5PutU16(pPage->buf.p, pPage->buf.n);
      fts5WriteDlidxAppend(p, pWriter, iRowid);
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254


3255
3256
3257
3258
3259
3260
3261
3262
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);

    if( pPage->buf.n>=p->pConfig->pgsz ){
      fts5WriteFlushLeaf(p, pWriter);
    }
  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){
  Fts5PageWriter *pPage = &pWriter->writer;
  const u8 *a = aData;
  int n = nData;
  
  assert( p->pConfig->pgsz>0 );
  while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){


    int nReq = p->pConfig->pgsz - pPage->buf.n;
    int nCopy = 0;
    while( nCopy<nReq ){
      i64 dummy;
      nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;







<
<
<
<














|
>
>
|







3277
3278
3279
3280
3281
3282
3283




3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
      fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid);
    }
    pWriter->iPrevRowid = iRowid;
    pWriter->bFirstRowidInDoclist = 0;
    pWriter->bFirstRowidInPage = 0;

    fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos);




  }
}

static void fts5WriteAppendPoslistData(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  const u8 *aData, 
  int nData
){
  Fts5PageWriter *pPage = &pWriter->writer;
  const u8 *a = aData;
  int n = nData;
  
  assert( p->pConfig->pgsz>0 );
  while( p->rc==SQLITE_OK 
     && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz 
  ){
    int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
    int nCopy = 0;
    while( nCopy<nReq ){
      i64 dummy;
      nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
    }
    fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
    a += nCopy;
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302

3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315


3316
3317
3318
3319
3320
3321
3322




3323
3324
3325
3326
3327
3328
3329
3330
3331
3332







3333
3334
3335
3336
3337
3338
3339
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */
  int *pnHeight,                  /* OUT: Height of the b-tree */
  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  Fts5PageWriter *pLeaf = &pWriter->writer;
  if( p->rc==SQLITE_OK ){
    if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
      *pnLeaf = 0;
      *pnHeight = 0;
    }else{
      if( pLeaf->buf.n>4 ){
        fts5WriteFlushLeaf(p, pWriter);
      }
      *pnLeaf = pLeaf->pgno-1;

      fts5WriteFlushBtree(p, pWriter);
      *pnHeight = 0;
    }
  }
  fts5BufferFree(&pLeaf->term);
  fts5BufferFree(&pLeaf->buf);

  fts5BufferFree(&pWriter->btterm);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){


  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  fts5WriteDlidxGrow(p, pWriter, 1);
  pWriter->writer.pgno = 1;
  pWriter->bFirstTermInPage = 1;
  pWriter->iBtPage = 1;





  if( p->pIdxWriter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
          "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 
          pConfig->zDb, pConfig->zName
    ));
  }

  if( p->rc==SQLITE_OK ){







    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental







<







<







<




>













>
>







>
>
>
>










>
>
>
>
>
>
>







3321
3322
3323
3324
3325
3326
3327

3328
3329
3330
3331
3332
3333
3334

3335
3336
3337
3338
3339
3340
3341

3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
/*
** Flush any data cached by the writer object to the database. Free any
** allocations associated with the writer.
*/
static void fts5WriteFinish(
  Fts5Index *p, 
  Fts5SegWriter *pWriter,         /* Writer object */

  int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
){
  int i;
  Fts5PageWriter *pLeaf = &pWriter->writer;
  if( p->rc==SQLITE_OK ){
    if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
      *pnLeaf = 0;

    }else{
      if( pLeaf->buf.n>4 ){
        fts5WriteFlushLeaf(p, pWriter);
      }
      *pnLeaf = pLeaf->pgno-1;

      fts5WriteFlushBtree(p, pWriter);

    }
  }
  fts5BufferFree(&pLeaf->term);
  fts5BufferFree(&pLeaf->buf);
  fts5BufferFree(&pLeaf->pgidx);
  fts5BufferFree(&pWriter->btterm);

  for(i=0; i<pWriter->nDlidx; i++){
    sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
  }
  sqlite3_free(pWriter->aDlidx);
}

static void fts5WriteInit(
  Fts5Index *p, 
  Fts5SegWriter *pWriter, 
  int iSegid
){
  const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;

  memset(pWriter, 0, sizeof(Fts5SegWriter));
  pWriter->iSegid = iSegid;

  fts5WriteDlidxGrow(p, pWriter, 1);
  pWriter->writer.pgno = 1;
  pWriter->bFirstTermInPage = 1;
  pWriter->iBtPage = 1;

  /* Grow the two buffers to pgsz + padding bytes in size. */
  fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer);
  fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer);

  if( p->pIdxWriter==0 ){
    Fts5Config *pConfig = p->pConfig;
    fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
          "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 
          pConfig->zDb, pConfig->zName
    ));
  }

  if( p->rc==SQLITE_OK ){
    /* Initialize the 4-byte leaf-page header to 0x00. */
    memset(pWriter->writer.buf.p, 0, 4);
    pWriter->writer.buf.n = 4;

    /* Bind the current output segment id to the index-writer. This is an
    ** optimization over binding the same value over and over as rows are
    ** inserted into %_idx by the current writer.  */
    sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
  }
}

/*
** Iterator pIter was used to iterate through the input segments of on an
** incremental merge operation. This function is called if the incremental
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366

3367
3368
3369













3370




3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
      pSeg->pSeg->pgnoLast = 0;
      pSeg->pSeg->pgnoFirst = 0;
    }else{
      int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
      i64 iLeafRowid;
      Fts5Data *pData;
      int iId = pSeg->pSeg->iSegid;
      u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04};

      iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno);
      pData = fts5DataRead(p, iLeafRowid);
      if( pData ){
        fts5BufferZero(&buf);

        fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
        fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
        fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);













        fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);




        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}








|

|



>



>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>


|







3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
      pSeg->pSeg->pgnoLast = 0;
      pSeg->pSeg->pgnoFirst = 0;
    }else{
      int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
      i64 iLeafRowid;
      Fts5Data *pData;
      int iId = pSeg->pSeg->iSegid;
      u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};

      iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
      pData = fts5DataRead(p, iLeafRowid);
      if( pData ){
        fts5BufferZero(&buf);
        fts5BufferGrow(&p->rc, &buf, pData->nn);
        fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
        fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
        fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
        fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]);
        if( p->rc==SQLITE_OK ){
          /* Set the szLeaf field */
          fts5PutU16(&buf.p[2], buf.n);
        }

        /* Set up the new page-index array */
        fts5BufferAppendVarint(&p->rc, &buf, 4);
        if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 
         && pSeg->iEndofDoclist<pData->szLeaf 
        ){
          int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
          fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
          fts5BufferAppendBlob(&p->rc, &buf, 
              pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
          );
        }

        fts5DataRelease(pData);
        pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
        fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
        fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
      }
    }
  }
  fts5BufferFree(&buf);
}

3466
3467
3468
3469
3470
3471
3472

3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        break;
      }

      /* This is a new term. Append a term to the output segment. */

      if( bRequireDoclistTerm ){
        fts5WriteAppendZerobyte(p, &writer);
      }
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSegIter->nPos*2 + pSegIter->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    /* Append the position-list data to the output */
    fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
    int i;

    /* Remove the redundant segments from the %_data table */
    for(i=0; i<nInput; i++){
      fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);







>

|

















|







3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
    pTerm = fts5MultiIterTerm(pIter, &nTerm);
    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( pnRem && writer.nLeafWritten>nRem ){
        break;
      }

      /* This is a new term. Append a term to the output segment. */
      /* TODO2: Doclist 0x00 term */
      if( bRequireDoclistTerm ){
        /* fts5WriteAppendZerobyte(p, &writer); */
      }
      fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
      fts5BufferSet(&p->rc, &term, nTerm, pTerm);
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    /* WRITEPOSLISTSIZE */
    nPos = pSegIter->nPos*2 + pSegIter->bDel;
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos);

    /* Append the position-list data to the output */
    fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
    int i;

    /* Remove the redundant segments from the %_data table */
    for(i=0; i<nInput; i++){
      fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
3610
3611
3612
3613
3614
3615
3616

3617
3618
3619
3620
3621
3622
3623
  const int nCrisis = p->pConfig->nCrisisMerge;
  Fts5Structure *pStruct = *ppStruct;
  int iLvl = 0;

  assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
  while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
    fts5IndexMergeLevel(p, &pStruct, iLvl, 0);

    fts5StructurePromote(p, iLvl+1, pStruct);
    iLvl++;
  }
  *ppStruct = pStruct;
}

static int fts5IndexReturn(Fts5Index *p){







>







3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
  const int nCrisis = p->pConfig->nCrisisMerge;
  Fts5Structure *pStruct = *ppStruct;
  int iLvl = 0;

  assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
  while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
    fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
    assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
    fts5StructurePromote(p, iLvl+1, pStruct);
    iLvl++;
  }
  *ppStruct = pStruct;
}

static int fts5IndexReturn(Fts5Index *p){
3637
3638
3639
3640
3641
3642
3643

3644
3645
3646
3647

3648
3649
3650
3651
3652
3653
3654
** in a 32-bit integer. Return the size of the largest prefix of this 
** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
  int ret;
  u32 dummy;
  ret = fts5GetVarint32(aBuf, dummy);

  while( 1 ){
    int i = fts5GetVarint32(&aBuf[ret], dummy);
    if( (ret + i) > nMax ) break;
    ret += i;

  }
  return ret;
}

#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  assert( pBuf->nSpace>=(pBuf->n+nBlob) );             \
  memcpy(&pBuf->p[pBuf->n], pBlob, nBlob);             \







>
|
|
|
|
>







3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
** in a 32-bit integer. Return the size of the largest prefix of this 
** list nMax bytes or less in size.
*/
static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
  int ret;
  u32 dummy;
  ret = fts5GetVarint32(aBuf, dummy);
  if( ret<nMax ){
    while( 1 ){
      int i = fts5GetVarint32(&aBuf[ret], dummy);
      if( (ret + i) > nMax ) break;
      ret += i;
    }
  }
  return ret;
}

#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
  assert( pBuf->nSpace>=(pBuf->n+nBlob) );             \
  memcpy(&pBuf->p[pBuf->n], pBlob, nBlob);             \
3673
3674
3675
3676
3677
3678
3679
3680
3681

3682
3683
3684
3685
3686
3687
3688
3689
3690

3691




3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706

3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
  pStruct = fts5StructureRead(p);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */
    int nHeight;                  /* Height of new segment b-tree */
    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */

    const u8 *zPrev = 0;

    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iSegid);

    /* Pre-allocate the buffer used to assemble leaf pages to the target
    ** page size.  */
    assert( pgsz>0 );
    pBuf = &writer.writer.buf;

    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);





    /* Begin scanning through hash table entries. This loop runs once for each
    ** term/doclist currently stored within the hash table. */
    if( p->rc==SQLITE_OK ){
      memset(pBuf->p, 0, 4);
      pBuf->n = 4;
      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }
    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;          /* Buffer containing term */
      int nTerm;                  /* Size of zTerm in bytes */
      const u8 *pDoclist;         /* Pointer to doclist for this term */
      int nDoclist;               /* Size of doclist in bytes */
      int nSuffix;                /* Size of term suffix */


      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
      nTerm = strlen(zTerm);

      /* Decide if the term will fit on the current leaf. If it will not, 
      ** flush the leaf to disk here.  */
      if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){
        fts5WriteFlushLeaf(p, &writer);
        pBuf = &writer.writer.buf;
        if( (nTerm + 32) > pBuf->nSpace ){
          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
          if( p->rc ) break;
        }
      }

      /* Write the term to the leaf. And if it is the first on the leaf, and
      ** the leaf is not page number 1, push it up into the b-tree hierarchy 
      ** as well.  */
      if( writer.bFirstTermInPage==0 ){
        int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
        pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre);
        nSuffix = nTerm - nPre;
      }else{
        fts5PutU16(&pBuf->p[2], pBuf->n);
        writer.bFirstTermInPage = 0;
        if( writer.writer.pgno!=1 ){
          int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
          fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
          pBuf = &writer.writer.buf;
          assert( nPre<nTerm );
        }
        nSuffix = nTerm;
      }
      pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix);
      fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix);

      /* We just wrote a term into page writer.aWriter[0].pgno. If a 
      ** doclist-index is to be generated for this doclist, it will be
      ** associated with this page. */
      assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
      writer.aDlidx[0].pgno = writer.writer.pgno;

      if( pgsz>=(pBuf->n + nDoclist + 1) ){
        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;

        writer.bFirstRowidInPage = 0;

        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( p->rc==SQLITE_OK && iOff<nDoclist ){
          int nPos;
          int nCopy;







<

>





<
<
<

>
|
>
>
>
>




<
<




<




>

<
|
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
<







|







3752
3753
3754
3755
3756
3757
3758

3759
3760
3761
3762
3763
3764
3765



3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776


3777
3778
3779
3780

3781
3782
3783
3784
3785
3786

3787








3788





3789
















3790





3791

3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
  pStruct = fts5StructureRead(p);
  iSegid = fts5AllocateSegid(p, pStruct);

  if( iSegid ){
    const int pgsz = p->pConfig->pgsz;

    Fts5StructureSegment *pSeg;   /* New segment within pStruct */

    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
    Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
    const u8 *zPrev = 0;

    Fts5SegWriter writer;
    fts5WriteInit(p, &writer, iSegid);




    pBuf = &writer.writer.buf;
    pPgidx = &writer.writer.pgidx;

    /* fts5WriteInit() should have initialized the buffers to (most likely)
    ** the maximum space required. */
    assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
    assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );

    /* Begin scanning through hash table entries. This loop runs once for each
    ** term/doclist currently stored within the hash table. */
    if( p->rc==SQLITE_OK ){


      p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
    }
    while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
      const char *zTerm;          /* Buffer containing term */

      const u8 *pDoclist;         /* Pointer to doclist for this term */
      int nDoclist;               /* Size of doclist in bytes */
      int nSuffix;                /* Size of term suffix */

      /* Write the term for this entry to disk. */
      sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);

      fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm);














      if( writer.bFirstRowidInPage==0 
















       && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) 





      ){

        /* The entire doclist will fit on the current leaf. */
        fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
      }else{
        i64 iRowid = 0;
        i64 iDelta = 0;
        int iOff = 0;

        /*  writer.bFirstRowidInPage = 0; */

        /* The entire doclist will not fit on this leaf. The following 
        ** loop iterates through the poslists that make up the current 
        ** doclist.  */
        while( p->rc==SQLITE_OK && iOff<nDoclist ){
          int nPos;
          int nCopy;
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811

3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
            writer.bFirstRowidInPage = 0;
            fts5WriteDlidxAppend(p, &writer, iRowid);
          }else{
            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + nCopy) <= pgsz ){
            /* The entire poslist will fit on the current leaf. So copy
            ** it in one go. */
            fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
          }else{
            /* The entire poslist will not fit on this leaf. So it needs
            ** to be broken into sections. The only qualification being
            ** that each varint must be stored contiguously.  */
            const u8 *pPoslist = &pDoclist[iOff];
            int iPos = 0;
            while( p->rc==SQLITE_OK ){
              int nSpace = pgsz - pBuf->n;
              int n = 0;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
              }
              assert( n>0 );
              fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
              iPos += n;
              if( pBuf->n>=pgsz ){
                fts5WriteFlushLeaf(p, &writer);
                pBuf = &writer.writer.buf;
              }
              if( iPos>=nCopy ) break;
            }
          }
          iOff += nCopy;
        }
      }


      pBuf->p[pBuf->n++] = '\0';
      assert( pBuf->n<=pBuf->nSpace );
      zPrev = (const u8*)zTerm;
      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
      pSeg->iSegid = iSegid;
      pSeg->nHeight = nHeight;
      pSeg->pgnoFirst = 1;
      pSeg->pgnoLast = pgnoLast;
      pStruct->nSegment++;
    }
    fts5StructurePromote(p, 0, pStruct);
  }








|










|









|

<








>
|





|










<







3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845

3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871

3872
3873
3874
3875
3876
3877
3878
            writer.bFirstRowidInPage = 0;
            fts5WriteDlidxAppend(p, &writer, iRowid);
          }else{
            pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
          }
          assert( pBuf->n<=pBuf->nSpace );

          if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
            /* The entire poslist will fit on the current leaf. So copy
            ** it in one go. */
            fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
          }else{
            /* The entire poslist will not fit on this leaf. So it needs
            ** to be broken into sections. The only qualification being
            ** that each varint must be stored contiguously.  */
            const u8 *pPoslist = &pDoclist[iOff];
            int iPos = 0;
            while( p->rc==SQLITE_OK ){
              int nSpace = pgsz - pBuf->n - pPgidx->n;
              int n = 0;
              if( (nCopy - iPos)<=nSpace ){
                n = nCopy - iPos;
              }else{
                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
              }
              assert( n>0 );
              fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
              iPos += n;
              if( (pBuf->n + pPgidx->n)>=pgsz ){
                fts5WriteFlushLeaf(p, &writer);

              }
              if( iPos>=nCopy ) break;
            }
          }
          iOff += nCopy;
        }
      }

      /* TODO2: Doclist terminator written here. */
      /* pBuf->p[pBuf->n++] = '\0'; */
      assert( pBuf->n<=pBuf->nSpace );
      zPrev = (const u8*)zTerm;
      sqlite3Fts5HashScanNext(pHash);
    }
    sqlite3Fts5HashClear(pHash);
    fts5WriteFinish(p, &writer, &pgnoLast);

    /* Update the Fts5Structure. It is written back to the database by the
    ** fts5StructureRelease() call below.  */
    if( pStruct->nLevel==0 ){
      fts5StructureAddLevel(&p->rc, &pStruct);
    }
    fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
    if( p->rc==SQLITE_OK ){
      pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
      pSeg->iSegid = iSegid;

      pSeg->pgnoFirst = 1;
      pSeg->pgnoLast = pgnoLast;
      pStruct->nSegment++;
    }
    fts5StructurePromote(p, 0, pStruct);
  }

3924
3925
3926
3927
3928
3929
3930


3931

3932
3933
3934
3935
3936
3937
3938
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){


  fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);

}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.







>
>
|
>







3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
}

static void fts5PoslistCallback(
  Fts5Index *p, 
  void *pCtx, 
  const u8 *pChunk, int nChunk
){
  assert_nc( nChunk>=0 );
  if( nChunk>0 ){
    fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk);
  }
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
      fts5BufferFree(&aBuf[i]);
    }
    fts5MultiIterFree(p, p1);

    pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
    if( pData ){
      pData->p = (u8*)&pData[1];
      pData->n = doclist.n;
      memcpy(pData->p, doclist.p, doclist.n);
      fts5MultiIterNew2(p, pData, bDesc, ppIter);
    }
    fts5BufferFree(&doclist);
  }

  fts5StructureRelease(pStruct);







|







4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
      fts5BufferFree(&aBuf[i]);
    }
    fts5MultiIterFree(p, p1);

    pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n);
    if( pData ){
      pData->p = (u8*)&pData[1];
      pData->nn = pData->szLeaf = doclist.n;
      memcpy(pData->p, doclist.p, doclist.n);
      fts5MultiIterNew2(p, pData, bDesc, ppIter);
    }
    fts5BufferFree(&doclist);
  }

  fts5StructureRelease(pStruct);
4389
4390
4391
4392
4393
4394
4395





4396
4397
4398
4399
4400
4401
4402
4403
       || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN
  );

  if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){
    memcpy(&buf.p[1], pToken, nToken);

#ifdef SQLITE_DEBUG





    if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){
      assert( flags & FTS5INDEX_QUERY_PREFIX );
      iIdx = 1+pConfig->nPrefix;
    }else
#endif
    if( flags & FTS5INDEX_QUERY_PREFIX ){
      int nChar = fts5IndexCharlen(pToken, nToken);
      for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){







>
>
>
>
>
|







4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
       || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN
  );

  if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){
    memcpy(&buf.p[1], pToken, nToken);

#ifdef SQLITE_DEBUG
    /* If the QUERY_TEST_NOIDX flag was specified, then this must be a
    ** prefix-query. Instead of using a prefix-index (if one exists), 
    ** evaluate the prefix query using the main FTS index. This is used
    ** for internal sanity checking by the integrity-check in debug 
    ** mode only.  */
    if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
      assert( flags & FTS5INDEX_QUERY_PREFIX );
      iIdx = 1+pConfig->nPrefix;
    }else
#endif
    if( flags & FTS5INDEX_QUERY_PREFIX ){
      int nChar = fts5IndexCharlen(pToken, nToken);
      for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
    *pp = pIter->poslist.p;
  }
  return fts5IndexReturn(pIter->pIndex);







|







4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
  int *pn,                        /* OUT: Size of position-list in bytes */
  i64 *piRowid                    /* OUT: Current rowid */
){
  Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
  assert( pIter->pIndex->rc==SQLITE_OK );
  *piRowid = pSeg->iRowid;
  *pn = pSeg->nPos;
  if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){
    *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
  }else{
    fts5BufferZero(&pIter->poslist);
    fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
    *pp = pIter->poslist.p;
  }
  return fts5IndexReturn(pIter->pIndex);
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
  int nCol = p->pConfig->nCol;
  Fts5Data *pData;

  *pnRow = 0;
  memset(anSize, 0, sizeof(i64) * nCol);
  pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
  if( p->rc==SQLITE_OK && pData->n ){
    int i = 0;
    int iCol;
    i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
    for(iCol=0; i<pData->n && iCol<nCol; iCol++){
      i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
    }
  }

  fts5DataRelease(pData);
  return fts5IndexReturn(p);
}







|



|







4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
  int nCol = p->pConfig->nCol;
  Fts5Data *pData;

  *pnRow = 0;
  memset(anSize, 0, sizeof(i64) * nCol);
  pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
  if( p->rc==SQLITE_OK && pData->nn ){
    int i = 0;
    int iCol;
    i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
    for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
      i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
    }
  }

  fts5DataRelease(pData);
  return fts5IndexReturn(p);
}
4766
4767
4768
4769
4770
4771
4772
4773






4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784

4785
4786
4787
4788
4789
4790
4791
    if( rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_DESC;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
    }
    if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;

    /* If this is a prefix query, check that the results returned if the
    ** the index is disabled are the same. In both ASC and DESC order. */






    if( iIdx>0 && rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
      ck2 = 0;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
    }
    if( iIdx>0 && rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
      ck2 = 0;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
      if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;

    }

    cksum3 ^= ck1;
    fts5BufferSet(&rc, pPrev, n, (const u8*)z);

    if( rc==SQLITE_OK && cksum3!=expected ){
      rc = FTS5_CORRUPT;







|
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
>







4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
    if( rc==SQLITE_OK ){
      int f = flags|FTS5INDEX_QUERY_DESC;
      rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
    }
    if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;

    /* If this is a prefix query, check that the results returned if the
    ** the index is disabled are the same. In both ASC and DESC order. 
    **
    ** This check may only be performed if the hash table is empty. This
    ** is because the hash table only supports a single scan query at
    ** a time, and the multi-iter loop from which this function is called
    ** is already performing such a scan. */
    if( p->nPendingData==0 ){
      if( iIdx>0 && rc==SQLITE_OK ){
        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
        ck2 = 0;
        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
      }
      if( iIdx>0 && rc==SQLITE_OK ){
        int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
        ck2 = 0;
        rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
        if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
      }
    }

    cksum3 ^= ck1;
    fts5BufferSet(&rc, pPrev, n, (const u8*)z);

    if( rc==SQLITE_OK && cksum3!=expected ){
      rc = FTS5_CORRUPT;
4816
4817
4818
4819
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831



















































4832
4833
4834
4835
4836
4837
4838
  int iLast
){
  int i;

  /* Now check that the iter.nEmpty leaves following the current leaf
  ** (a) exist and (b) contain no terms. */
  for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
    Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i));
    if( pLeaf ){
      if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT;
      if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT;
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}




















































static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5Config *pConfig = p->pConfig;
  sqlite3_stmt *pStmt = 0;







|

|
|





>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944
4945
4946
  int iLast
){
  int i;

  /* Now check that the iter.nEmpty leaves following the current leaf
  ** (a) exist and (b) contain no terms. */
  for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
    Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
    if( pLeaf ){
      if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
      if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;
  }
}

static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
  int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2;
  int iTermOff = 0;
  int ii;

  Fts5Buffer buf1 = {0,0,0};
  Fts5Buffer buf2 = {0,0,0};

  ii = pLeaf->szLeaf;
  while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
    int res;
    int iOff;
    int nIncr;

    ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
    iTermOff += nIncr;
    iOff = iTermOff;

    if( iOff>=pLeaf->szLeaf ){
      p->rc = FTS5_CORRUPT;
    }else if( iTermOff==nIncr ){
      int nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }
    }else{
      int nKeep, nByte;
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
      iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
      if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
        p->rc = FTS5_CORRUPT;
      }else{
        buf1.n = nKeep;
        fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
      }

      if( p->rc==SQLITE_OK ){
        res = fts5BufferCompare(&buf1, &buf2);
        if( res<=0 ) p->rc = FTS5_CORRUPT;
      }
    }
    fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
  }

  fts5BufferFree(&buf1);
  fts5BufferFree(&buf2);
}

static void fts5IndexIntegrityCheckSegment(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
){
  Fts5Config *pConfig = p->pConfig;
  sqlite3_stmt *pStmt = 0;
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875

4876
4877
4878
4879

4880
4881
4882
4883
4884
4885
4886
4887
4888


4889
4890
4891
4892
4893
4894
4895
      pConfig->zDb, pConfig->zName, pSeg->iSegid
  ));

  /* Iterate through the b-tree hierarchy.  */
  while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
    i64 iRow;                     /* Rowid for this leaf */
    Fts5Data *pLeaf;              /* Data for this leaf */
    int iOff;                     /* Offset of first term on leaf */

    int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
    const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
    int iIdxLeaf = sqlite3_column_int(pStmt, 2);
    int bIdxDlidx = sqlite3_column_int(pStmt, 3);

    /* If the leaf in question has already been trimmed from the segment, 
    ** ignore this b-tree entry. Otherwise, load it into memory. */
    if( iIdxLeaf<pSeg->pgnoFirst ) continue;
    iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf);
    pLeaf = fts5DataRead(p, iRow);
    if( pLeaf==0 ) break;

    /* Check that the leaf contains at least one term, and that it is equal
    ** to or larger than the split-key in zIdxTerm.  Also check that if there
    ** is also a rowid pointer within the leaf page header, it points to a
    ** location before the term.  */
    iOff = fts5GetU16(&pLeaf->p[2]);
    if( iOff==0 ){
      p->rc = FTS5_CORRUPT;
    }else{

      int iRowidOff;
      int nTerm;                  /* Size of term on leaf in bytes */
      int res;                    /* Comparison of term and split-key */


      iRowidOff = fts5GetU16(&pLeaf->p[0]);
      if( iRowidOff>=iOff ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
        res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
        if( res==0 ) res = nTerm - nIdxTerm;
        if( res<0 ) p->rc = FTS5_CORRUPT;
      }


    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;


    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */







<









|







|
<


>
|



>
|








>
>







4955
4956
4957
4958
4959
4960
4961

4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979

4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
      pConfig->zDb, pConfig->zName, pSeg->iSegid
  ));

  /* Iterate through the b-tree hierarchy.  */
  while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
    i64 iRow;                     /* Rowid for this leaf */
    Fts5Data *pLeaf;              /* Data for this leaf */


    int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
    const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
    int iIdxLeaf = sqlite3_column_int(pStmt, 2);
    int bIdxDlidx = sqlite3_column_int(pStmt, 3);

    /* If the leaf in question has already been trimmed from the segment, 
    ** ignore this b-tree entry. Otherwise, load it into memory. */
    if( iIdxLeaf<pSeg->pgnoFirst ) continue;
    iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
    pLeaf = fts5DataRead(p, iRow);
    if( pLeaf==0 ) break;

    /* Check that the leaf contains at least one term, and that it is equal
    ** to or larger than the split-key in zIdxTerm.  Also check that if there
    ** is also a rowid pointer within the leaf page header, it points to a
    ** location before the term.  */
    if( pLeaf->nn<=pLeaf->szLeaf ){

      p->rc = FTS5_CORRUPT;
    }else{
      int iOff;                   /* Offset of first term on leaf */
      int iRowidOff;              /* Offset of first rowid on leaf */
      int nTerm;                  /* Size of term on leaf in bytes */
      int res;                    /* Comparison of term and split-key */

      iOff = fts5LeafFirstTermOff(pLeaf);
      iRowidOff = fts5LeafFirstRowidOff(pLeaf);
      if( iRowidOff>=iOff ){
        p->rc = FTS5_CORRUPT;
      }else{
        iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
        res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
        if( res==0 ) res = nTerm - nIdxTerm;
        if( res<0 ) p->rc = FTS5_CORRUPT;
      }

      fts5IntegrityCheckPgidx(p, pLeaf);
    }
    fts5DataRelease(pLeaf);
    if( p->rc ) break;


    /* Now check that the iter.nEmpty leaves following the current leaf
    ** (a) exist and (b) contain no terms. */
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
4929
4930
4931

4932
4933
4934
4935
4936
4937
4938
4939
      for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(p, pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
          iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = fts5DlidxIterPgno(pDlidx);

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5GetU16(&pLeaf->p[0]);

          if( iRowidOff>=pLeaf->n ){
            p->rc = FTS5_CORRUPT;
          }else{
            fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
          }
          fts5DataRelease(pLeaf);
        }







|


|







|



|
>
|







5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
      for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
          fts5DlidxIterEof(p, pDlidx)==0;
          fts5DlidxIterNext(p, pDlidx)
      ){

        /* Check any rowid-less pages that occur before the current leaf. */
        for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
          iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
          pLeaf = fts5DataRead(p, iKey);
          if( pLeaf ){
            if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
            fts5DataRelease(pLeaf);
          }
        }
        iPrevLeaf = fts5DlidxIterPgno(pDlidx);

        /* Check that the leaf page indicated by the iterator really does
        ** contain the rowid suggested by the same. */
        iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
        pLeaf = fts5DataRead(p, iKey);
        if( pLeaf ){
          i64 iRowid;
          int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
          ASSERT_SZLEAF_OK(pLeaf);
          if( iRowidOff>=pLeaf->szLeaf ){
            p->rc = FTS5_CORRUPT;
          }else{
            fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
            if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
          }
          fts5DataRelease(pLeaf);
        }
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
  for(iLvl=0; iLvl<p->nLevel; iLvl++){
    Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
        " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
    );
    for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
      Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
          " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, 
          pSeg->pgnoFirst, pSeg->pgnoLast
      );
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }
}

/*







|
<
|







5237
5238
5239
5240
5241
5242
5243
5244

5245
5246
5247
5248
5249
5250
5251
5252
  for(iLvl=0; iLvl<p->nLevel; iLvl++){
    Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 
        " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
    );
    for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
      Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", 

          pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
      );
    }
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
  }
}

/*
5189
5190
5191
5192
5193
5194
5195

5196
5197

5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
**
** The return value is the number of bytes read from the input buffer.
*/
static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  i64 iDocid;
  int iOff = 0;


  iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid);
  sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);

  while( iOff<n ){
    int nPos;
    int bDummy;
    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }
  }

  return iOff;
}

/*







>
|
|
>










|







5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
**
** The return value is the number of bytes read from the input buffer.
*/
static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  i64 iDocid;
  int iOff = 0;

  if( n>0 ){
    iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
    sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    int bDummy;
    iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid += iDelta;
      sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
    }
  }

  return iOff;
}

/*
5227
5228
5229
5230
5231
5232
5233




5234
5235
5236
5237
5238
5239
5240


5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267



5268
5269
5270
5271
5272
5273
5274
5275
5276
5277
5278
5279
5280





5281
5282


5283
5284
5285
5286
5287
5288
5289
5290
5291
5292

5293
5294
5295




5296



5297
5298

5299





5300
5301


5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */
  int nSpace = 0;

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);




  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);

  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;

    dlidx.p = a;
    dlidx.n = n;

    memset(&lvl, 0, sizeof(Fts5DlidxLvl));
    lvl.pData = &dlidx;
    lvl.iLeafPgno = iPgno;

    for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
      );
    }
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;



    int iTermOff = 0;
    int iRowidOff = 0;
    int iOff;
    int nKeep = 0;

    memset(&term, 0, sizeof(Fts5Buffer));

    if( n>=4 ){
      iRowidOff = fts5GetU16(&a[0]);
      iTermOff = fts5GetU16(&a[2]);
    }else{
      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;





    }



    if( iRowidOff ){
      iOff = iRowidOff;
    }else if( iTermOff ){
      iOff = iTermOff;
    }else{
      iOff = n;
    }
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    assert( iRowidOff==0 || iOff==iRowidOff );

    if( iRowidOff ){
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
    }








    assert( iTermOff==0 || iOff==iTermOff );
    while( iOff<n ){

      int nByte;





      iOff += fts5GetVarint32(&a[iOff], nByte);
      term.n= nKeep;


      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
          );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      if( iOff<n ){
        iOff += fts5GetVarint32(&a[iOff], nKeep);
      }
    }
    fts5BufferFree(&term);
  }
  
 decode_out:
  sqlite3_free(a);
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);







>
>
>
>


<




>
>








|

















|
>
>
>



|



|
<
<
<


>
>
>
>
>
|
|
>
>
|

|


|



|
>
|
|
|
>
>
>
>
|
>
>
>
|
|
>
|
>
>
>
>
>
|
|
>
>





|
|
<
<
|
|







5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351

5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395



5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448


5449
5450
5451
5452
5453
5454
5455
5456
5457
  Fts5Buffer s;                   /* Build up text to return here */
  int rc = SQLITE_OK;             /* Return code */
  int nSpace = 0;

  assert( nArg==2 );
  memset(&s, 0, sizeof(Fts5Buffer));
  iRowid = sqlite3_value_int64(apVal[0]);

  /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
  ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
  ** buffer overreads even if the record is corrupt.  */
  n = sqlite3_value_bytes(apVal[1]);
  aBlob = sqlite3_value_blob(apVal[1]);

  nSpace = n + FTS5_DATA_ZERO_PADDING;
  a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
  if( a==0 ) goto decode_out;
  memcpy(a, aBlob, n);


  fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);

  fts5DebugRowid(&rc, &s, iRowid);
  if( bDlidx ){
    Fts5Data dlidx;
    Fts5DlidxLvl lvl;

    dlidx.p = a;
    dlidx.nn = n;

    memset(&lvl, 0, sizeof(Fts5DlidxLvl));
    lvl.pData = &dlidx;
    lvl.iLeafPgno = iPgno;

    for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
      sqlite3Fts5BufferAppendPrintf(&rc, &s, 
          " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
      );
    }
  }else if( iSegid==0 ){
    if( iRowid==FTS5_AVERAGES_ROWID ){
      /* todo */
    }else{
      fts5DecodeStructure(&rc, &s, a, n);
    }
  }else{
    Fts5Buffer term;              /* Current term read from page */
    int szLeaf;                   /* Offset of pgidx in a[] */
    int iPgidxOff;
    int iPgidxPrev = 0;           /* Previous value read from pgidx */
    int iTermOff = 0;
    int iRowidOff = 0;
    int iOff;
    int nDoclist;

    memset(&term, 0, sizeof(Fts5Buffer));

    if( n<4 ){



      sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt");
      goto decode_out;
    }else{
      iRowidOff = fts5GetU16(&a[0]);
      iPgidxOff = szLeaf = fts5GetU16(&a[2]);
      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], iTermOff);
      }
    }

    /* Decode the position list tail at the start of the page */
    if( iRowidOff!=0 ){
      iOff = iRowidOff;
    }else if( iTermOff!=0 ){
      iOff = iTermOff;
    }else{
      iOff = szLeaf;
    }
    fts5DecodePoslist(&rc, &s, &a[4], iOff-4);

    /* Decode any more doclist data that appears on the page before the
    ** first term. */
    nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
    fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);

    while( iPgidxOff<n ){
      int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
      int nByte;                            /* Bytes of data */
      int iEnd;
      
      iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
      iPgidxPrev += nByte;
      iOff = iPgidxPrev;

      if( iPgidxOff<n ){
        fts5GetVarint32(&a[iPgidxOff], nByte);
        iEnd = iPgidxPrev + nByte;
      }else{
        iEnd = szLeaf;
      }

      if( bFirst==0 ){
        iOff += fts5GetVarint32(&a[iOff], nByte);
        term.n = nByte;
      }
      iOff += fts5GetVarint32(&a[iOff], nByte);
      fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
      iOff += nByte;

      sqlite3Fts5BufferAppendPrintf(
          &rc, &s, " term=%.*s", term.n, (const char*)term.p
      );
      iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);


    }

    fts5BufferFree(&term);
  }
  
 decode_out:
  sqlite3_free(a);
  if( rc==SQLITE_OK ){
    sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
  if( nArg==0 ){
    sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  }else{
    zArg = (const char*)sqlite3_value_text(apVal[0]);
    if( 0==sqlite3_stricmp(zArg, "segment") ){
      i64 iRowid;
      int segid, height, pgno;
      if( nArg!=4 ){
        sqlite3_result_error(pCtx, 
            "should be: fts5_rowid('segment', segid, height, pgno))", -1
        );
      }else{
        segid = sqlite3_value_int(apVal[1]);
        height = sqlite3_value_int(apVal[2]);
        pgno = sqlite3_value_int(apVal[3]);
        iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno);
        sqlite3_result_int64(pCtx, iRowid);
      }
    }else {
      sqlite3_result_error(pCtx, 
        "first arg to fts5_rowid() must be 'segment' "
        "or 'start-of-index'"
        , -1
      );
    }
  }
}

/*
** This is called as part of registering the FTS5 module with database







|

|



<
|
|


|

|
<
<







5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485

5486
5487
5488
5489
5490
5491
5492


5493
5494
5495
5496
5497
5498
5499
  if( nArg==0 ){
    sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
  }else{
    zArg = (const char*)sqlite3_value_text(apVal[0]);
    if( 0==sqlite3_stricmp(zArg, "segment") ){
      i64 iRowid;
      int segid, height, pgno;
      if( nArg!=3 ){
        sqlite3_result_error(pCtx, 
            "should be: fts5_rowid('segment', segid, pgno))", -1
        );
      }else{
        segid = sqlite3_value_int(apVal[1]);

        pgno = sqlite3_value_int(apVal[2]);
        iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
        sqlite3_result_int64(pCtx, iRowid);
      }
    }else{
      sqlite3_result_error(pCtx, 
        "first arg to fts5_rowid() must be 'segment'" , -1


      );
    }
  }
}

/*
** This is called as part of registering the FTS5 module with database

Changes to ext/fts5/fts5_main.c.

1313
1314
1315
1316
1317
1318
1319




1320
1321
1322
1323
1324
1325
1326
  }else if( 0==sqlite3_stricmp("optimize", z) ){
    rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
  }else if( 0==sqlite3_stricmp("merge", z) ){
    int nMerge = sqlite3_value_int(pVal);
    rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
  }else if( 0==sqlite3_stricmp("integrity-check", z) ){
    rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);




  }else{
    rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError);
    }
    if( rc==SQLITE_OK ){
      if( bError ){







>
>
>
>







1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
  }else if( 0==sqlite3_stricmp("optimize", z) ){
    rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
  }else if( 0==sqlite3_stricmp("merge", z) ){
    int nMerge = sqlite3_value_int(pVal);
    rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
  }else if( 0==sqlite3_stricmp("integrity-check", z) ){
    rc = sqlite3Fts5StorageIntegrity(pTab->pStorage);
#ifdef SQLITE_DEBUG
  }else if( 0==sqlite3_stricmp("prefix-index", z) ){
    pConfig->bPrefixIndex = sqlite3_value_int(pVal);
#endif
  }else{
    rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex);
    if( rc==SQLITE_OK ){
      rc = sqlite3Fts5ConfigSetValue(pTab->pConfig, z, pVal, &bError);
    }
    if( rc==SQLITE_OK ){
      if( bError ){

Changes to ext/fts5/test/fts5aa.test.

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}

do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}

foreach w {a b c d e f} {
  do_execsql_test 2.3.$w.asc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w;
  } {1}
  do_execsql_test 2.3.$w.desc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;







|







47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}

do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/}

foreach w {a b c d e f} {
  do_execsql_test 2.3.$w.asc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w;
  } {1}
  do_execsql_test 2.3.$w.desc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#
breakpoint
reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_execsql_test 6.1 {







<







135
136
137
138
139
140
141

142
143
144
145
146
147
148
  do_execsql_test 5.$i.1 { INSERT INTO t1 VALUES($x, $y) }
  do_execsql_test 5.$i.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#

reset_db
do_execsql_test 6.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x,y);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
}

do_execsql_test 6.1 {
197
198
199
200
201
202
203

204
205
206
207
208
209
210
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}

}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");







>







196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
      set y [doc]
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
  if {[set_test_counter errors]} break
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");

Changes to ext/fts5/test/fts5ad.test.

201
202
203
204
205
206
207



208
209
210
211
212
213
214
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }




  
  foreach {bAsc sql} {
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 







>
>
>







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }

  do_execsql_test $T.integrity {
    INSERT INTO t1(t1) VALUES('integrity-check');
  }
  
  foreach {bAsc sql} {
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 

Changes to ext/fts5/test/fts5al.test.

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  SELECT * FROM ft1_config;
} {version 3}

do_execsql_test 1.2 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
  SELECT * FROM ft1_config;
} {pgsz 32 version 3}

do_execsql_test 1.3 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
  SELECT * FROM ft1_config;
} {pgsz 64 version 3}

#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
  1 "fname()"
  2 "fname(1)"







|




|




|







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  finish_test
  return
}

do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE ft1 USING fts5(x);
  SELECT * FROM ft1_config;
} {version 4}

do_execsql_test 1.2 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32);
  SELECT * FROM ft1_config;
} {pgsz 32 version 4}

do_execsql_test 1.3 {
  INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64);
  SELECT * FROM ft1_config;
} {pgsz 64 version 4}

#--------------------------------------------------------------------------
# Test the logic for parsing the rank() function definition.
#
foreach {tn defn} {
  1 "fname()"
  2 "fname(1)"

Changes to ext/fts5/test/fts5corrupt.test.

39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]

do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

do_test 1.4 {
  db_restore_and_reopen
  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid('segment', $segid, 0, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}








|








|







39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
db_save

do_execsql_test 1.2 { INSERT INTO t1(t1) VALUES('integrity-check') }
set segid [lindex [fts5_level_segids t1] 0]

do_test 1.3 {
  execsql {
    DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

do_test 1.4 {
  db_restore_and_reopen
  execsql {
    UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE
    rowid = fts5_rowid('segment', $segid, 4);
  }
  catchsql { INSERT INTO t1(t1) VALUES('integrity-check') }
} {1 {database disk image is malformed}}

db_restore_and_reopen
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}

Changes to ext/fts5/test/fts5corrupt2.test.

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    execsql ROLLBACK
  }

  do_test 4.$tn.x { expr $nCorrupt>0 } 1
}

}

set doc [string repeat "A B C " 1000]
do_execsql_test 4.0 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
  INSERT INTO x5 SELECT $doc FROM ii;
}

foreach {tn hdr} {
  1 "\x00\x01"
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 4.$tn.$tn2 {
      execsql BEGIN

      set fd [db incrblob main x5_data block $rowid]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $hdr
      close $fd

      catchsql { INSERT INTO x5(x5) VALUES('integrity-check') }
      set {} {}
    } {}

    execsql ROLLBACK
  }
}

#--------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5 VALUES('a');
  INSERT INTO x5 VALUES('a a');
  INSERT INTO x5 VALUES('a a a');
  INSERT INTO x5 VALUES('a a a a');

  UPDATE x5_docsize SET sz = X'' WHERE id=3;
}
proc colsize {cmd i} { 
  $cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize

do_catchsql_test 5.2 {
  SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}


sqlite3_fts5_may_be_corrupt 0
finish_test








|





|














|

















|













|







205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
      if {$res == "1 {database disk image is malformed}"} {incr nCorrupt}
      set {} 1
    } {1}

    execsql ROLLBACK
  }

  # do_test 4.$tn.x { expr $nCorrupt>0 } 1
}

}

set doc [string repeat "A B C " 1000]
do_execsql_test 5.0 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5(x5, rank) VALUES('pgsz', 32);
  WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) 
  INSERT INTO x5 SELECT $doc FROM ii;
}

foreach {tn hdr} {
  1 "\x00\x01"
} {
  set tn2 0
  set nCorrupt 0
  foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] {
    if {$rowid & $mask} continue
    incr tn2
    do_test 5.$tn.$tn2 {
      execsql BEGIN

      set fd [db incrblob main x5_data block $rowid]
      fconfigure $fd -encoding binary -translation binary
      puts -nonewline $fd $hdr
      close $fd

      catchsql { INSERT INTO x5(x5) VALUES('integrity-check') }
      set {} {}
    } {}

    execsql ROLLBACK
  }
}

#--------------------------------------------------------------------
reset_db
do_execsql_test 6.1 {
  CREATE VIRTUAL TABLE x5 USING fts5(tt);
  INSERT INTO x5 VALUES('a');
  INSERT INTO x5 VALUES('a a');
  INSERT INTO x5 VALUES('a a a');
  INSERT INTO x5 VALUES('a a a a');

  UPDATE x5_docsize SET sz = X'' WHERE id=3;
}
proc colsize {cmd i} { 
  $cmd xColumnSize $i
}
sqlite3_fts5_create_function db colsize colsize

do_catchsql_test 6.2 {
  SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a'
} {1 SQLITE_CORRUPT_VTAB}


sqlite3_fts5_may_be_corrupt 0
finish_test

Changes to ext/fts5/test/fts5rowid.test.

23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

do_catchsql_test 1.1 {
  SELECT fts5_rowid()
} {1 {should be: fts5_rowid(subject, ....)}}

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, height, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1, 1)
} {139586437121}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} 


#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db
do_execsql_test 2.1 { 







|


|
|



|







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

do_catchsql_test 1.1 {
  SELECT fts5_rowid()
} {1 {should be: fts5_rowid(subject, ....)}}

do_catchsql_test 1.2 {
  SELECT fts5_rowid('segment')
} {1 {should be: fts5_rowid('segment', segid, pgno))}}

do_execsql_test 1.3 {
  SELECT fts5_rowid('segment', 1, 1)
} {137438953473}

do_catchsql_test 1.4 {
  SELECT fts5_rowid('nosucharg');
} {1 {first arg to fts5_rowid() must be 'segment'}} 


#-------------------------------------------------------------------------
# Tests of the fts5_decode() function.
#
reset_db
do_execsql_test 2.1 { 

Added ext/fts5/test/fts5simple.test.



























































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# 2015 September 05
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5simple

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

if 1 {
#-------------------------------------------------------------------------
#
set doc "x x [string repeat {y } 50]z z"
do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  BEGIN;
    INSERT INTO t1 VALUES($doc);
  COMMIT;
}

do_execsql_test 1.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 2.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x);
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  INSERT INTO t1 VALUES('a b c');
  INSERT INTO t1 VALUES('d e f');
  INSERT INTO t1(t1) VALUES('optimize');
}

do_execsql_test 2.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}


#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix='1,2');
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);
  BEGIN;
  INSERT INTO t1 VALUES('one');
  SELECT * FROM t1 WHERE t1 MATCH 'o*';
} {one}

do_execsql_test 3.1 {
  INSERT INTO t1(t1) VALUES('integrity-check');
} {}

#-------------------------------------------------------------------------
reset_db
do_execsql_test 4.1 {
  CREATE VIRTUAL TABLE t11 USING fts5(content);
  INSERT INTO t11(t11, rank) VALUES('pgsz', 32);
  INSERT INTO t11 VALUES('another');
  INSERT INTO t11 VALUES('string');
  INSERT INTO t11 VALUES('of');
  INSERT INTO t11 VALUES('text');
}
do_test 4.2 {
  execsql { INSERT INTO t11(t11) VALUES('optimize') }
} {}
do_execsql_test 4.3 {
  INSERT INTO t11(t11) VALUES('integrity-check');
} {}

#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x }

#-------------------------------------------------------------------------
reset_db
set doc [string repeat "x y " 5]
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE yy USING fts5(content);
  INSERT INTO yy(yy, rank) VALUES('pgsz', 32);
  BEGIN;
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
    INSERT INTO yy VALUES($doc);
  COMMIT;
}

do_execsql_test 5.2 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.3 {
  SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC
} {8 7 6 5 4 3 2 1}

#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x }

#-------------------------------------------------------------------------
reset_db
do_execsql_test 5.1 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa');
}

do_execsql_test 5.2 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

do_execsql_test 5.3 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
} {}

do_execsql_test 5.4 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2}

}

do_execsql_test 5.5 {
  DELETE FROM tt;
  BEGIN;
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
    INSERT INTO tt VALUES('aa');
    INSERT INTO tt VALUES('ab');
  COMMIT;
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1 2 3 4 5 6 7 8}

do_execsql_test 5.6 {
  INSERT INTO tt(tt) VALUES('integrity-check');
}

reset_db
do_execsql_test 5.7 {
  CREATE VIRTUAL TABLE tt USING fts5(content);
  INSERT INTO tt(tt, rank) VALUES('pgsz', 32);
  INSERT INTO tt VALUES('aa ab ac ad ae af');
}

do_execsql_test 5.8 {
  SELECT rowid FROM tt WHERE tt MATCH 'a*';
} {1}

finish_test

Changes to ext/fts5/test/fts5version.test.

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one);
  INSERT INTO t1 VALUES('a b c d');
} {}

do_execsql_test 1.2 {
  SELECT * FROM t1_config WHERE k='version'
} {version 3}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}

do_execsql_test 1.4 {
  UPDATE t1_config set v=4 WHERE k='version';
} 

do_test 1.5 {
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}

do_test 1.6 {
  db close
  sqlite3 db test.db
  catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}}

do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 3) - run 'rebuild'}}


finish_test








|






|






|





|






|




26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
do_execsql_test 1.1 {
  CREATE VIRTUAL TABLE t1 USING fts5(one);
  INSERT INTO t1 VALUES('a b c d');
} {}

do_execsql_test 1.2 {
  SELECT * FROM t1_config WHERE k='version'
} {version 4}

do_execsql_test 1.3 {
  SELECT rowid FROM t1 WHERE t1 MATCH 'a';
} {1}

do_execsql_test 1.4 {
  UPDATE t1_config set v=5 WHERE k='version';
} 

do_test 1.5 {
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}

do_test 1.6 {
  db close
  sqlite3 db test.db
  catchsql { INSERT INTO t1 VALUES('x y z') }
} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}}

do_test 1.7 {
  execsql { DELETE FROM t1_config WHERE k='version' }
  db close
  sqlite3 db test.db
  catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' }
} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}}


finish_test

Changes to ext/fts5/tool/loadfts5.tcl.

14
15
16
17
18
19
20






21
22
23
24
25
26
27
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow







      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }








>
>
>
>
>
>







14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
  foreach f [glob -nocomplain -dir $dir *] {
    if {$::O(limit) && $::nRow>=$::O(limit)} break
    if {[file isdir $f]} {
      load_hierachy $f
    } else {
      db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
      incr ::nRow

      if {$::O(trans) && ($::nRow % $::O(trans))==0} {
        db eval { COMMIT }
        db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
        db eval { BEGIN }
      }

      if {($::nRow % $::nRowPerDot)==0} {
        puts -nonewline .
        if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
        flush stdout
      }

37
38
39
40
41
42
43

44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"

  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""


if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {







>










>







43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  puts stderr "  -fts5        (use fts5)"
  puts stderr "  -porter      (use porter tokenizer)"
  puts stderr "  -delete      (delete the database file before starting)"
  puts stderr "  -limit N     (load no more than N documents)"
  puts stderr "  -automerge N (set the automerge parameter to N)"
  puts stderr "  -crisismerge N (set the crisismerge parameter to N)"
  puts stderr "  -prefix PREFIX (comma separated prefix= argument)"
  puts stderr "  -trans N     (commit after N inserts - 0 == never)"
  exit 1
}

set O(vtab)       fts5
set O(tok)        ""
set O(limit)      0
set O(delete)     0
set O(automerge)  -1
set O(crisismerge)  -1
set O(prefix)     ""
set O(trans)      0

if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
  set arg [lindex $argv $i]
  switch -- [lindex $argv $i] {
    -fts4 {
73
74
75
76
77
78
79





80
81
82
83
84
85
86
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }





    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {







>
>
>
>
>







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
      set O(delete) 1
    }

    -limit {
      if { [incr i]>=$nOpt } usage
      set O(limit) [lindex $argv $i]
    }

    -trans {
      if { [incr i]>=$nOpt } usage
      set O(trans) [lindex $argv $i]
    }
    
    -automerge {
      if { [incr i]>=$nOpt } usage
      set O(automerge) [lindex $argv $i]
    }

    -crisismerge {
100
101
102
103
104
105
106

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

130
131
132
}

set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile


db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
  }
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]
}











>

|




















<
>



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

143
144
145
146
}

set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"

db eval BEGIN
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }
  }
  if {$O(crisismerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
    } else {
    }
  }
  load_hierachy [lindex $argv end]

db eval COMMIT



Changes to main.mk.

328
329
330
331
332
333
334



335

336
337
338
339
340
341
342
  $(TOP)/ext/misc/series.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c \
  $(TOP)/ext/misc/vfslog.c \
  $(TOP)/ext/fts5/fts5_tcl.c \
  $(TOP)/ext/fts5/fts5_test_mi.c \



  fts5.c



#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \







>
>
>
|
>







328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  $(TOP)/ext/misc/series.c \
  $(TOP)/ext/misc/spellfix.c \
  $(TOP)/ext/misc/totype.c \
  $(TOP)/ext/misc/wholenumber.c \
  $(TOP)/ext/misc/vfslog.c \
  $(TOP)/ext/fts5/fts5_tcl.c \
  $(TOP)/ext/fts5/fts5_test_mi.c \
  $(FTS5_SRC)


#  fts5.c



#TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \

Changes to test/permutations.test.

254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]

test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt* *fault* *big* *fts5aj*
]

test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [







|







254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
} -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test]

test_suite "fts5-light" -prefix "" -description {
  All FTS5 tests.
} -files [
  test_set \
      [glob -nocomplain $::testdir/../ext/fts5/test/*.test] \
      -exclude *corrupt*
]

test_suite "nofaultsim" -prefix "" -description {
  "Very" quick test suite. Runs in less than 5 minutes on a workstation. 
  This test suite is the same as the "quick" tests, except that some files
  that test malloc and IO errors are omitted.
} -files [