/ Check-in [62f2ff20]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the position list format so that its size in bytes is stored at the start of the list itself.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 62f2ff20418702ed0fbf708369edf5638445b51b
User & Date: dan 2014-07-01 20:45:18
Context
2014-07-02
20:18
Add support for phrase queries to fts5. check-in: 2e5652e6 user: dan tags: fts5
2014-07-01
20:45
Change the position list format so that its size in bytes is stored at the start of the list itself. check-in: 62f2ff20 user: dan tags: fts5
2014-06-26
12:31
Fix minor problems in term matching. check-in: 94eeb077 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5Int.h.

71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
...
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/

typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;


/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX 0x0001       /* Prefix query */
#define FTS5INDEX_QUERY_ASC    0x0002       /* Docs in ascending rowid order */
#define FTS5INDEX_QUERY_MATCH  0x0004       /* Use the iMatch arg to Next() */

................................................................................
** Docid list iteration.
*/
int  sqlite3Fts5IterEof(Fts5IndexIter*);
void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch);
i64  sqlite3Fts5IterRowid(Fts5IndexIter*);

/*
** Position list iteration.
**
**   for(
**     iPos=sqlite3Fts5IterFirstPos(pIter, iCol); 
**     iPos>=0; 
**     iPos=sqlite3Fts5IterNextPos(pIter)
**   ){
**     // token appears at position iPos of column iCol of the current document
**   }
*/
// int sqlite3Fts5IterFirstPos(Fts5IndexIter*, int iCol);
// int sqlite3Fts5IterNextPos(Fts5IndexIter*);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*







<







 







|
<
<
<
<
<
<
<
<

|
<







71
72
73
74
75
76
77

78
79
80
81
82
83
84
...
112
113
114
115
116
117
118
119








120
121

122
123
124
125
126
127
128
** Interface to code in fts5_index.c. fts5_index.c contains contains code
** to access the data stored in the %_data table.
*/

typedef struct Fts5Index Fts5Index;
typedef struct Fts5IndexIter Fts5IndexIter;


/*
** Values used as part of the flags argument passed to IndexQuery().
*/
#define FTS5INDEX_QUERY_PREFIX 0x0001       /* Prefix query */
#define FTS5INDEX_QUERY_ASC    0x0002       /* Docs in ascending rowid order */
#define FTS5INDEX_QUERY_MATCH  0x0004       /* Use the iMatch arg to Next() */

................................................................................
** Docid list iteration.
*/
int  sqlite3Fts5IterEof(Fts5IndexIter*);
void sqlite3Fts5IterNext(Fts5IndexIter*, i64 iMatch);
i64  sqlite3Fts5IterRowid(Fts5IndexIter*);

/*
** Obtain the position list that corresponds to the current position.








*/
const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter*, int *pn);


/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*

Changes to ext/fts5/fts5_index.c.

106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
121
122
123
124
125
126
...
251
252
253
254
255
256
257

258
259
260
261
262
263
264
...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314







315
316
317
318
319
320
321
...
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
...
449
450
451
452
453
454
455













456
457
458
459
460
461
462
463
464
465
466
467
468
469
....
1103
1104
1105
1106
1107
1108
1109




1110
1111
1112
1113
1114
1115
1116
....
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224

1225



1226
1227
1228

1229
1230
1231
1232
1233
1234
1235
....
1263
1264
1265
1266
1267
1268
1269

1270
1271




1272
1273
1274
1275
1276
1277
1278
....
1527
1528
1529
1530
1531
1532
1533
































































1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556

1557
1558
1559
1560
1561
1562
1563
1564
1565

1566
1567
1568
1569
1570
1571
1572
1573
1574
1575

1576
1577
1578
1579
1580
1581
1582
....
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597


1598

1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
....
2102
2103
2104
2105
2106
2107
2108



2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
....
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
....
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
....
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
....
2928
2929
2930
2931
2932
2933
2934


2935
2936
2937
2938
2939
2940
2941
2942
....
2985
2986
2987
2988
2989
2990
2991


2992
2993




2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
....
3111
3112
3113
3114
3115
3116
3117

3118
3119
3120
3121
3122
3123
3124
3125
3126















3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
**           varint:  rowid delta (always > 0)
**           poslist: first poslist
**         }
**         0x00 byte
**
**     poslist format:
**

**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }
**         0x00 byte
**
**     collist format:
**
**         varint: first offset + 2
**         zero-or-more {
**           varint: offset delta + 2
**         }
................................................................................
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif


typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5Buffer Fts5Buffer;

typedef struct Fts5Data Fts5Data;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
................................................................................
  int rc;                         /* Current error code */

  /* State used by the fts5DataXXX() functions. */
  sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
  sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
  sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
};

struct Fts5IndexIter {
  Fts5Index *pIndex;
  Fts5Structure *pStruct;
  Fts5MultiSegIter *pMulti;
};


/*
** Buffer object for the incremental building of string data.
*/
struct Fts5Buffer {
  u8 *p;
  int n;
  int nSpace;
};








/*
** A single record read from the %_data table.
*/
struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */
................................................................................
**   The segment to iterate through.
**
** iLeafPgno:
**   Current leaf page number within segment.
**
** iLeafOffset:
**   Byte offset within the current leaf that is one byte past the end of the
**   rowid field of the current entry. Usually this is the first byte of 
**   the position list data. The exception is if the rowid for the current 
**   entry is the last thing on the leaf page.
**
** pLeaf:
**   Buffer containing current leaf page data. Set to NULL at EOF.
**
** iTermLeafPgno, iTermLeafOffset:
**   Leaf page number containing the last term read from the segment. And
**   the offset immediately following the term data.
................................................................................
  int iTermLeafOffset;

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
};














/*
** Object for iterating through a single position list.
*/
struct Fts5PosIter {
  Fts5Data *pLeaf;                /* Current leaf data. NULL -> EOF. */
  i64 iLeafRowid;                 /* Absolute rowid of current leaf */
  int iLeafOffset;                /* Current offset within leaf */

  int iCol;
  int iPos;
};

/*
** Object for iterating through the conents of a single internal node in 
................................................................................
        FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
}





static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;  /* Offset to read at */
  int nNew;                       /* Bytes of new data */

  iOff += getVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
................................................................................
      int n = pIter->pLeaf->n;

      pIter->iLeafOffset = fts5GetU16(&a[2]);
      fts5SegIterLoadTerm(p, pIter, 0);

      while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){
        if( res<0 && pIter->iLeafPgno==iPg ){
          /* Search for the end of the current doclist within the current
          ** page. The end of a doclist is marked by a pair of successive
          ** 0x00 bytes.  */
          int iOff;

          for(iOff=pIter->iLeafOffset+1; iOff<n; iOff++){



            if( a[iOff]==0 && a[iOff-1]==0 ) break;
          }
          iOff++;


          /* If the iterator is not yet at the end of the next page, load
          ** the next term and jump to the next iteration of the while()
          ** loop.  */
          if( iOff<n ){
            int nKeep;
            pIter->iLeafOffset = iOff + getVarint32(&a[iOff], nKeep);
................................................................................
    int iOff;
    int bNewTerm = 0;
    int nKeep = 0;

    /* Search for the end of the position list within the current page. */
    u8 *a = pLeaf->p;
    int n = pLeaf->n;

    for(iOff=pIter->iLeafOffset; iOff<n && a[iOff]; iOff++);
    iOff++;





    if( iOff<n ){
      /* The next entry is on the current page */
      u64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], &iDelta);
      pIter->iLeafOffset = iOff;
      if( iDelta==0 ){
................................................................................
** entry that the iterator currently points to.
*/
static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ];
  *pn = p->term.n;
  return p->term.p;
}

































































/*
** Read and return the next 32-bit varint from the position-list iterator 
** passed as the second argument.
**
** If an error occurs, zero is returned an an error code left in 
** Fts5Index.rc. If an error has already occurred when this function is
** called, it is a no-op.
*/
static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){
  int iVal = 0;
  if( p->rc==SQLITE_OK ){
    int iOff = pIter->iLeafOffset;
    if( iOff < pIter->pLeaf->n ){
      pIter->iLeafOffset += getVarint32(&pIter->pLeaf->p[iOff], iVal);
    }else{
      fts5DataRelease(pIter->pLeaf);
      pIter->iLeafRowid++;
      pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid);
      if( pIter->pLeaf ){
        pIter->iLeafOffset = 4 + getVarint32(&pIter->pLeaf->p[4], iVal);
      }
    }

  }
  return iVal;
}

/*
** Advance the position list iterator to the next entry.
*/
static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){
  int iVal;

  iVal = fts5PosIterReadVarint(p, pIter);
  if( iVal==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
  }
  else if( iVal==1 ){
    pIter->iCol = fts5PosIterReadVarint(p, pIter);
    pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2;
  }else{
    pIter->iPos += (iVal - 2);

  }
}

/*
** Initialize the Fts5PosIter object passed as the final argument to iterate
** through the position-list associated with the index entry that iterator 
** pMulti currently points to.
................................................................................
static void fts5PosIterInit(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5MultiSegIter *pMulti,       /* Multi-seg iterator to read pos-list from */
  Fts5PosIter *pIter              /* Initialize this object */
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ];
    int iId = pSeg->pSeg->iSegid;

    memset(pIter, 0, sizeof(*pIter));
    pIter->pLeaf = pSeg->pLeaf;
    pIter->iLeafOffset = pSeg->iLeafOffset;
    pIter->iLeafRowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno);
    fts5DataReference(pIter->pLeaf);


    fts5PosIterNext(p, pIter);

  }
}

/*
** Return true if the position iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  return (p->rc || pIter->pLeaf==0);
}


/*
** Allocate memory. The difference between this function and fts5IdxMalloc()
** is that this increments the Fts5Index.nPendingData variable by the
** number of bytes allocated. It should be used for all allocations used
................................................................................
  /* Append the position list for each rowid */
  for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){
    int i = 0;

    /* Append the rowid itself */
    fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid);




    /* Copy the position list to the output segment */
    while( i<pPoslist->buf.n){
      int iVal;
      i += getVarint32(&pPoslist->buf.p[i], iVal);
      fts5WriteAppendPoslistInt(p, pWriter, iVal);
    }

    /* Write the position list terminator */
    fts5WriteAppendZerobyte(p, pWriter);
  }

  /* Write the doclist terminator */
  fts5WriteAppendZerobyte(p, pWriter);
}

static void fts5WriteFinish(
................................................................................
fflush(stdout);
#endif

  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter)
  ){
    Fts5PosIter sPos;             /* Used to iterate through position list */
    int iCol = 0;                 /* Current output column */
    int iPos = 0;                 /* Current output position */
    int nTerm;
    const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);

    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( writer.nLeafWritten>nRem ) break;

      /* This is a new term. Append a term to the output segment. */
................................................................................
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

    /* Copy the position list from input to output */
    for(fts5PosIterInit(p, pIter, &sPos);
        fts5PosIterEof(p, &sPos)==0;
        fts5PosIterNext(p, &sPos)
    ){
      if( sPos.iCol!=iCol ){
        fts5WriteAppendPoslistInt(p, &writer, 1);
        fts5WriteAppendPoslistInt(p, &writer, sPos.iCol);
        iCol = sPos.iCol;
        iPos = 0;
      }
      fts5WriteAppendPoslistInt(p, &writer, (sPos.iPos-iPos) + 2);
      iPos = sPos.iPos;
    }
    fts5WriteAppendZerobyte(p, &writer);
  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
................................................................................
*/
static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  int iOff = 0;
  while( iOff<n ){
    int iVal;
    iOff += getVarint32(&a[iOff], iVal);
    fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
    if( iVal==0 ) break;
  }
  return iOff;
}

/*
** The start of buffer (a/n) contains the start of a doclist. The doclist
** may or may not finish within the buffer. This function appends a text
................................................................................
  int iOff = 0;

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){


    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], n-iOff);
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid -= iDelta;
      fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }
................................................................................
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;

      iRowidOff = fts5GetU16(&a[0]);
      iTermOff = fts5GetU16(&a[2]);


      iOff = 4;
      if( iTermOff!=4 && iRowidOff!=4 ){




        iOff += fts5DecodePoslist(&rc, &s, &a[iOff], n-iOff);
        if( iRowidOff==0 ) iOff++;
      }

      assert( iRowidOff==0 || iOff==iRowidOff );
      if( iRowidOff ){
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      }

      assert( iTermOff==0 || iOff==iTermOff );
................................................................................
  return fts5MultiIterEof(pIter->pIndex, pIter->pMulti);
}

/*
** Move to the next matching rowid. 
*/
void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){

  fts5MultiIterNext(pIter->pIndex, pIter->pMulti);
}

/*
** Return the current rowid.
*/
i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
  return fts5MultiIterRowid(pIter->pMulti);
}
















/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
    fts5MultiIterFree(pIter->pIndex, pIter->pMulti);
    fts5StructureRelease(pIter->pStruct);
    fts5CloseReader(pIter->pIndex);
    sqlite3_free(pIter);
  }
}








>






<







 







>







 







<
<
<
<
<
<
<









>
>
>
>
>
>
>







 







|
|
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>




|
|
<







 







>
>
>
>







 







|
|
<
|
>
|
>
>
>
|
|
<
>







 







>
|
|
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>












|
|
|
<
<
|
<
<
<
|
<
>









>

<
|
<
<
|
|
|
|
|
>







 







<
<

<
<
<
<
>
>
|
>








|







 







>
>
>






<
<
<







 







|
|
<







 







|
|
|
|
|
|
|
|
|
|
<
<
<
<







 







<







 







>
>
|







 







>
>
|
|
>
>
>
>
|
<
|







 







>









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













106
107
108
109
110
111
112
113
114
115
116
117
118
119

120
121
122
123
124
125
126
...
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
...
293
294
295
296
297
298
299







300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
...
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
...
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475

476
477
478
479
480
481
482
....
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
....
1231
1232
1233
1234
1235
1236
1237
1238
1239

1240
1241
1242
1243
1244
1245
1246
1247

1248
1249
1250
1251
1252
1253
1254
1255
....
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
....
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637


1638



1639

1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651

1652


1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
....
1667
1668
1669
1670
1671
1672
1673


1674




1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
....
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197



2198
2199
2200
2201
2202
2203
2204
....
2373
2374
2375
2376
2377
2378
2379
2380
2381

2382
2383
2384
2385
2386
2387
2388
....
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410




2411
2412
2413
2414
2415
2416
2417
....
2981
2982
2983
2984
2985
2986
2987

2988
2989
2990
2991
2992
2993
2994
....
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
....
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076

3077
3078
3079
3080
3081
3082
3083
3084
....
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
**           varint:  rowid delta (always > 0)
**           poslist: first poslist
**         }
**         0x00 byte
**
**     poslist format:
**
**         varint: size of poslist in bytes. not including this field.
**         collist: collist for column 0
**         zero-or-more {
**           0x01 byte
**           varint: column number (I)
**           collist: collist for column I
**         }

**
**     collist format:
**
**         varint: first offset + 2
**         zero-or-more {
**           varint: offset delta + 2
**         }
................................................................................
# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
#endif


typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5Buffer Fts5Buffer;
typedef struct Fts5ChunkIter Fts5ChunkIter;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5MultiSegIter Fts5MultiSegIter;
typedef struct Fts5NodeIter Fts5NodeIter;
typedef struct Fts5PageWriter Fts5PageWriter;
typedef struct Fts5PendingDoclist Fts5PendingDoclist;
typedef struct Fts5PendingPoslist Fts5PendingPoslist;
typedef struct Fts5PosIter Fts5PosIter;
................................................................................
  int rc;                         /* Current error code */

  /* State used by the fts5DataXXX() functions. */
  sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
  sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
  sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
};








/*
** Buffer object for the incremental building of string data.
*/
struct Fts5Buffer {
  u8 *p;
  int n;
  int nSpace;
};

struct Fts5IndexIter {
  Fts5Index *pIndex;
  Fts5Structure *pStruct;
  Fts5MultiSegIter *pMulti;
  Fts5Buffer poslist;             /* Buffer containing current poslist */
};

/*
** A single record read from the %_data table.
*/
struct Fts5Data {
  u8 *p;                          /* Pointer to buffer containing record */
  int n;                          /* Size of record in bytes */
................................................................................
**   The segment to iterate through.
**
** iLeafPgno:
**   Current leaf page number within segment.
**
** iLeafOffset:
**   Byte offset within the current leaf that is one byte past the end of the
**   rowid field of the current entry. Usually this is the size field of the
**   position list data. The exception is if the rowid for the current entry 
**   is the last thing on the leaf page.
**
** pLeaf:
**   Buffer containing current leaf page data. Set to NULL at EOF.
**
** iTermLeafPgno, iTermLeafOffset:
**   Leaf page number containing the last term read from the segment. And
**   the offset immediately following the term data.
................................................................................
  int iTermLeafOffset;

  /* Variables populated based on current entry. */
  Fts5Buffer term;                /* Current term */
  i64 iRowid;                     /* Current rowid */
};

/*
** Object for iterating through paginated data.
*/
struct Fts5ChunkIter {
  Fts5Data *pLeaf;                /* Current leaf data. NULL -> EOF. */
  i64 iLeafRowid;                 /* Absolute rowid of current leaf */
  int nRem;                       /* Remaining bytes of data to read */

  /* Output parameters */
  u8 *p;                          /* Pointer to chunk of data */
  int n;                          /* Size of buffer p in bytes */
};

/*
** Object for iterating through a single position list.
*/
struct Fts5PosIter {
  Fts5ChunkIter chunk;            /* Current chunk of data */
  int iOff;                       /* Offset within chunk data */


  int iCol;
  int iPos;
};

/*
** Object for iterating through the conents of a single internal node in 
................................................................................
        FTS5_SEGMENT_ROWID(pIter->iIdx, pSeg->iSegid, 0, pIter->iLeafPgno)
    );
  }else{
    pIter->pLeaf = 0;
  }
}

/*
** Leave pIter->iLeafOffset as the offset to the size field of the first
** position list. The position list belonging to document pIter->iRowid.
*/
static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;  /* Offset to read at */
  int nNew;                       /* Bytes of new data */

  iOff += getVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
................................................................................
      int n = pIter->pLeaf->n;

      pIter->iLeafOffset = fts5GetU16(&a[2]);
      fts5SegIterLoadTerm(p, pIter, 0);

      while( (res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)) ){
        if( res<0 && pIter->iLeafPgno==iPg ){
          int iOff = pIter->iLeafOffset;
          while( iOff<n ){

            int nPos;
            iOff += getVarint32(&a[iOff], nPos);
            iOff += nPos;
            if( iOff<n ){
              u64 rowid;
              iOff += getVarint(&a[iOff], &rowid);
              if( rowid==0 ) break;
            }

          }

          /* If the iterator is not yet at the end of the next page, load
          ** the next term and jump to the next iteration of the while()
          ** loop.  */
          if( iOff<n ){
            int nKeep;
            pIter->iLeafOffset = iOff + getVarint32(&a[iOff], nKeep);
................................................................................
    int iOff;
    int bNewTerm = 0;
    int nKeep = 0;

    /* Search for the end of the position list within the current page. */
    u8 *a = pLeaf->p;
    int n = pLeaf->n;

    iOff = pIter->iLeafOffset;
    if( iOff<=n ){
      int nPoslist;
      iOff += getVarint32(&a[iOff], nPoslist);
      iOff += nPoslist;
    }

    if( iOff<n ){
      /* The next entry is on the current page */
      u64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], &iDelta);
      pIter->iLeafOffset = iOff;
      if( iDelta==0 ){
................................................................................
** entry that the iterator currently points to.
*/
static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){
  Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1] ];
  *pn = p->term.n;
  return p->term.p;
}

/*
** Return true if the chunk iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5ChunkIterEof(Fts5Index *p, Fts5ChunkIter *pIter){
  return (p->rc || pIter->pLeaf==0);
}

/*
** Advance the chunk-iterator to the next chunk of data to read.
*/
static void fts5ChunkIterNext(Fts5Index *p, Fts5ChunkIter *pIter){
  assert( pIter->nRem>=pIter->n );
  pIter->nRem -= pIter->n;
  fts5DataRelease(pIter->pLeaf);
  pIter->pLeaf = 0;
  pIter->p = 0;
  if( pIter->nRem>0 ){
    Fts5Data *pLeaf;
    pIter->iLeafRowid++;
    pLeaf = pIter->pLeaf = fts5DataRead(p, pIter->iLeafRowid);
    if( pLeaf ){
      pIter->n = MIN(pIter->nRem, pLeaf->n-4);
      pIter->p = pLeaf->p+4;
    }
  }
}

/*
** Intialize the chunk iterator to read the position list data for which 
** the size field is at offset iOff of leaf pLeaf. 
*/
static void fts5ChunkIterInit(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5SegIter *pSeg,              /* Segment iterator to read poslist from */
  Fts5ChunkIter *pIter            /* Initialize this object */
){
  int iId = pSeg->pSeg->iSegid;
  i64 rowid = FTS5_SEGMENT_ROWID(pSeg->iIdx, iId, 0, pSeg->iLeafPgno);
  Fts5Data *pLeaf = pSeg->pLeaf;
  int iOff = pSeg->iLeafOffset;

  memset(pIter, 0, sizeof(*pIter));
  pIter->iLeafRowid = rowid;
  if( iOff<pLeaf->n ){
    fts5DataReference(pLeaf);
    pIter->pLeaf = pLeaf;
  }else{
    pIter->nRem = 1;
    fts5ChunkIterNext(p, pIter);
    if( p->rc ) return;
    iOff = 4;
    pLeaf = pIter->pLeaf;
  }

  iOff += getVarint32(&pLeaf->p[iOff], pIter->nRem);
  pIter->n = MIN(pLeaf->n - iOff, pIter->nRem);
  pIter->p = pLeaf->p + iOff;

  if( pIter->n==0 ){
    fts5ChunkIterNext(p, pIter);
  }
}

/*
** Read and return the next 32-bit varint from the position-list iterator 
** passed as the second argument.
**
** If an error occurs, zero is returned an an error code left in 
** Fts5Index.rc. If an error has already occurred when this function is
** called, it is a no-op.
*/
static int fts5PosIterReadVarint(Fts5Index *p, Fts5PosIter *pIter){
  int iVal = 0;
  if( p->rc==SQLITE_OK ){
    if( pIter->iOff>=pIter->chunk.n ){
      fts5ChunkIterNext(p, &pIter->chunk);
      if( fts5ChunkIterEof(p, &pIter->chunk) ) return 0;


      pIter->iOff = 0;



    }

    pIter->iOff += getVarint32(&pIter->chunk.p[pIter->iOff], iVal);
  }
  return iVal;
}

/*
** Advance the position list iterator to the next entry.
*/
static void fts5PosIterNext(Fts5Index *p, Fts5PosIter *pIter){
  int iVal;
  assert( fts5ChunkIterEof(p, &pIter->chunk)==0 );
  iVal = fts5PosIterReadVarint(p, pIter);

  if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){


    if( iVal==1 ){
      pIter->iCol = fts5PosIterReadVarint(p, pIter);
      pIter->iPos = fts5PosIterReadVarint(p, pIter) - 2;
    }else{
      pIter->iPos += (iVal - 2);
    }
  }
}

/*
** Initialize the Fts5PosIter object passed as the final argument to iterate
** through the position-list associated with the index entry that iterator 
** pMulti currently points to.
................................................................................
static void fts5PosIterInit(
  Fts5Index *p,                   /* FTS5 backend object */
  Fts5MultiSegIter *pMulti,       /* Multi-seg iterator to read pos-list from */
  Fts5PosIter *pIter              /* Initialize this object */
){
  if( p->rc==SQLITE_OK ){
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1] ];


    memset(pIter, 0, sizeof(*pIter));




    fts5ChunkIterInit(p, pSeg, &pIter->chunk);
    if( fts5ChunkIterEof(p, &pIter->chunk)==0 ){
      fts5PosIterNext(p, pIter);
    }
  }
}

/*
** Return true if the position iterator passed as the second argument is
** at EOF. Or if an error has already occurred. Otherwise, return false.
*/
static int fts5PosIterEof(Fts5Index *p, Fts5PosIter *pIter){
  return (p->rc || pIter->chunk.pLeaf==0);
}


/*
** Allocate memory. The difference between this function and fts5IdxMalloc()
** is that this increments the Fts5Index.nPendingData variable by the
** number of bytes allocated. It should be used for all allocations used
................................................................................
  /* Append the position list for each rowid */
  for(pPoslist=pDoclist->pPoslist; pPoslist; pPoslist=pPoslist->pNext){
    int i = 0;

    /* Append the rowid itself */
    fts5WriteAppendRowid(p, pWriter, pPoslist->iRowid);

    /* Append the size of the position list in bytes */
    fts5WriteAppendPoslistInt(p, pWriter, pPoslist->buf.n);

    /* Copy the position list to the output segment */
    while( i<pPoslist->buf.n){
      int iVal;
      i += getVarint32(&pPoslist->buf.p[i], iVal);
      fts5WriteAppendPoslistInt(p, pWriter, iVal);
    }



  }

  /* Write the doclist terminator */
  fts5WriteAppendZerobyte(p, pWriter);
}

static void fts5WriteFinish(
................................................................................
fflush(stdout);
#endif

  for(fts5MultiIterNew(p, pStruct, iIdx, 0, 0, iLvl, nInput, &pIter);
      fts5MultiIterEof(p, pIter)==0;
      fts5MultiIterNext(p, pIter)
  ){
    Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1] ];
    Fts5ChunkIter sPos;           /* Used to iterate through position list */

    int nTerm;
    const u8 *pTerm = fts5MultiIterTerm(pIter, &nTerm);

    if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){
      if( writer.nLeafWritten>nRem ) break;

      /* This is a new term. Append a term to the output segment. */
................................................................................
      bRequireDoclistTerm = 1;
    }

    /* Append the rowid to the output */
    fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));

    /* Copy the position list from input to output */
    fts5ChunkIterInit(p, pSeg, &sPos);
    fts5WriteAppendPoslistInt(p, &writer, sPos.nRem);
    for(/* noop */; fts5ChunkIterEof(p, &sPos)==0; fts5ChunkIterNext(p, &sPos)){
      int iOff = 0;
      while( iOff<sPos.n ){
        int iVal;
        iOff += getVarint32(&sPos.p[iOff], iVal);
        fts5WriteAppendPoslistInt(p, &writer, iVal);
      }
    }




  }

  /* Flush the last leaf page to disk. Set the output segment b-tree height
  ** and last leaf page number at the same time.  */
  fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast);

  if( fts5MultiIterEof(p, pIter) ){
................................................................................
*/
static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
  int iOff = 0;
  while( iOff<n ){
    int iVal;
    iOff += getVarint32(&a[iOff], iVal);
    fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);

  }
  return iOff;
}

/*
** The start of buffer (a/n) contains the start of a doclist. The doclist
** may or may not finish within the buffer. This function appends a text
................................................................................
  int iOff = 0;

  if( iOff<n ){
    iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDocid);
    fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
  }
  while( iOff<n ){
    int nPos;
    iOff += getVarint32(&a[iOff], nPos);
    iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
    if( iOff<n ){
      i64 iDelta;
      iOff += sqlite3GetVarint(&a[iOff], (u64*)&iDelta);
      if( iDelta==0 ) return iOff;
      iDocid -= iDelta;
      fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid);
    }
................................................................................
      int iTermOff = 0;
      int iRowidOff = 0;
      int iOff;
      int nKeep = 0;

      iRowidOff = fts5GetU16(&a[0]);
      iTermOff = fts5GetU16(&a[2]);

      if( iRowidOff ){
        iOff = iRowidOff;
      }else if( iTermOff ){
        iOff = iTermOff;
      }else{
        iOff = n;
      }
      fts5DecodePoslist(&rc, &s, &a[4], iOff-4);



      assert( iRowidOff==0 || iOff==iRowidOff );
      if( iRowidOff ){
        iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff);
      }

      assert( iTermOff==0 || iOff==iTermOff );
................................................................................
  return fts5MultiIterEof(pIter->pIndex, pIter->pMulti);
}

/*
** Move to the next matching rowid. 
*/
void sqlite3Fts5IterNext(Fts5IndexIter *pIter, i64 iMatch){
  fts5BufferZero(&pIter->poslist);
  fts5MultiIterNext(pIter->pIndex, pIter->pMulti);
}

/*
** Return the current rowid.
*/
i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){
  return fts5MultiIterRowid(pIter->pMulti);
}

/*
** Return a pointer to a buffer containing a copy of the position list for
** the current entry. Output variable *pn is set to the size of the buffer 
** in bytes before returning.
**
** The returned buffer does not include the 0x00 terminator byte stored on
** disk.
*/
const u8 *sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, int *pn){
  assert( sqlite3Fts5IterEof(pIter)==0 );

  *pn = pIter->poslist.n;
  return pIter->poslist.p;
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
    fts5MultiIterFree(pIter->pIndex, pIter->pMulti);
    fts5StructureRelease(pIter->pStruct);
    fts5CloseReader(pIter->pIndex);
    sqlite3_free(pIter);
  }
}