/ Check-in [7b7da1eb]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Speed up seek operations on fts5 b-tree structures.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b7da1eb435d321fc4283f6aa2161fa1e16f2cf3
User & Date: dan 2015-07-06 20:27:19
Context
2015-07-07
08:29
Further optimizations for fts5 b-tree seeks. check-in: f3789968 user: dan tags: trunk
2015-07-06
20:27
Speed up seek operations on fts5 b-tree structures. check-in: 7b7da1eb user: dan tags: trunk
2015-07-05
22:15
Do not allow recursive CTEs that use aggregate queries in the recursive part. check-in: 6d2999af user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

1575
1576
1577
1578
1579
1580
1581

















1582
1583
1584
1585
1586
1587
1588
....
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619

1620
1621
1622
1623
1624
1625
1626
....
2119
2120
2121
2122
2123
2124
2125



























































































































2126
2127
2128
2129
2130
2131
2132
....
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190

2191
2192
2193
2194
2195
2196
2197
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}


















/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**
................................................................................

  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
  }
  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  pIter->iLeafOffset = iOff;

}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
**
................................................................................
    *pbDlidx = 0;
    pPtr += nNew;
  }

  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  return iPg;
}




























































































































/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
................................................................................
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);

  if( (pLeaf = pIter->pLeaf) ){
    int res;
    pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]);
    if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){
      p->rc = FTS5_CORRUPT;
    }else{
      fts5SegIterLoadTerm(p, pIter, 0);
      fts5SegIterLoadNPos(p, pIter);
      do {
        res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm);
        if( res>=0 ) break;
        fts5SegIterNext(p, pIter, 0);
      }while( pIter->pLeaf && p->rc==SQLITE_OK );

      if( bGe==0 && res ){
        /* Set iterator to point to EOF */
        fts5DataRelease(pIter->pLeaf);
        pIter->pLeaf = 0;
      }
    }

  }

  if( p->rc==SQLITE_OK && bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_DESC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|
<
<
<
<
|
<
<
<
<
<
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
>







1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
....
1619
1620
1621
1622
1623
1624
1625
1626




1627





1628
1629
1630
1631
1632
1633
1634
1635
....
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
....
2296
2297
2298
2299
2300
2301
2302

















2303


2304
2305
2306
2307
2308
2309
2310
2311
      p->rc = FTS5_CORRUPT;
    }else{
      const u8 *a = &pIter->pLeaf->p[iOff];
      pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
    }
  }
}

static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
  int iOff = pIter->iLeafOffset;

  if( iOff>=pIter->pLeaf->n ){
    fts5SegIterNextPage(p, pIter);
    if( pIter->pLeaf==0 ){
      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
      return;
    }
    iOff = 4;
    a = pIter->pLeaf->p;
  }
  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  pIter->iLeafOffset = iOff;
}

/*
** Fts5SegIter.iLeafOffset currently points to the first byte of the 
** "nSuffix" field of a term. Function parameter nKeep contains the value
** of the "nPrefix" field (if there was one - it is passed 0 if this is
** the first term in the segment).
**
................................................................................

  iOff += fts5GetVarint32(&a[iOff], nNew);
  pIter->term.n = nKeep;
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  iOff += nNew;
  pIter->iTermLeafOffset = iOff;
  pIter->iTermLeafPgno = pIter->iLeafPgno;
  pIter->iLeafOffset = iOff;










  fts5SegIterLoadRowid(p, pIter);
}

/*
** Initialize the iterator object pIter to iterate through the entries in
** segment pSeg. The iterator is left pointing to the first entry when 
** this function returns.
**
................................................................................
    *pbDlidx = 0;
    pPtr += nNew;
  }

  fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  return iPg;
}

/*
** The iterator object passed as the second argument currently contains
** no valid values except for the Fts5SegIter.pLeaf member variable. This
** function searches the leaf page for a term matching (pTerm/nTerm).
**
*/
static void fts5LeafSeek(
  Fts5Index *p,                   /* Leave any error code here */
  int bGe,                        /* True for a >= search */
  Fts5SegIter *pIter,             /* Iterator to seek */
  const u8 *pTerm, int nTerm      /* Term to search for */
){
  int iOff;
  const u8 *a = pIter->pLeaf->p;
  int n = pIter->pLeaf->n;

  int nMatch = 0;
  int nKeep = 0;
  int nNew = 0;

  assert( p->rc==SQLITE_OK );
  assert( pIter->pLeaf );

  iOff = fts5GetU16(&a[2]);
  if( iOff<4 || iOff>=n ){
    p->rc = FTS5_CORRUPT;
    return;
  }

  while( 1 ){
    int i;
    int nCmp;
    i64 rowid;

    /* Figure out how many new bytes are in this term */

    nNew = a[iOff++];
    if( nNew & 0x80 ){ 
      iOff--;
      iOff += fts5GetVarint32(&a[iOff], nNew);
    }

    if( nKeep<nMatch ){
      goto search_failed;
    }

    assert( nKeep>=nMatch );
    if( nKeep==nMatch ){
      nCmp = MIN(nNew, nTerm-nMatch);
      for(i=0; i<nCmp; i++){
        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
      }
      nMatch += i;

      if( nTerm==nMatch ){
        if( i==nNew ){
          goto search_success;
        }else{
          goto search_failed;
        }
      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
        goto search_failed;
      }
    }
    iOff += nNew;

    /* Skip past the doclist. If the end of the page is reached, bail out. */
    iOff += fts5GetVarint(&a[iOff], &rowid);
    while( iOff<n ){
      int nPos;

      iOff += fts5GetVarint32(&a[iOff], nPos);
      iOff += (nPos / 2);

      /* Skip past docid delta */
      iOff += fts5GetVarint(&a[iOff], &rowid);
      if( rowid==0 ) break;
    };
    if( iOff>=n ) goto search_failed;

    /* Read the nKeep field of the next term. */
    nKeep = a[iOff++];
    if( nKeep & 0x80 ){
      iOff--;
      iOff += fts5GetVarint32(&a[iOff], nKeep);
    }
  }

 search_failed:
  if( bGe==0 ){
    fts5DataRelease(pIter->pLeaf);
    pIter->pLeaf = 0;
    return;
  }else if( iOff>=n ){
    do {
      fts5SegIterNextPage(p, pIter);
      if( pIter->pLeaf==0 ) return;
      a = pIter->pLeaf->p;
      iOff = fts5GetU16(&a[2]);
      if( iOff ){
        if( iOff<4 || iOff>=n ){
          p->rc = FTS5_CORRUPT;
        }else{
          nKeep = 0;
          iOff += fts5GetVarint32(&a[iOff], nNew);
          break;
        }
      }
    }while( 1 );
  }

 search_success:
  pIter->iLeafOffset = iOff + nNew;
  pIter->iTermLeafOffset = pIter->iLeafOffset;
  pIter->iTermLeafPgno = pIter->iLeafPgno;

  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);

  fts5SegIterLoadRowid(p, pIter);
  fts5SegIterLoadNPos(p, pIter);
}

/*
** Initialize the object pIter to point to term pTerm/nTerm within segment
** pSeg. If there is no such term in the index, the iterator is set to EOF.
**
** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
** an error has already occurred when this function is called, it is a no-op.
................................................................................
    iPg = pSeg->pgnoFirst;
    bDlidx = 0;
  }

  pIter->iLeafPgno = iPg - 1;
  fts5SegIterNextPage(p, pIter);


















  if( pIter->pLeaf ){


    fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
  }

  if( p->rc==SQLITE_OK && bGe==0 ){
    pIter->flags |= FTS5_SEGITER_ONETERM;
    if( pIter->pLeaf ){
      if( flags & FTS5INDEX_QUERY_DESC ){
        pIter->flags |= FTS5_SEGITER_REVERSE;

Changes to ext/fts5/test/fts5aa.test.

46
47
48
49
50
51
52










53
54
55
56
57
58
59
60
...
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}










do_execsql_test 2.3 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
................................................................................
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
}
#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
#exit

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);







>
>
>
>
>
>
>
>
>
>
|







 







<
<







46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
...
196
197
198
199
200
201
202


203
204
205
206
207
208
209
}
do_execsql_test 2.1 {
  INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
  execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}

foreach w {a b c d e f} {
  do_execsql_test 2.3.$w.asc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w;
  } {1}
  do_execsql_test 2.3.$w.desc {
    SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
  } {1}
}

do_execsql_test 2.4 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 3.0 {
................................................................................
      set z [doc]
      set rowid [expr int(rand() * 100)]
      execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
    }
    execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
  } {}
}



#-------------------------------------------------------------------------
#
reset_db
do_execsql_test 8.0 {
  CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
  INSERT INTO t1(t1, rank) VALUES('pgsz', 32);

Changes to ext/fts5/test/fts5ac.test.

200
201
202
203
204
205
206

207
208
209
210
211
212
213

  do_test $tn2.1.1 {
    foreach {id x y} $data {
      execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
    }
    execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
  } {}


  #-------------------------------------------------------------------------
  # Test phrase queries.
  #
  foreach {tn phrase} {
    1 "o"
    2 "b q"







>







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

  do_test $tn2.1.1 {
    foreach {id x y} $data {
      execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
    }
    execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
  } {}


  #-------------------------------------------------------------------------
  # Test phrase queries.
  #
  foreach {tn phrase} {
    1 "o"
    2 "b q"

Changes to ext/fts5/test/fts5ad.test.

200
201
202
203
204
205
206
207

208
209
210
211
212
213
214
215
216
217
          break
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }
  

  foreach {bAsc sql} {
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
      6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
      11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
      16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
      21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}







|
>

|
|







200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
          break
        }
      }
      if {$bMatch} { lappend ret $rowid }
    }
    return $ret
  }

  
  foreach {bAsc sql} {
    1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
  } {
    foreach {tn prefix} {
      1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
      6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
      11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
      16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
      21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}

Changes to ext/fts5/test/fts5alter.test.

77
78
79
80
81
82
83

















84
85
86
} {-56 -22 -11}

do_execsql_test 2.3 {
  ROLLBACK;
  SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}



















finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
} {-56 -22 -11}

do_execsql_test 2.3 {
  ROLLBACK;
  SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
} {-56 -22}

#-------------------------------------------------------------------------

do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE abc USING fts5(a);
  INSERT INTO abc(rowid, a) VALUES(1, 'a');
  BEGIN;
    INSERT INTO abc(rowid, a) VALUES(2, 'a');
}
breakpoint
do_execsql_test 3.2 {
    SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}

do_execsql_test 3.3 {
  COMMIT;
  SELECT rowid FROM abc WHERE abc MATCH 'a';
} {1 2}

finish_test