/ Check-in [7b7da1eb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Speed up seek operations on fts5 b-tree structures.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b7da1eb435d321fc4283f6aa2161fa1e16f2cf3
User & Date: dan 2015-07-06 20:27:19
Context
2015-07-07
08:29
Further optimizations for fts5 b-tree seeks. check-in: f3789968 user: dan tags: trunk
2015-07-06
20:27
Speed up seek operations on fts5 b-tree structures. check-in: 7b7da1eb user: dan tags: trunk
2015-07-05
22:15
Do not allow recursive CTEs that use aggregate queries in the recursive part. check-in: 6d2999af user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

  1575   1575         p->rc = FTS5_CORRUPT;
  1576   1576       }else{
  1577   1577         const u8 *a = &pIter->pLeaf->p[iOff];
  1578   1578         pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel);
  1579   1579       }
  1580   1580     }
  1581   1581   }
         1582  +
         1583  +static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
         1584  +  u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
         1585  +  int iOff = pIter->iLeafOffset;
         1586  +
         1587  +  if( iOff>=pIter->pLeaf->n ){
         1588  +    fts5SegIterNextPage(p, pIter);
         1589  +    if( pIter->pLeaf==0 ){
         1590  +      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
         1591  +      return;
         1592  +    }
         1593  +    iOff = 4;
         1594  +    a = pIter->pLeaf->p;
         1595  +  }
         1596  +  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
         1597  +  pIter->iLeafOffset = iOff;
         1598  +}
  1582   1599   
  1583   1600   /*
  1584   1601   ** Fts5SegIter.iLeafOffset currently points to the first byte of the 
  1585   1602   ** "nSuffix" field of a term. Function parameter nKeep contains the value
  1586   1603   ** of the "nPrefix" field (if there was one - it is passed 0 if this is
  1587   1604   ** the first term in the segment).
  1588   1605   **
................................................................................
  1602   1619   
  1603   1620     iOff += fts5GetVarint32(&a[iOff], nNew);
  1604   1621     pIter->term.n = nKeep;
  1605   1622     fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
  1606   1623     iOff += nNew;
  1607   1624     pIter->iTermLeafOffset = iOff;
  1608   1625     pIter->iTermLeafPgno = pIter->iLeafPgno;
  1609         -  if( iOff>=pIter->pLeaf->n ){
  1610         -    fts5SegIterNextPage(p, pIter);
  1611         -    if( pIter->pLeaf==0 ){
  1612         -      if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
  1613         -      return;
  1614         -    }
  1615         -    iOff = 4;
  1616         -    a = pIter->pLeaf->p;
  1617         -  }
  1618         -  iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
  1619   1626     pIter->iLeafOffset = iOff;
         1627  +
         1628  +  fts5SegIterLoadRowid(p, pIter);
  1620   1629   }
  1621   1630   
  1622   1631   /*
  1623   1632   ** Initialize the iterator object pIter to iterate through the entries in
  1624   1633   ** segment pSeg. The iterator is left pointing to the first entry when 
  1625   1634   ** this function returns.
  1626   1635   **
................................................................................
  2119   2128       *pbDlidx = 0;
  2120   2129       pPtr += nNew;
  2121   2130     }
  2122   2131   
  2123   2132     fts5AssertNodeSeekOk(pNode, pTerm, nTerm, iPg, *pbDlidx);
  2124   2133     return iPg;
  2125   2134   }
         2135  +
         2136  +/*
         2137  +** The iterator object passed as the second argument currently contains
         2138  +** no valid values except for the Fts5SegIter.pLeaf member variable. This
         2139  +** function searches the leaf page for a term matching (pTerm/nTerm).
         2140  +**
         2141  +*/
         2142  +static void fts5LeafSeek(
         2143  +  Fts5Index *p,                   /* Leave any error code here */
         2144  +  int bGe,                        /* True for a >= search */
         2145  +  Fts5SegIter *pIter,             /* Iterator to seek */
         2146  +  const u8 *pTerm, int nTerm      /* Term to search for */
         2147  +){
         2148  +  int iOff;
         2149  +  const u8 *a = pIter->pLeaf->p;
         2150  +  int n = pIter->pLeaf->n;
         2151  +
         2152  +  int nMatch = 0;
         2153  +  int nKeep = 0;
         2154  +  int nNew = 0;
         2155  +
         2156  +  assert( p->rc==SQLITE_OK );
         2157  +  assert( pIter->pLeaf );
         2158  +
         2159  +  iOff = fts5GetU16(&a[2]);
         2160  +  if( iOff<4 || iOff>=n ){
         2161  +    p->rc = FTS5_CORRUPT;
         2162  +    return;
         2163  +  }
         2164  +
         2165  +  while( 1 ){
         2166  +    int i;
         2167  +    int nCmp;
         2168  +    i64 rowid;
         2169  +
         2170  +    /* Figure out how many new bytes are in this term */
         2171  +
         2172  +    nNew = a[iOff++];
         2173  +    if( nNew & 0x80 ){ 
         2174  +      iOff--;
         2175  +      iOff += fts5GetVarint32(&a[iOff], nNew);
         2176  +    }
         2177  +
         2178  +    if( nKeep<nMatch ){
         2179  +      goto search_failed;
         2180  +    }
         2181  +
         2182  +    assert( nKeep>=nMatch );
         2183  +    if( nKeep==nMatch ){
         2184  +      nCmp = MIN(nNew, nTerm-nMatch);
         2185  +      for(i=0; i<nCmp; i++){
         2186  +        if( a[iOff+i]!=pTerm[nMatch+i] ) break;
         2187  +      }
         2188  +      nMatch += i;
         2189  +
         2190  +      if( nTerm==nMatch ){
         2191  +        if( i==nNew ){
         2192  +          goto search_success;
         2193  +        }else{
         2194  +          goto search_failed;
         2195  +        }
         2196  +      }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
         2197  +        goto search_failed;
         2198  +      }
         2199  +    }
         2200  +    iOff += nNew;
         2201  +
         2202  +    /* Skip past the doclist. If the end of the page is reached, bail out. */
         2203  +    iOff += fts5GetVarint(&a[iOff], &rowid);
         2204  +    while( iOff<n ){
         2205  +      int nPos;
         2206  +
         2207  +      iOff += fts5GetVarint32(&a[iOff], nPos);
         2208  +      iOff += (nPos / 2);
         2209  +
         2210  +      /* Skip past docid delta */
         2211  +      iOff += fts5GetVarint(&a[iOff], &rowid);
         2212  +      if( rowid==0 ) break;
         2213  +    };
         2214  +    if( iOff>=n ) goto search_failed;
         2215  +
         2216  +    /* Read the nKeep field of the next term. */
         2217  +    nKeep = a[iOff++];
         2218  +    if( nKeep & 0x80 ){
         2219  +      iOff--;
         2220  +      iOff += fts5GetVarint32(&a[iOff], nKeep);
         2221  +    }
         2222  +  }
         2223  +
         2224  + search_failed:
         2225  +  if( bGe==0 ){
         2226  +    fts5DataRelease(pIter->pLeaf);
         2227  +    pIter->pLeaf = 0;
         2228  +    return;
         2229  +  }else if( iOff>=n ){
         2230  +    do {
         2231  +      fts5SegIterNextPage(p, pIter);
         2232  +      if( pIter->pLeaf==0 ) return;
         2233  +      a = pIter->pLeaf->p;
         2234  +      iOff = fts5GetU16(&a[2]);
         2235  +      if( iOff ){
         2236  +        if( iOff<4 || iOff>=n ){
         2237  +          p->rc = FTS5_CORRUPT;
         2238  +        }else{
         2239  +          nKeep = 0;
         2240  +          iOff += fts5GetVarint32(&a[iOff], nNew);
         2241  +          break;
         2242  +        }
         2243  +      }
         2244  +    }while( 1 );
         2245  +  }
         2246  +
         2247  + search_success:
         2248  +  pIter->iLeafOffset = iOff + nNew;
         2249  +  pIter->iTermLeafOffset = pIter->iLeafOffset;
         2250  +  pIter->iTermLeafPgno = pIter->iLeafPgno;
         2251  +
         2252  +  fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
         2253  +  fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
         2254  +
         2255  +  fts5SegIterLoadRowid(p, pIter);
         2256  +  fts5SegIterLoadNPos(p, pIter);
         2257  +}
  2126   2258   
  2127   2259   /*
  2128   2260   ** Initialize the object pIter to point to term pTerm/nTerm within segment
  2129   2261   ** pSeg. If there is no such term in the index, the iterator is set to EOF.
  2130   2262   **
  2131   2263   ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 
  2132   2264   ** an error has already occurred when this function is called, it is a no-op.
................................................................................
  2164   2296       iPg = pSeg->pgnoFirst;
  2165   2297       bDlidx = 0;
  2166   2298     }
  2167   2299   
  2168   2300     pIter->iLeafPgno = iPg - 1;
  2169   2301     fts5SegIterNextPage(p, pIter);
  2170   2302   
  2171         -  if( (pLeaf = pIter->pLeaf) ){
  2172         -    int res;
  2173         -    pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]);
  2174         -    if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){
  2175         -      p->rc = FTS5_CORRUPT;
  2176         -    }else{
  2177         -      fts5SegIterLoadTerm(p, pIter, 0);
  2178         -      fts5SegIterLoadNPos(p, pIter);
  2179         -      do {
  2180         -        res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm);
  2181         -        if( res>=0 ) break;
  2182         -        fts5SegIterNext(p, pIter, 0);
  2183         -      }while( pIter->pLeaf && p->rc==SQLITE_OK );
  2184         -
  2185         -      if( bGe==0 && res ){
  2186         -        /* Set iterator to point to EOF */
  2187         -        fts5DataRelease(pIter->pLeaf);
  2188         -        pIter->pLeaf = 0;
  2189         -      }
  2190         -    }
         2303  +  if( pIter->pLeaf ){
         2304  +    fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
  2191   2305     }
  2192   2306   
  2193   2307     if( p->rc==SQLITE_OK && bGe==0 ){
  2194   2308       pIter->flags |= FTS5_SEGITER_ONETERM;
  2195   2309       if( pIter->pLeaf ){
  2196   2310         if( flags & FTS5INDEX_QUERY_DESC ){
  2197   2311           pIter->flags |= FTS5_SEGITER_REVERSE;

Changes to ext/fts5/test/fts5aa.test.

    46     46   }
    47     47   do_execsql_test 2.1 {
    48     48     INSERT INTO t1 VALUES('a b c', 'd e f');
    49     49   }
    50     50   do_test 2.2 {
    51     51     execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
    52     52   } {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/}
    53         -do_execsql_test 2.3 {
           53  +
           54  +foreach w {a b c d e f} {
           55  +  do_execsql_test 2.3.$w.asc {
           56  +    SELECT rowid FROM t1 WHERE t1 MATCH $w;
           57  +  } {1}
           58  +  do_execsql_test 2.3.$w.desc {
           59  +    SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC;
           60  +  } {1}
           61  +}
           62  +
           63  +do_execsql_test 2.4 {
    54     64     INSERT INTO t1(t1) VALUES('integrity-check');
    55     65   }
    56     66   
    57     67   #-------------------------------------------------------------------------
    58     68   #
    59     69   reset_db
    60     70   do_execsql_test 3.0 {
................................................................................
   186    196         set z [doc]
   187    197         set rowid [expr int(rand() * 100)]
   188    198         execsql { REPLACE INTO t1(rowid,x,y,z) VALUES($rowid, $x, $y, $z) }
   189    199       }
   190    200       execsql { INSERT INTO t1(t1) VALUES('integrity-check'); }
   191    201     } {}
   192    202   }
   193         -#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r}
   194         -#exit
   195    203   
   196    204   #-------------------------------------------------------------------------
   197    205   #
   198    206   reset_db
   199    207   do_execsql_test 8.0 {
   200    208     CREATE VIRTUAL TABLE t1 USING fts5(x, prefix="1,2,3");
   201    209     INSERT INTO t1(t1, rank) VALUES('pgsz', 32);

Changes to ext/fts5/test/fts5ac.test.

   200    200   
   201    201     do_test $tn2.1.1 {
   202    202       foreach {id x y} $data {
   203    203         execsql { INSERT INTO xx(rowid, x, y) VALUES($id, $x, $y) }
   204    204       }
   205    205       execsql { INSERT INTO xx(xx) VALUES('integrity-check') }
   206    206     } {}
          207  +
   207    208   
   208    209     #-------------------------------------------------------------------------
   209    210     # Test phrase queries.
   210    211     #
   211    212     foreach {tn phrase} {
   212    213       1 "o"
   213    214       2 "b q"

Changes to ext/fts5/test/fts5ad.test.

   200    200             break
   201    201           }
   202    202         }
   203    203         if {$bMatch} { lappend ret $rowid }
   204    204       }
   205    205       return $ret
   206    206     }
          207  +
   207    208     
   208    209     foreach {bAsc sql} {
   209         -    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
   210    210       1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix}
          211  +    0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC}
   211    212     } {
   212    213       foreach {tn prefix} {
   213    214         1  {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} 
   214    215         6  {f*} 7 {fg*} 8 {fgh*} 9 {fghi*} 10 {fghij*}
   215    216         11 {k*} 12 {kl*} 13 {klm*} 14 {klmn*} 15 {klmno*}
   216    217         16 {p*} 17 {pq*} 18 {pqr*} 19 {pqrs*} 20 {pqrst*}
   217    218         21 {u*} 22 {uv*} 23 {uvw*} 24 {uvwx*} 25 {uvwxy*} 26 {uvwxyz*}

Changes to ext/fts5/test/fts5alter.test.

    77     77   } {-56 -22 -11}
    78     78   
    79     79   do_execsql_test 2.3 {
    80     80     ROLLBACK;
    81     81     SELECT rowid FROM yy WHERE yy MATCH 'a + b + c';
    82     82   } {-56 -22}
    83     83   
           84  +#-------------------------------------------------------------------------
           85  +
           86  +do_execsql_test 3.1 {
           87  +  CREATE VIRTUAL TABLE abc USING fts5(a);
           88  +  INSERT INTO abc(rowid, a) VALUES(1, 'a');
           89  +  BEGIN;
           90  +    INSERT INTO abc(rowid, a) VALUES(2, 'a');
           91  +}
           92  +breakpoint
           93  +do_execsql_test 3.2 {
           94  +    SELECT rowid FROM abc WHERE abc MATCH 'a';
           95  +} {1 2}
           96  +
           97  +do_execsql_test 3.3 {
           98  +  COMMIT;
           99  +  SELECT rowid FROM abc WHERE abc MATCH 'a';
          100  +} {1 2}
    84    101   
    85    102   finish_test
    86    103