/ Check-in [b9477eb0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge the fts3-changes branch back into the trunk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:b9477eb056d120826ed82b0d65e6f27b5d0c087a
User & Date: dan 2011-06-28 14:16:42
Context
2011-06-29
17:11
Pass the BTREE_UNORDERED hint into both sqlite3BtreeOpen() and into sqlite3BtreeCreateTable(). check-in: 591de898 user: drh tags: trunk
2011-06-28
14:16
Merge the fts3-changes branch back into the trunk. check-in: b9477eb0 user: dan tags: trunk
11:58
Add a fix and tests for the FTS deferred token logic. Closed-Leaf check-in: 91daea7d user: dan tags: fts3-changes
07:15
Changes to allow FTS to be compiled as a loadable module again. check-in: 29e69f38 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

   308    308   
   309    309   #include "fts3.h"
   310    310   #ifndef SQLITE_CORE 
   311    311   # include "sqlite3ext.h"
   312    312     SQLITE_EXTENSION_INIT1
   313    313   #endif
   314    314   
          315  +static int fts3EvalNext(Fts3Cursor *pCsr);
          316  +static int fts3EvalStart(Fts3Cursor *pCsr);
          317  +static int fts3TermSegReaderCursor(
          318  +    Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
          319  +
   315    320   /* 
   316    321   ** Write a 64-bit variable-length integer to memory starting at p[0].
   317    322   ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
   318    323   ** The number of bytes written is returned.
   319    324   */
   320    325   int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){
   321    326     unsigned char *q = (unsigned char *) p;
................................................................................
   816    821     for(i=0; i<p->nColumn; i++){
   817    822       fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
   818    823     }
   819    824     sqlite3_free(zFree);
   820    825     return zRet;
   821    826   }
   822    827   
          828  +/*
          829  +** This function interprets the string at (*pp) as a non-negative integer
          830  +** value. It reads the integer and sets *pnOut to the value read, then 
          831  +** sets *pp to point to the byte immediately following the last byte of
          832  +** the integer value.
          833  +**
          834  +** Only decimal digits ('0'..'9') may be part of an integer value. 
          835  +**
          836  +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
          837  +** the output value undefined. Otherwise SQLITE_OK is returned.
          838  +**
          839  +** This function is used when parsing the "prefix=" FTS4 parameter.
          840  +*/
   823    841   static int fts3GobbleInt(const char **pp, int *pnOut){
   824         -  const char *p = *pp;
   825         -  int nInt = 0;
          842  +  const char *p = *pp;            /* Iterator pointer */
          843  +  int nInt = 0;                   /* Output value */
          844  +
   826    845     for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
   827    846       nInt = nInt * 10 + (p[0] - '0');
   828    847     }
   829    848     if( p==*pp ) return SQLITE_ERROR;
   830    849     *pnOut = nInt;
   831    850     *pp = p;
   832    851     return SQLITE_OK;
   833    852   }
   834    853   
   835         -
          854  +/*
          855  +** This function is called to allocate an array of Fts3Index structures
          856  +** representing the indexes maintained by the current FTS table. FTS tables
          857  +** always maintain the main "terms" index, but may also maintain one or
          858  +** more "prefix" indexes, depending on the value of the "prefix=" parameter
          859  +** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
          860  +**
          861  +** Argument zParam is passed the value of the "prefix=" option if one was
          862  +** specified, or NULL otherwise.
          863  +**
          864  +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
          865  +** the allocated array. *pnIndex is set to the number of elements in the
          866  +** array. If an error does occur, an SQLite error code is returned.
          867  +**
          868  +** Regardless of whether or not an error is returned, it is the responsibility
          869  +** of the caller to call sqlite3_free() on the output array to free it.
          870  +*/
   836    871   static int fts3PrefixParameter(
   837    872     const char *zParam,             /* ABC in prefix=ABC parameter to parse */
   838    873     int *pnIndex,                   /* OUT: size of *apIndex[] array */
   839         -  struct Fts3Index **apIndex,     /* OUT: Array of indexes for this table */
   840         -  struct Fts3Index **apFree       /* OUT: Free this with sqlite3_free() */
          874  +  struct Fts3Index **apIndex      /* OUT: Array of indexes for this table */
   841    875   ){
   842         -  struct Fts3Index *aIndex;
   843         -  int nIndex = 1;
          876  +  struct Fts3Index *aIndex;       /* Allocated array */
          877  +  int nIndex = 1;                 /* Number of entries in array */
   844    878   
   845    879     if( zParam && zParam[0] ){
   846    880       const char *p;
   847    881       nIndex++;
   848    882       for(p=zParam; *p; p++){
   849    883         if( *p==',' ) nIndex++;
   850    884       }
   851    885     }
   852    886   
   853    887     aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
   854         -  *apIndex = *apFree = aIndex;
          888  +  *apIndex = aIndex;
   855    889     *pnIndex = nIndex;
   856    890     if( !aIndex ){
   857    891       return SQLITE_NOMEM;
   858    892     }
   859    893   
   860    894     memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex);
   861    895     if( zParam ){
................................................................................
   904    938     int nDb;                        /* Bytes required to hold database name */
   905    939     int nName;                      /* Bytes required to hold table name */
   906    940     int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */
   907    941     const char **aCol;              /* Array of column names */
   908    942     sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */
   909    943   
   910    944     int nIndex;                     /* Size of aIndex[] array */
   911         -  struct Fts3Index *aIndex;       /* Array of indexes for this table */
   912         -  struct Fts3Index *aFree = 0;    /* Free this before returning */
          945  +  struct Fts3Index *aIndex = 0;   /* Array of indexes for this table */
   913    946   
   914    947     /* The results of parsing supported FTS4 key=value options: */
   915    948     int bNoDocsize = 0;             /* True to omit %_docsize table */
   916    949     int bDescIdx = 0;               /* True to store descending indexes */
   917    950     char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
   918    951     char *zCompress = 0;            /* compress=? parameter (or NULL) */
   919    952     char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
................................................................................
  1042   1075   
  1043   1076     if( pTokenizer==0 ){
  1044   1077       rc = sqlite3Fts3InitTokenizer(pHash, "simple", &pTokenizer, pzErr);
  1045   1078       if( rc!=SQLITE_OK ) goto fts3_init_out;
  1046   1079     }
  1047   1080     assert( pTokenizer );
  1048   1081   
  1049         -  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree);
         1082  +  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
  1050   1083     if( rc==SQLITE_ERROR ){
  1051   1084       assert( zPrefix );
  1052   1085       *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
  1053   1086     }
  1054   1087     if( rc!=SQLITE_OK ) goto fts3_init_out;
  1055   1088   
  1056   1089     /* Allocate and populate the Fts3Table structure. */
................................................................................
  1129   1162     p->nNodeSize = p->nPgsz-35;
  1130   1163   
  1131   1164     /* Declare the table schema to SQLite. */
  1132   1165     fts3DeclareVtab(&rc, p);
  1133   1166   
  1134   1167   fts3_init_out:
  1135   1168     sqlite3_free(zPrefix);
  1136         -  sqlite3_free(aFree);
         1169  +  sqlite3_free(aIndex);
  1137   1170     sqlite3_free(zCompress);
  1138   1171     sqlite3_free(zUncompress);
  1139   1172     sqlite3_free((void *)aCol);
  1140   1173     if( rc!=SQLITE_OK ){
  1141   1174       if( p ){
  1142   1175         fts3DisconnectMethod((sqlite3_vtab *)p);
  1143   1176       }else if( pTokenizer ){
................................................................................
  1720   1753     *p++ = POS_END;
  1721   1754     *pp = p;
  1722   1755     *pp1 = p1 + 1;
  1723   1756     *pp2 = p2 + 1;
  1724   1757   }
  1725   1758   
  1726   1759   /*
  1727         -** nToken==1 searches for adjacent positions.
  1728         -**
  1729   1760   ** This function is used to merge two position lists into one. When it is
  1730   1761   ** called, *pp1 and *pp2 must both point to position lists. A position-list is
  1731   1762   ** the part of a doclist that follows each document id. For example, if a row
  1732   1763   ** contains:
  1733   1764   **
  1734   1765   **     'a b c'|'x y z'|'a b b a'
  1735   1766   **
................................................................................
  1741   1772   ** byte following the 0x00 terminator of their respective position lists.
  1742   1773   **
  1743   1774   ** If isSaveLeft is 0, an entry is added to the output position list for 
  1744   1775   ** each position in *pp2 for which there exists one or more positions in
  1745   1776   ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
  1746   1777   ** when the *pp1 token appears before the *pp2 token, but not more than nToken
  1747   1778   ** slots before it.
         1779  +**
         1780  +** e.g. nToken==1 searches for adjacent positions.
  1748   1781   */
  1749   1782   static int fts3PoslistPhraseMerge(
  1750   1783     char **pp,                      /* IN/OUT: Preallocated output buffer */
  1751   1784     int nToken,                     /* Maximum difference in token positions */
  1752   1785     int isSaveLeft,                 /* Save the left position */
  1753   1786     int isExact,                    /* If *pp1 is exactly nTokens before *pp2 */
  1754   1787     char **pp1,                     /* IN/OUT: Left input list */
................................................................................
  1907   1940       res = 0;
  1908   1941     }
  1909   1942   
  1910   1943     return res;
  1911   1944   }
  1912   1945   
  1913   1946   /* 
  1914         -** A pointer to an instance of this structure is used as the context 
  1915         -** argument to sqlite3Fts3SegReaderIterate()
         1947  +** An instance of this function is used to merge together the (potentially
         1948  +** large number of) doclists for each term that matches a prefix query.
         1949  +** See function fts3TermSelectMerge() for details.
  1916   1950   */
  1917   1951   typedef struct TermSelect TermSelect;
  1918   1952   struct TermSelect {
  1919         -  int isReqPos;
  1920         -  char *aaOutput[16];             /* Malloc'd output buffer */
  1921         -  int anOutput[16];               /* Size of output in bytes */
         1953  +  char *aaOutput[16];             /* Malloc'd output buffers */
         1954  +  int anOutput[16];               /* Size each output buffer in bytes */
  1922   1955   };
  1923   1956   
  1924         -
         1957  +/*
         1958  +** This function is used to read a single varint from a buffer. Parameter
         1959  +** pEnd points 1 byte past the end of the buffer. When this function is
         1960  +** called, if *pp points to pEnd or greater, then the end of the buffer
         1961  +** has been reached. In this case *pp is set to 0 and the function returns.
         1962  +**
         1963  +** If *pp does not point to or past pEnd, then a single varint is read
         1964  +** from *pp. *pp is then set to point 1 byte past the end of the read varint.
         1965  +**
         1966  +** If bDescIdx is false, the value read is added to *pVal before returning.
         1967  +** If it is true, the value read is subtracted from *pVal before this 
         1968  +** function returns.
         1969  +*/
  1925   1970   static void fts3GetDeltaVarint3(
  1926         -  char **pp, 
  1927         -  char *pEnd, 
  1928         -  int bDescIdx,
  1929         -  sqlite3_int64 *pVal
         1971  +  char **pp,                      /* IN/OUT: Point to read varint from */
         1972  +  char *pEnd,                     /* End of buffer */
         1973  +  int bDescIdx,                   /* True if docids are descending */
         1974  +  sqlite3_int64 *pVal             /* IN/OUT: Integer value */
  1930   1975   ){
  1931   1976     if( *pp>=pEnd ){
  1932   1977       *pp = 0;
  1933   1978     }else{
  1934   1979       sqlite3_int64 iVal;
  1935   1980       *pp += sqlite3Fts3GetVarint(*pp, &iVal);
  1936   1981       if( bDescIdx ){
................................................................................
  1937   1982         *pVal -= iVal;
  1938   1983       }else{
  1939   1984         *pVal += iVal;
  1940   1985       }
  1941   1986     }
  1942   1987   }
  1943   1988   
         1989  +/*
         1990  +** This function is used to write a single varint to a buffer. The varint
         1991  +** is written to *pp. Before returning, *pp is set to point 1 byte past the
         1992  +** end of the value written.
         1993  +**
         1994  +** If *pbFirst is zero when this function is called, the value written to
         1995  +** the buffer is that of parameter iVal. 
         1996  +**
         1997  +** If *pbFirst is non-zero when this function is called, then the value 
         1998  +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
         1999  +** (if bDescIdx is non-zero).
         2000  +**
         2001  +** Before returning, this function always sets *pbFirst to 1 and *piPrev
         2002  +** to the value of parameter iVal.
         2003  +*/
  1944   2004   static void fts3PutDeltaVarint3(
  1945   2005     char **pp,                      /* IN/OUT: Output pointer */
  1946   2006     int bDescIdx,                   /* True for descending docids */
  1947   2007     sqlite3_int64 *piPrev,          /* IN/OUT: Previous value written to list */
  1948   2008     int *pbFirst,                   /* IN/OUT: True after first int written */
  1949   2009     sqlite3_int64 iVal              /* Write this value to the list */
  1950   2010   ){
................................................................................
  1957   2017     assert( *pbFirst || *piPrev==0 );
  1958   2018     assert( *pbFirst==0 || iWrite>0 );
  1959   2019     *pp += sqlite3Fts3PutVarint(*pp, iWrite);
  1960   2020     *piPrev = iVal;
  1961   2021     *pbFirst = 1;
  1962   2022   }
  1963   2023   
  1964         -#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2))
  1965   2024   
         2025  +/*
         2026  +** This macro is used by various functions that merge doclists. The two
         2027  +** arguments are 64-bit docid values. If the value of the stack variable
         2028  +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). 
         2029  +** Otherwise, (i2-i1).
         2030  +**
         2031  +** Using this makes it easier to write code that can merge doclists that are
         2032  +** sorted in either ascending or descending order.
         2033  +*/
         2034  +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
         2035  +
         2036  +/*
         2037  +** This function does an "OR" merge of two doclists (output contains all
         2038  +** positions contained in either argument doclist). If the docids in the 
         2039  +** input doclists are sorted in ascending order, parameter bDescDoclist
         2040  +** should be false. If they are sorted in ascending order, it should be
         2041  +** passed a non-zero value.
         2042  +**
         2043  +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
         2044  +** containing the output doclist and SQLITE_OK is returned. In this case
         2045  +** *pnOut is set to the number of bytes in the output doclist.
         2046  +**
         2047  +** If an error occurs, an SQLite error code is returned. The output values
         2048  +** are undefined in this case.
         2049  +*/
  1966   2050   static int fts3DoclistOrMerge(
  1967         -  int bDescIdx,                   /* True if arguments are desc */
         2051  +  int bDescDoclist,               /* True if arguments are desc */
  1968   2052     char *a1, int n1,               /* First doclist */
  1969   2053     char *a2, int n2,               /* Second doclist */
  1970   2054     char **paOut, int *pnOut        /* OUT: Malloc'd doclist */
  1971   2055   ){
  1972   2056     sqlite3_int64 i1 = 0;
  1973   2057     sqlite3_int64 i2 = 0;
  1974   2058     sqlite3_int64 iPrev = 0;
................................................................................
  1985   2069     aOut = sqlite3_malloc(n1+n2);
  1986   2070     if( !aOut ) return SQLITE_NOMEM;
  1987   2071   
  1988   2072     p = aOut;
  1989   2073     fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  1990   2074     fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
  1991   2075     while( p1 || p2 ){
  1992         -    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
         2076  +    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
  1993   2077   
  1994   2078       if( p2 && p1 && iDiff==0 ){
  1995         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2079  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  1996   2080         fts3PoslistMerge(&p, &p1, &p2);
  1997         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
  1998         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2081  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
         2082  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  1999   2083       }else if( !p2 || (p1 && iDiff<0) ){
  2000         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2084  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  2001   2085         fts3PoslistCopy(&p, &p1);
  2002         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
         2086  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
  2003   2087       }else{
  2004         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2);
         2088  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
  2005   2089         fts3PoslistCopy(&p, &p2);
  2006         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2090  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2007   2091       }
  2008   2092     }
  2009   2093   
  2010   2094     *paOut = aOut;
  2011   2095     *pnOut = (p-aOut);
  2012   2096     return SQLITE_OK;
  2013   2097   }
  2014   2098   
         2099  +/*
         2100  +** This function does a "phrase" merge of two doclists. In a phrase merge,
         2101  +** the output contains a copy of each position from the right-hand input
         2102  +** doclist for which there is a position in the left-hand input doclist
         2103  +** exactly nDist tokens before it.
         2104  +**
         2105  +** If the docids in the input doclists are sorted in ascending order,
         2106  +** parameter bDescDoclist should be false. If they are sorted in ascending 
         2107  +** order, it should be passed a non-zero value.
         2108  +**
         2109  +** The right-hand input doclist is overwritten by this function.
         2110  +*/
  2015   2111   static void fts3DoclistPhraseMerge(
  2016         -  int bDescIdx,                   /* True if arguments are desc */
         2112  +  int bDescDoclist,               /* True if arguments are desc */
  2017   2113     int nDist,                      /* Distance from left to right (1=adjacent) */
  2018   2114     char *aLeft, int nLeft,         /* Left doclist */
  2019   2115     char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
  2020   2116   ){
  2021   2117     sqlite3_int64 i1 = 0;
  2022   2118     sqlite3_int64 i2 = 0;
  2023   2119     sqlite3_int64 iPrev = 0;
................................................................................
  2032   2128     assert( nDist>0 );
  2033   2129   
  2034   2130     p = aOut;
  2035   2131     fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  2036   2132     fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
  2037   2133   
  2038   2134     while( p1 && p2 ){
  2039         -    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
         2135  +    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
  2040   2136       if( iDiff==0 ){
  2041   2137         char *pSave = p;
  2042   2138         sqlite3_int64 iPrevSave = iPrev;
  2043   2139         int bFirstOutSave = bFirstOut;
  2044   2140   
  2045         -      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
         2141  +      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
  2046   2142         if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
  2047   2143           p = pSave;
  2048   2144           iPrev = iPrevSave;
  2049   2145           bFirstOut = bFirstOutSave;
  2050   2146         }
  2051         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
  2052         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2147  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
         2148  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2053   2149       }else if( iDiff<0 ){
  2054   2150         fts3PoslistCopy(0, &p1);
  2055         -      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
         2151  +      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
  2056   2152       }else{
  2057   2153         fts3PoslistCopy(0, &p2);
  2058         -      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
         2154  +      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
  2059   2155       }
  2060   2156     }
  2061   2157   
  2062   2158     *pnRight = p - aOut;
  2063   2159   }
  2064   2160   
  2065   2161   
................................................................................
  2068   2164   ** doclist stored in TermSelect.aaOutput[0]. If successful, delete all
  2069   2165   ** other doclists (except the aaOutput[0] one) and return SQLITE_OK.
  2070   2166   **
  2071   2167   ** If an OOM error occurs, return SQLITE_NOMEM. In this case it is
  2072   2168   ** the responsibility of the caller to free any doclists left in the
  2073   2169   ** TermSelect.aaOutput[] array.
  2074   2170   */
  2075         -static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
         2171  +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
  2076   2172     char *aOut = 0;
  2077   2173     int nOut = 0;
  2078   2174     int i;
  2079   2175   
  2080   2176     /* Loop through the doclists in the aaOutput[] array. Merge them all
  2081   2177     ** into a single doclist.
  2082   2178     */
................................................................................
  2109   2205   
  2110   2206     pTS->aaOutput[0] = aOut;
  2111   2207     pTS->anOutput[0] = nOut;
  2112   2208     return SQLITE_OK;
  2113   2209   }
  2114   2210   
  2115   2211   /*
  2116         -** This function is used as the sqlite3Fts3SegReaderIterate() callback when
  2117         -** querying the full-text index for a doclist associated with a term or
  2118         -** term-prefix.
         2212  +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
         2213  +** as the first argument. The merge is an "OR" merge (see function
         2214  +** fts3DoclistOrMerge() for details).
         2215  +**
         2216  +** This function is called with the doclist for each term that matches
         2217  +** a queried prefix. It merges all these doclists into one, the doclist
         2218  +** for the specified prefix. Since there can be a very large number of
         2219  +** doclists to merge, the merging is done pair-wise using the TermSelect
         2220  +** object.
         2221  +**
         2222  +** This function returns SQLITE_OK if the merge is successful, or an
         2223  +** SQLite error code (SQLITE_NOMEM) if an error occurs.
  2119   2224   */
  2120         -static int fts3TermSelectCb(
  2121         -  Fts3Table *p,                   /* Virtual table object */
  2122         -  void *pContext,                 /* Pointer to TermSelect structure */
  2123         -  char *zTerm,
  2124         -  int nTerm,
  2125         -  char *aDoclist,
  2126         -  int nDoclist
         2225  +static int fts3TermSelectMerge(
         2226  +  Fts3Table *p,                   /* FTS table handle */
         2227  +  TermSelect *pTS,                /* TermSelect object to merge into */
         2228  +  char *aDoclist,                 /* Pointer to doclist */
         2229  +  int nDoclist                    /* Size of aDoclist in bytes */
  2127   2230   ){
  2128         -  TermSelect *pTS = (TermSelect *)pContext;
  2129         -
  2130         -  UNUSED_PARAMETER(p);
  2131         -  UNUSED_PARAMETER(zTerm);
  2132         -  UNUSED_PARAMETER(nTerm);
  2133         -
  2134   2231     if( pTS->aaOutput[0]==0 ){
  2135   2232       /* If this is the first term selected, copy the doclist to the output
  2136   2233       ** buffer using memcpy(). */
  2137   2234       pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
  2138   2235       pTS->anOutput[0] = nDoclist;
  2139   2236       if( pTS->aaOutput[0] ){
  2140   2237         memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
................................................................................
  2197   2294       }
  2198   2295       pCsr->apSegment = apNew;
  2199   2296     }
  2200   2297     pCsr->apSegment[pCsr->nSegment++] = pNew;
  2201   2298     return SQLITE_OK;
  2202   2299   }
  2203   2300   
         2301  +/*
         2302  +** Add seg-reader objects to the Fts3MultiSegReader object passed as the
         2303  +** 8th argument.
         2304  +**
         2305  +** This function returns SQLITE_OK if successful, or an SQLite error code
         2306  +** otherwise.
         2307  +*/
  2204   2308   static int fts3SegReaderCursor(
  2205   2309     Fts3Table *p,                   /* FTS3 table handle */
  2206   2310     int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
  2207   2311     int iLevel,                     /* Level of segments to scan */
  2208   2312     const char *zTerm,              /* Term to query for */
  2209   2313     int nTerm,                      /* Size of zTerm in bytes */
  2210   2314     int isPrefix,                   /* True for a prefix search */
  2211   2315     int isScan,                     /* True to scan from zTerm to EOF */
  2212         -  Fts3MultiSegReader *pCsr       /* Cursor object to populate */
         2316  +  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
  2213   2317   ){
  2214         -  int rc = SQLITE_OK;
  2215         -  int rc2;
  2216         -  sqlite3_stmt *pStmt = 0;
         2318  +  int rc = SQLITE_OK;             /* Error code */
         2319  +  sqlite3_stmt *pStmt = 0;        /* Statement to iterate through segments */
         2320  +  int rc2;                        /* Result of sqlite3_reset() */
  2217   2321   
  2218   2322     /* If iLevel is less than 0 and this is not a scan, include a seg-reader 
  2219   2323     ** for the pending-terms. If this is a scan, then this call must be being
  2220   2324     ** made by an fts4aux module, not an FTS table. In this case calling
  2221   2325     ** Fts3SegReaderPending might segfault, as the data structures used by 
  2222   2326     ** fts4aux are not completely populated. So it's easiest to filter these
  2223   2327     ** calls out here.  */
................................................................................
  2298   2402     memset(pCsr, 0, sizeof(Fts3MultiSegReader));
  2299   2403   
  2300   2404     return fts3SegReaderCursor(
  2301   2405         p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
  2302   2406     );
  2303   2407   }
  2304   2408   
         2409  +/*
         2410  +** In addition to its current configuration, have the Fts3MultiSegReader
         2411  +** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
         2412  +**
         2413  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         2414  +*/
  2305   2415   static int fts3SegReaderCursorAddZero(
  2306         -  Fts3Table *p,
  2307         -  const char *zTerm,
  2308         -  int nTerm,
  2309         -  Fts3MultiSegReader *pCsr
         2416  +  Fts3Table *p,                   /* FTS virtual table handle */
         2417  +  const char *zTerm,              /* Term to scan doclist of */
         2418  +  int nTerm,                      /* Number of bytes in zTerm */
         2419  +  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
  2310   2420   ){
  2311   2421     return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
  2312   2422   }
  2313   2423   
  2314         -
  2315         -int sqlite3Fts3TermSegReaderCursor(
         2424  +/*
         2425  +** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
         2426  +** if isPrefix is true, to scan the doclist for all terms for which 
         2427  +** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
         2428  +** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
         2429  +** an SQLite error code.
         2430  +**
         2431  +** It is the responsibility of the caller to free this object by eventually
         2432  +** passing it to fts3SegReaderCursorFree() 
         2433  +**
         2434  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         2435  +** Output parameter *ppSegcsr is set to 0 if an error occurs.
         2436  +*/
         2437  +static int fts3TermSegReaderCursor(
  2316   2438     Fts3Cursor *pCsr,               /* Virtual table cursor handle */
  2317   2439     const char *zTerm,              /* Term to query for */
  2318   2440     int nTerm,                      /* Size of zTerm in bytes */
  2319   2441     int isPrefix,                   /* True for a prefix search */
  2320   2442     Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
  2321   2443   ){
  2322         -  Fts3MultiSegReader *pSegcsr;   /* Object to allocate and return */
         2444  +  Fts3MultiSegReader *pSegcsr;    /* Object to allocate and return */
  2323   2445     int rc = SQLITE_NOMEM;          /* Return code */
  2324   2446   
  2325   2447     pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
  2326   2448     if( pSegcsr ){
  2327   2449       int i;
  2328   2450       int bFound = 0;               /* True once an index has been found */
  2329   2451       Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
................................................................................
  2359   2481       }
  2360   2482     }
  2361   2483   
  2362   2484     *ppSegcsr = pSegcsr;
  2363   2485     return rc;
  2364   2486   }
  2365   2487   
         2488  +/*
         2489  +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
         2490  +*/
  2366   2491   static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
  2367   2492     sqlite3Fts3SegReaderFinish(pSegcsr);
  2368   2493     sqlite3_free(pSegcsr);
  2369   2494   }
  2370   2495   
  2371   2496   /*
  2372   2497   ** This function retreives the doclist for the specified term (or term
  2373         -** prefix) from the database. 
  2374         -**
  2375         -** The returned doclist may be in one of two formats, depending on the 
  2376         -** value of parameter isReqPos. If isReqPos is zero, then the doclist is
  2377         -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
  2378         -** is non-zero, then the returned list is in the same format as is stored 
  2379         -** in the database without the found length specifier at the start of on-disk
  2380         -** doclists.
         2498  +** prefix) from the database.
  2381   2499   */
  2382   2500   static int fts3TermSelect(
  2383   2501     Fts3Table *p,                   /* Virtual table handle */
  2384   2502     Fts3PhraseToken *pTok,          /* Token to query for */
  2385   2503     int iColumn,                    /* Column to query (or -ve for all columns) */
  2386         -  int isReqPos,                   /* True to include position lists in output */
  2387   2504     int *pnOut,                     /* OUT: Size of buffer at *ppOut */
  2388   2505     char **ppOut                    /* OUT: Malloced result buffer */
  2389   2506   ){
  2390   2507     int rc;                         /* Return code */
  2391         -  Fts3MultiSegReader *pSegcsr;   /* Seg-reader cursor for this term */
  2392         -  TermSelect tsc;                 /* Context object for fts3TermSelectCb() */
         2508  +  Fts3MultiSegReader *pSegcsr;    /* Seg-reader cursor for this term */
         2509  +  TermSelect tsc;                 /* Object for pair-wise doclist merging */
  2393   2510     Fts3SegFilter filter;           /* Segment term filter configuration */
  2394   2511   
  2395   2512     pSegcsr = pTok->pSegcsr;
  2396   2513     memset(&tsc, 0, sizeof(TermSelect));
  2397         -  tsc.isReqPos = isReqPos;
  2398   2514   
  2399         -  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY 
         2515  +  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
  2400   2516           | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
  2401         -        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
  2402   2517           | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
  2403   2518     filter.iCol = iColumn;
  2404   2519     filter.zTerm = pTok->z;
  2405   2520     filter.nTerm = pTok->n;
  2406   2521   
  2407   2522     rc = sqlite3Fts3SegReaderStart(p, pSegcsr, &filter);
  2408   2523     while( SQLITE_OK==rc
  2409   2524         && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) 
  2410   2525     ){
  2411         -    rc = fts3TermSelectCb(p, (void *)&tsc, 
  2412         -        pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist
  2413         -    );
         2526  +    rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
  2414   2527     }
  2415   2528   
  2416   2529     if( rc==SQLITE_OK ){
  2417         -    rc = fts3TermSelectMerge(p, &tsc);
         2530  +    rc = fts3TermSelectFinishMerge(p, &tsc);
  2418   2531     }
  2419   2532     if( rc==SQLITE_OK ){
  2420   2533       *ppOut = tsc.aaOutput[0];
  2421   2534       *pnOut = tsc.anOutput[0];
  2422   2535     }else{
  2423   2536       int i;
  2424   2537       for(i=0; i<SizeofArray(tsc.aaOutput); i++){
................................................................................
  2436   2549   ** in buffer aList[], size nList bytes.
  2437   2550   **
  2438   2551   ** If the isPoslist argument is true, then it is assumed that the doclist
  2439   2552   ** contains a position-list following each docid. Otherwise, it is assumed
  2440   2553   ** that the doclist is simply a list of docids stored as delta encoded 
  2441   2554   ** varints.
  2442   2555   */
  2443         -static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
         2556  +static int fts3DoclistCountDocids(char *aList, int nList){
  2444   2557     int nDoc = 0;                   /* Return value */
  2445   2558     if( aList ){
  2446   2559       char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
  2447   2560       char *p = aList;              /* Cursor */
  2448         -    if( !isPoslist ){
  2449         -      /* The number of docids in the list is the same as the number of 
  2450         -      ** varints. In FTS3 a varint consists of a single byte with the 0x80 
  2451         -      ** bit cleared and zero or more bytes with the 0x80 bit set. So to
  2452         -      ** count the varints in the buffer, just count the number of bytes
  2453         -      ** with the 0x80 bit clear.  */
  2454         -      while( p<aEnd ) nDoc += (((*p++)&0x80)==0);
  2455         -    }else{
  2456         -      while( p<aEnd ){
  2457         -        nDoc++;
  2458         -        while( (*p++)&0x80 );     /* Skip docid varint */
  2459         -        fts3PoslistCopy(0, &p);   /* Skip over position list */
  2460         -      }
         2561  +    while( p<aEnd ){
         2562  +      nDoc++;
         2563  +      while( (*p++)&0x80 );     /* Skip docid varint */
         2564  +      fts3PoslistCopy(0, &p);   /* Skip over position list */
  2461   2565       }
  2462   2566     }
  2463   2567   
  2464   2568     return nDoc;
  2465   2569   }
  2466   2570   
  2467   2571   /*
................................................................................
  2483   2587         pCsr->isEof = 1;
  2484   2588         rc = sqlite3_reset(pCsr->pStmt);
  2485   2589       }else{
  2486   2590         pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0);
  2487   2591         rc = SQLITE_OK;
  2488   2592       }
  2489   2593     }else{
  2490         -    rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor);
         2594  +    rc = fts3EvalNext((Fts3Cursor *)pCursor);
  2491   2595     }
  2492   2596     assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
  2493   2597     return rc;
  2494   2598   }
  2495   2599   
  2496   2600   /*
  2497   2601   ** This is the xFilter interface for the virtual table.  See
................................................................................
  2560   2664         }
  2561   2665         return rc;
  2562   2666       }
  2563   2667   
  2564   2668       rc = sqlite3Fts3ReadLock(p);
  2565   2669       if( rc!=SQLITE_OK ) return rc;
  2566   2670   
  2567         -    rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1);
         2671  +    rc = fts3EvalStart(pCsr);
  2568   2672   
  2569   2673       sqlite3Fts3SegmentsClose(p);
  2570   2674       if( rc!=SQLITE_OK ) return rc;
  2571   2675       pCsr->pNextId = pCsr->aDoclist;
  2572   2676       pCsr->iPrevId = 0;
  2573   2677     }
  2574   2678   
................................................................................
  2967   3071     fts3DbExec(&rc, db,
  2968   3072       "ALTER TABLE %Q.'%q_segdir'   RENAME TO '%q_segdir';",
  2969   3073       p->zDb, p->zName, zName
  2970   3074     );
  2971   3075     return rc;
  2972   3076   }
  2973   3077   
         3078  +/*
         3079  +** The xSavepoint() method.
         3080  +**
         3081  +** Flush the contents of the pending-terms table to disk.
         3082  +*/
  2974   3083   static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2975   3084     UNUSED_PARAMETER(iSavepoint);
  2976   3085     assert( ((Fts3Table *)pVtab)->inTransaction );
  2977   3086     assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
  2978   3087     TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
  2979   3088     return fts3SyncMethod(pVtab);
  2980   3089   }
         3090  +
         3091  +/*
         3092  +** The xRelease() method.
         3093  +**
         3094  +** This is a no-op.
         3095  +*/
  2981   3096   static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2982   3097     TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
  2983   3098     UNUSED_PARAMETER(iSavepoint);
  2984   3099     UNUSED_PARAMETER(pVtab);
  2985   3100     assert( p->inTransaction );
  2986   3101     assert( p->mxSavepoint >= iSavepoint );
  2987   3102     TESTONLY( p->mxSavepoint = iSavepoint-1 );
  2988   3103     return SQLITE_OK;
  2989   3104   }
         3105  +
         3106  +/*
         3107  +** The xRollbackTo() method.
         3108  +**
         3109  +** Discard the contents of the pending terms table.
         3110  +*/
  2990   3111   static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
  2991   3112     Fts3Table *p = (Fts3Table*)pVtab;
  2992   3113     UNUSED_PARAMETER(iSavepoint);
  2993   3114     assert( p->inTransaction );
  2994   3115     assert( p->mxSavepoint >= iSavepoint );
  2995   3116     TESTONLY( p->mxSavepoint = iSavepoint );
  2996   3117     sqlite3Fts3PendingTermsClear(p);
................................................................................
  3131   3252     assert( rc!=SQLITE_OK );
  3132   3253     if( pHash ){
  3133   3254       sqlite3Fts3HashClear(pHash);
  3134   3255       sqlite3_free(pHash);
  3135   3256     }
  3136   3257     return rc;
  3137   3258   }
  3138         -
  3139         -#if !SQLITE_CORE
  3140         -int sqlite3_extension_init(
  3141         -  sqlite3 *db, 
  3142         -  char **pzErrMsg,
  3143         -  const sqlite3_api_routines *pApi
  3144         -){
  3145         -  SQLITE_EXTENSION_INIT2(pApi)
  3146         -  return sqlite3Fts3Init(db);
  3147         -}
  3148         -#endif
  3149         -
  3150   3259   
  3151   3260   /*
  3152   3261   ** Allocate an Fts3MultiSegReader for each token in the expression headed
  3153   3262   ** by pExpr. 
  3154   3263   **
  3155   3264   ** An Fts3SegReader object is a cursor that can seek or scan a range of
  3156   3265   ** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple
................................................................................
  3160   3269   ** If the allocated Fts3MultiSegReader just seeks to a single entry in a
  3161   3270   ** segment b-tree (if the term is not a prefix or it is a prefix for which
  3162   3271   ** there exists prefix b-tree of the right length) then it may be traversed
  3163   3272   ** and merged incrementally. Otherwise, it has to be merged into an in-memory 
  3164   3273   ** doclist and then traversed.
  3165   3274   */
  3166   3275   static void fts3EvalAllocateReaders(
  3167         -  Fts3Cursor *pCsr, 
  3168         -  Fts3Expr *pExpr, 
         3276  +  Fts3Cursor *pCsr,               /* FTS cursor handle */
         3277  +  Fts3Expr *pExpr,                /* Allocate readers for this expression */
  3169   3278     int *pnToken,                   /* OUT: Total number of tokens in phrase. */
  3170   3279     int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
  3171         -  int *pRc
         3280  +  int *pRc                        /* IN/OUT: Error code */
  3172   3281   ){
  3173   3282     if( pExpr && SQLITE_OK==*pRc ){
  3174   3283       if( pExpr->eType==FTSQUERY_PHRASE ){
  3175   3284         int i;
  3176   3285         int nToken = pExpr->pPhrase->nToken;
  3177   3286         *pnToken += nToken;
  3178   3287         for(i=0; i<nToken; i++){
  3179   3288           Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
  3180         -        int rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
         3289  +        int rc = fts3TermSegReaderCursor(pCsr, 
  3181   3290               pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
  3182   3291           );
  3183   3292           if( rc!=SQLITE_OK ){
  3184   3293             *pRc = rc;
  3185   3294             return;
  3186   3295           }
  3187   3296         }
................................................................................
  3191   3300         *pnOr += (pExpr->eType==FTSQUERY_OR);
  3192   3301         fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc);
  3193   3302         fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc);
  3194   3303       }
  3195   3304     }
  3196   3305   }
  3197   3306   
         3307  +/*
         3308  +** Arguments pList/nList contain the doclist for token iToken of phrase p.
         3309  +** It is merged into the main doclist stored in p->doclist.aAll/nAll.
         3310  +**
         3311  +** This function assumes that pList points to a buffer allocated using
         3312  +** sqlite3_malloc(). This function takes responsibility for eventually
         3313  +** freeing the buffer.
         3314  +*/
  3198   3315   static void fts3EvalPhraseMergeToken(
  3199         -  Fts3Table *pTab,
  3200         -  Fts3Phrase *p,
  3201         -  int iToken,
  3202         -  char *pList,
  3203         -  int nList
         3316  +  Fts3Table *pTab,                /* FTS Table pointer */
         3317  +  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
         3318  +  int iToken,                     /* Token pList/nList corresponds to */
         3319  +  char *pList,                    /* Pointer to doclist */
         3320  +  int nList                       /* Number of bytes in pList */
  3204   3321   ){
  3205   3322     assert( iToken!=p->iDoclistToken );
  3206   3323   
  3207   3324     if( pList==0 ){
  3208   3325       sqlite3_free(p->doclist.aAll);
  3209   3326       p->doclist.aAll = 0;
  3210   3327       p->doclist.nAll = 0;
................................................................................
  3245   3362       p->doclist.aAll = pRight;
  3246   3363       p->doclist.nAll = nRight;
  3247   3364     }
  3248   3365   
  3249   3366     if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
  3250   3367   }
  3251   3368   
         3369  +/*
         3370  +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
         3371  +** does not take deferred tokens into account.
         3372  +**
         3373  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         3374  +*/
  3252   3375   static int fts3EvalPhraseLoad(
  3253         -  Fts3Cursor *pCsr, 
  3254         -  Fts3Phrase *p
         3376  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3377  +  Fts3Phrase *p                   /* Phrase object */
  3255   3378   ){
  3256   3379     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3257   3380     int iToken;
  3258   3381     int rc = SQLITE_OK;
  3259   3382   
  3260   3383     for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){
  3261   3384       Fts3PhraseToken *pToken = &p->aToken[iToken];
  3262   3385       assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );
  3263   3386   
  3264   3387       if( pToken->pSegcsr ){
  3265   3388         int nThis = 0;
  3266   3389         char *pThis = 0;
  3267         -      rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
         3390  +      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
  3268   3391         if( rc==SQLITE_OK ){
  3269   3392           fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
  3270   3393         }
  3271   3394       }
  3272   3395       assert( pToken->pSegcsr==0 );
  3273   3396     }
  3274   3397   
  3275   3398     return rc;
  3276   3399   }
  3277   3400   
         3401  +/*
         3402  +** This function is called on each phrase after the position lists for
         3403  +** any deferred tokens have been loaded into memory. It updates the phrases
         3404  +** current position list to include only those positions that are really
         3405  +** instances of the phrase (after considering deferred tokens). If this
         3406  +** means that the phrase does not appear in the current row, doclist.pList
         3407  +** and doclist.nList are both zeroed.
         3408  +**
         3409  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
         3410  +*/
  3278   3411   static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
  3279         -  int iToken;
  3280         -  int rc = SQLITE_OK;
  3281         -
  3282         -  int nMaxUndeferred = pPhrase->iDoclistToken;
  3283         -  char *aPoslist = 0;
  3284         -  int nPoslist = 0;
  3285         -  int iPrev = -1;
         3412  +  int iToken;                     /* Used to iterate through phrase tokens */
         3413  +  int rc = SQLITE_OK;             /* Return code */
         3414  +  char *aPoslist = 0;             /* Position list for deferred tokens */
         3415  +  int nPoslist = 0;               /* Number of bytes in aPoslist */
         3416  +  int iPrev = -1;                 /* Token number of previous deferred token */
  3286   3417   
  3287   3418     assert( pPhrase->doclist.bFreeList==0 );
  3288   3419   
  3289   3420     for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
  3290   3421       Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
  3291   3422       Fts3DeferredToken *pDeferred = pToken->pDeferred;
  3292   3423   
................................................................................
  3324   3455           }
  3325   3456         }
  3326   3457         iPrev = iToken;
  3327   3458       }
  3328   3459     }
  3329   3460   
  3330   3461     if( iPrev>=0 ){
         3462  +    int nMaxUndeferred = pPhrase->iDoclistToken;
  3331   3463       if( nMaxUndeferred<0 ){
  3332   3464         pPhrase->doclist.pList = aPoslist;
  3333   3465         pPhrase->doclist.nList = nPoslist;
  3334   3466         pPhrase->doclist.iDocid = pCsr->iPrevId;
  3335   3467         pPhrase->doclist.bFreeList = 1;
  3336   3468       }else{
  3337   3469         int nDistance;
................................................................................
  3372   3504   }
  3373   3505   
  3374   3506   /*
  3375   3507   ** This function is called for each Fts3Phrase in a full-text query 
  3376   3508   ** expression to initialize the mechanism for returning rows. Once this
  3377   3509   ** function has been called successfully on an Fts3Phrase, it may be
  3378   3510   ** used with fts3EvalPhraseNext() to iterate through the matching docids.
         3511  +**
         3512  +** If parameter bOptOk is true, then the phrase may (or may not) use the
         3513  +** incremental loading strategy. Otherwise, the entire doclist is loaded into
         3514  +** memory within this call.
         3515  +**
         3516  +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
  3379   3517   */
  3380   3518   static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
  3381         -  int rc;
         3519  +  int rc;                         /* Error code */
  3382   3520     Fts3PhraseToken *pFirst = &p->aToken[0];
  3383   3521     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3384   3522   
  3385   3523     if( pCsr->bDesc==pTab->bDescIdx 
  3386   3524      && bOptOk==1 
  3387   3525      && p->nToken==1 
  3388   3526      && pFirst->pSegcsr 
................................................................................
  3402   3540   
  3403   3541     assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr );
  3404   3542     return rc;
  3405   3543   }
  3406   3544   
  3407   3545   /*
  3408   3546   ** This function is used to iterate backwards (from the end to start) 
  3409         -** through doclists.
         3547  +** through doclists. It is used by this module to iterate through phrase
         3548  +** doclists in reverse and by the fts3_write.c module to iterate through
         3549  +** pending-terms lists when writing to databases with "order=desc".
         3550  +**
         3551  +** The doclist may be sorted in ascending (parameter bDescIdx==0) or 
         3552  +** descending (parameter bDescIdx==1) order of docid. Regardless, this
         3553  +** function iterates from the end of the doclist to the beginning.
  3410   3554   */
  3411   3555   void sqlite3Fts3DoclistPrev(
  3412   3556     int bDescIdx,                   /* True if the doclist is desc */
  3413   3557     char *aDoclist,                 /* Pointer to entire doclist */
  3414   3558     int nDoclist,                   /* Length of aDoclist in bytes */
  3415   3559     char **ppIter,                  /* IN/OUT: Iterator pointer */
  3416   3560     sqlite3_int64 *piDocid,         /* IN/OUT: Docid pointer */
................................................................................
  3467   3611   ** SQLITE_OK.
  3468   3612   **
  3469   3613   ** If there is no "next" entry and no error occurs, then *pbEof is set to
  3470   3614   ** 1 before returning. Otherwise, if no error occurs and the iterator is
  3471   3615   ** successfully advanced, *pbEof is set to 0.
  3472   3616   */
  3473   3617   static int fts3EvalPhraseNext(
  3474         -  Fts3Cursor *pCsr, 
  3475         -  Fts3Phrase *p, 
  3476         -  u8 *pbEof
         3618  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3619  +  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
         3620  +  u8 *pbEof                       /* OUT: Set to 1 if EOF */
  3477   3621   ){
  3478   3622     int rc = SQLITE_OK;
  3479   3623     Fts3Doclist *pDL = &p->doclist;
  3480   3624     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3481   3625   
  3482   3626     if( p->bIncr ){
  3483   3627       assert( p->nToken==1 );
................................................................................
  3515   3659         }
  3516   3660         pDL->pList = pIter;
  3517   3661         fts3PoslistCopy(0, &pIter);
  3518   3662         pDL->nList = (pIter - pDL->pList);
  3519   3663   
  3520   3664         /* pIter now points just past the 0x00 that terminates the position-
  3521   3665         ** list for document pDL->iDocid. However, if this position-list was
  3522         -      ** edited in place by fts3EvalNearTrim2(), then pIter may not actually
         3666  +      ** edited in place by fts3EvalNearTrim(), then pIter may not actually
  3523   3667         ** point to the start of the next docid value. The following line deals
  3524   3668         ** with this case by advancing pIter past the zero-padding added by
  3525         -      ** fts3EvalNearTrim2().  */
         3669  +      ** fts3EvalNearTrim().  */
  3526   3670         while( pIter<pEnd && *pIter==0 ) pIter++;
  3527   3671   
  3528   3672         pDL->pNextDocid = pIter;
  3529   3673         assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter );
  3530   3674         *pbEof = 0;
  3531   3675       }
  3532   3676     }
  3533   3677   
  3534   3678     return rc;
  3535   3679   }
  3536   3680   
         3681  +/*
         3682  +**
         3683  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         3684  +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
         3685  +** expression. Also the Fts3Expr.bDeferred variable is set to true for any
         3686  +** expressions for which all descendent tokens are deferred.
         3687  +**
         3688  +** If parameter bOptOk is zero, then it is guaranteed that the
         3689  +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
         3690  +** each phrase in the expression (subject to deferred token processing).
         3691  +** Or, if bOptOk is non-zero, then one or more tokens within the expression
         3692  +** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
         3693  +**
         3694  +** If an error occurs within this function, *pRc is set to an SQLite error
         3695  +** code before returning.
         3696  +*/
  3537   3697   static void fts3EvalStartReaders(
  3538         -  Fts3Cursor *pCsr, 
  3539         -  Fts3Expr *pExpr, 
  3540         -  int bOptOk,
  3541         -  int *pRc
         3698  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3699  +  Fts3Expr *pExpr,                /* Expression to initialize phrases in */
         3700  +  int bOptOk,                     /* True to enable incremental loading */
         3701  +  int *pRc                        /* IN/OUT: Error code */
  3542   3702   ){
  3543   3703     if( pExpr && SQLITE_OK==*pRc ){
  3544   3704       if( pExpr->eType==FTSQUERY_PHRASE ){
  3545   3705         int i;
  3546   3706         int nToken = pExpr->pPhrase->nToken;
  3547   3707         for(i=0; i<nToken; i++){
  3548   3708           if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break;
................................................................................
  3553   3713         fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc);
  3554   3714         fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc);
  3555   3715         pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred);
  3556   3716       }
  3557   3717     }
  3558   3718   }
  3559   3719   
         3720  +/*
         3721  +** An array of the following structures is assembled as part of the process
         3722  +** of selecting tokens to defer before the query starts executing (as part
         3723  +** of the xFilter() method). There is one element in the array for each
         3724  +** token in the FTS expression.
         3725  +**
         3726  +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
         3727  +** to phrases that are connected only by AND and NEAR operators (not OR or
         3728  +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
         3729  +** separately. The root of a tokens AND/NEAR cluster is stored in 
         3730  +** Fts3TokenAndCost.pRoot.
         3731  +*/
  3560   3732   typedef struct Fts3TokenAndCost Fts3TokenAndCost;
  3561   3733   struct Fts3TokenAndCost {
  3562   3734     Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
  3563   3735     int iToken;                     /* Position of token in phrase */
  3564   3736     Fts3PhraseToken *pToken;        /* The token itself */
  3565         -  Fts3Expr *pRoot; 
  3566         -  int nOvfl;
         3737  +  Fts3Expr *pRoot;                /* Root of NEAR/AND cluster */
         3738  +  int nOvfl;                      /* Number of overflow pages to load doclist */
  3567   3739     int iCol;                       /* The column the token must match */
  3568   3740   };
  3569   3741   
         3742  +/*
         3743  +** This function is used to populate an allocated Fts3TokenAndCost array.
         3744  +**
         3745  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         3746  +** Otherwise, if an error occurs during execution, *pRc is set to an
         3747  +** SQLite error code.
         3748  +*/
  3570   3749   static void fts3EvalTokenCosts(
  3571         -  Fts3Cursor *pCsr, 
  3572         -  Fts3Expr *pRoot, 
  3573         -  Fts3Expr *pExpr, 
  3574         -  Fts3TokenAndCost **ppTC,
  3575         -  Fts3Expr ***ppOr,
  3576         -  int *pRc
         3750  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3751  +  Fts3Expr *pRoot,                /* Root of current AND/NEAR cluster */
         3752  +  Fts3Expr *pExpr,                /* Expression to consider */
         3753  +  Fts3TokenAndCost **ppTC,        /* Write new entries to *(*ppTC)++ */
         3754  +  Fts3Expr ***ppOr,               /* Write new OR root to *(*ppOr)++ */
         3755  +  int *pRc                        /* IN/OUT: Error code */
  3577   3756   ){
  3578   3757     if( *pRc==SQLITE_OK && pExpr ){
  3579   3758       if( pExpr->eType==FTSQUERY_PHRASE ){
  3580   3759         Fts3Phrase *pPhrase = pExpr->pPhrase;
  3581   3760         int i;
  3582   3761         for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){
  3583   3762           Fts3TokenAndCost *pTC = (*ppTC)++;
................................................................................
  3601   3780           (*ppOr)++;
  3602   3781         }
  3603   3782         fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc);
  3604   3783       }
  3605   3784     }
  3606   3785   }
  3607   3786   
         3787  +/*
         3788  +** Determine the average document (row) size in pages. If successful,
         3789  +** write this value to *pnPage and return SQLITE_OK. Otherwise, return
         3790  +** an SQLite error code.
         3791  +**
         3792  +** The average document size in pages is calculated by first calculating 
         3793  +** determining the average size in bytes, B. If B is less than the amount
         3794  +** of data that will fit on a single leaf page of an intkey table in
         3795  +** this database, then the average docsize is 1. Otherwise, it is 1 plus
         3796  +** the number of overflow pages consumed by a record B bytes in size.
         3797  +*/
  3608   3798   static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
  3609   3799     if( pCsr->nRowAvg==0 ){
  3610   3800       /* The average document size, which is required to calculate the cost
  3611         -     ** of each doclist, has not yet been determined. Read the required 
  3612         -     ** data from the %_stat table to calculate it.
  3613         -     **
  3614         -     ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
  3615         -     ** varints, where nCol is the number of columns in the FTS3 table.
  3616         -     ** The first varint is the number of documents currently stored in
  3617         -     ** the table. The following nCol varints contain the total amount of
  3618         -     ** data stored in all rows of each column of the table, from left
  3619         -     ** to right.
  3620         -     */
         3801  +    ** of each doclist, has not yet been determined. Read the required 
         3802  +    ** data from the %_stat table to calculate it.
         3803  +    **
         3804  +    ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
         3805  +    ** varints, where nCol is the number of columns in the FTS3 table.
         3806  +    ** The first varint is the number of documents currently stored in
         3807  +    ** the table. The following nCol varints contain the total amount of
         3808  +    ** data stored in all rows of each column of the table, from left
         3809  +    ** to right.
         3810  +    */
  3621   3811       int rc;
  3622   3812       Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
  3623   3813       sqlite3_stmt *pStmt;
  3624   3814       sqlite3_int64 nDoc = 0;
  3625   3815       sqlite3_int64 nByte = 0;
  3626   3816       const char *pEnd;
  3627   3817       const char *a;
................................................................................
  3648   3838       if( rc!=SQLITE_OK ) return rc;
  3649   3839     }
  3650   3840   
  3651   3841     *pnPage = pCsr->nRowAvg;
  3652   3842     return SQLITE_OK;
  3653   3843   }
  3654   3844   
         3845  +/*
         3846  +** This function is called to select the tokens (if any) that will be 
         3847  +** deferred. The array aTC[] has already been populated when this is
         3848  +** called.
         3849  +**
         3850  +** This function is called once for each AND/NEAR cluster in the 
         3851  +** expression. Each invocation determines which tokens to defer within
         3852  +** the cluster with root node pRoot. See comments above the definition
         3853  +** of struct Fts3TokenAndCost for more details.
         3854  +**
         3855  +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
         3856  +** called on each token to defer. Otherwise, an SQLite error code is
         3857  +** returned.
         3858  +*/
  3655   3859   static int fts3EvalSelectDeferred(
  3656         -  Fts3Cursor *pCsr,
  3657         -  Fts3Expr *pRoot,
  3658         -  Fts3TokenAndCost *aTC,
  3659         -  int nTC
         3860  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         3861  +  Fts3Expr *pRoot,                /* Consider tokens with this root node */
         3862  +  Fts3TokenAndCost *aTC,          /* Array of expression tokens and costs */
         3863  +  int nTC                         /* Number of entries in aTC[] */
  3660   3864   ){
  3661         -  int nDocSize = 0;
  3662         -  int nDocEst = 0;
  3663         -  int rc = SQLITE_OK;
  3664   3865     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3665         -  int ii;
         3866  +  int nDocSize = 0;               /* Number of pages per doc loaded */
         3867  +  int rc = SQLITE_OK;             /* Return code */
         3868  +  int ii;                         /* Iterator variable for various purposes */
         3869  +  int nOvfl = 0;                  /* Total overflow pages used by doclists */
         3870  +  int nToken = 0;                 /* Total number of tokens in cluster */
  3666   3871   
  3667         -  int nOvfl = 0;
  3668         -  int nTerm = 0;
         3872  +  int nMinEst = 0;                /* The minimum count for any phrase so far. */
         3873  +  int nLoad4 = 1;                 /* (Phrases that will be loaded)^4. */
  3669   3874   
         3875  +  /* Count the tokens in this AND/NEAR cluster. If none of the doclists
         3876  +  ** associated with the tokens spill onto overflow pages, or if there is
         3877  +  ** only 1 token, exit early. No tokens to defer in this case. */
  3670   3878     for(ii=0; ii<nTC; ii++){
  3671   3879       if( aTC[ii].pRoot==pRoot ){
  3672   3880         nOvfl += aTC[ii].nOvfl;
  3673         -      nTerm++;
         3881  +      nToken++;
  3674   3882       }
  3675   3883     }
  3676         -  if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;
         3884  +  if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
  3677   3885   
         3886  +  /* Obtain the average docsize (in pages). */
  3678   3887     rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
         3888  +  assert( rc!=SQLITE_OK || nDocSize>0 );
  3679   3889   
  3680         -  for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
  3681         -    int jj;
  3682         -    Fts3TokenAndCost *pTC = 0;
  3683   3890   
  3684         -    for(jj=0; jj<nTC; jj++){
  3685         -      if( aTC[jj].pToken && aTC[jj].pRoot==pRoot 
  3686         -       && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) 
         3891  +  /* Iterate through all tokens in this AND/NEAR cluster, in ascending order 
         3892  +  ** of the number of overflow pages that will be loaded by the pager layer 
         3893  +  ** to retrieve the entire doclist for the token from the full-text index.
         3894  +  ** Load the doclists for tokens that are either:
         3895  +  **
         3896  +  **   a. The cheapest token in the entire query (i.e. the one visited by the
         3897  +  **      first iteration of this loop), or
         3898  +  **
         3899  +  **   b. Part of a multi-token phrase.
         3900  +  **
         3901  +  ** After each token doclist is loaded, merge it with the others from the
         3902  +  ** same phrase and count the number of documents that the merged doclist
         3903  +  ** contains. Set variable "nMinEst" to the smallest number of documents in 
         3904  +  ** any phrase doclist for which 1 or more token doclists have been loaded.
         3905  +  ** Let nOther be the number of other phrases for which it is certain that
         3906  +  ** one or more tokens will not be deferred.
         3907  +  **
         3908  +  ** Then, for each token, defer it if loading the doclist would result in
         3909  +  ** loading N or more overflow pages into memory, where N is computed as:
         3910  +  **
         3911  +  **    (nMinEst + 4^nOther - 1) / (4^nOther)
         3912  +  */
         3913  +  for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
         3914  +    int iTC;                      /* Used to iterate through aTC[] array. */
         3915  +    Fts3TokenAndCost *pTC = 0;    /* Set to cheapest remaining token. */
         3916  +
         3917  +    /* Set pTC to point to the cheapest remaining token. */
         3918  +    for(iTC=0; iTC<nTC; iTC++){
         3919  +      if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot 
         3920  +       && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) 
  3687   3921         ){
  3688         -        pTC = &aTC[jj];
         3922  +        pTC = &aTC[iTC];
  3689   3923         }
  3690   3924       }
  3691   3925       assert( pTC );
  3692   3926   
  3693         -    /* At this point pTC points to the cheapest remaining token. */
  3694         -    if( ii==0 ){
  3695         -      if( pTC->nOvfl ){
  3696         -        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
  3697         -      }else{
         3927  +    if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
         3928  +      /* The number of overflow pages to load for this (and therefore all
         3929  +      ** subsequent) tokens is greater than the estimated number of pages 
         3930  +      ** that will be loaded if all subsequent tokens are deferred.
         3931  +      */
         3932  +      Fts3PhraseToken *pToken = pTC->pToken;
         3933  +      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
         3934  +      fts3SegReaderCursorFree(pToken->pSegcsr);
         3935  +      pToken->pSegcsr = 0;
         3936  +    }else{
         3937  +      nLoad4 = nLoad4*4;
         3938  +      if( ii==0 || pTC->pPhrase->nToken>1 ){
         3939  +        /* Either this is the cheapest token in the entire query, or it is
         3940  +        ** part of a multi-token phrase. Either way, the entire doclist will
         3941  +        ** (eventually) be loaded into memory. It may as well be now. */
  3698   3942           Fts3PhraseToken *pToken = pTC->pToken;
  3699   3943           int nList = 0;
  3700   3944           char *pList = 0;
  3701         -        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
         3945  +        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
  3702   3946           assert( rc==SQLITE_OK || pList==0 );
  3703         -
  3704   3947           if( rc==SQLITE_OK ){
  3705         -          nDocEst = fts3DoclistCountDocids(1, pList, nList);
         3948  +          int nCount;
  3706   3949             fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
         3950  +          nCount = fts3DoclistCountDocids(
         3951  +              pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
         3952  +          );
         3953  +          if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
  3707   3954           }
  3708   3955         }
  3709         -    }else{
  3710         -      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
  3711         -        Fts3PhraseToken *pToken = pTC->pToken;
  3712         -        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
  3713         -        fts3SegReaderCursorFree(pToken->pSegcsr);
  3714         -        pToken->pSegcsr = 0;
  3715         -      }
  3716         -      nDocEst = 1 + (nDocEst/4);
  3717   3956       }
  3718   3957       pTC->pToken = 0;
  3719   3958     }
  3720   3959   
  3721   3960     return rc;
  3722   3961   }
  3723   3962   
  3724         -int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
         3963  +/*
         3964  +** This function is called from within the xFilter method. It initializes
         3965  +** the full-text query currently stored in pCsr->pExpr. To iterate through
         3966  +** the results of a query, the caller does:
         3967  +**
         3968  +**    fts3EvalStart(pCsr);
         3969  +**    while( 1 ){
         3970  +**      fts3EvalNext(pCsr);
         3971  +**      if( pCsr->bEof ) break;
         3972  +**      ... return row pCsr->iPrevId to the caller ...
         3973  +**    }
         3974  +*/
         3975  +static int fts3EvalStart(Fts3Cursor *pCsr){
  3725   3976     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  3726   3977     int rc = SQLITE_OK;
  3727   3978     int nToken = 0;
  3728   3979     int nOr = 0;
  3729   3980   
  3730   3981     /* Allocate a MultiSegReader for each token in the expression. */
  3731         -  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);
         3982  +  fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
  3732   3983   
  3733         -  /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
  3734         -  ** This call will eventually also be responsible for determining which
  3735         -  ** tokens are 'deferred' until the document text is loaded into memory.
  3736         -  **
  3737         -  ** Each token in each phrase is dealt with using one of the following
  3738         -  ** three strategies:
  3739         -  **
  3740         -  **   1. Entire doclist loaded into memory as part of the
  3741         -  **      fts3EvalStartReaders() call.
  3742         -  **
  3743         -  **   2. Doclist loaded into memory incrementally, as part of each
  3744         -  **      sqlite3Fts3EvalNext() call.
  3745         -  **
  3746         -  **   3. Token doclist is never loaded. Instead, documents are loaded into
  3747         -  **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
  3748         -  **      call. This is known as a "deferred" token.
  3749         -  */
  3750         -
  3751         -  /* If bOptOk is true, check if there are any tokens that should be deferred.
  3752         -  */
  3753         -  if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
         3984  +  /* Determine which, if any, tokens in the expression should be deferred. */
         3985  +  if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
  3754   3986       Fts3TokenAndCost *aTC;
  3755   3987       Fts3Expr **apOr;
  3756   3988       aTC = (Fts3TokenAndCost *)sqlite3_malloc(
  3757   3989           sizeof(Fts3TokenAndCost) * nToken
  3758   3990         + sizeof(Fts3Expr *) * nOr * 2
  3759   3991       );
  3760   3992       apOr = (Fts3Expr **)&aTC[nToken];
................................................................................
  3762   3994       if( !aTC ){
  3763   3995         rc = SQLITE_NOMEM;
  3764   3996       }else{
  3765   3997         int ii;
  3766   3998         Fts3TokenAndCost *pTC = aTC;
  3767   3999         Fts3Expr **ppOr = apOr;
  3768   4000   
  3769         -      fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
         4001  +      fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
  3770   4002         nToken = pTC-aTC;
  3771   4003         nOr = ppOr-apOr;
  3772   4004   
  3773   4005         if( rc==SQLITE_OK ){
  3774   4006           rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
  3775   4007           for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){
  3776   4008             rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken);
................................................................................
  3777   4009           }
  3778   4010         }
  3779   4011   
  3780   4012         sqlite3_free(aTC);
  3781   4013       }
  3782   4014     }
  3783   4015   
  3784         -  fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);
         4016  +  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
  3785   4017     return rc;
  3786   4018   }
  3787   4019   
  3788         -static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){
         4020  +/*
         4021  +** Invalidate the current position list for phrase pPhrase.
         4022  +*/
         4023  +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
  3789   4024     if( pPhrase->doclist.bFreeList ){
  3790   4025       sqlite3_free(pPhrase->doclist.pList);
  3791   4026     }
  3792   4027     pPhrase->doclist.pList = 0;
  3793   4028     pPhrase->doclist.nList = 0;
  3794   4029     pPhrase->doclist.bFreeList = 0;
  3795   4030   }
  3796   4031   
  3797         -static int fts3EvalNearTrim2(
  3798         -  int nNear,
         4032  +/*
         4033  +** This function is called to edit the position list associated with
         4034  +** the phrase object passed as the fifth argument according to a NEAR
         4035  +** condition. For example:
         4036  +**
         4037  +**     abc NEAR/5 "def ghi"
         4038  +**
         4039  +** Parameter nNear is passed the NEAR distance of the expression (5 in
         4040  +** the example above). When this function is called, *paPoslist points to
         4041  +** the position list, and *pnToken is the number of phrase tokens in, the
         4042  +** phrase on the other side of the NEAR operator to pPhrase. For example,
         4043  +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
         4044  +** the position list associated with phrase "abc".
         4045  +**
         4046  +** All positions in the pPhrase position list that are not sufficiently
         4047  +** close to a position in the *paPoslist position list are removed. If this
         4048  +** leaves 0 positions, zero is returned. Otherwise, non-zero.
         4049  +**
         4050  +** Before returning, *paPoslist is set to point to the position lsit 
         4051  +** associated with pPhrase. And *pnToken is set to the number of tokens in
         4052  +** pPhrase.
         4053  +*/
         4054  +static int fts3EvalNearTrim(
         4055  +  int nNear,                      /* NEAR distance. As in "NEAR/nNear". */
  3799   4056     char *aTmp,                     /* Temporary space to use */
  3800   4057     char **paPoslist,               /* IN/OUT: Position list */
  3801   4058     int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
  3802   4059     Fts3Phrase *pPhrase             /* The phrase object to trim the doclist of */
  3803   4060   ){
  3804   4061     int nParam1 = nNear + pPhrase->nToken;
  3805   4062     int nParam2 = nNear + *pnToken;
................................................................................
  3823   4080       *paPoslist = pPhrase->doclist.pList;
  3824   4081       *pnToken = pPhrase->nToken;
  3825   4082     }
  3826   4083   
  3827   4084     return res;
  3828   4085   }
  3829   4086   
         4087  +/*
         4088  +** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
         4089  +** Otherwise, it advances the expression passed as the second argument to
         4090  +** point to the next matching row in the database. Expressions iterate through
         4091  +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
         4092  +** or descending if it is non-zero.
         4093  +**
         4094  +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
         4095  +** successful, the following variables in pExpr are set:
         4096  +**
         4097  +**   Fts3Expr.bEof                (non-zero if EOF - there is no next row)
         4098  +**   Fts3Expr.iDocid              (valid if bEof==0. The docid of the next row)
         4099  +**
         4100  +** If the expression is of type FTSQUERY_PHRASE, and the expression is not
         4101  +** at EOF, then the following variables are populated with the position list
         4102  +** for the phrase for the visited row:
         4103  +**
         4104  +**   FTs3Expr.pPhrase->doclist.nList        (length of pList in bytes)
         4105  +**   FTs3Expr.pPhrase->doclist.pList        (pointer to position list)
         4106  +**
         4107  +** It says above that this function advances the expression to the next
         4108  +** matching row. This is usually true, but there are the following exceptions:
         4109  +**
         4110  +**   1. Deferred tokens are not taken into account. If a phrase consists
         4111  +**      entirely of deferred tokens, it is assumed to match every row in
         4112  +**      the db. In this case the position-list is not populated at all. 
         4113  +**
         4114  +**      Or, if a phrase contains one or more deferred tokens and one or
         4115  +**      more non-deferred tokens, then the expression is advanced to the 
         4116  +**      next possible match, considering only non-deferred tokens. In other
         4117  +**      words, if the phrase is "A B C", and "B" is deferred, the expression
         4118  +**      is advanced to the next row that contains an instance of "A * C", 
         4119  +**      where "*" may match any single token. The position list in this case
         4120  +**      is populated as for "A * C" before returning.
         4121  +**
         4122  +**   2. NEAR is treated as AND. If the expression is "x NEAR y", it is 
         4123  +**      advanced to point to the next row that matches "x AND y".
         4124  +** 
         4125  +** See fts3EvalTestDeferredAndNear() for details on testing if a row is
         4126  +** really a match, taking into account deferred tokens and NEAR operators.
         4127  +*/
         4128  +static void fts3EvalNextRow(
         4129  +  Fts3Cursor *pCsr,               /* FTS Cursor handle */
         4130  +  Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
         4131  +  int *pRc                        /* IN/OUT: Error code */
         4132  +){
         4133  +  if( *pRc==SQLITE_OK ){
         4134  +    int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
         4135  +    assert( pExpr->bEof==0 );
         4136  +    pExpr->bStart = 1;
         4137  +
         4138  +    switch( pExpr->eType ){
         4139  +      case FTSQUERY_NEAR:
         4140  +      case FTSQUERY_AND: {
         4141  +        Fts3Expr *pLeft = pExpr->pLeft;
         4142  +        Fts3Expr *pRight = pExpr->pRight;
         4143  +        assert( !pLeft->bDeferred || !pRight->bDeferred );
         4144  +
         4145  +        if( pLeft->bDeferred ){
         4146  +          /* LHS is entirely deferred. So we assume it matches every row.
         4147  +          ** Advance the RHS iterator to find the next row visited. */
         4148  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4149  +          pExpr->iDocid = pRight->iDocid;
         4150  +          pExpr->bEof = pRight->bEof;
         4151  +        }else if( pRight->bDeferred ){
         4152  +          /* RHS is entirely deferred. So we assume it matches every row.
         4153  +          ** Advance the LHS iterator to find the next row visited. */
         4154  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4155  +          pExpr->iDocid = pLeft->iDocid;
         4156  +          pExpr->bEof = pLeft->bEof;
         4157  +        }else{
         4158  +          /* Neither the RHS or LHS are deferred. */
         4159  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4160  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4161  +          while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
         4162  +            sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4163  +            if( iDiff==0 ) break;
         4164  +            if( iDiff<0 ){
         4165  +              fts3EvalNextRow(pCsr, pLeft, pRc);
         4166  +            }else{
         4167  +              fts3EvalNextRow(pCsr, pRight, pRc);
         4168  +            }
         4169  +          }
         4170  +          pExpr->iDocid = pLeft->iDocid;
         4171  +          pExpr->bEof = (pLeft->bEof || pRight->bEof);
         4172  +        }
         4173  +        break;
         4174  +      }
         4175  +  
         4176  +      case FTSQUERY_OR: {
         4177  +        Fts3Expr *pLeft = pExpr->pLeft;
         4178  +        Fts3Expr *pRight = pExpr->pRight;
         4179  +        sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4180  +
         4181  +        assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
         4182  +        assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
         4183  +
         4184  +        if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
         4185  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4186  +        }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
         4187  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4188  +        }else{
         4189  +          fts3EvalNextRow(pCsr, pLeft, pRc);
         4190  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4191  +        }
         4192  +
         4193  +        pExpr->bEof = (pLeft->bEof && pRight->bEof);
         4194  +        iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
         4195  +        if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
         4196  +          pExpr->iDocid = pLeft->iDocid;
         4197  +        }else{
         4198  +          pExpr->iDocid = pRight->iDocid;
         4199  +        }
         4200  +
         4201  +        break;
         4202  +      }
         4203  +
         4204  +      case FTSQUERY_NOT: {
         4205  +        Fts3Expr *pLeft = pExpr->pLeft;
         4206  +        Fts3Expr *pRight = pExpr->pRight;
         4207  +
         4208  +        if( pRight->bStart==0 ){
         4209  +          fts3EvalNextRow(pCsr, pRight, pRc);
         4210  +          assert( *pRc!=SQLITE_OK || pRight->bStart );
         4211  +        }
         4212  +
         4213  +        fts3EvalNextRow(pCsr, pLeft, pRc);
         4214  +        if( pLeft->bEof==0 ){
         4215  +          while( !*pRc 
         4216  +              && !pRight->bEof 
         4217  +              && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
         4218  +          ){
         4219  +            fts3EvalNextRow(pCsr, pRight, pRc);
         4220  +          }
         4221  +        }
         4222  +        pExpr->iDocid = pLeft->iDocid;
         4223  +        pExpr->bEof = pLeft->bEof;
         4224  +        break;
         4225  +      }
         4226  +
         4227  +      default: {
         4228  +        Fts3Phrase *pPhrase = pExpr->pPhrase;
         4229  +        fts3EvalInvalidatePoslist(pPhrase);
         4230  +        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
         4231  +        pExpr->iDocid = pPhrase->doclist.iDocid;
         4232  +        break;
         4233  +      }
         4234  +    }
         4235  +  }
         4236  +}
         4237  +
         4238  +/*
         4239  +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
         4240  +** cluster, then this function returns 1 immediately.
         4241  +**
         4242  +** Otherwise, it checks if the current row really does match the NEAR 
         4243  +** expression, using the data currently stored in the position lists 
         4244  +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. 
         4245  +**
         4246  +** If the current row is a match, the position list associated with each
         4247  +** phrase in the NEAR expression is edited in place to contain only those
         4248  +** phrase instances sufficiently close to their peers to satisfy all NEAR
         4249  +** constraints. In this case it returns 1. If the NEAR expression does not 
         4250  +** match the current row, 0 is returned. The position lists may or may not
         4251  +** be edited if 0 is returned.
         4252  +*/
  3830   4253   static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
  3831   4254     int res = 1;
  3832   4255   
  3833   4256     /* The following block runs if pExpr is the root of a NEAR query.
  3834   4257     ** For example, the query:
  3835   4258     **
  3836   4259     **         "w" NEAR "x" NEAR "y" NEAR "z"
................................................................................
  3844   4267     **                     |        |
  3845   4268     **                +--NEAR--+   "y"
  3846   4269     **                |        |
  3847   4270     **               "w"      "x"
  3848   4271     **
  3849   4272     ** The right-hand child of a NEAR node is always a phrase. The 
  3850   4273     ** left-hand child may be either a phrase or a NEAR node. There are
  3851         -  ** no exceptions to this.
         4274  +  ** no exceptions to this - it's the way the parser in fts3_expr.c works.
  3852   4275     */
  3853   4276     if( *pRc==SQLITE_OK 
  3854   4277      && pExpr->eType==FTSQUERY_NEAR 
  3855   4278      && pExpr->bEof==0
  3856   4279      && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
  3857   4280     ){
  3858   4281       Fts3Expr *p; 
................................................................................
  3871   4294       }else{
  3872   4295         char *aPoslist = p->pPhrase->doclist.pList;
  3873   4296         int nToken = p->pPhrase->nToken;
  3874   4297   
  3875   4298         for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
  3876   4299           Fts3Phrase *pPhrase = p->pRight->pPhrase;
  3877   4300           int nNear = p->nNear;
  3878         -        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4301  +        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
  3879   4302         }
  3880   4303     
  3881   4304         aPoslist = pExpr->pRight->pPhrase->doclist.pList;
  3882   4305         nToken = pExpr->pRight->pPhrase->nToken;
  3883   4306         for(p=pExpr->pLeft; p && res; p=p->pLeft){
  3884   4307           int nNear = p->pParent->nNear;
  3885   4308           Fts3Phrase *pPhrase = (
  3886   4309               p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
  3887   4310           );
  3888         -        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
         4311  +        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
  3889   4312         }
  3890   4313       }
  3891   4314   
  3892   4315       sqlite3_free(aTmp);
  3893   4316     }
  3894   4317   
  3895   4318     return res;
  3896   4319   }
  3897   4320   
  3898   4321   /*
  3899         -** This macro is used by the fts3EvalNext() function. The two arguments are
  3900         -** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
  3901         -** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
  3902         -** it returns (i2 - i1). This allows the same code to be used for merging
  3903         -** doclists in ascending or descending order.
         4322  +** This function is a helper function for fts3EvalTestDeferredAndNear().
         4323  +** Assuming no error occurs or has occurred, It returns non-zero if the
         4324  +** expression passed as the second argument matches the row that pCsr 
         4325  +** currently points to, or zero if it does not.
         4326  +**
         4327  +** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
         4328  +** If an error occurs during execution of this function, *pRc is set to 
         4329  +** the appropriate SQLite error code. In this case the returned value is 
         4330  +** undefined.
  3904   4331   */
  3905         -#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2))
  3906         -
  3907         -static void fts3EvalNext(
  3908         -  Fts3Cursor *pCsr, 
  3909         -  Fts3Expr *pExpr, 
  3910         -  int *pRc
         4332  +static int fts3EvalTestExpr(
         4333  +  Fts3Cursor *pCsr,               /* FTS cursor handle */
         4334  +  Fts3Expr *pExpr,                /* Expr to test. May or may not be root. */
         4335  +  int *pRc                        /* IN/OUT: Error code */
  3911   4336   ){
  3912         -  if( *pRc==SQLITE_OK ){
  3913         -    assert( pExpr->bEof==0 );
  3914         -    pExpr->bStart = 1;
  3915         -
  3916         -    switch( pExpr->eType ){
  3917         -      case FTSQUERY_NEAR:
  3918         -      case FTSQUERY_AND: {
  3919         -        Fts3Expr *pLeft = pExpr->pLeft;
  3920         -        Fts3Expr *pRight = pExpr->pRight;
  3921         -        assert( !pLeft->bDeferred || !pRight->bDeferred );
  3922         -        if( pLeft->bDeferred ){
  3923         -          fts3EvalNext(pCsr, pRight, pRc);
  3924         -          pExpr->iDocid = pRight->iDocid;
  3925         -          pExpr->bEof = pRight->bEof;
  3926         -        }else if( pRight->bDeferred ){
  3927         -          fts3EvalNext(pCsr, pLeft, pRc);
  3928         -          pExpr->iDocid = pLeft->iDocid;
  3929         -          pExpr->bEof = pLeft->bEof;
  3930         -        }else{
  3931         -          fts3EvalNext(pCsr, pLeft, pRc);
  3932         -          fts3EvalNext(pCsr, pRight, pRc);
  3933         -
  3934         -          while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
  3935         -            sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3936         -            if( iDiff==0 ) break;
  3937         -            if( iDiff<0 ){
  3938         -              fts3EvalNext(pCsr, pLeft, pRc);
  3939         -            }else{
  3940         -              fts3EvalNext(pCsr, pRight, pRc);
  3941         -            }
  3942         -          }
  3943         -
  3944         -          pExpr->iDocid = pLeft->iDocid;
  3945         -          pExpr->bEof = (pLeft->bEof || pRight->bEof);
  3946         -        }
  3947         -        break;
  3948         -      }
  3949         -  
  3950         -      case FTSQUERY_OR: {
  3951         -        Fts3Expr *pLeft = pExpr->pLeft;
  3952         -        Fts3Expr *pRight = pExpr->pRight;
  3953         -        sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3954         -
  3955         -        assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid );
  3956         -        assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
  3957         -
  3958         -        if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
  3959         -          fts3EvalNext(pCsr, pLeft, pRc);
  3960         -        }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
  3961         -          fts3EvalNext(pCsr, pRight, pRc);
  3962         -        }else{
  3963         -          fts3EvalNext(pCsr, pLeft, pRc);
  3964         -          fts3EvalNext(pCsr, pRight, pRc);
  3965         -        }
  3966         -
  3967         -        pExpr->bEof = (pLeft->bEof && pRight->bEof);
  3968         -        iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
  3969         -        if( pRight->bEof || (pLeft->bEof==0 &&  iCmp<0) ){
  3970         -          pExpr->iDocid = pLeft->iDocid;
  3971         -        }else{
  3972         -          pExpr->iDocid = pRight->iDocid;
  3973         -        }
  3974         -
  3975         -        break;
  3976         -      }
  3977         -
  3978         -      case FTSQUERY_NOT: {
  3979         -        Fts3Expr *pLeft = pExpr->pLeft;
  3980         -        Fts3Expr *pRight = pExpr->pRight;
  3981         -
  3982         -        if( pRight->bStart==0 ){
  3983         -          fts3EvalNext(pCsr, pRight, pRc);
  3984         -          assert( *pRc!=SQLITE_OK || pRight->bStart );
  3985         -        }
  3986         -
  3987         -        fts3EvalNext(pCsr, pLeft, pRc);
  3988         -        if( pLeft->bEof==0 ){
  3989         -          while( !*pRc 
  3990         -              && !pRight->bEof 
  3991         -              && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
  3992         -          ){
  3993         -            fts3EvalNext(pCsr, pRight, pRc);
  3994         -          }
  3995         -        }
  3996         -        pExpr->iDocid = pLeft->iDocid;
  3997         -        pExpr->bEof = pLeft->bEof;
  3998         -        break;
  3999         -      }
  4000         -
  4001         -      default: {
  4002         -        Fts3Phrase *pPhrase = pExpr->pPhrase;
  4003         -        fts3EvalZeroPoslist(pPhrase);
  4004         -        *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
  4005         -        pExpr->iDocid = pPhrase->doclist.iDocid;
  4006         -        break;
  4007         -      }
  4008         -    }
  4009         -  }
  4010         -}
  4011         -
  4012         -static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
  4013         -  int bHit = 1;
         4337  +  int bHit = 1;                   /* Return value */
  4014   4338     if( *pRc==SQLITE_OK ){
  4015   4339       switch( pExpr->eType ){
  4016   4340         case FTSQUERY_NEAR:
  4017   4341         case FTSQUERY_AND:
  4018   4342           bHit = (
  4019         -            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4020         -         && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         4343  +            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
         4344  +         && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
  4021   4345            && fts3EvalNearTest(pExpr, pRc)
  4022   4346           );
  4023   4347   
  4024   4348           /* If the NEAR expression does not match any rows, zero the doclist for 
  4025   4349           ** all phrases involved in the NEAR. This is because the snippet(),
  4026   4350           ** offsets() and matchinfo() functions are not supposed to recognize 
  4027   4351           ** any instances of phrases that are part of unmatched NEAR queries. 
................................................................................
  4039   4363           if( bHit==0 
  4040   4364            && pExpr->eType==FTSQUERY_NEAR 
  4041   4365            && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
  4042   4366           ){
  4043   4367             Fts3Expr *p;
  4044   4368             for(p=pExpr; p->pPhrase==0; p=p->pLeft){
  4045   4369               if( p->pRight->iDocid==pCsr->iPrevId ){
  4046         -              fts3EvalZeroPoslist(p->pRight->pPhrase);
         4370  +              fts3EvalInvalidatePoslist(p->pRight->pPhrase);
  4047   4371               }
  4048   4372             }
  4049   4373             if( p->iDocid==pCsr->iPrevId ){
  4050         -            fts3EvalZeroPoslist(p->pPhrase);
         4374  +            fts3EvalInvalidatePoslist(p->pPhrase);
  4051   4375             }
  4052   4376           }
  4053   4377   
  4054   4378           break;
  4055   4379   
  4056   4380         case FTSQUERY_OR: {
  4057         -        int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
  4058         -        int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);
         4381  +        int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
         4382  +        int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
  4059   4383           bHit = bHit1 || bHit2;
  4060   4384           break;
  4061   4385         }
  4062   4386   
  4063   4387         case FTSQUERY_NOT:
  4064   4388           bHit = (
  4065         -            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
  4066         -         && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
         4389  +            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
         4390  +         && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
  4067   4391           );
  4068   4392           break;
  4069   4393   
  4070   4394         default: {
  4071   4395           if( pCsr->pDeferred 
  4072   4396            && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
  4073   4397           ){
  4074   4398             Fts3Phrase *pPhrase = pExpr->pPhrase;
  4075   4399             assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
  4076   4400             if( pExpr->bDeferred ){
  4077         -            fts3EvalZeroPoslist(pPhrase);
         4401  +            fts3EvalInvalidatePoslist(pPhrase);
  4078   4402             }
  4079   4403             *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
  4080   4404             bHit = (pPhrase->doclist.pList!=0);
  4081   4405             pExpr->iDocid = pCsr->iPrevId;
  4082   4406           }else{
  4083   4407             bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
  4084   4408           }
................................................................................
  4086   4410         }
  4087   4411       }
  4088   4412     }
  4089   4413     return bHit;
  4090   4414   }
  4091   4415   
  4092   4416   /*
  4093         -** Return 1 if both of the following are true:
         4417  +** This function is called as the second part of each xNext operation when
         4418  +** iterating through the results of a full-text query. At this point the
         4419  +** cursor points to a row that matches the query expression, with the
         4420  +** following caveats:
         4421  +**
         4422  +**   * Up until this point, "NEAR" operators in the expression have been
         4423  +**     treated as "AND".
         4424  +**
         4425  +**   * Deferred tokens have not yet been considered.
         4426  +**
         4427  +** If *pRc is not SQLITE_OK when this function is called, it immediately
         4428  +** returns 0. Otherwise, it tests whether or not after considering NEAR
         4429  +** operators and deferred tokens the current row is still a match for the
         4430  +** expression. It returns 1 if both of the following are true:
  4094   4431   **
  4095   4432   **   1. *pRc is SQLITE_OK when this function returns, and
  4096   4433   **
  4097   4434   **   2. After scanning the current FTS table row for the deferred tokens,
  4098         -**      it is determined that the row does not match the query.
         4435  +**      it is determined that the row does *not* match the query.
  4099   4436   **
  4100   4437   ** Or, if no error occurs and it seems the current row does match the FTS
  4101   4438   ** query, return 0.
  4102   4439   */
  4103         -static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
         4440  +static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
  4104   4441     int rc = *pRc;
  4105   4442     int bMiss = 0;
  4106   4443     if( rc==SQLITE_OK ){
         4444  +
         4445  +    /* If there are one or more deferred tokens, load the current row into
         4446  +    ** memory and scan it to determine the position list for each deferred
         4447  +    ** token. Then, see if this row is really a match, considering deferred
         4448  +    ** tokens and NEAR operators (neither of which were taken into account
         4449  +    ** earlier, by fts3EvalNextRow()). 
         4450  +    */
  4107   4451       if( pCsr->pDeferred ){
  4108   4452         rc = fts3CursorSeek(0, pCsr);
  4109   4453         if( rc==SQLITE_OK ){
  4110   4454           rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
  4111   4455         }
  4112   4456       }
  4113         -    bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
         4457  +    bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
         4458  +
         4459  +    /* Free the position-lists accumulated for each deferred token above. */
  4114   4460       sqlite3Fts3FreeDeferredDoclists(pCsr);
  4115   4461       *pRc = rc;
  4116   4462     }
  4117   4463     return (rc==SQLITE_OK && bMiss);
  4118   4464   }
  4119   4465   
  4120   4466   /*
  4121   4467   ** Advance to the next document that matches the FTS expression in
  4122   4468   ** Fts3Cursor.pExpr.
  4123   4469   */
  4124         -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
         4470  +static int fts3EvalNext(Fts3Cursor *pCsr){
  4125   4471     int rc = SQLITE_OK;             /* Return Code */
  4126   4472     Fts3Expr *pExpr = pCsr->pExpr;
  4127   4473     assert( pCsr->isEof==0 );
  4128   4474     if( pExpr==0 ){
  4129   4475       pCsr->isEof = 1;
  4130   4476     }else{
  4131   4477       do {
  4132   4478         if( pCsr->isRequireSeek==0 ){
  4133   4479           sqlite3_reset(pCsr->pStmt);
  4134   4480         }
  4135   4481         assert( sqlite3_data_count(pCsr->pStmt)==0 );
  4136         -      fts3EvalNext(pCsr, pExpr, &rc);
         4482  +      fts3EvalNextRow(pCsr, pExpr, &rc);
  4137   4483         pCsr->isEof = pExpr->bEof;
  4138   4484         pCsr->isRequireSeek = 1;
  4139   4485         pCsr->isMatchinfoNeeded = 1;
  4140   4486         pCsr->iPrevId = pExpr->iDocid;
  4141         -    }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) );
         4487  +    }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
  4142   4488     }
  4143   4489     return rc;
  4144   4490   }
  4145   4491   
  4146   4492   /*
  4147   4493   ** Restart interation for expression pExpr so that the next call to
  4148         -** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental 
         4494  +** fts3EvalNext() visits the first row. Do not allow incremental 
  4149   4495   ** loading or merging of phrase doclists for this iteration.
  4150   4496   **
  4151   4497   ** If *pRc is other than SQLITE_OK when this function is called, it is
  4152   4498   ** a no-op. If an error occurs within this function, *pRc is set to an
  4153   4499   ** SQLite error code before returning.
  4154   4500   */
  4155   4501   static void fts3EvalRestart(
................................................................................
  4157   4503     Fts3Expr *pExpr,
  4158   4504     int *pRc
  4159   4505   ){
  4160   4506     if( pExpr && *pRc==SQLITE_OK ){
  4161   4507       Fts3Phrase *pPhrase = pExpr->pPhrase;
  4162   4508   
  4163   4509       if( pPhrase ){
  4164         -      fts3EvalZeroPoslist(pPhrase);
         4510  +      fts3EvalInvalidatePoslist(pPhrase);
  4165   4511         if( pPhrase->bIncr ){
  4166   4512           assert( pPhrase->nToken==1 );
  4167   4513           assert( pPhrase->aToken[0].pSegcsr );
  4168   4514           sqlite3Fts3MsrIncrRestart(pPhrase->aToken[0].pSegcsr);
  4169   4515           *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase);
  4170   4516         }
  4171   4517   
................................................................................
  4273   4619   
  4274   4620         do {
  4275   4621           /* Ensure the %_content statement is reset. */
  4276   4622           if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt);
  4277   4623           assert( sqlite3_data_count(pCsr->pStmt)==0 );
  4278   4624   
  4279   4625           /* Advance to the next document */
  4280         -        fts3EvalNext(pCsr, pRoot, &rc);
         4626  +        fts3EvalNextRow(pCsr, pRoot, &rc);
  4281   4627           pCsr->isEof = pRoot->bEof;
  4282   4628           pCsr->isRequireSeek = 1;
  4283   4629           pCsr->isMatchinfoNeeded = 1;
  4284   4630           pCsr->iPrevId = pRoot->iDocid;
  4285   4631         }while( pCsr->isEof==0 
  4286   4632              && pRoot->eType==FTSQUERY_NEAR 
  4287         -           && fts3EvalLoadDeferred(pCsr, &rc) 
         4633  +           && fts3EvalTestDeferredAndNear(pCsr, &rc) 
  4288   4634         );
  4289   4635   
  4290   4636         if( rc==SQLITE_OK && pCsr->isEof==0 ){
  4291   4637           fts3EvalUpdateCounts(pRoot);
  4292   4638         }
  4293   4639       }
  4294   4640   
................................................................................
  4302   4648         ** order. For this reason, even though it seems more defensive, the 
  4303   4649         ** do loop can not be written:
  4304   4650         **
  4305   4651         **   do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK );
  4306   4652         */
  4307   4653         fts3EvalRestart(pCsr, pRoot, &rc);
  4308   4654         do {
  4309         -        fts3EvalNext(pCsr, pRoot, &rc);
         4655  +        fts3EvalNextRow(pCsr, pRoot, &rc);
  4310   4656           assert( pRoot->bEof==0 );
  4311   4657         }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
  4312         -      fts3EvalLoadDeferred(pCsr, &rc);
         4658  +      fts3EvalTestDeferredAndNear(pCsr, &rc);
  4313   4659       }
  4314   4660     }
  4315   4661     return rc;
  4316   4662   }
  4317   4663   
  4318   4664   /*
  4319   4665   ** This function is used by the matchinfo() module to query a phrase 
................................................................................
  4436   4782   **   * the contents of pPhrase->doclist, and
  4437   4783   **   * any Fts3MultiSegReader objects held by phrase tokens.
  4438   4784   */
  4439   4785   void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
  4440   4786     if( pPhrase ){
  4441   4787       int i;
  4442   4788       sqlite3_free(pPhrase->doclist.aAll);
  4443         -    fts3EvalZeroPoslist(pPhrase);
         4789  +    fts3EvalInvalidatePoslist(pPhrase);
  4444   4790       memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
  4445   4791       for(i=0; i<pPhrase->nToken; i++){
  4446   4792         fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
  4447   4793         pPhrase->aToken[i].pSegcsr = 0;
  4448   4794       }
  4449   4795     }
  4450   4796   }
         4797  +
         4798  +#if !SQLITE_CORE
         4799  +/*
         4800  +** Initialize API pointer table, if required.
         4801  +*/
         4802  +int sqlite3_extension_init(
         4803  +  sqlite3 *db, 
         4804  +  char **pzErrMsg,
         4805  +  const sqlite3_api_routines *pApi
         4806  +){
         4807  +  SQLITE_EXTENSION_INIT2(pApi)
         4808  +  return sqlite3Fts3Init(db);
         4809  +}
         4810  +#endif
  4451   4811   
  4452   4812   #endif

Changes to ext/fts3/fts3Int.h.

   503    503     int nTerm,                      /* Size of zTerm in bytes */
   504    504     int isPrefix,                   /* True for a prefix search */
   505    505     Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
   506    506   );
   507    507   
   508    508   void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
   509    509   
   510         -int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int);
   511         -int sqlite3Fts3EvalNext(Fts3Cursor *pCsr);
   512         -
   513    510   int sqlite3Fts3MsrIncrStart(
   514    511       Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
   515    512   int sqlite3Fts3MsrIncrNext(
   516    513       Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
   517    514   char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol); 
   518    515   int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
   519    516   int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
   520    517   
   521    518   int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
   522    519   
   523    520   #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
   524    521   #endif /* _FTSINT_H */

Changes to test/fts3auto.test.

   103    103   
   104    104     do_execsql_test $tn$title.4 "
   105    105       SELECT docid, mit(matchinfo($tbl, 'x')) FROM $tbl 
   106    106       WHERE $tbl MATCH '$match' ORDER BY docid ASC
   107    107     " $matchinfo_asc
   108    108   }
   109    109   
   110         -#    fts3_make_deferrable TABLE TOKEN
          110  +#    fts3_make_deferrable TABLE TOKEN ?NROW?
   111    111   #
   112         -proc fts3_make_deferrable {tbl token} {
          112  +proc fts3_make_deferrable {tbl token {nRow 0}} {
   113    113   
   114    114     set stmt [sqlite3_prepare db "SELECT * FROM $tbl" -1 dummy]
   115    115     set name [sqlite3_column_name $stmt 0]
   116    116     sqlite3_finalize $stmt
   117    117   
   118         -  set nRow [db one "SELECT count(*) FROM $tbl"]
          118  +  if {$nRow==0} {
          119  +    set nRow [db one "SELECT count(*) FROM $tbl"]
          120  +  }
   119    121     set pgsz [db one "PRAGMA page_size"]
   120    122     execsql BEGIN
   121    123     for {set i 0} {$i < ($nRow * $pgsz * 1.2)/100} {incr i} {
   122    124       set doc [string repeat "$token " 100]
   123    125       execsql "INSERT INTO $tbl ($name) VALUES(\$doc)"
   124    126     }
   125    127     execsql "INSERT INTO $tbl ($name) VALUES('aaaaaaa ${token}aaaaa')"
................................................................................
   648    650     do_fts3query_test 6.$tn.2 t1 {b:G AND c:I}
   649    651     do_fts3query_test 6.$tn.3 t1 {b:G NEAR c:I}
   650    652     do_fts3query_test 6.$tn.4 t1 {a:C OR b:G OR c:K OR d:C}
   651    653     do_fts3query_test 6.$tn.5 t1 {a:G OR b:G}
   652    654   
   653    655     catchsql { COMMIT }
   654    656   }
          657  +
          658  +foreach {tn create} {
          659  +  1    "fts4(x)"
          660  +  2    "fts4(x, order=DESC)"
          661  +} {
          662  +  execsql [subst {
          663  +    DROP TABLE IF EXISTS t1;
          664  +    CREATE VIRTUAL TABLE t1 USING $create;
          665  +  }]
          666  +
          667  +  foreach {x} {
          668  +    "F E N O T K X V A X I E X A P G Q V H U"
          669  +    "R V A E T C V Q N I E L O N U G J K L U"
          670  +    "U Y I G W M V F J L X I D C H F P J Q B"
          671  +    "S G D Z X R P G S S Y B K A S G A I L L"
          672  +    "L S I C H T Z S R Q P R N K J X L F M J"
          673  +    "C C C D P X B Z C M A D A C X S B T X V"
          674  +    "W Y J M D R G V R K B X S A W R I T N C"
          675  +    "P K L W T M S P O Y Y V V O E H Q A I R"
          676  +    "C D Y I C Z F H J C O Y A Q F L S B D K"
          677  +    "P G S C Y C Y V I M B D S Z D D Y W I E"
          678  +    "Z K Z U E E S F Y X T U A L W O U J C Q"
          679  +    "P A T Z S W L P L Q V Y Y I P W U X S S"
          680  +    "I U I H U O F Z F R H R F T N D X A G M"
          681  +    "N A B M S H K X S O Y D T X S B R Y H Z"
          682  +    "L U D A S K I L S V Z J P U B E B Y H M"
          683  +  } {
          684  +    execsql { INSERT INTO t1 VALUES($x) }
          685  +  }
          686  +
          687  +  # Add extra documents to the database such that token "B" will be considered
          688  +  # deferrable if considering the other tokens means that 2 or fewer documents
          689  +  # will be loaded into memory.
          690  +  #
          691  +  fts3_make_deferrable t1 B 2
          692  +
          693  +  # B is not deferred in either of the first two tests below, since filtering
          694  +  # on "M" or "D" returns 10 documents or so. But filtering on "M * D" only
          695  +  # returns 2, so B is deferred in this case.
          696  +  #
          697  +  do_fts3query_test 7.$tn.1             t1 {"M B"}
          698  +  do_fts3query_test 7.$tn.2             t1 {"B D"}
          699  +  do_fts3query_test 7.$tn.3 -deferred B t1 {"M B D"}
          700  +}
   655    701   
   656    702   set sqlite_fts3_enable_parentheses $sfep
   657    703   finish_test
   658    704