/ Check-in [51f7ee84]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add experimental implementation of FTS3 functions matchinfo() and snippet() (not enabled by default).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 51f7ee844057086789dcfcdcba7daf45343cae62
User & Date: dan 2010-01-02 19:02:02
Context
2010-01-04
13:30
Version 3.6.22 Release Candidate 1 check-in: 9d8ab0f1 user: drh tags: trunk
2010-01-02
19:02
Add experimental implementation of FTS3 functions matchinfo() and snippet() (not enabled by default). check-in: 51f7ee84 user: dan tags: trunk
03:46
Fix a bug in the new sqlite3_test_control case of the previous check-in. check-in: 3b77701b user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

793
794
795
796
797
798
799

800
801
802
803
804
805
806
...
838
839
840
841
842
843
844

845
846
847
848
849
850
851
....
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
....
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062


2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099



2100
2101
2102

2103
2104


2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142



2143

2144
2145
2146
2147
2148

2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160




2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184

2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206

2207
2208
2209
2210
2211
2212
2213
2214
2215
2216

2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231

2232
2233
2234
2235
2236
2237
2238
....
2293
2294
2295
2296
2297
2298
2299









































2300
2301
2302
2303
2304
2305
2306
....
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
....
2431
2432
2433
2434
2435
2436
2437

2438
2439
2440
2441
2442
2443
2444
....
2580
2581
2582
2583
2584
2585
2586

2587
2588
2589
2590
2591
2592
2593
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
  sqlite3_finalize(pCsr->pStmt);
  sqlite3Fts3ExprFree(pCsr->pExpr);
  sqlite3_free(pCsr->aDoclist);

  sqlite3_free(pCsr);
  return SQLITE_OK;
}

static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
  if( pCsr->isRequireSeek ){
    pCsr->isRequireSeek = 0;
................................................................................
    }
  }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
    pCsr->isEof = 1;
  }else{
    sqlite3_reset(pCsr->pStmt);
    fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
    pCsr->isRequireSeek = 1;

  }
  return rc;
}


/*
** The buffer pointed to by argument zNode (size nNode bytes) contains the
................................................................................
    memcpy(p, *ppPoslist, n);
    p += n;
    *pp = p;
  }
  *ppPoslist = pEnd;
}

/*
** This function is used to count the entries in a column-list (delta-encoded
** list of term offsets within a single column of a single row).
*/
static int fts3ColumnlistCount(char **ppCollist){
  char *pEnd = *ppCollist;
  char c = 0;
  int nEntry = 0;

  /* A column-list is terminated by either a 0x01 or 0x00. */
  while( 0xFE & (*pEnd | c) ){
    c = *pEnd++ & 0x80;
    if( !c ) nEntry++;
  }

  *ppCollist = pEnd;
  return nEntry;
}

/*
** Value used to signify the end of an offset-list. This is safe because
** it is not possible to have a document with 2^31 terms.
*/
#define OFFSET_LIST_END 0x7fffffff

/*
................................................................................
*/
static int fts3RollbackMethod(sqlite3_vtab *pVtab){
  sqlite3Fts3PendingTermsClear((Fts3Table *)pVtab);
  return SQLITE_OK;
}

/*
** The following flags affect the format of the blob of unsigned int values
** returned by the matchinfo() function. The format is defined as follows:
**
**   Integer 0: Number of 'simple queries' that make up the FTS3 query.
**   Integer 1: Number of columns in queried table.
**
** followed by the data for (query 0, column 0), (query 0, column 1) ...
** (query 1, column 0) and so on.
**
** The first integer in each data is the number of hits that the simple
** query has in the current column.
**
** If the GLOBALCOUNT flag is set, then this is followed by the total
** number of hits the simple query has in the current column of *all*
** selected rows.
**
** If the PHRASELENGTH flag is set, this is followed by the number of
** tokens in the phrase.
**
** If the POSITIONLIST flag is set, then this is followed by <local-count>
** integers - the positions of each of the hits for the current column/query.
*/
#define FTS3_MATCHINFO_GLOBALCOUNT  0x00000001
#define FTS3_MATCHINFO_POSITIONLIST 0x00000002
#define FTS3_MATCHINFO_PHRASELENGTH 0x00000004



typedef struct MatchInfo MatchInfo;
struct MatchInfo {
  int rc;                         /* Return code. SQLITE_OK if no error */
  sqlite3_int64 iDocid;           /* Docid of entry to return data for */
  Fts3Table *pTab;                /* FTS3 Virtual table */
  int flags;                      /* Output flags (see above) */
  int nQuery;                     /* Number of simple queries */

  /* Malloced output buffer */
  unsigned int *aOut;
  int nOut;
  int nAlloc;
};

static void fts3MatchInfoAppend(MatchInfo *pInfo, unsigned int iVal){
  if( pInfo->rc!=SQLITE_OK ) return;

  if( pInfo->nOut==pInfo->nAlloc ){
    int nNew = pInfo->nAlloc*2+100;
    unsigned int *aNew = (unsigned int *)sqlite3_realloc(
        pInfo->aOut, nNew * sizeof(unsigned int)
    );
    if( !aNew ){
      pInfo->rc = SQLITE_NOMEM;
      return;
    }
    pInfo->aOut = aNew;
    pInfo->nAlloc = nNew;
  }

  pInfo->aOut[pInfo->nOut++] = iVal;
}

/*
** Iterate through each simple query that makes up the query expression 
** implemented by the cursor passed as the first argument.



*/
static void fts3ExprMatchInfo(
  sqlite3_context *pCtx,

  Fts3Expr *pExpr,
  MatchInfo *pInfo


){
  int eType = pExpr->eType;
  if( eType==FTSQUERY_NOT || pInfo->rc ){
    return;
  }else if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    fts3ExprMatchInfo(pCtx, pExpr->pLeft, pInfo);
    if( pInfo->rc==SQLITE_OK ){
      fts3ExprMatchInfo(pCtx, pExpr->pRight, pInfo);
    }
  }else{
    int nPhrase = pExpr->pPhrase->nToken;
    Fts3Table *pTab = pInfo->pTab;

    /* If it is not loaded already, load the doclist for this simple query
    ** from the FTS3 full-text index. 
    */
    if( pExpr->isLoaded==0 ){
      pInfo->rc = evalFts3Expr(pTab,pExpr,&pExpr->aDoclist,&pExpr->nDoclist,1);
      if( pInfo->rc ) return;
      pExpr->isLoaded = 1;
    }

    /* If aDoclist is not NULL, search for the doclist entry in pExpr->aDoclist
    ** associated with the docid pInfo->iDocid.
    */
    if( pExpr->aDoclist ){
      char *pEnd = &pExpr->aDoclist[pExpr->nDoclist];
      sqlite3_int64 iSearch = pInfo->iDocid;

      if( pExpr->pCurrent==0 ){
        assert( pExpr->iDocid==0 );
        pExpr->pCurrent = pExpr->aDoclist;
        fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iDocid);
      }

      while( pExpr->iDocid<iSearch && pExpr->pCurrent<pEnd ){
        /* Skip pCurrent to the start of the next doclist entry */



        fts3PoslistCopy(0, &pExpr->pCurrent);

        if( pExpr->pCurrent<pEnd ){
          fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iDocid);
        }
      }


      if( pExpr->iDocid==iSearch ){
        int i;
        for(i=0; i<pTab->nColumn; i++){
          unsigned int iLocalOff;

          /* Add space for the "local-count" field. */
          iLocalOff = pInfo->nOut;
          fts3MatchInfoAppend(pInfo, 0);
          if( pInfo->rc ) return;

          /* If the GLOBALCOUNT field is required, write the global-count
          ** value for this query/column to the output buffer.




          */
          if( pInfo->flags&FTS3_MATCHINFO_GLOBALCOUNT ){
            if( !pExpr->aHist ){
              char *pCsr = pExpr->aDoclist;

              /* Allocate a zeroed buffer to store the global-counts 
              ** corresponding to this simple query for each table column. 
              */
              int nByte = sizeof(unsigned int)*pTab->nColumn;
              pExpr->aHist = (unsigned int *)sqlite3_malloc(nByte);
              if( !pExpr->aHist ){
                pInfo->rc = SQLITE_NOMEM;
                return;
              }
              memset(pExpr->aHist, 0, nByte);

              /* Scan the entire doclist to populate Fts3Expr.aHist[]. */ 
              while( pCsr<pEnd ){
                while( *pCsr++ & 0x80 );
                while( *pCsr ){
                  sqlite3_int64 iCol = 0;
                  if( *pCsr==0x01 ) pCsr += sqlite3Fts3GetVarint(++pCsr, &iCol);
                  pExpr->aHist[iCol] += fts3ColumnlistCount(&pCsr);
                }

                pCsr++;
              }
            }

            fts3MatchInfoAppend(pInfo, pExpr->aHist[i]);
          }

          if( pInfo->flags&FTS3_MATCHINFO_PHRASELENGTH ){
            fts3MatchInfoAppend(pInfo, nPhrase);
          }

          if( i==0 ){
            if( *pExpr->pCurrent==0x01 ) continue;
          }else{
            sqlite3_int64 iCol;
            char *pList = pExpr->pCurrent;
            if( *pList==0x00 ) continue;
            pList++;
            pList += sqlite3Fts3GetVarint(pList, &iCol);
            if( iCol!=i ) continue;
            pExpr->pCurrent = pList;
          }


          if( pInfo->flags&FTS3_MATCHINFO_POSITIONLIST ){
            int nLocal = 0;
            sqlite3_int64 iOffset = 0;
            char *pList = pExpr->pCurrent;
            while( *pList&0xFE ){
              fts3GetDeltaVarint(&pList, &iOffset);
              iOffset -= 2;
              fts3MatchInfoAppend(pInfo, (unsigned int)(iOffset+1-nPhrase));
              nLocal++;

            }
            pExpr->pCurrent = pList;
            pInfo->aOut[iLocalOff] = nLocal;
          }else{
            pInfo->aOut[iLocalOff] = fts3ColumnlistCount(&pExpr->pCurrent);
          }
        }
        pExpr->pCurrent++;
        if( pExpr->pCurrent<pEnd ){
          fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iDocid);
        }
      }
    }
    pInfo->nQuery++;
  }

}

/*
** Helper function used by the implementation of the overloaded snippet(),
** offsets() and optimize() SQL functions.
**
** If the value passed as the third argument is a blob of size
................................................................................
  }
  if( !zEllipsis || !zEnd || !zStart ){
    sqlite3_result_error_nomem(pContext);
  }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
    sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis);
  }
}










































/*
** Implementation of the offsets() function for FTS3
*/
static void fts3OffsetsFunc(
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
................................................................................
*/
static void fts3MatchinfoFunc(
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  int flags = 0;


  if( nVal==2 ){
    int i;
    const unsigned char *zFlags = sqlite3_value_text(apVal[1]);
    for(i=0; zFlags[i]; i++){
      switch( zFlags[i] ){
        case 'g': flags |= FTS3_MATCHINFO_GLOBALCOUNT; break;
        case 'p': flags |= FTS3_MATCHINFO_POSITIONLIST; break;
        case 'n': flags |= FTS3_MATCHINFO_PHRASELENGTH; break;
        default: {
          char zErr[18];
          memcpy(zErr, "Unknown flag: \"%c\"", 18);
          zErr[16] = (char)zFlags[i];
          sqlite3_result_error(pContext, zErr, -1);
          return;
        }
      }
    }
  }else if( nVal!=1 ){
    sqlite3_result_error(pContext,
        "wrong number of arguments to function matchinfo()", -1);
    return;
  }

  if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
    MatchInfo ctx;
    memset(&ctx, 0, sizeof(ctx));
    ctx.iDocid = pCsr->iPrevId;
    ctx.pTab = (Fts3Table *)pCsr->base.pVtab;
    ctx.flags = flags;

    fts3MatchInfoAppend(&ctx, 0);
    fts3MatchInfoAppend(&ctx, ctx.pTab->nColumn);

    /* Iterate through each of the 'simple' queries that make up the query
    ** expression. A 'simple' query is a phrase (including token and token 
    ** prefix) or NEAR query. 
    */
    fts3ExprMatchInfo(pContext, pCsr->pExpr, &ctx);
    if( ctx.rc ){
      sqlite3_free(ctx.aOut);
      sqlite3_result_error_code(pContext, ctx.rc);
    }else{
      int nByte = ctx.nOut*sizeof(unsigned int);
      ctx.aOut[0] = ctx.nQuery;
      sqlite3_result_blob(pContext, ctx.aOut, nByte, sqlite3_free);
    }
  }
}

/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.
*/
................................................................................
  void **ppArg                    /* Unused */
){
  struct Overloaded {
    const char *zName;
    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
  } aOverload[] = {
    { "snippet", fts3SnippetFunc },

    { "offsets", fts3OffsetsFunc },
    { "optimize", fts3OptimizeFunc },
    { "matchinfo", fts3MatchinfoFunc },
  };
  int i;                          /* Iterator variable */

  UNUSED_PARAMETER(pVtab);
................................................................................
  /* Create the virtual table wrapper around the hash-table and overload 
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))

   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
  ){
    return sqlite3_create_module_v2(
        db, "fts3", &fts3Module, (void *)pHash, hashDestroy
    );







>







 







>







 







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







 







|
|
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
>
>
|
<
<
<
<
<
<
<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
>
>
>

<
<
>
|
<
>
>

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
|
|
<
<
|
<
<
<
|
<
<
<
>
>
>
|
>
|
<
<
<
<
>
|
|
<
<
<
<
<
<
<
<
<
<
>
>
>
>
|
<
<
<
<
<
<
<
<
<
<
<
|
|
<
<
<
<
<
|
<
<
|
<
>
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<

>
|
<
<
<
<
<
<
<
<
<
>
|
<
<
<
<
|
|
<
<
<
|
<
<
<
<
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







<

<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|






<
<
<
<
<
<
<
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<







 







>







 







>







793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
...
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
....
1002
1003
1004
1005
1006
1007
1008



















1009
1010
1011
1012
1013
1014
1015
....
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024

















2025



2026
2027
2028







2029

























2030


2031
2032
2033
2034


2035
2036

2037
2038
2039



















2040





2041
2042


2043



2044



2045
2046
2047
2048
2049
2050




2051
2052
2053










2054
2055
2056
2057
2058











2059
2060





2061


2062

2063
2064

















2065


2066
2067
2068









2069
2070




2071
2072



2073




2074
2075
2076
2077
2078
2079
2080
2081
....
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
....
2247
2248
2249
2250
2251
2252
2253

2254


















2255
2256
2257
2258
2259
2260
2261













2262








2263
2264
2265
2266
2267
2268
2269
....
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
....
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
** on the xClose method of the virtual table interface.
*/
static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  Fts3Cursor *pCsr = (Fts3Cursor *)pCursor;
  sqlite3_finalize(pCsr->pStmt);
  sqlite3Fts3ExprFree(pCsr->pExpr);
  sqlite3_free(pCsr->aDoclist);
  sqlite3_free(pCsr->aMatchinfo);
  sqlite3_free(pCsr);
  return SQLITE_OK;
}

static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
  if( pCsr->isRequireSeek ){
    pCsr->isRequireSeek = 0;
................................................................................
    }
  }else if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){
    pCsr->isEof = 1;
  }else{
    sqlite3_reset(pCsr->pStmt);
    fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId);
    pCsr->isRequireSeek = 1;
    pCsr->isMatchinfoOk = 1;
  }
  return rc;
}


/*
** The buffer pointed to by argument zNode (size nNode bytes) contains the
................................................................................
    memcpy(p, *ppPoslist, n);
    p += n;
    *pp = p;
  }
  *ppPoslist = pEnd;
}




















/*
** Value used to signify the end of an offset-list. This is safe because
** it is not possible to have a document with 2^31 terms.
*/
#define OFFSET_LIST_END 0x7fffffff

/*
................................................................................
*/
static int fts3RollbackMethod(sqlite3_vtab *pVtab){
  sqlite3Fts3PendingTermsClear((Fts3Table *)pVtab);
  return SQLITE_OK;
}

/*
** Load the doclist associated with expression pExpr to pExpr->aDoclist.
** The loaded doclist contains positions as well as the document ids.
** This is used by the matchinfo(), snippet() and offsets() auxillary
** functions.

















*/



int sqlite3Fts3ExprLoadDoclist(Fts3Table *pTab, Fts3Expr *pExpr){
  return evalFts3Expr(pTab, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1);
}

































/*


** After ExprLoadDoclist() (see above) has been called, this function is
** used to iterate through the position lists that make up the doclist
** stored in pExpr->aDoclist.
*/


char *sqlite3Fts3FindPositions(
  Fts3Expr *pExpr,                /* Access this expressions doclist */

  sqlite3_int64 iDocid,           /* Docid associated with requested pos-list */
  int iCol                        /* Column of requested pos-list */
){



















  assert( pExpr->isLoaded );





  if( pExpr->aDoclist ){
    char *pEnd = &pExpr->aDoclist[pExpr->nDoclist];


    char *pCsr = pExpr->pCurrent;







    assert( pCsr );
    while( pCsr<pEnd ){
      if( pExpr->iCurrent<iDocid ){
        fts3PoslistCopy(0, &pCsr);
        fts3GetDeltaVarint(&pCsr, &pExpr->iCurrent);
        pExpr->pCurrent = pCsr;




      }else{
        if( pExpr->iCurrent==iDocid ){
          int iThis = 0;










          if( iCol<0 ){
            /* If iCol is negative, return a pointer to the start of the
            ** position-list (instead of a pointer to the start of a list
            ** of offsets associated with a specific column).
            */











            return pCsr;
          }





          while( iThis<iCol ){


            fts3ColumnlistCopy(0, &pCsr);

            if( *pCsr==0x00 ) return 0;
            pCsr++;

















            pCsr += sqlite3Fts3GetVarint32(pCsr, &iThis);


          }
          if( iCol==iThis ) return pCsr;
        }









        return 0;
      }




    }
  }








  return 0;
}

/*
** Helper function used by the implementation of the overloaded snippet(),
** offsets() and optimize() SQL functions.
**
** If the value passed as the third argument is a blob of size
................................................................................
  }
  if( !zEllipsis || !zEnd || !zStart ){
    sqlite3_result_error_nomem(pContext);
  }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
    sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis);
  }
}

/*
** Implementation of the snippet2() function for FTS3
*/
static void fts3Snippet2Func(
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of apVal[] array */
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  const char *zStart = "<b>";
  const char *zEnd = "</b>";
  const char *zEllipsis = "<b>...</b>";
  int iCol = -1;
  int nToken = 10;

  /* There must be at least one argument passed to this function (otherwise
  ** the non-overloaded version would have been called instead of this one).
  */
  assert( nVal>=1 );

  if( nVal>6 ){
    sqlite3_result_error(pContext, 
        "wrong number of arguments to function snippet()", -1);
    return;
  }
  if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return;

  switch( nVal ){
    case 6: nToken = sqlite3_value_int(apVal[5]);
    case 5: iCol = sqlite3_value_int(apVal[4]);
    case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]);
    case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]);
    case 2: zStart = (const char*)sqlite3_value_text(apVal[1]);
  }
  if( !zEllipsis || !zEnd || !zStart ){
    sqlite3_result_error_nomem(pContext);
  }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){
    sqlite3Fts3Snippet2(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken);
  }
}

/*
** Implementation of the offsets() function for FTS3
*/
static void fts3OffsetsFunc(
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
................................................................................
*/
static void fts3MatchinfoFunc(
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */




















  if( nVal!=1 ){
    sqlite3_result_error(pContext,
        "wrong number of arguments to function matchinfo()", -1);
    return;
  }

  if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){













    sqlite3Fts3Matchinfo(pContext, pCsr);








  }
}

/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.
*/
................................................................................
  void **ppArg                    /* Unused */
){
  struct Overloaded {
    const char *zName;
    void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
  } aOverload[] = {
    { "snippet", fts3SnippetFunc },
    { "snippet2", fts3Snippet2Func },
    { "offsets", fts3OffsetsFunc },
    { "optimize", fts3OptimizeFunc },
    { "matchinfo", fts3MatchinfoFunc },
  };
  int i;                          /* Iterator variable */

  UNUSED_PARAMETER(pVtab);
................................................................................
  /* Create the virtual table wrapper around the hash-table and overload 
  ** the two scalar functions. If this is successful, register the
  ** module with sqlite.
  */
  if( SQLITE_OK==rc 
   && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer"))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet2", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", -1))
   && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1))
  ){
    return sqlite3_create_module_v2(
        db, "fts3", &fts3Module, (void *)pHash, hashDestroy
    );

Changes to ext/fts3/fts3Int.h.

66
67
68
69
70
71
72


73
74
75
76
77
78
79
...
142
143
144
145
146
147
148


149
150
151
152
153
154
155
...
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
...
269
270
271
272
273
274
275



276
277
278
279
280
281
282
283
284
285
286
287




288
289
290
291
292
293
294
295
296
297
298
# define ALWAYS(x) (x)
# define NEVER(X)  (x)
/*
** Internal types used by SQLite.
*/
typedef unsigned char u8;         /* 1-byte (or larger) unsigned integer */
typedef short int i16;            /* 2-byte (or larger) signed integer */


/*
** Macro used to suppress compiler warnings for unused parameters.
*/
#define UNUSED_PARAMETER(x) (void)(x)
#endif

typedef struct Fts3Table Fts3Table;
................................................................................
  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
  Fts3Expr *pExpr;                /* Parsed MATCH query string */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */


};

/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched.  For example, in
................................................................................
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */

  int isLoaded;
  sqlite3_int64 iDocid;
  char *aDoclist;
  int nDoclist;
  char *pCurrent;
  unsigned int *aHist;
};

/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For 
** example, the following:
**
................................................................................
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);




/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, 
  const char *, sqlite3_tokenizer **, const char **, char **
);

/* fts3_snippet.c */
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*, 
  const char *, const char *, const char *
);





/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
  char **, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif

#endif /* _FTSINT_H */







>
>







 







>
>







 







|
|
|
|
|
|







 







>
>
>












>
>
>
>











66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
...
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
...
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
...
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# define ALWAYS(x) (x)
# define NEVER(X)  (x)
/*
** Internal types used by SQLite.
*/
typedef unsigned char u8;         /* 1-byte (or larger) unsigned integer */
typedef short int i16;            /* 2-byte (or larger) signed integer */
typedef unsigned int u32;         /* 4-byte unsigned integer */
typedef sqlite3_uint64 u64;       /* 8-byte unsigned integer */
/*
** Macro used to suppress compiler warnings for unused parameters.
*/
#define UNUSED_PARAMETER(x) (void)(x)
#endif

typedef struct Fts3Table Fts3Table;
................................................................................
  u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
  sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
  Fts3Expr *pExpr;                /* Parsed MATCH query string */
  sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
  char *pNextId;                  /* Pointer into the body of aDoclist */
  char *aDoclist;                 /* List of docids for full-text queries */
  int nDoclist;                   /* Size of buffer at aDoclist */
  int isMatchinfoOk;              /* True when aMatchinfo[] matches iPrevId */
  u32 *aMatchinfo;
};

/*
** The Fts3Cursor.eSearch member is always set to one of the following.
** Actualy, Fts3Cursor.eSearch can be greater than or equal to
** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
** of the column to be searched.  For example, in
................................................................................
  int eType;                 /* One of the FTSQUERY_XXX values defined below */
  int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
  Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
  Fts3Expr *pLeft;           /* Left operand */
  Fts3Expr *pRight;          /* Right operand */
  Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */

  int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
  char *aDoclist;            /* Buffer containing doclist */
  int nDoclist;              /* Size of aDoclist in bytes */

  sqlite3_int64 iCurrent;
  char *pCurrent;
};

/*
** Candidate values for Fts3Query.eType. Note that the order of the first
** four values is in order of precedence when parsing expressions. For 
** example, the following:
**
................................................................................
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);

char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *);

/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, 
  const char *, sqlite3_tokenizer **, const char **, char **
);

/* fts3_snippet.c */
void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*, 
  const char *, const char *, const char *
);
void sqlite3Fts3Snippet2(sqlite3_context *, Fts3Cursor *, const char *,
  const char *, const char *, int, int
);
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *);

/* fts3_expr.c */
int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
  char **, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
#endif

#endif /* _FTSINT_H */

Changes to ext/fts3/fts3_expr.c.

732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
  if( p ){
    sqlite3Fts3ExprFree(p->pLeft);
    sqlite3Fts3ExprFree(p->pRight);
    sqlite3_free(p->aDoclist);
    sqlite3_free(p->aHist);
    sqlite3_free(p);
  }
}

/****************************************************************************
*****************************************************************************
** Everything after this point is just test code.







<







732
733
734
735
736
737
738

739
740
741
742
743
744
745
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
*/
void sqlite3Fts3ExprFree(Fts3Expr *p){
  if( p ){
    sqlite3Fts3ExprFree(p->pLeft);
    sqlite3Fts3ExprFree(p->pRight);
    sqlite3_free(p->aDoclist);

    sqlite3_free(p);
  }
}

/****************************************************************************
*****************************************************************************
** Everything after this point is just test code.

Changes to ext/fts3/fts3_snippet.c.

726
727
728
729
730
731
732
733


































































































































































































































































































































































































































































































































































































































734
      sqlite3_result_error_nomem(pCtx);
    }
  }else{
    sqlite3_result_error_nomem(pCtx);
  }
  fts3SnippetFree(p);
}



































































































































































































































































































































































































































































































































































































































#endif








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
      sqlite3_result_error_nomem(pCtx);
    }
  }else{
    sqlite3_result_error_nomem(pCtx);
  }
  fts3SnippetFree(p);
}

/*************************************************************************
** Below this point is the alternative, experimental snippet() implementation.
*/

#define SNIPPET_BUFFER_CHUNK  64
#define SNIPPET_BUFFER_SIZE   SNIPPET_BUFFER_CHUNK*4
#define SNIPPET_BUFFER_MASK   (SNIPPET_BUFFER_SIZE-1)

static void fts3GetDeltaPosition(char **pp, int *piPos){
  int iVal;
  *pp += sqlite3Fts3GetVarint32(*pp, &iVal);
  *piPos += (iVal-2);
}

/*
** Iterate through all phrase nodes in an FTS3 query, except those that
** are part of a sub-tree that is the right-hand-side of a NOT operator.
** For each phrase node found, the supplied callback function is invoked.
**
** If the callback function returns anything other than SQLITE_OK, 
** the iteration is abandoned and the error code returned immediately.
** Otherwise, SQLITE_OK is returned after a callback has been made for
** all eligible phrase nodes.
*/
static int fts3ExprIterate(
  Fts3Expr *pExpr,                /* Expression to iterate phrases of */
  int (*x)(Fts3Expr *, void *),   /* Callback function to invoke for phrases */
  void *pCtx                      /* Second argument to pass to callback */
){
  int rc;
  int eType = pExpr->eType;
  if( eType==FTSQUERY_NOT ){
    rc = SQLITE_OK;
  }else if( eType!=FTSQUERY_PHRASE ){
    assert( pExpr->pLeft && pExpr->pRight );
    rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
    if( rc==SQLITE_OK ){
      rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
    }
  }else{
    rc = x(pExpr, pCtx);
  }
  return rc;
}

typedef struct LoadDoclistCtx LoadDoclistCtx;
struct LoadDoclistCtx {
  Fts3Table *pTab;                /* FTS3 Table */
  int nPhrase;                    /* Number of phrases so far */
};

static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, void *ctx){
  int rc = SQLITE_OK;
  LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
  p->nPhrase++;
  if( pExpr->isLoaded==0 ){
    rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
    pExpr->isLoaded = 1;
    if( rc==SQLITE_OK && pExpr->aDoclist ){
      pExpr->pCurrent = pExpr->aDoclist;
      pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent);
    }
  }
  return rc;
}

static int fts3ExprLoadDoclists(Fts3Cursor *pCsr, int *pnPhrase){
  int rc;
  LoadDoclistCtx sCtx = {0, 0};
  sCtx.pTab = (Fts3Table *)pCsr->base.pVtab;
  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
  *pnPhrase = sCtx.nPhrase;
  return rc;
}

/*
** Each call to this function populates a chunk of a snippet-buffer 
** SNIPPET_BUFFER_CHUNK bytes in size.
**
** Return true if the end of the data has been reached (and all subsequent
** calls to fts3LoadSnippetBuffer() with the same arguments will be no-ops), 
** or false otherwise.
*/
static int fts3LoadSnippetBuffer(
  int iPos,                       /* Document token offset to load data for */
  u8 *aBuffer,                    /* Circular snippet buffer to populate */
  int nList,                      /* Number of position lists in appList */
  char **apList,                  /* IN/OUT: nList position list pointers */
  int *aiPrev                     /* IN/OUT: Previous positions read */
){
  int i;
  int nFin = 0;

  assert( (iPos&(SNIPPET_BUFFER_CHUNK-1))==0 );

  memset(&aBuffer[iPos&SNIPPET_BUFFER_MASK], 0, SNIPPET_BUFFER_CHUNK);

  for(i=0; i<nList; i++){
    int iPrev = aiPrev[i];
    char *pList = apList[i];

    if( !pList ){
      nFin++;
      continue;
    }

    while( iPrev<(iPos+SNIPPET_BUFFER_CHUNK) ){
      if( iPrev>=iPos ){
        aBuffer[iPrev&SNIPPET_BUFFER_MASK] = i+1;
      }
      if( 0==((*pList)&0xFE) ){
        nFin++;
        break;
      }
      fts3GetDeltaPosition(&pList, &iPrev); 
    }

    aiPrev[i] = iPrev;
    apList[i] = pList;
  }

  return (nFin==nList);
}

typedef struct SnippetCtx SnippetCtx;
struct SnippetCtx {
  Fts3Cursor *pCsr;
  int iCol;
  int iPhrase;
  int *aiPrev;
  int *anToken;
  char **apList;
};

static int fts3SnippetFindPositions(Fts3Expr *pExpr, void *ctx){
  SnippetCtx *p = (SnippetCtx *)ctx;
  int iPhrase = p->iPhrase++;
  char *pCsr;

  p->anToken[iPhrase] = pExpr->pPhrase->nToken;
  pCsr = sqlite3Fts3FindPositions(pExpr, p->pCsr->iPrevId, p->iCol);

  if( pCsr ){
    int iVal;
    pCsr += sqlite3Fts3GetVarint32(pCsr, &iVal);
    p->apList[iPhrase] = pCsr;
    p->aiPrev[iPhrase] = iVal-2;
  }
  return SQLITE_OK;
}

static void fts3SnippetCnt(
  int iIdx, 
  int nSnippet, 
  int *anCnt, 
  u8 *aBuffer,
  int *anToken,
  u64 *pHlmask
){
  int iSub =  (iIdx-1)&SNIPPET_BUFFER_MASK;
  int iAdd =  (iIdx+nSnippet-1)&SNIPPET_BUFFER_MASK;
  int iSub2 = (iIdx+(nSnippet/3)-1)&SNIPPET_BUFFER_MASK;
  int iAdd2 = (iIdx+(nSnippet*2/3)-1)&SNIPPET_BUFFER_MASK;

  u64 h = *pHlmask;

  anCnt[ aBuffer[iSub]  ]--;
  anCnt[ aBuffer[iSub2] ]--;
  anCnt[ aBuffer[iAdd]  ]++;
  anCnt[ aBuffer[iAdd2] ]++;

  h = h >> 1;
  if( aBuffer[iAdd] ){
    int j;
    for(j=anToken[aBuffer[iAdd]-1]; j>=1; j--){
      h |= (u64)1 << (nSnippet-j);
    }
  }
  *pHlmask = h;
}

static int fts3SnippetScore(int n, int *anCnt){
  int j;
  int iScore = 0;
  for(j=1; j<=n; j++){
    int nCnt = anCnt[j];
    iScore += nCnt + (nCnt ? 1000 : 0);
  }
  return iScore;
}

static int fts3BestSnippet(
  int nSnippet,                   /* Desired snippet length */
  Fts3Cursor *pCsr,               /* Cursor to create snippet for */
  int iCol,                       /* Index of column to create snippet from */
  int *piPos,                     /* OUT: Starting token for best snippet */
  u64 *pHlmask                    /* OUT: Highlight mask for best snippet */
){
  int rc;                         /* Return Code */
  u8 aBuffer[SNIPPET_BUFFER_SIZE];/* Circular snippet buffer */
  int *aiPrev;                    /* Used by fts3LoadSnippetBuffer() */
  int *anToken;                   /* Number of tokens in each phrase */
  char **apList;                  /* Array of position lists */
  int *anCnt;                     /* Running totals of phrase occurences */
  int nList;

  int i;

  u64 hlmask = 0;                 /* Current mask of highlighted terms */
  u64 besthlmask = 0;             /* Mask of highlighted terms for iBestPos */
  int iBestPos = 0;               /* Starting position of 'best' snippet */
  int iBestScore = 0;             /* Score of best snippet higher->better */
  SnippetCtx sCtx;

  /* Iterate through the phrases in the expression to count them. The same
  ** callback makes sure the doclists are loaded for each phrase.
  */
  rc = fts3ExprLoadDoclists(pCsr, &nList);
  if( rc!=SQLITE_OK ){
    return rc;
  }

  /* Now that it is known how many phrases there are, allocate and zero
  ** the required arrays using malloc().
  */
  apList = sqlite3_malloc(
      sizeof(u8*)*nList +         /* apList */
      sizeof(int)*(nList) +       /* anToken */
      sizeof(int)*nList +         /* aiPrev */
      sizeof(int)*(nList+1)       /* anCnt */
  );
  if( !apList ){
    return SQLITE_NOMEM;
  }
  memset(apList, 0, sizeof(u8*)*nList+sizeof(int)*nList+sizeof(int)*nList);
  anToken = (int *)&apList[nList];
  aiPrev = &anToken[nList];
  anCnt = &aiPrev[nList];

  /* Initialize the contents of the aiPrev and aiList arrays. */
  sCtx.pCsr = pCsr;
  sCtx.iCol = iCol;
  sCtx.apList = apList;
  sCtx.aiPrev = aiPrev;
  sCtx.anToken = anToken;
  sCtx.iPhrase = 0;
  (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);

  /* Load the first two chunks of data into the buffer. */
  memset(aBuffer, 0, SNIPPET_BUFFER_SIZE);
  fts3LoadSnippetBuffer(0, aBuffer, nList, apList, aiPrev);
  fts3LoadSnippetBuffer(SNIPPET_BUFFER_CHUNK, aBuffer, nList, apList, aiPrev);

  /* Set the initial contents of the highlight-mask and anCnt[] array. */
  for(i=1-nSnippet; i<=0; i++){
    fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask);
  }
  iBestScore = fts3SnippetScore(nList, anCnt);
  besthlmask = hlmask;
  iBestPos = 0;

  for(i=1; 1; i++){
    int iScore;

    if( 0==(i&(SNIPPET_BUFFER_CHUNK-1)) ){
      int iLoad = i + SNIPPET_BUFFER_CHUNK;
      if( fts3LoadSnippetBuffer(iLoad, aBuffer, nList, apList, aiPrev) ) break;
    }

    /* Figure out how highly a snippet starting at token offset i scores
    ** according to fts3SnippetScore(). If it is higher than any previously
    ** considered position, save the current position, score and hlmask as 
    ** the best snippet candidate found so far.
    */
    fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask);
    iScore = fts3SnippetScore(nList, anCnt);
    if( iScore>iBestScore ){
      iBestPos = i;
      iBestScore = iScore;
      besthlmask = hlmask;
    }
  }

  sqlite3_free(apList);
  *piPos = iBestPos;
  *pHlmask = besthlmask;
  return SQLITE_OK;
}

typedef struct StrBuffer StrBuffer;
struct StrBuffer {
  char *z;
  int n;
  int nAlloc;
};

static int fts3StringAppend(
  StrBuffer *pStr, 
  const char *zAppend, 
  int nAppend
){
  if( nAppend<0 ){
    nAppend = strlen(zAppend);
  }

  if( pStr->n+nAppend+1>=pStr->nAlloc ){
    int nAlloc = pStr->nAlloc+nAppend+100;
    char *zNew = sqlite3_realloc(pStr->z, nAlloc);
    if( !zNew ){
      return SQLITE_NOMEM;
    }
    pStr->z = zNew;
    pStr->nAlloc = nAlloc;
  }

  memcpy(&pStr->z[pStr->n], zAppend, nAppend);
  pStr->n += nAppend;
  pStr->z[pStr->n] = '\0';

  return SQLITE_OK;
}

static int fts3SnippetText(
  Fts3Cursor *pCsr,               /* FTS3 Cursor */
  const char *zDoc,               /* Document to extract snippet from */
  int nDoc,                       /* Size of zDoc in bytes */
  int nSnippet,                   /* Number of tokens in extracted snippet */
  int iPos,                       /* Index of first document token in snippet */
  u64 hlmask,                     /* Bitmask of terms to highlight in snippet */
  const char *zOpen,              /* String inserted before highlighted term */
  const char *zClose,             /* String inserted after highlighted term */
  const char *zEllipsis,
  char **pzSnippet                /* OUT: Snippet text */
){
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  int rc;                         /* Return code */
  int iCurrent = 0;
  int iStart = 0;
  int iEnd;

  sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
  sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
  const char *ZDUMMY;             /* Dummy arguments used with tokenizer */
  int DUMMY1, DUMMY2, DUMMY3;     /* Dummy arguments used with tokenizer */

  StrBuffer res = {0, 0, 0};   /* Result string */

  /* Open a token cursor on the document. Read all tokens up to and 
  ** including token iPos (the first token of the snippet). Set variable
  ** iStart to the byte offset in zDoc of the start of token iPos.
  */
  pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
  rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
  while( rc==SQLITE_OK && iCurrent<iPos ){
    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iStart, &DUMMY2, &iCurrent);
  }
  iEnd = iStart;

  if( rc==SQLITE_OK && iStart>0 ){
    rc = fts3StringAppend(&res, zEllipsis, -1);
  }

  while( rc==SQLITE_OK ){
    int iBegin;
    int iFin;
    rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);

    if( rc==SQLITE_OK ){
      if( iCurrent>=(iPos+nSnippet) ){
        rc = SQLITE_DONE;
      }else{
        iEnd = iFin;
        if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
          if( fts3StringAppend(&res, &zDoc[iStart], iBegin-iStart)
           || fts3StringAppend(&res, zOpen, -1)
           || fts3StringAppend(&res, &zDoc[iBegin], iEnd-iBegin)
           || fts3StringAppend(&res, zClose, -1)
          ){
            rc = SQLITE_NOMEM;
          }
          iStart = iEnd;
        }
      }
    }
  }
  assert( rc!=SQLITE_OK );
  if( rc==SQLITE_DONE ){
    rc = fts3StringAppend(&res, &zDoc[iStart], iEnd-iStart);
    if( rc==SQLITE_OK ){
      rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
      if( rc==SQLITE_OK ){
        rc = fts3StringAppend(&res, zEllipsis, -1);
      }else if( rc==SQLITE_DONE ){
        rc = fts3StringAppend(&res, &zDoc[iEnd], -1);
      }
    }
  }

  pMod->xClose(pC);
  if( rc!=SQLITE_OK ){
    sqlite3_free(res.z);
  }else{
    *pzSnippet = res.z;
  }
  return rc;
}


/*
** An instance of this structure is used to collect the 'global' part of
** the matchinfo statistics. The 'global' part consists of the following:
**
**   1. The number of phrases in the query (nPhrase).
**
**   2. The number of columns in the FTS3 table (nCol).
**
**   3. A matrix of (nPhrase*nCol) integers containing the sum of the
**      number of hits for each phrase in each column across all rows
**      of the table.
**
** The total size of the global matchinfo array, assuming the number of
** columns is N and the number of phrases is P is:
**
**   2 + P*(N+1)
**
** The number of hits for the 3rd phrase in the second column is found
** using the expression:
**
**   aGlobal[2 + P*(1+2) + 1]
*/
typedef struct MatchInfo MatchInfo;
struct MatchInfo {
  Fts3Table *pTab;                /* FTS3 Table */
  Fts3Cursor *pCursor;            /* FTS3 Cursor */
  int iPhrase;                    /* Number of phrases so far */
  int nCol;                       /* Number of columns in table */
  u32 *aGlobal;                   /* Pre-allocated buffer */
};

/*
** This function is used to count the entries in a column-list (delta-encoded
** list of term offsets within a single column of a single row).
*/
static int fts3ColumnlistCount(char **ppCollist){
  char *pEnd = *ppCollist;
  char c = 0;
  int nEntry = 0;

  /* A column-list is terminated by either a 0x01 or 0x00. */
  while( 0xFE & (*pEnd | c) ){
    c = *pEnd++ & 0x80;
    if( !c ) nEntry++;
  }

  *ppCollist = pEnd;
  return nEntry;
}

static void fts3LoadColumnlistCounts(char **pp, u32 *aOut){
  char *pCsr = *pp;
  while( *pCsr ){
    sqlite3_int64 iCol = 0;
    if( *pCsr==0x01 ){
      pCsr++;
      pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
    }
    aOut[iCol] += fts3ColumnlistCount(&pCsr);
  }
  pCsr++;
  *pp = pCsr;
}

/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
** for a single query.
*/
static int fts3ExprGlobalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  char *pCsr;
  char *pEnd;
  const int iStart = 2 + p->nCol*p->iPhrase;

  assert( pExpr->isLoaded );

  /* Fill in the global hit count matrix row for this phrase. */
  pCsr = pExpr->aDoclist;
  pEnd = &pExpr->aDoclist[pExpr->nDoclist];
  while( pCsr<pEnd ){
    while( *pCsr++ & 0x80 );
    fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iStart]);
  }

  p->iPhrase++;
  return SQLITE_OK;
}

static int fts3ExprLocalMatchinfoCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  int iPhrase = p->iPhrase++;

  if( pExpr->aDoclist ){
    char *pCsr;
    int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase);

    memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32));
    pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
    if( pCsr ) fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iOffset]);
  }

  return SQLITE_OK;
}

/*
** Populate pCsr->aMatchinfo[] with data for the current row. The 'matchinfo'
** data is an array of 32-bit unsigned integers (C type u32).
*/
static int fts3GetMatchinfo(Fts3Cursor *pCsr){
  MatchInfo g;
  Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  if( pCsr->aMatchinfo==0 ){
    int rc;
    int nPhrase;
    int nMatchinfo;

    g.pTab = pTab;
    g.nCol = pTab->nColumn;
    g.iPhrase = 0;
    rc = fts3ExprLoadDoclists(pCsr, &nPhrase);
    if( rc!=SQLITE_OK ){
      return rc;
    }

    nMatchinfo = 2 + 2*g.nCol*nPhrase;

    g.iPhrase = 0;
    g.aGlobal = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
    if( !g.aGlobal ){ 
      return SQLITE_NOMEM;
    }
    memset(g.aGlobal, 0, sizeof(u32)*nMatchinfo);

    g.aGlobal[0] = nPhrase;
    g.aGlobal[1] = g.nCol;
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb, (void *)&g);

    pCsr->aMatchinfo = g.aGlobal;
  }

  g.pTab = pTab;
  g.pCursor = pCsr;
  g.nCol = pTab->nColumn;
  g.iPhrase = 0;
  g.aGlobal = pCsr->aMatchinfo;

  if( pCsr->isMatchinfoOk ){
    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void *)&g);
    pCsr->isMatchinfoOk = 0;
  }

  return SQLITE_OK;
}

void sqlite3Fts3Snippet2(
  sqlite3_context *pCtx,          /* SQLite function call context */
  Fts3Cursor *pCsr,               /* Cursor object */
  const char *zStart,             /* Snippet start text - "<b>" */
  const char *zEnd,               /* Snippet end text - "</b>" */
  const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
  int iCol,                       /* Extract snippet from this column */
  int nToken                      /* Approximate number of tokens in snippet */
){
  int rc;
  int iPos = 0;
  u64 hlmask = 0;
  char *z = 0;
  int nDoc;
  const char *zDoc;

  rc = fts3BestSnippet(nToken, pCsr, iCol, &iPos, &hlmask);

  nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
  zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);

  if( rc==SQLITE_OK ){
    rc = fts3SnippetText(
        pCsr, zDoc, nDoc, nToken, iPos, hlmask, zStart, zEnd, zEllipsis, &z);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    sqlite3_result_text(pCtx, z, -1, sqlite3_free);
  }
}

void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
  int rc = fts3GetMatchinfo(pCsr);
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pContext, rc);
  }else{
    int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*2);
    sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
  }
}

#endif

Changes to test/fts3query.test.

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
db func mit mit

do_test fts3query-3.3 {
  execsql { SELECT mit(matchinfo(foobar)) FROM foobar WHERE foobar MATCH 'the' }
} {{1 1 3}}
do_test fts3query-3.4 {
  execsql { 
    SELECT mit(matchinfo(foobar, 'g')) FROM foobar WHERE foobar MATCH 'the' 
  }
} {{1 1 3 3}}
do_test fts3query-3.5 {
  execsql { 
    SELECT mit(matchinfo(foobar, 'p')) FROM foobar WHERE foobar MATCH 'the' 
  }
} {{1 1 3 27 74 79}}
do_test fts3query-3.5 {
  execsql { 
    SELECT mit(matchinfo(foobar, 'pg')) FROM foobar WHERE foobar MATCH 'the' 
  }
} {{1 1 3 3 27 74 79}}

finish_test








<
<
<
<
<

<
<
<
<
<
<
<
<
<
<



95
96
97
98
99
100
101





102










103
104
105
  binary scan $blob $scan($::tcl_platform(byteOrder)) r
  return $r
}
db func mit mit

do_test fts3query-3.3 {
  execsql { SELECT mit(matchinfo(foobar)) FROM foobar WHERE foobar MATCH 'the' }





} {{1 1 3 3}}











finish_test

Changes to test/fts3rnd.test.

156
157
158
159
160
161
162




163
164
165
166
167


168
169
170
171
172
173
174
175
176
177
178
179
180
181
  }

  #lsort -uniq -integer $ret
  set ret
}

proc simple_token_matchinfo {zToken} {




  foreach key [lsort -integer [array names ::t1]] {
    set value $::t1($key)
    set cnt [list]
    foreach col $value {
      lappend cnt [llength [lsearch -all $col $zToken]]


    }
    if {[lindex [lsort $cnt] end]} {
      lappend ret $key [concat 1 3 $cnt]
    }
  }
  
  set ret
} 

proc simple_near {termlist nNear} {
  set ret [list]

  foreach {key value} [array get ::t1] {
    foreach v $value {







>
>
>
>



|
|
>
>


|



|







156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  }

  #lsort -uniq -integer $ret
  set ret
}

proc simple_token_matchinfo {zToken} {
  set total(0) 0
  set total(1) 0
  set total(2) 0

  foreach key [lsort -integer [array names ::t1]] {
    set value $::t1($key)
    set cnt [list]
    foreach i {0 1 2} col $value {
      set n [llength [lsearch -all $col $zToken]]
      lappend cnt $n
      incr total($i) $n
    }
    if {[lindex [lsort $cnt] end]} {
      lappend ret $key [concat 1 3 XXX $cnt]
    }
  }
  
  string map [list XXX "$total(0) $total(1) $total(2)"] $ret
} 

proc simple_near {termlist nNear} {
  set ret [list]

  foreach {key value} [array get ::t1] {
    foreach v $value {