/ Artifact Content
Login

Artifact 31a42dc3dcf1eb6127a0a1acf42ab4aa1d5e5c81:


/*
** 2015 Aug 04
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains test code only, it is not included in release 
** versions of FTS5. It contains the implementation of an FTS5 auxiliary
** function very similar to the FTS4 function matchinfo():
**
**     https://www.sqlite.org/fts3.html#matchinfo
**
** Known differences are that:
**
**  1) this function uses the FTS5 definition of "matchable phrase", which
**     excludes any phrases that are part of an expression sub-tree that
**     does not match the current row. This comes up for MATCH queries 
**     such as:
**
**         "a OR (b AND c)"
**
**     In FTS4, if a single row contains instances of tokens "a" and "c", 
**     but not "b", all instances of "c" are considered matches. In FTS5,
**     they are not (as the "b AND c" sub-tree does not match the current
**     row.
**
**  2) For the values returned by 'x' that apply to all rows of the table, 
**     NEAR constraints are not considered. But for the number of hits in
**     the current row, they are.
**     
** This file exports a single function that may be called to register the
** matchinfo() implementation with a database handle:
**
**   int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
*/


#ifdef SQLITE_ENABLE_FTS5

#include "fts5.h"
#include <assert.h>
#include <string.h>

typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
typedef unsigned int u32;

struct Fts5MatchinfoCtx {
  int nCol;                       /* Number of cols in FTS5 table */
  int nPhrase;                    /* Number of phrases in FTS5 query */
  char *zArg;                     /* nul-term'd copy of 2nd arg */
  int nRet;                       /* Number of elements in aRet[] */
  u32 *aRet;                      /* Array of 32-bit unsigned ints to return */
};



/*
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database 
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
static fts5_api *fts5_api_from_db(sqlite3 *db){
  fts5_api *pRet = 0;
  sqlite3_stmt *pStmt = 0;

  if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
   && SQLITE_ROW==sqlite3_step(pStmt) 
   && sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
  ){
    memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
  }
  sqlite3_finalize(pStmt);
  return pRet;
}


/*
** Argument f should be a flag accepted by matchinfo() (a valid character
** in the string passed as the second argument). If it is not, -1 is 
** returned. Otherwise, if f is a valid matchinfo flag, the value returned
** is the number of 32-bit integers added to the output array if the
** table has nCol columns and the query nPhrase phrases.
*/
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
  int ret = -1;
  switch( f ){
    case 'p': ret = 1; break;
    case 'c': ret = 1; break;
    case 'x': ret = 3 * nCol * nPhrase; break;
    case 'y': ret = nCol * nPhrase; break;
    case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
    case 'n': ret = 1; break;
    case 'a': ret = nCol; break;
    case 'l': ret = nCol; break;
    case 's': ret = nCol; break;
  }
  return ret;
}

static int fts5MatchinfoIter(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  Fts5MatchinfoCtx *p,
  int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
){
  int i;
  int n = 0;
  int rc = SQLITE_OK;
  char f;
  for(i=0; (f = p->zArg[i]); i++){
    rc = x(pApi, pFts, p, f, &p->aRet[n]);
    if( rc!=SQLITE_OK ) break;
    n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
  }
  return rc;
}

static int fts5MatchinfoXCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  void *pUserData
){
  Fts5PhraseIter iter;
  int iCol, iOff;
  u32 *aOut = (u32*)pUserData;
  int iPrev = -1;

  for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff); 
      iCol>=0; 
      pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
  ){
    aOut[iCol*3+1]++;
    if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
    iPrev = iCol;
  }

  return SQLITE_OK;
}

static int fts5MatchinfoGlobalCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  Fts5MatchinfoCtx *p,
  char f,
  u32 *aOut
){
  int rc = SQLITE_OK;
  switch( f ){
    case 'p':
      aOut[0] = p->nPhrase; 
      break;

    case 'c':
      aOut[0] = p->nCol; 
      break;

    case 'x': {
      int i;
      for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
        void *pPtr = (void*)&aOut[i * p->nCol * 3];
        rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
      }
      break;
    }

    case 'n': {
      sqlite3_int64 nRow;
      rc = pApi->xRowCount(pFts, &nRow);
      aOut[0] = (u32)nRow;
      break;
    }

    case 'a': {
      sqlite3_int64 nRow = 0;
      rc = pApi->xRowCount(pFts, &nRow);
      if( nRow==0 ){
        memset(aOut, 0, sizeof(u32) * p->nCol);
      }else{
        int i;
        for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
          sqlite3_int64 nToken;
          rc = pApi->xColumnTotalSize(pFts, i, &nToken);
          if( rc==SQLITE_OK){
            aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
          }
        }
      }
      break;
    }

  }
  return rc;
}

static int fts5MatchinfoLocalCb(
  const Fts5ExtensionApi *pApi,
  Fts5Context *pFts,
  Fts5MatchinfoCtx *p,
  char f,
  u32 *aOut
){
  int i;
  int rc = SQLITE_OK;

  switch( f ){
    case 'b': {
      int iPhrase;
      int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
      for(i=0; i<nInt; i++) aOut[i] = 0;

      for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
        Fts5PhraseIter iter;
        int iCol;
        for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
            iCol>=0; 
            pApi->xPhraseNextColumn(pFts, &iter, &iCol)
        ){
          aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
        }
      }

      break;
    }

    case 'x':
    case 'y': {
      int nMul = (f=='x' ? 3 : 1);
      int iPhrase;

      for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;

      for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
        Fts5PhraseIter iter;
        int iOff, iCol;
        for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); 
            iOff>=0; 
            pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
        ){
          if( f=='b' ){
            aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
          }else{
            aOut[nMul * (iCol + iPhrase * p->nCol)]++;
          }
        }
      }

      break;
    }

    case 'l': {
      for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
        int nToken;
        rc = pApi->xColumnSize(pFts, i, &nToken);
        aOut[i] = (u32)nToken;
      }
      break;
    }

    case 's': {
      int nInst;

      memset(aOut, 0, sizeof(u32) * p->nCol);

      rc = pApi->xInstCount(pFts, &nInst);
      for(i=0; rc==SQLITE_OK && i<nInst; i++){
        int iPhrase, iOff, iCol = 0;
        int iNextPhrase;
        int iNextOff;
        u32 nSeq = 1;
        int j;

        rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
        iNextPhrase = iPhrase+1;
        iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
        for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
          int ip, ic, io;
          rc = pApi->xInst(pFts, j, &ip, &ic, &io);
          if( ic!=iCol || io>iNextOff ) break;
          if( ip==iNextPhrase && io==iNextOff ){
            nSeq++;
            iNextPhrase = ip+1;
            iNextOff = io + pApi->xPhraseSize(pFts, ip);
          }
        }

        if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
      }

      break;
    }
  }
  return rc;
}
 
static Fts5MatchinfoCtx *fts5MatchinfoNew(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning error message */
  const char *zArg                /* Matchinfo flag string */
){
  Fts5MatchinfoCtx *p;
  int nCol;
  int nPhrase;
  int i;
  int nInt;
  int nByte;
  int rc;

  nCol = pApi->xColumnCount(pFts);
  nPhrase = pApi->xPhraseCount(pFts);

  nInt = 0;
  for(i=0; zArg[i]; i++){
    int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
    if( n<0 ){
      char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
      sqlite3_result_error(pCtx, zErr, -1);
      sqlite3_free(zErr);
      return 0;
    }
    nInt += n;
  }

  nByte = sizeof(Fts5MatchinfoCtx)          /* The struct itself */
         + sizeof(u32) * nInt               /* The p->aRet[] array */
         + (i+1);                           /* The p->zArg string */
  p = (Fts5MatchinfoCtx*)sqlite3_malloc(nByte);
  if( p==0 ){
    sqlite3_result_error_nomem(pCtx);
    return 0;
  }
  memset(p, 0, nByte);

  p->nCol = nCol;
  p->nPhrase = nPhrase;
  p->aRet = (u32*)&p[1];
  p->nRet = nInt;
  p->zArg = (char*)&p->aRet[nInt];
  memcpy(p->zArg, zArg, i);

  rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
    sqlite3_free(p);
    p = 0;
  }

  return p;
}

static void fts5MatchinfoFunc(
  const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
  Fts5Context *pFts,              /* First arg to pass to pApi functions */
  sqlite3_context *pCtx,          /* Context for returning result/error */
  int nVal,                       /* Number of values in apVal[] array */
  sqlite3_value **apVal           /* Array of trailing arguments */
){
  const char *zArg;
  Fts5MatchinfoCtx *p;
  int rc = SQLITE_OK;

  if( nVal>0 ){
    zArg = (const char*)sqlite3_value_text(apVal[0]);
  }else{
    zArg = "pcx";
  }

  p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
    p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
    if( p==0 ){
      rc = SQLITE_NOMEM;
    }else{
      rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
    }
  }

  if( rc==SQLITE_OK ){
    rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  }
  if( rc!=SQLITE_OK ){
    sqlite3_result_error_code(pCtx, rc);
  }else{
    /* No errors has occured, so return a copy of the array of integers. */
    int nByte = p->nRet * sizeof(u32);
    sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  }
}

int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
  int rc;                         /* Return code */
  fts5_api *pApi;                 /* FTS5 API functions */

  /* Extract the FTS5 API pointer from the database handle. The 
  ** fts5_api_from_db() function above is copied verbatim from the 
  ** FTS5 documentation. Refer there for details. */
  pApi = fts5_api_from_db(db);

  /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
  ** with this database handle, or an error (OOM perhaps?) has occurred.
  **
  ** Also check that the fts5_api object is version 2 or newer.  
  */ 
  if( pApi==0 || pApi->iVersion<1 ){
    return SQLITE_ERROR;
  }

  /* Register the implementation of matchinfo() */
  rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);

  return rc;
}

#endif /* SQLITE_ENABLE_FTS5 */