/ Check-in [b9b77972]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the fts3 matchinfo 'b' flag.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-matchinfo-y
Files: files | file ages | folders
SHA1: b9b77972d88171e4239b8194f308eb5d60b5d172
User & Date: dan 2015-05-05 20:39:53
Context
2015-05-06
08:43
Further optimizations for the 'y' and 'b' matchinfo operators. check-in: fbd038bb user: dan tags: fts3-matchinfo-y
2015-05-05
20:39
Add the fts3 matchinfo 'b' flag. check-in: b9b77972 user: dan tags: fts3-matchinfo-y
19:37
Optimizations for the matchinfo() function, particularly the 'y' flag. check-in: dddd7e18 user: dan tags: fts3-matchinfo-y
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_snippet.c.

    24     24   #define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
    25     25   #define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
    26     26   #define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
    27     27   #define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
    28     28   #define FTS3_MATCHINFO_LCS       's'        /* nCol values */
    29     29   #define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
    30     30   #define FTS3_MATCHINFO_LHITS     'y'        /* nCol*nPhrase values */
           31  +#define FTS3_MATCHINFO_LHITS_BM  'b'        /* nCol*nPhrase values */
    31     32   
    32     33   /*
    33     34   ** The default value for the second argument to matchinfo(). 
    34     35   */
    35     36   #define FTS3_MATCHINFO_DEFAULT   "pcx"
    36     37   
    37     38   
................................................................................
    85     86   */
    86     87   typedef struct MatchInfo MatchInfo;
    87     88   struct MatchInfo {
    88     89     Fts3Cursor *pCursor;            /* FTS3 Cursor */
    89     90     int nCol;                       /* Number of columns in table */
    90     91     int nPhrase;                    /* Number of matchable phrases in query */
    91     92     sqlite3_int64 nDoc;             /* Number of docs in database */
           93  +  char flag;
    92     94     u32 *aMatchinfo;                /* Pre-allocated buffer */
    93     95   };
    94     96   
    95     97   /*
    96     98   ** An instance of this structure is used to manage a pair of buffers, each
    97     99   ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
    98    100   ** for details.
................................................................................
   232    234   */
   233    235   static void fts3GetDeltaPosition(char **pp, int *piPos){
   234    236     int iVal;
   235    237     *pp += fts3GetVarint32(*pp, &iVal);
   236    238     *piPos += (iVal-2);
   237    239   }
   238    240   
   239         -static int fts3ExprLHitsCb(Fts3Expr*,int,void*);
   240         -
   241    241   /*
   242    242   ** Helper function for fts3ExprIterate() (see below).
   243    243   */
   244    244   static int fts3ExprIterate2(
   245    245     Fts3Expr *pExpr,                /* Expression to iterate phrases of */
          246  +  int bExcludeEof,
   246    247     int *piPhrase,                  /* Pointer to phrase counter */
   247    248     int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
   248    249     void *pCtx                      /* Second argument to pass to callback */
   249    250   ){
   250    251     int rc;                         /* Return code */
   251    252   
   252         -  if( x==fts3ExprLHitsCb && pExpr->bEof ){
          253  +  if( bExcludeEof && pExpr->bEof ){
   253    254       rc = SQLITE_OK;
   254    255     }else{
   255    256       int eType = pExpr->eType;     /* Type of expression node pExpr */
   256    257       if( eType!=FTSQUERY_PHRASE ){
   257    258         assert( pExpr->pLeft && pExpr->pRight );
   258         -      rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
          259  +      rc = fts3ExprIterate2(pExpr->pLeft, bExcludeEof, piPhrase, x, pCtx);
   259    260         if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
   260         -        rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
          261  +        rc = fts3ExprIterate2(pExpr->pRight, bExcludeEof, piPhrase, x, pCtx);
   261    262         }
   262    263       }else{
   263    264         rc = x(pExpr, *piPhrase, pCtx);
   264    265         (*piPhrase)++;
   265    266       }
   266    267     }
   267    268     return rc;
................................................................................
   275    276   ** If the callback function returns anything other than SQLITE_OK, 
   276    277   ** the iteration is abandoned and the error code returned immediately.
   277    278   ** Otherwise, SQLITE_OK is returned after a callback has been made for
   278    279   ** all eligible phrase nodes.
   279    280   */
   280    281   static int fts3ExprIterate(
   281    282     Fts3Expr *pExpr,                /* Expression to iterate phrases of */
          283  +  int bExcludeEof,                /* Include nodes already at EOF */
   282    284     int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
   283    285     void *pCtx                      /* Second argument to pass to callback */
   284    286   ){
   285    287     int iPhrase = 0;                /* Variable used as the phrase counter */
   286         -  return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
          288  +  return fts3ExprIterate2(pExpr, bExcludeEof, &iPhrase, x, pCtx);
   287    289   }
   288    290   
   289    291   /*
   290    292   ** This is an fts3ExprIterate() callback used while loading the doclists
   291    293   ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
   292    294   ** fts3ExprLoadDoclists().
   293    295   */
................................................................................
   318    320     Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
   319    321     int *pnPhrase,                  /* OUT: Number of phrases in query */
   320    322     int *pnToken                    /* OUT: Number of tokens in query */
   321    323   ){
   322    324     int rc;                         /* Return Code */
   323    325     LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
   324    326     sCtx.pCsr = pCsr;
   325         -  rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
          327  +  rc = fts3ExprIterate(pCsr->pExpr, 0, fts3ExprLoadDoclistsCb, (void *)&sCtx);
   326    328     if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
   327    329     if( pnToken ) *pnToken = sCtx.nToken;
   328    330     return rc;
   329    331   }
   330    332   
   331    333   static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
   332    334     (*(int *)ctx)++;
   333    335     UNUSED_PARAMETER(pExpr);
   334    336     UNUSED_PARAMETER(iPhrase);
   335    337     return SQLITE_OK;
   336    338   }
   337    339   static int fts3ExprPhraseCount(Fts3Expr *pExpr){
   338    340     int nPhrase = 0;
   339         -  (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
          341  +  (void)fts3ExprIterate(pExpr, 0, fts3ExprPhraseCountCb, (void *)&nPhrase);
   340    342     return nPhrase;
   341    343   }
   342    344   
   343    345   /*
   344    346   ** Advance the position list iterator specified by the first two 
   345    347   ** arguments so that it points to the first element with a value greater
   346    348   ** than or equal to parameter iNext.
................................................................................
   548    550     ** the set of phrases in the expression to populate the aPhrase[] array.
   549    551     */
   550    552     sIter.pCsr = pCsr;
   551    553     sIter.iCol = iCol;
   552    554     sIter.nSnippet = nSnippet;
   553    555     sIter.nPhrase = nList;
   554    556     sIter.iCurrent = -1;
   555         -  rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
          557  +  rc = fts3ExprIterate(pCsr->pExpr, 0, fts3SnippetFindPositions, (void*)&sIter);
   556    558     if( rc==SQLITE_OK ){
   557    559   
   558    560       /* Set the *pmSeen output variable. */
   559    561       for(i=0; i<nList; i++){
   560    562         if( sIter.aPhrase[i].pHead ){
   561    563           *pmSeen |= (u64)1 << i;
   562    564         }
................................................................................
   932    934     MatchInfo *p = (MatchInfo *)pCtx;
   933    935     
   934    936     /* This must be a phrase */
   935    937     assert( pExpr->pPhrase );
   936    938   
   937    939     if( pExpr->iDocid==p->pCursor->iPrevId ){
   938    940       Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
   939         -    int iStart = iPhrase * p->nCol;
          941  +    int iStart;
   940    942       Fts3Phrase *pPhrase = pExpr->pPhrase;
   941    943       char *pIter = pPhrase->doclist.pList;
   942    944       int iCol = 0;
          945  +
          946  +    assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
          947  +    if( p->flag==FTS3_MATCHINFO_LHITS ){
          948  +      iStart = iPhrase * p->nCol;
          949  +    }else{
          950  +      iStart = iPhrase * ((p->nCol + 31) / 32);
          951  +    }
   943    952   
   944    953       while( 1 ){
   945    954         int nHit = fts3ColumnlistCount(&pIter);
   946    955         if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
   947         -        p->aMatchinfo[iStart + iCol] = (u32)nHit;
          956  +        if( p->flag==FTS3_MATCHINFO_LHITS ){
          957  +          p->aMatchinfo[iStart + iCol] = (u32)nHit;
          958  +        }else if( nHit ){
          959  +          p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
          960  +        }
   948    961         }
   949    962         assert( *pIter==0x00 || *pIter==0x01 );
   950    963         if( *pIter!=0x01 ) break;
   951    964         pIter++;
   952    965         pIter += fts3GetVarint32(pIter, &iCol);
   953    966       }
   954    967     }
................................................................................
   965    978      || (cArg==FTS3_MATCHINFO_NCOL)
   966    979      || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
   967    980      || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
   968    981      || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
   969    982      || (cArg==FTS3_MATCHINFO_LCS)
   970    983      || (cArg==FTS3_MATCHINFO_HITS)
   971    984      || (cArg==FTS3_MATCHINFO_LHITS)
          985  +   || (cArg==FTS3_MATCHINFO_LHITS_BM)
   972    986     ){
   973    987       return SQLITE_OK;
   974    988     }
   975    989     sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
   976    990     return SQLITE_ERROR;
   977    991   }
   978    992   
................................................................................
   991   1005       case FTS3_MATCHINFO_LCS:
   992   1006         nVal = pInfo->nCol;
   993   1007         break;
   994   1008   
   995   1009       case FTS3_MATCHINFO_LHITS:
   996   1010         nVal = pInfo->nCol * pInfo->nPhrase;
   997   1011         break;
         1012  +
         1013  +    case FTS3_MATCHINFO_LHITS_BM:
         1014  +      nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
         1015  +      break;
   998   1016   
   999   1017       default:
  1000   1018         assert( cArg==FTS3_MATCHINFO_HITS );
  1001   1019         nVal = pInfo->nCol * pInfo->nPhrase * 3;
  1002   1020         break;
  1003   1021     }
  1004   1022   
................................................................................
  1102   1120   
  1103   1121     /* Allocate and populate the array of LcsIterator objects. The array
  1104   1122     ** contains one element for each matchable phrase in the query.
  1105   1123     **/
  1106   1124     aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
  1107   1125     if( !aIter ) return SQLITE_NOMEM;
  1108   1126     memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
  1109         -  (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
         1127  +  (void)fts3ExprIterate(pCsr->pExpr, 0, fts3MatchinfoLcsCb, (void*)aIter);
  1110   1128   
  1111   1129     for(i=0; i<pInfo->nPhrase; i++){
  1112   1130       LcsIterator *pIter = &aIter[i];
  1113   1131       nToken -= pIter->pExpr->pPhrase->nToken;
  1114   1132       pIter->iPosOffset = nToken;
  1115   1133     }
  1116   1134   
................................................................................
  1186   1204   ){
  1187   1205     int rc = SQLITE_OK;
  1188   1206     int i;
  1189   1207     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
  1190   1208     sqlite3_stmt *pSelect = 0;
  1191   1209   
  1192   1210     for(i=0; rc==SQLITE_OK && zArg[i]; i++){
  1193         -
         1211  +    pInfo->flag = zArg[i];
  1194   1212       switch( zArg[i] ){
  1195   1213         case FTS3_MATCHINFO_NPHRASE:
  1196   1214           if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
  1197   1215           break;
  1198   1216   
  1199   1217         case FTS3_MATCHINFO_NCOL:
  1200   1218           if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
................................................................................
  1246   1264         case FTS3_MATCHINFO_LCS:
  1247   1265           rc = fts3ExprLoadDoclists(pCsr, 0, 0);
  1248   1266           if( rc==SQLITE_OK ){
  1249   1267             rc = fts3MatchinfoLcs(pCsr, pInfo);
  1250   1268           }
  1251   1269           break;
  1252   1270   
         1271  +      case FTS3_MATCHINFO_LHITS_BM:
  1253   1272         case FTS3_MATCHINFO_LHITS: {
  1254         -        int nZero = fts3MatchinfoSize(pInfo, FTS3_MATCHINFO_LHITS)*sizeof(u32);
         1273  +        int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
  1255   1274           memset(pInfo->aMatchinfo, 0, nZero);
  1256         -        (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLHitsCb, (void*)pInfo);
         1275  +        (void)fts3ExprIterate(pCsr->pExpr, 1, fts3ExprLHitsCb, (void*)pInfo);
  1257   1276           break;
  1258   1277         }
  1259   1278   
  1260   1279         default: {
  1261   1280           Fts3Expr *pExpr;
  1262   1281           assert( zArg[i]==FTS3_MATCHINFO_HITS );
  1263   1282           pExpr = pCsr->pExpr;
................................................................................
  1264   1283           rc = fts3ExprLoadDoclists(pCsr, 0, 0);
  1265   1284           if( rc!=SQLITE_OK ) break;
  1266   1285           if( bGlobal ){
  1267   1286             if( pCsr->pDeferred ){
  1268   1287               rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
  1269   1288               if( rc!=SQLITE_OK ) break;
  1270   1289             }
  1271         -          rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
         1290  +          rc = fts3ExprIterate(pExpr, 0, fts3ExprGlobalHitsCb,(void*)pInfo);
  1272   1291             if( rc!=SQLITE_OK ) break;
  1273   1292           }
  1274         -        (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
         1293  +        (void)fts3ExprIterate(pExpr, 0, fts3ExprLocalHitsCb,(void*)pInfo);
  1275   1294           break;
  1276   1295         }
  1277   1296       }
  1278   1297   
  1279   1298       pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
  1280   1299     }
  1281   1300   
................................................................................
  1566   1585   
  1567   1586       /* Initialize the contents of sCtx.aTerm[] for column iCol. There is 
  1568   1587       ** no way that this operation can fail, so the return code from
  1569   1588       ** fts3ExprIterate() can be discarded.
  1570   1589       */
  1571   1590       sCtx.iCol = iCol;
  1572   1591       sCtx.iTerm = 0;
  1573         -    (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx);
         1592  +    (void)fts3ExprIterate(pCsr->pExpr, 0, fts3ExprTermOffsetInit, (void*)&sCtx);
  1574   1593   
  1575   1594       /* Retreive the text stored in column iCol. If an SQL NULL is stored 
  1576   1595       ** in column iCol, jump immediately to the next iteration of the loop.
  1577   1596       ** If an OOM occurs while retrieving the data (this can happen if SQLite
  1578   1597       ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM 
  1579   1598       ** to the caller. 
  1580   1599       */

Changes to test/fts3matchinfo.test.

   503    503     7 "a OR (a AND b)" {
   504    504         1 {1 2 1 2 0 1}   2 {1 0 1 0 1 0}   3 {0 1 0 1 1 2}   4 {1 0 1 0 0 1}   
   505    505         5 {1 0 1 0 0 1}   6 {1 0 1 0 2 2}   7 {2 1 0 0 0 0}   8 {1 2 1 2 2 1}   
   506    506         9 {1 1 1 1 1 3}  10 {1 3 0 0 0 0}
   507    507     }
   508    508   
   509    509   } {
   510         -  do_execsql_test 11.1.$tn  {
          510  +  do_execsql_test 11.1.$tn.1  {
   511    511       SELECT rowid, mit(matchinfo(tt, 'y')) FROM tt WHERE tt MATCH $expr
   512    512     } $res
          513  +
          514  +  set r2 [list]
          515  +  foreach {rowid L} $res {
          516  +    lappend r2 $rowid
          517  +    set M [list]
          518  +    foreach {a b} $L {
          519  +      lappend M [expr ($a ? 1 : 0) + ($b ? 2 : 0)]
          520  +    }
          521  +    lappend r2 $M
          522  +  }
          523  +
          524  +  do_execsql_test 11.1.$tn.2  {
          525  +    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
          526  +  } $r2
          527  +  breakpoint
          528  +
          529  +  do_execsql_test 11.1.$tn.2  {
          530  +    SELECT rowid, mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH $expr
          531  +  } $r2
   513    532   }
   514    533   set sqlite_fts3_enable_parentheses 0
   515    534   
          535  +#---------------------------------------------------------------------------
          536  +# Test the 'b' matchinfo flag
          537  +#
          538  +set sqlite_fts3_enable_parentheses 1
          539  +reset_db
          540  +db func mit mit
          541  +
          542  +do_test 12.0 {
          543  +  set cols [list]
          544  +  for {set i 0} {$i < 50} {incr i} { lappend cols "c$i" }
          545  +  execsql "CREATE VIRTUAL TABLE tt USING fts3([join $cols ,])"
          546  +} {}
          547  +
          548  +do_execsql_test 12.1 {
          549  +  INSERT INTO tt (rowid, c4, c45) VALUES(1, 'abc', 'abc');
          550  +  SELECT mit(matchinfo(tt, 'b')) FROM tt WHERE tt MATCH 'abc';
          551  +} [list [list [expr 1<<4] [expr 1<<(45-32)]]]
          552  +
          553  +set sqlite_fts3_enable_parentheses 0
   516    554   finish_test
          555  +

Changes to test/fts3query.test.

   169    169   } {
   170    170     1 "SELECT matchinfo(content) FROM t2 WHERE t2 MATCH 'history'" matchinfo
   171    171     2 "SELECT offsets(content) FROM t2 WHERE t2 MATCH 'history'"   offsets
   172    172     3 "SELECT snippet(content) FROM t2 WHERE t2 MATCH 'history'"   snippet
   173    173     4 "SELECT optimize(content) FROM t2 WHERE t2 MATCH 'history'"  optimize
   174    174   }
   175    175   do_catchsql_test 5.5.1 {
   176         -  SELECT matchinfo(t2, 'abc') FROM t2 WHERE t2 MATCH 'history'
   177         -} {1 {unrecognized matchinfo request: b}}
          176  +  SELECT matchinfo(t2, 'abcd') FROM t2 WHERE t2 MATCH 'history'
          177  +} {1 {unrecognized matchinfo request: d}}
   178    178   
   179    179   do_execsql_test 5.5 { DROP TABLE t2 }
   180    180   
   181    181   
   182    182   # Test the snippet() function with 1 to 6 arguments.
   183    183   # 
   184    184   do_execsql_test 6.1 {