/ Check-in [aef1e8f4]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the 'y' flag to the fts3/4 matchinfo() function.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: aef1e8f47123e2e865432a0abf194dea4f23447a
User & Date: dan 2015-05-04 12:29:50
Context
2015-05-04
13:25
Add the --lib option to the sqldiff.exe command-line utility. check-in: a117e8f6 user: drh tags: trunk
12:29
Add the 'y' flag to the fts3/4 matchinfo() function. check-in: aef1e8f4 user: dan tags: trunk
2015-05-02
19:54
Improvements to fuzzershell: Avoid excess memory allocations when loading many files. Show the total runtime on final output. Show individual filenames as they are processed even if they are single test-case files. check-in: 34a722a2 user: drh tags: trunk
09:44
Add the experimental matchinfo 'y' flag to fts3/4. Closed-Leaf check-in: 92941609 user: dan tags: fts3-matchinfo-y
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3_snippet.c.

23
24
25
26
27
28
29

30
31
32
33
34
35
36
...
804
805
806
807
808
809
810













































811
812
813
814
815
816
817
818
819
820
821
822
823

824
825
826
827
828
829
830
...
839
840
841
842
843
844
845




846
847
848
849
850
851
852
....
1093
1094
1095
1096
1097
1098
1099




1100
1101
1102
1103
1104
1105
1106
#define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
#define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
#define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
#define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
#define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
#define FTS3_MATCHINFO_LCS       's'        /* nCol values */
#define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */


/*
** The default value for the second argument to matchinfo(). 
*/
#define FTS3_MATCHINFO_DEFAULT   "pcx"


................................................................................
    }else{
      p->aMatchinfo[iStart+i*3] = 0;
    }
  }

  return rc;
}














































static int fts3MatchinfoCheck(
  Fts3Table *pTab, 
  char cArg,
  char **pzErr
){
  if( (cArg==FTS3_MATCHINFO_NPHRASE)
   || (cArg==FTS3_MATCHINFO_NCOL)
   || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
   || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
   || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
   || (cArg==FTS3_MATCHINFO_LCS)
   || (cArg==FTS3_MATCHINFO_HITS)

  ){
    return SQLITE_OK;
  }
  sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
  return SQLITE_ERROR;
}

................................................................................
      break;

    case FTS3_MATCHINFO_AVGLENGTH:
    case FTS3_MATCHINFO_LENGTH:
    case FTS3_MATCHINFO_LCS:
      nVal = pInfo->nCol;
      break;





    default:
      assert( cArg==FTS3_MATCHINFO_HITS );
      nVal = pInfo->nCol * pInfo->nPhrase * 3;
      break;
  }

................................................................................

      case FTS3_MATCHINFO_LCS:
        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
        if( rc==SQLITE_OK ){
          rc = fts3MatchinfoLcs(pCsr, pInfo);
        }
        break;





      default: {
        Fts3Expr *pExpr;
        assert( zArg[i]==FTS3_MATCHINFO_HITS );
        pExpr = pCsr->pExpr;
        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
        if( rc!=SQLITE_OK ) break;







>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













>







 







>
>
>
>







 







>
>
>
>







23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
...
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
...
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
....
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
#define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
#define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
#define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
#define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
#define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
#define FTS3_MATCHINFO_LCS       's'        /* nCol values */
#define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
#define FTS3_MATCHINFO_LHITS     'y'        /* nCol*nPhrase values */

/*
** The default value for the second argument to matchinfo(). 
*/
#define FTS3_MATCHINFO_DEFAULT   "pcx"


................................................................................
    }else{
      p->aMatchinfo[iStart+i*3] = 0;
    }
  }

  return rc;
}

/*
** fts3ExprIterate() callback used to gather information for the matchinfo
** directive 'y'.
*/
static int fts3ExprLHitsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
  Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
  int rc = SQLITE_OK;
  int iStart = iPhrase * p->nCol;
  Fts3Expr *pEof;                 /* Ancestor node already at EOF */
  
  /* This must be a phrase */
  assert( pExpr->pPhrase );

  /* Initialize all output integers to zero. */
  memset(&p->aMatchinfo[iStart], 0, sizeof(u32) * p->nCol);

  /* Check if this or any parent node is at EOF. If so, then all output
  ** values are zero.  */
  for(pEof=pExpr; pEof && pEof->bEof==0; pEof=pEof->pParent);

  if( pEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
    Fts3Phrase *pPhrase = pExpr->pPhrase;
    char *pIter = pPhrase->doclist.pList;
    int iCol = 0;

    while( 1 ){
      int nHit = fts3ColumnlistCount(&pIter);
      if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
        p->aMatchinfo[iStart + iCol] = (u32)nHit;
      }
      assert( *pIter==0x00 || *pIter==0x01 );
      if( *pIter!=0x01 ) break;
      pIter++;
      pIter += fts3GetVarint32(pIter, &iCol);
    }
  }

  return rc;
}

static int fts3MatchinfoCheck(
  Fts3Table *pTab, 
  char cArg,
  char **pzErr
){
  if( (cArg==FTS3_MATCHINFO_NPHRASE)
   || (cArg==FTS3_MATCHINFO_NCOL)
   || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
   || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
   || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
   || (cArg==FTS3_MATCHINFO_LCS)
   || (cArg==FTS3_MATCHINFO_HITS)
   || (cArg==FTS3_MATCHINFO_LHITS)
  ){
    return SQLITE_OK;
  }
  sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
  return SQLITE_ERROR;
}

................................................................................
      break;

    case FTS3_MATCHINFO_AVGLENGTH:
    case FTS3_MATCHINFO_LENGTH:
    case FTS3_MATCHINFO_LCS:
      nVal = pInfo->nCol;
      break;

    case FTS3_MATCHINFO_LHITS:
      nVal = pInfo->nCol * pInfo->nPhrase;
      break;

    default:
      assert( cArg==FTS3_MATCHINFO_HITS );
      nVal = pInfo->nCol * pInfo->nPhrase * 3;
      break;
  }

................................................................................

      case FTS3_MATCHINFO_LCS:
        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
        if( rc==SQLITE_OK ){
          rc = fts3MatchinfoLcs(pCsr, pInfo);
        }
        break;

      case FTS3_MATCHINFO_LHITS:
        (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLHitsCb, (void*)pInfo);
        break;

      default: {
        Fts3Expr *pExpr;
        assert( zArg[i]==FTS3_MATCHINFO_HITS );
        pExpr = pCsr->pExpr;
        rc = fts3ExprLoadDoclists(pCsr, 0, 0);
        if( rc!=SQLITE_OK ) break;

Changes to test/fts3matchinfo.test.

445
446
447
448
449
450
451


452





























































453
    JOIN (SELECT 1 AS idx UNION SELECT 2 UNION SELECT 3) AS x
   WHERE t10 MATCH x.idx
     AND matchinfo(t10) not null
   GROUP BY docId
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
































































finish_test







>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
    JOIN (SELECT 1 AS idx UNION SELECT 2 UNION SELECT 3) AS x
   WHERE t10 MATCH x.idx
     AND matchinfo(t10) not null
   GROUP BY docId
   ORDER BY 1;
} {1 1 one 2 2 two 3 3 three}
  
#---------------------------------------------------------------------------
# Test the 'y' matchinfo flag
#
set sqlite_fts3_enable_parentheses 1
reset_db
do_execsql_test 11.0 {
  CREATE VIRTUAL TABLE tt USING fts3(x, y);
  INSERT INTO tt VALUES('c d a c d d', 'e a g b d a');   -- 1
  INSERT INTO tt VALUES('c c g a e b', 'c g d g e c');   -- 2
  INSERT INTO tt VALUES('b e f d e g', 'b a c b c g');   -- 3
  INSERT INTO tt VALUES('a c f f g d', 'd b f d e g');   -- 4
  INSERT INTO tt VALUES('g a c f c f', 'd g g b c c');   -- 5
  INSERT INTO tt VALUES('g a c e b b', 'd b f b g g');   -- 6
  INSERT INTO tt VALUES('f d a a f c', 'e e a d c f');   -- 7
  INSERT INTO tt VALUES('a c b b g f', 'a b a e d f');   -- 8
  INSERT INTO tt VALUES('b a f e c c', 'f d b b a b');   -- 9
  INSERT INTO tt VALUES('f d c e a c', 'f a f a a f');   -- 10
}

db func mit mit
foreach {tn expr res} {
  1 "a" {
      1 {1 2}   2 {1 0}   3 {0 1}   4 {1 0}   5 {1 0}
      6 {1 0}   7 {2 1}   8 {1 2}   9 {1 1}  10 {1 3}
  }

  2 "b" {
      1 {0 1}   2 {1 0}   3 {1 2}   4 {0 1}   5 {0 1}
      6 {2 2}             8 {2 1}   9 {1 3}            
  }

  3 "y:a" {
      1 {0 2}             3 {0 1}                    
                7 {0 1}   8 {0 2}   9 {0 1}  10 {0 3}
  }

  4 "x:a" {
      1 {1 0}   2 {1 0}             4 {1 0}   5 {1 0}
      6 {1 0}   7 {2 0}   8 {1 0}   9 {1 0}  10 {1 0}
  }

  5 "a OR b" {
      1 {1 2 0 1}   2 {1 0 1 0}   3 {0 1 1 2}   4 {1 0 0 1}   5 {1 0 0 1}
      6 {1 0 2 2}   7 {2 1 0 0}   8 {1 2 2 1}   9 {1 1 1 3}  10 {1 3 0 0}
  }

  6 "a AND b" {
      1 {1 2 0 1}   2 {1 0 1 0}   3 {0 1 1 2}   4 {1 0 0 1}   5 {1 0 0 1}
      6 {1 0 2 2}                 8 {1 2 2 1}   9 {1 1 1 3}              
  }

  7 "a OR (a AND b)" {
      1 {1 2 1 2 0 1}   2 {1 0 1 0 1 0}   3 {0 1 0 1 1 2}   4 {1 0 1 0 0 1}   
      5 {1 0 1 0 0 1}   6 {1 0 1 0 2 2}   7 {2 1 0 0 0 0}   8 {1 2 1 2 2 1}   
      9 {1 1 1 1 1 3}  10 {1 3 0 0 0 0}
  }

} {
  do_execsql_test 11.1.$tn  {
    SELECT rowid, mit(matchinfo(tt, 'y')) FROM tt WHERE tt MATCH $expr
  } $res
}
set sqlite_fts3_enable_parentheses 0

finish_test