/ Check-in [ae40b34c]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Remove some unused code from fts3. Add tests to fts3matchinfo.test.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-experimental
Files: files | file ages | folders
SHA1: ae40b34cf7c24c9601bdfb5cbe5b20f05a376ea8
User & Date: dan 2010-11-24 11:51:56
Context
2010-11-24
15:02
Fix crashes that can occur when queries are run on an FTS4 table containing zero rows. check-in: ed61fd20 user: dan tags: fts3-experimental
11:51
Remove some unused code from fts3. Add tests to fts3matchinfo.test. check-in: ae40b34c user: dan tags: fts3-experimental
2010-11-23
19:16
Experimental changes to fts3 function matchinfo(). check-in: 9cf0f2b7 user: dan tags: fts3-experimental
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
....
3271
3272
3273
3274
3275
3276
3277


3278

3279
3280
3281
3282
3283
3284
3285
      }
    }
  }

  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok;        /* Token to find doclist for */
    int iTok;                     /* The token being queried this iteration */
    char *pList;                  /* Pointer to token doclist */
    int nList;                    /* Size of buffer at pList */

    /* Select a token to process. If this is an xFilter() call, then tokens 
    ** are processed in order from least to most costly. Otherwise, tokens 
    ** are processed in the order in which they occur in the phrase.
    */
    if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){
      assert( isReqPos );
................................................................................
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  assert( nVal==1 || nVal==2 );
  if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){


    const char *zArg = (nVal>1 ? sqlite3_value_text(apVal[1]) : 0);

    sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
  }
}

/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.







|
|







 







>
>
|
>







2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
....
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
      }
    }
  }

  for(ii=0; ii<pPhrase->nToken; ii++){
    Fts3PhraseToken *pTok;        /* Token to find doclist for */
    int iTok;                     /* The token being queried this iteration */
    char *pList = 0;              /* Pointer to token doclist */
    int nList = 0;                /* Size of buffer at pList */

    /* Select a token to process. If this is an xFilter() call, then tokens 
    ** are processed in order from least to most costly. Otherwise, tokens 
    ** are processed in the order in which they occur in the phrase.
    */
    if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){
      assert( isReqPos );
................................................................................
  sqlite3_context *pContext,      /* SQLite function call context */
  int nVal,                       /* Size of argument array */
  sqlite3_value **apVal           /* Array of arguments */
){
  Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  assert( nVal==1 || nVal==2 );
  if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){
    const char *zArg = 0;
    if( nVal>1 ){
      zArg = (const char *)sqlite3_value_text(apVal[1]);
    }
    sqlite3Fts3Matchinfo(pContext, pCsr, zArg);
  }
}

/*
** This routine implements the xFindFunction method for the FTS3
** virtual table.

Changes to ext/fts3/fts3Int.h.

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
void sqlite3Fts3SegReaderFree(Fts3Table *, Fts3SegReader *);
int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);
int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **);
int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor*, u32*);
int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor*, u32*);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*);

int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);

void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);







<
<







287
288
289
290
291
292
293


294
295
296
297
298
299
300
void sqlite3Fts3SegReaderFree(Fts3Table *, Fts3SegReader *);
int sqlite3Fts3SegReaderIterate(
  Fts3Table *, Fts3SegReader **, int, Fts3SegFilter *,
  int (*)(Fts3Table *, void *, char *, int, char *, int),  void *
);
int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
int sqlite3Fts3AllSegdirs(Fts3Table*, sqlite3_stmt **);


int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*);

int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);

void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);

Changes to ext/fts3/fts3_snippet.c.

815
816
817
818
819
820
821






822
823
824
825
826
827
828
...
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
....
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
**   }
**
** where X is the number of matches for phrase iPhrase is column iCol of all
** rows of the table. Y is the number of rows for which column iCol contains
** at least one instance of phrase iPhrase.






*/
static int fts3ExprGlobalHitsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
................................................................................
}

static int fts3MatchinfoCheck(
  Fts3Table *pTab, 
  char cArg,
  char **pzErr
){
  if( cArg==FTS3_MATCHINFO_NPHRASE
   || cArg==FTS3_MATCHINFO_NCOL
   || cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat
   || cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat
   || cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize
   || cArg==FTS3_MATCHINFO_LCS
   || cArg==FTS3_MATCHINFO_HITS
  ){
    return SQLITE_OK;
  }
  *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
  return SQLITE_ERROR;
}

................................................................................
      memset(pFragment, 0, sizeof(*pFragment));

      /* Loop through all columns of the table being considered for snippets.
      ** If the iCol argument to this function was negative, this means all
      ** columns of the FTS3 table. Otherwise, only column iCol is considered.
      */
      for(iRead=0; iRead<pTab->nColumn; iRead++){
        SnippetFragment sF;
        int iS;
        if( iCol>=0 && iRead!=iCol ) continue;

        /* Find the best snippet of nFToken tokens in column iRead. */
        rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
        if( rc!=SQLITE_OK ){
          goto snippet_out;







>
>
>
>
>
>







 







|
|
|
|
|
|
|







 







|







815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
...
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
....
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
**     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
**   }
**
** where X is the number of matches for phrase iPhrase is column iCol of all
** rows of the table. Y is the number of rows for which column iCol contains
** at least one instance of phrase iPhrase.
**
** If the phrase pExpr consists entirely of deferred tokens, then all X and
** Y values are set to nDoc, where nDoc is the number of documents in the 
** file system. This is done because the full-text index doclist is required
** to calculate these values properly, and the full-text index doclist is
** not available for deferred tokens.
*/
static int fts3ExprGlobalHitsCb(
  Fts3Expr *pExpr,                /* Phrase expression node */
  int iPhrase,                    /* Phrase number (numbered from zero) */
  void *pCtx                      /* Pointer to MatchInfo structure */
){
  MatchInfo *p = (MatchInfo *)pCtx;
................................................................................
}

static int fts3MatchinfoCheck(
  Fts3Table *pTab, 
  char cArg,
  char **pzErr
){
  if( (cArg==FTS3_MATCHINFO_NPHRASE)
   || (cArg==FTS3_MATCHINFO_NCOL)
   || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat)
   || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat)
   || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
   || (cArg==FTS3_MATCHINFO_LCS)
   || (cArg==FTS3_MATCHINFO_HITS)
  ){
    return SQLITE_OK;
  }
  *pzErr = sqlite3_mprintf("unrecognized matchinfo request: %c", cArg);
  return SQLITE_ERROR;
}

................................................................................
      memset(pFragment, 0, sizeof(*pFragment));

      /* Loop through all columns of the table being considered for snippets.
      ** If the iCol argument to this function was negative, this means all
      ** columns of the FTS3 table. Otherwise, only column iCol is considered.
      */
      for(iRead=0; iRead<pTab->nColumn; iRead++){
        SnippetFragment sF = {0, 0, 0, 0};
        int iS;
        if( iCol>=0 && iRead!=iCol ) continue;

        /* Find the best snippet of nFToken tokens in column iRead. */
        rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
        if( rc!=SQLITE_OK ){
          goto snippet_out;

Changes to ext/fts3/fts3_write.c.

2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
    sqlite3_int64 x;
    j += sqlite3Fts3GetVarint(&zBuf[j], &x);
    assert(j<=nBuf);
    a[i] = (u32)(x & 0xffffffff);
  }
}

/*
** Fill in the document size auxiliary information for the matchinfo
** structure.  The auxiliary information is:
**
**    N     Total number of documents in the full-text index
**    a0    Average length of column 0 over the whole index
**    n0    Length of column 0 on the matching row
**    ...
**    aM    Average length of column M over the whole index
**    nM    Length of column M on the matching row
**
** The fts3MatchinfoDocsizeLocal() routine fills in the nX values.
** The fts3MatchinfoDocsizeGlobal() routine fills in N and the aX values.
*/
int sqlite3Fts3MatchinfoDocsizeLocal(Fts3Cursor *pCur, u32 *a){
  const char *pBlob;       /* The BLOB holding %_docsize info */
  int nBlob;               /* Size of the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i, j;                /* Loop counters */
  sqlite3_int64 x;         /* Varint value */
  int rc;                  /* Result code from subfunctions */
  Fts3Table *p;            /* The FTS table */

  p = (Fts3Table*)pCur->base.pVtab;
  rc = fts3SqlStmt(p, SQL_SELECT_DOCSIZE, &pStmt, 0);
  if( rc ){
    return rc;
  }
  sqlite3_bind_int64(pStmt, 1, pCur->iPrevId);
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    nBlob = sqlite3_column_bytes(pStmt, 0);
    pBlob = (const char*)sqlite3_column_blob(pStmt, 0);
    for(i=j=0; i<p->nColumn && j<nBlob; i++){
      j = sqlite3Fts3GetVarint(&pBlob[j], &x);
      a[2+i*2] = (u32)(x & 0xffffffff);
    }
  }
  sqlite3_reset(pStmt);
  return SQLITE_OK; 
}
int sqlite3Fts3MatchinfoDocsizeGlobal(Fts3Cursor *pCur, u32 *a){
  const char *pBlob;       /* The BLOB holding %_stat info */
  int nBlob;               /* Size of the BLOB */
  sqlite3_stmt *pStmt;     /* Statement for reading and writing */
  int i, j;                /* Loop counters */
  sqlite3_int64 x;         /* Varint value */
  int nDoc;                /* Number of documents */
  int rc;                  /* Result code from subfunctions */
  Fts3Table *p;            /* The FTS table */

  p = (Fts3Table*)pCur->base.pVtab;
  rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
  if( rc ){
    return rc;
  }
  if( sqlite3_step(pStmt)==SQLITE_ROW ){
    nBlob = sqlite3_column_bytes(pStmt, 0);
    pBlob = (const char*)sqlite3_column_blob(pStmt, 0);
    j = sqlite3Fts3GetVarint(pBlob, &x);
    a[0] = nDoc = (u32)(x & 0xffffffff);
    for(i=0; i<p->nColumn && j<nBlob; i++){
      j = sqlite3Fts3GetVarint(&pBlob[j], &x);
      a[1+i*2] = ((u32)(x & 0xffffffff) + nDoc/2)/nDoc;
    }
  }
  sqlite3_reset(pStmt);
  return SQLITE_OK; 
}

/*
** Insert the sizes (in tokens) for each column of the document
** with docid equal to p->iPrevDocid.  The sizes are encoded as
** a blob of varints.
*/
static void fts3InsertDocsize(
  int *pRC,         /* Result code */







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







2484
2485
2486
2487
2488
2489
2490





































































2491
2492
2493
2494
2495
2496
2497
    sqlite3_int64 x;
    j += sqlite3Fts3GetVarint(&zBuf[j], &x);
    assert(j<=nBuf);
    a[i] = (u32)(x & 0xffffffff);
  }
}






































































/*
** Insert the sizes (in tokens) for each column of the document
** with docid equal to p->iPrevDocid.  The sizes are encoded as
** a blob of varints.
*/
static void fts3InsertDocsize(
  int *pRC,         /* Result code */

Changes to test/fts3matchinfo.test.

61
62
63
64
65
66
67
68









































































































































69
# Check that with fts3, the "=" character is permitted in column definitions.
#
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
  INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
  SELECT mtchinfo FROM t3;
} {{Beside the lake, beneath the trees}}










































































































































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Check that with fts3, the "=" character is permitted in column definitions.
#
do_execsql_test 3.1 {
  CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3);
  INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees');
  SELECT mtchinfo FROM t3;
} {{Beside the lake, beneath the trees}}


#--------------------------------------------------------------------------
# Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function.
#
# The first argument - $tn - is a test identifier. This may be either a
# full identifier (i.e. "fts3matchinfo-1.1") or, if global var $testprefix
# is set, just the numeric component (i.e. "1.1").
#
# The second argument is the name of an FTSX table. The third is the 
# full text of a WHERE/MATCH expression to query the table for 
# (i.e. "t1 MATCH 'abc'"). The final argument - $results - should be a
# key-value list (serialized array) with matchinfo() format specifiers
# as keys, and the results of executing the statement:
#
#   SELECT matchinfo($tbl, '$key') FROM $tbl WHERE $expr
#
# For example:
#
#   CREATE VIRTUAL TABLE t1 USING fts4;
#   INSERT INTO t1 VALUES('abc');
#   INSERT INTO t1 VALUES('def');
#   INSERT INTO t1 VALUES('abc abc');
#
#   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
#     n {3 3}
#     p {1 1}
#     c {1 1}
#     x {{1 3 2} {2 3 2}}
#   }
#
# If the $results list contains keys mapped to "-" instead of a matchinfo()
# result, then this command computes the expected results based on other
# mappings to test the matchinfo() function. For example, the command above
# could be changed to:
#
#   do_matchinfo_test 1.1 t1 "t1 MATCH 'abc'" {
#     n {3 3} p {1 1} c {1 1} x {{1 3 2} {2 3 2}}
#     pcx -
#   }
#
# And this command would compute the expected results for matchinfo(t1, 'pcx')
# based on the results of matchinfo(t1, 'p'), matchinfo(t1, 'c') and 
# matchinfo(t1, 'x') in order to test 'pcx'.
#
proc do_matchinfo_test {tn tbl expr results} {

  foreach {fmt res} $results {
    if {$res == "-"} continue
    set resarray($fmt) $res
  }

  set nRow 0
  foreach {fmt res} [array get resarray] {
    if {[llength $res]>$nRow} { set nRow [llength $res] }
  }

  # Construct expected results for any formats for which the caller 
  # supplied result is "-".
  #
  foreach {fmt res} $results {
    if {$res == "-"} {
      set res [list]
      for {set iRow 0} {$iRow<$nRow} {incr iRow} {
        set rowres [list]
        foreach c [split $fmt ""] {
          set rowres [concat $rowres [lindex $resarray($c) $iRow]]
        }
        lappend res $rowres
      }
      set resarray($fmt) $res
    }
  }

  # Test each matchinfo() request individually.
  #
  foreach {fmt res} [array get resarray] {
    set sql "SELECT mit(matchinfo($tbl, '$fmt')) FROM $tbl WHERE $expr"
    do_execsql_test $tn.$fmt $sql [normalize2 $res]
  }

  # Test them all executed together (multiple invocations of matchinfo()).
  #
  set exprlist [list]
  foreach {format res} [array get resarray] {
    lappend exprlist "mit(matchinfo($tbl, '$format'))"
  }
  set allres [list]
  for {set iRow 0} {$iRow<$nRow} {incr iRow} {
    foreach {format res} [array get resarray] {
      lappend allres [lindex $res $iRow]
    }
  }
  set sql "SELECT [join $exprlist ,] FROM $tbl WHERE $expr"
  do_execsql_test $tn.multi $sql [normalize2 $allres]
}
proc normalize2 {list_of_lists} {
  set res [list]
  foreach elem $list_of_lists {
    lappend res [list {*}$elem]
  }
  return $res
}


do_execsql_test 4.1.0 {
  CREATE VIRTUAL TABLE t4 USING fts4(x, y);
  INSERT INTO t4 VALUES('a b c d e', 'f g h i j');
  INSERT INTO t4 VALUES('f g h i j', 'a b c d e');
}
do_matchinfo_test 4.1.1 t4 {t4 MATCH 'a b c'} {
  p {3 3}
  c {2 2}
  x {
    {1 1 1   0 1 1   1 1 1   0 1 1   1 1 1   0 1 1}
    {0 1 1   1 1 1   0 1 1   1 1 1   0 1 1   1 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}

  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
}
do_matchinfo_test 4.1.2 t4 {t4 MATCH '"g h i"'} {
  p {1 1}
  c {2 2}
  x {
    {0 1 1   1 1 1}
    {1 1 1   0 1 1}
  }
  n {2 2}
  l {{5 5} {5 5}}
  a {{5 5} {5 5}}

  xxxxxxxxxxxxxxxxxx - pcx - xpc - ccc - pppxpcpcx - laxnpc -
}


finish_test