/ Check-in [3b5ccd26]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixes to problems in FTS3 snippet() function found by th3 tests.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 3b5ccd2682176929f4da8a3f39a7e8f58b179f18
User & Date: dan 2010-01-07 10:54:29
Context
2010-01-07
11:27
Changes to test code so that testfixture compiles when OMIT_SHARED_CACHE and OMIT_UTF16 are defined. check-in: d6ee5ff6 user: dan tags: trunk
10:54
Fixes to problems in FTS3 snippet() function found by th3 tests. check-in: 3b5ccd26 user: dan tags: trunk
03:53
Another attempt at fixing the table generator in lemon. Again, this does not effect the SQLite grammar. check-in: e22c090f user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  2172   2172     sqlite3_value **apVal           /* Array of arguments */
  2173   2173   ){
  2174   2174     Fts3Cursor *pCsr;               /* Cursor handle passed through apVal[0] */
  2175   2175     const char *zStart = "<b>";
  2176   2176     const char *zEnd = "</b>";
  2177   2177     const char *zEllipsis = "<b>...</b>";
  2178   2178     int iCol = -1;
  2179         -  int nToken = 15;
         2179  +  int nToken = 15;                /* Default number of tokens in snippet */
  2180   2180   
  2181   2181     /* There must be at least one argument passed to this function (otherwise
  2182   2182     ** the non-overloaded version would have been called instead of this one).
  2183   2183     */
  2184   2184     assert( nVal>=1 );
  2185   2185   
  2186   2186     if( nVal>6 ){

Changes to ext/fts3/fts3_snippet.c.

    41     41   static int fts3ExprIterate(
    42     42     Fts3Expr *pExpr,                /* Expression to iterate phrases of */
    43     43     int (*x)(Fts3Expr *, void *),   /* Callback function to invoke for phrases */
    44     44     void *pCtx                      /* Second argument to pass to callback */
    45     45   ){
    46     46     int rc;
    47     47     int eType = pExpr->eType;
    48         -  if( eType==FTSQUERY_NOT ){
    49         -    rc = SQLITE_OK;
    50         -  }else if( eType!=FTSQUERY_PHRASE ){
           48  +  if( eType!=FTSQUERY_PHRASE ){
    51     49       assert( pExpr->pLeft && pExpr->pRight );
    52     50       rc = fts3ExprIterate(pExpr->pLeft, x, pCtx);
    53         -    if( rc==SQLITE_OK ){
           51  +    if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
    54     52         rc = fts3ExprIterate(pExpr->pRight, x, pCtx);
    55     53       }
    56     54     }else{
    57     55       rc = x(pExpr, pCtx);
    58     56     }
    59     57     return rc;
    60     58   }
................................................................................
   104    102     p->nPhrase++;
   105    103     p->nToken += pExpr->pPhrase->nToken;
   106    104   
   107    105     if( pExpr->isLoaded==0 ){
   108    106       rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr);
   109    107       pExpr->isLoaded = 1;
   110    108       if( rc==SQLITE_OK ){
   111         -      fts3ExprNearTrim(pExpr);
          109  +      rc = fts3ExprNearTrim(pExpr);
   112    110       }
   113    111     }
   114    112   
   115    113     return rc;
   116    114   }
   117    115   
   118    116   static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){
................................................................................
   455    453         }
   456    454         pC->pTokenizer = pTab->pTokenizer;
   457    455         while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
   458    456           const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
   459    457           rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
   460    458         }
   461    459         pMod->xClose(pC);
   462         -      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){
   463         -        return rc;
   464         -      }
   465         -      nShift = iCurrent-nSnippet;
          460  +      if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
          461  +
          462  +      nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
          463  +      assert( nShift<=nDesired );
   466    464         if( nShift>0 ){
   467    465           *piPos += nShift;
   468    466           *pHlmask = hlmask >> nShift;
   469    467         }
   470    468       }
   471    469     }
   472    470     return SQLITE_OK;
   473    471   }
   474    472   
   475    473   static int fts3SnippetText(
   476    474     Fts3Cursor *pCsr,               /* FTS3 Cursor */
   477    475     SnippetFragment *pFragment,     /* Snippet to extract */
          476  +  int iFragment,                  /* Fragment number */
          477  +  int isLast,                     /* True for final fragment in snippet */
   478    478     int nSnippet,                   /* Number of tokens in extracted snippet */
   479    479     const char *zOpen,              /* String inserted before highlighted term */
   480    480     const char *zClose,             /* String inserted after highlighted term */
   481    481     const char *zEllipsis,
   482    482     StrBuffer *pOut
   483    483   ){
   484    484     Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
   485    485     int rc;                         /* Return code */
   486    486     const char *zDoc;               /* Document text to extract snippet from */
   487    487     int nDoc;                       /* Size of zDoc in bytes */
   488    488     int iCurrent = 0;               /* Current token number of document */
   489         -  int iStart = 0;                 /* Byte offset of current token */
   490    489     int iEnd = 0;                   /* Byte offset of end of current token */
   491    490     int isShiftDone = 0;
   492    491     int iPos = pFragment->iPos;
   493    492     u64 hlmask = pFragment->hlmask;
   494    493   
   495    494     sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
   496    495     sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
   497    496     const char *ZDUMMY;             /* Dummy arguments used with tokenizer */
   498         -  int DUMMY1, DUMMY2, DUMMY3;     /* Dummy arguments used with tokenizer */
          497  +  int DUMMY1;                     /* Dummy arguments used with tokenizer */
   499    498     
   500    499     zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
   501    500     if( zDoc==0 ){
   502    501       if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
   503    502         return SQLITE_NOMEM;
   504    503       }
   505    504       return SQLITE_OK;
   506    505     }
   507    506     nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
   508    507   
   509         -  /* Open a token cursor on the document. Read all tokens up to and 
   510         -  ** including token iPos (the first token of the snippet). Set variable
   511         -  ** iStart to the byte offset in zDoc of the start of token iPos.
   512         -  */
          508  +  /* Open a token cursor on the document. */
   513    509     pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
   514    510     rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
   515    511     if( rc!=SQLITE_OK ){
   516    512       return rc;
   517    513     }
   518    514     pC->pTokenizer = pTab->pTokenizer;
   519    515   
   520    516     while( rc==SQLITE_OK ){
   521         -    int iBegin;
   522         -    int iFin;
          517  +    int iBegin;                   /* Offset in zDoc of start of token */
          518  +    int iFin;                     /* Offset in zDoc of end of token */
          519  +    int isHighlight;
          520  +
   523    521       rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
   524         -
   525         -    if( rc==SQLITE_OK ){
   526         -      if( iCurrent<iPos ) continue;
   527         -
   528         -      if( !isShiftDone ){
   529         -        int n = nDoc - iBegin;
   530         -        rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
   531         -        if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
   532         -      }
   533         -      if( iCurrent==iPos ){
   534         -        iStart = iEnd = iBegin;
          522  +    if( rc!=SQLITE_OK ){
          523  +      if( rc==SQLITE_DONE ){
          524  +        /* Special case - the last token of the snippet is also the last token
          525  +        ** of the column. Append any punctuation that occurred between the end
          526  +        ** of the previous token and the end of the document to the output. 
          527  +        ** Then break out of the loop. */
          528  +        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
   535    529         }
          530  +      break;
          531  +    }
          532  +    if( iCurrent<iPos ){ continue; }
   536    533   
   537         -      if( iCurrent>=(iPos+nSnippet) ){
   538         -        rc = SQLITE_DONE;
   539         -      }else{
   540         -        iEnd = iFin;
   541         -        if( hlmask & ((u64)1 << (iCurrent-iPos)) ){
   542         -          if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart)
   543         -           || fts3StringAppend(pOut, zOpen, -1)
   544         -           || fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin)
   545         -           || fts3StringAppend(pOut, zClose, -1)
   546         -          ){
   547         -            rc = SQLITE_NOMEM;
   548         -          }
   549         -          iStart = iEnd;
   550         -        }
          534  +    if( !isShiftDone ){
          535  +      int n = nDoc - iBegin;
          536  +      rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
          537  +      isShiftDone = 1;
          538  +
          539  +      /* Now that the shift has been done, check if the initial "..." are
          540  +      ** required. They are required if (a) this is not the first fragment,
          541  +      ** or (b) this fragment does not begin at position 0 of its column. 
          542  +      */
          543  +      if( rc==SQLITE_OK && (iPos>0 || iFragment>0) ){
          544  +        rc = fts3StringAppend(pOut, zEllipsis, -1);
   551    545         }
          546  +      if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
   552    547       }
   553         -  }
   554         -  assert( rc!=SQLITE_OK );
   555         -  if( rc==SQLITE_DONE ){
   556         -    rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart);
   557         -    if( rc==SQLITE_OK ){
   558         -      rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
   559         -      if( rc==SQLITE_DONE ){
   560         -        rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
   561         -      }else if( rc==SQLITE_OK && zEllipsis ){
          548  +
          549  +    if( iCurrent>=(iPos+nSnippet) ){
          550  +      if( isLast ){
   562    551           rc = fts3StringAppend(pOut, zEllipsis, -1);
   563    552         }
          553  +      break;
   564    554       }
          555  +
          556  +    /* Set isHighlight to true if this term should be highlighted. */
          557  +    isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
          558  +
          559  +    if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
          560  +    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
          561  +    if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
          562  +    if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
          563  +
          564  +    iEnd = iFin;
   565    565     }
   566    566   
   567    567     pMod->xClose(pC);
   568    568     return rc;
   569    569   }
   570    570   
   571    571   
................................................................................
   799    799       assert( (mCovered&mSeen)==mCovered );
   800    800       if( mSeen==mCovered ) break;
   801    801     }while( nSnippet<SizeofArray(aSnippet) );
   802    802   
   803    803     assert( nFToken>0 );
   804    804   
   805    805     for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
   806         -    SnippetFragment *p = &aSnippet[i];
   807         -    const char *zTail = ((i==nSnippet-1) ? zEllipsis : 0);
   808         -
   809         -    if( i>0 || p->iPos>0 ){
   810         -      fts3StringAppend(&res, zEllipsis, -1);
   811         -    }
   812         -    rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res);
          806  +    rc = fts3SnippetText(pCsr, &aSnippet[i], 
          807  +        i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
          808  +    );
   813    809     }
   814    810   
   815    811    snippet_out:
   816    812     if( rc!=SQLITE_OK ){
   817    813       sqlite3_result_error_code(pCtx, rc);
   818    814       sqlite3_free(res.z);
   819    815     }else{
................................................................................
   947    943             rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
   948    944           }
   949    945           if( rc==SQLITE_OK ){
   950    946             char aBuffer[64];
   951    947             sqlite3_snprintf(sizeof(aBuffer), aBuffer, 
   952    948                 "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
   953    949             );
   954         -          fts3StringAppend(&res, aBuffer, -1);
          950  +          rc = fts3StringAppend(&res, aBuffer, -1);
   955    951           }
   956    952         }
   957    953       }
   958    954       if( rc==SQLITE_DONE ){
   959    955         rc = SQLITE_ERROR;
   960    956       }
   961    957   

Changes to test/fts3snippet.test.

            1  +# 2010 January 07
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +#
     1     12   
     2     13   set testdir [file dirname $argv0]
     3     14   source $testdir/tester.tcl
     4     15   
     5         -# If SQLITE_ENABLE_FTS3 is defined, omit this file.
           16  +# If SQLITE_ENABLE_FTS3 is not defined, omit this file.
     6     17   ifcapable !fts3 { finish_test ; return }
           18  +
           19  +# Transform the list $L to its "normal" form. So that it can be compared to
           20  +# another list with the same set of elements using [string compare].
           21  +#
           22  +proc normalize {L} {
           23  +  set ret [list]
           24  +  foreach l $L {lappend ret $l}
           25  +  return $ret
           26  +}
     7     27   
     8     28   do_test fts3snippet-1.1 {
     9     29     execsql {
    10     30       CREATE VIRTUAL TABLE ft USING fts3;
    11     31       INSERT INTO ft VALUES('xxx xxx xxx xxx');
    12     32     }
    13     33   } {}
    14     34   
    15         -proc normalize {L} {
    16         -  set ret [list]
    17         -  foreach l $L {lappend ret $l}
    18         -  return $ret
    19         -}
    20         -
    21     35   do_test fts3snippet-1.2 {
    22     36     execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' }
    23     37   } {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}}
    24     38   
    25     39   do_test fts3snippet-1.3 {
    26     40     execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' }
    27     41   } [list [normalize {
................................................................................
    59     73       0 2  4 3 
    60     74       0 0  8 3 
    61     75       0 1  8 3 
    62     76       0 2  8 3 
    63     77       0 0 12 3
    64     78       0 2 12 3
    65     79   }]]
           80  +
           81  +do_test fts3snippet-2.1 {
           82  +  execsql {
           83  +    DROP TABLE IF EXISTS ft;
           84  +    CREATE VIRTUAL TABLE ft USING fts3;
           85  +    INSERT INTO ft VALUES('one two three four five six seven eight nine ten');
           86  +  }
           87  +} {}
           88  +foreach {tn expr res} {
           89  +   1 one       "[one] two three four five..."
           90  +   2 two       "one [two] three four five..."
           91  +   3 three     "one two [three] four five..."
           92  +   4 four      "...two three [four] five six..."
           93  +   5 five      "...three four [five] six seven..."
           94  +   6 six       "...four five [six] seven eight..."
           95  +   7 seven     "...five six [seven] eight nine..."
           96  +   8 eight     "...six seven [eight] nine ten"
           97  +   9 nine      "...six seven eight [nine] ten"
           98  +  10 ten       "...six seven eight nine [ten]"
           99  +} {
          100  +  do_test fts3snippet-2.2.$tn {
          101  +    execsql {
          102  +      SELECT snippet(ft, '[', ']', '...', 0, 5) FROM ft WHERE ft MATCH $expr
          103  +    }
          104  +  } [list $res]
          105  +}
    66    106   
    67    107   finish_test
    68    108