/ Check-in [d8180af2]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-snippet-bias
Files: files | file ages | folders
SHA1: d8180af2adfc101dfce95a0498b5cd7b30643b30
User & Date: dan 2016-08-19 14:25:38
Context
2016-08-19
18:37
Register any built-in fts5 module before loading automatic extensions. This allows automatic extensions to register fts5 tokenizers and auxiliary functions. check-in: b10e31dc user: dan tags: fts5-snippet-bias
14:25
Have the fts5 snippet() function avoid favouring snippets that begin with sentences that do not contain search terms. Add an extra bias in favour of the first sentence in the document. check-in: d8180af2 user: dan tags: fts5-snippet-bias
2016-08-18
14:47
Adjust some tests to account for recent changes to the fts5 snippet function. check-in: 184ecbe9 user: dan tags: fts5-snippet-bias
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_aux.c.

418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

445


446
447
448
449
450
451
452
453
454
455
456
457






















458
459
460
461
462
463
464
      rc = pApi->xTokenize(pFts, 
          sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
      );
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xColumnSize(pFts, i, &nDocsize);
      if( rc!=SQLITE_OK ) break;

      for(ii=0; rc==SQLITE_OK && ii<sFinder.nFirst; ii++){
        int nScore;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
            sFinder.aFirst[ii], nToken, &nScore, 0
        );

        /* Bonus of 100 points for starting at the start of a sentence */
        nScore += 100;            

        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = sFinder.aFirst[ii];
          nColSize = nDocsize;
        }
      }

      for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
        int ip, ic, io;

        int nScore;


        rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
        if( ic!=i || rc!=SQLITE_OK ) continue;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
            io, nToken, &nScore, &io
        );
        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = io;
          nColSize = nDocsize;
        }






















      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<


>

>
>




|




|


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







418
419
420
421
422
423
424


















425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
      rc = pApi->xTokenize(pFts, 
          sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
      );
      if( rc!=SQLITE_OK ) break;
      rc = pApi->xColumnSize(pFts, i, &nDocsize);
      if( rc!=SQLITE_OK ) break;



















      for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
        int ip, ic, io;
        int iAdj;
        int nScore;
        int jj;

        rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
        if( ic!=i || rc!=SQLITE_OK ) continue;
        memset(aSeen, 0, nPhrase);
        rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
            io, nToken, &nScore, &iAdj
        );
        if( rc==SQLITE_OK && nScore>nBestScore ){
          nBestScore = nScore;
          iBestCol = i;
          iBestStart = iAdj;
          nColSize = nDocsize;
        }

        if( rc==SQLITE_OK && sFinder.nFirst ){
          for(jj=0; jj<(sFinder.nFirst-1); jj++){
            if( sFinder.aFirst[jj+1]>io ) break;
          }

          if( sFinder.aFirst[jj]<io ){
            int nScore;
            memset(aSeen, 0, nPhrase);
            rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 
              sFinder.aFirst[jj], nToken, &nScore, 0
            );

            nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
            if( rc==SQLITE_OK && nScore>nBestScore ){
              nBestScore = nScore;
              iBestCol = i;
              iBestStart = sFinder.aFirst[jj];
              nColSize = nDocsize;
            }
          }
        }
      }
    }
  }

  if( rc==SQLITE_OK ){
    rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
  }