/ Check-in [2c4bbd90]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to improve the selection of deferred tokens within phrases.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-changes
Files: files | file ages | folders
SHA1: 2c4bbd90e2fca593c186bf412b608aff8c9f9061
User & Date: dan 2011-06-27 11:15:53
Context
2011-06-27
19:12
Remove an unnecessary assignment from vdbeapi.c. check-in: 25e5b768 user: dan tags: fts3-changes
11:15
Changes to improve the selection of deferred tokens within phrases. check-in: 2c4bbd90 user: dan tags: fts3-changes
2011-06-23
17:09
Fix some of the code issues (missing comments etc.) in the new FTS code. check-in: 8230d831 user: dan tags: fts3-changes
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
....
3910
3911
3912
3913
3914
3915
3916
3917

3918
3919
3920


3921








3922
3923
3924
3925
3926
3927
3928



3929
3930
3931
3932
3933
3934
3935
3936
3937



3938
3939
3940
3941
3942
3943
3944
** in buffer aList[], size nList bytes.
**
** If the isPoslist argument is true, then it is assumed that the doclist
** contains a position-list following each docid. Otherwise, it is assumed
** that the doclist is simply a list of docids stored as delta encoded 
** varints.
*/
static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
  int nDoc = 0;                   /* Return value */
  if( aList ){
    char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
    char *p = aList;              /* Cursor */
    if( !isPoslist ){
      /* The number of docids in the list is the same as the number of 
      ** varints. In FTS3 a varint consists of a single byte with the 0x80 
      ** bit cleared and zero or more bytes with the 0x80 bit set. So to
      ** count the varints in the buffer, just count the number of bytes
      ** with the 0x80 bit clear.  */
      while( p<aEnd ) nDoc += (((*p++)&0x80)==0);
    }else{
      while( p<aEnd ){
        nDoc++;
        while( (*p++)&0x80 );     /* Skip docid varint */
        fts3PoslistCopy(0, &p);   /* Skip over position list */
      }
    }
  }

  return nDoc;
}

/*
................................................................................

    /* Determine if token pTC should be deferred. If not, update nDocEst. 
    **
    ** TODO: If there are performance regressions involving deferred tokens,
    ** this (the logic that selects the tokens to be deferred) is probably
    ** the bit that needs to change.
    */
    if( ii==0 ){

      if( pTC->nOvfl ){
        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
      }else{


        Fts3PhraseToken *pToken = pTC->pToken;








        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
        assert( rc==SQLITE_OK || pList==0 );
        if( rc==SQLITE_OK ){
          nDocEst = fts3DoclistCountDocids(1, pList, nList);
          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);



        }
      }
    }else{
      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
        Fts3PhraseToken *pToken = pTC->pToken;
        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
        fts3SegReaderCursorFree(pToken->pSegcsr);
        pToken->pSegcsr = 0;
      }



      nDocEst = 1 + (nDocEst/4);
    }
    pTC->pToken = 0;
  }

  return rc;
}







|




<
<
<
<
<
<
<
<
|
|
|
|
<







 







<
>
|
|
|
>
>
|
>
>
>
>
>
>
>
>
|
|
|
|
|
<
|
>
>
>
|
<
|
<
<
<
<
<
<
>
>
>







2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560








2561
2562
2563
2564

2565
2566
2567
2568
2569
2570
2571
....
3901
3902
3903
3904
3905
3906
3907

3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927

3928
3929
3930
3931
3932

3933






3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
** in buffer aList[], size nList bytes.
**
** If the isPoslist argument is true, then it is assumed that the doclist
** contains a position-list following each docid. Otherwise, it is assumed
** that the doclist is simply a list of docids stored as delta encoded 
** varints.
*/
static int fts3DoclistCountDocids(char *aList, int nList){
  int nDoc = 0;                   /* Return value */
  if( aList ){
    char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
    char *p = aList;              /* Cursor */








    while( p<aEnd ){
      nDoc++;
      while( (*p++)&0x80 );     /* Skip docid varint */
      fts3PoslistCopy(0, &p);   /* Skip over position list */

    }
  }

  return nDoc;
}

/*
................................................................................

    /* Determine if token pTC should be deferred. If not, update nDocEst. 
    **
    ** TODO: If there are performance regressions involving deferred tokens,
    ** this (the logic that selects the tokens to be deferred) is probably
    ** the bit that needs to change.
    */


    if( ii && pTC->nOvfl>=(nDocEst*nDocSize) ){
      /* The number of overflow pages to load for this (and therefore all
      ** subsequent) tokens is greater than the estimated number of pages 
      ** that will be loaded if all subsequent tokens are deferred.
      */
      Fts3PhraseToken *pToken = pTC->pToken;
      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
      fts3SegReaderCursorFree(pToken->pSegcsr);
      pToken->pSegcsr = 0;
    }else if( ii==0 || pTC->pPhrase->nToken>1 ){
      /* Either this is the cheapest token in the entire query, or it is
      ** part of a multi-token phrase. Either way, the entire doclist will
      ** (eventually) be loaded into memory. It may as well be now. */
      Fts3PhraseToken *pToken = pTC->pToken;
      int nList = 0;
      char *pList = 0;
      rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
      assert( rc==SQLITE_OK || pList==0 );
      if( rc==SQLITE_OK ){

        fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
        nDocEst = fts3DoclistCountDocids(
            pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
        );
      }

    }else {






      /* This token will not be deferred. And it will not be loaded into
      ** memory at this point either. So assume that it filters out 75% of
      ** the currently estimated number of documents. */
      nDocEst = 1 + (nDocEst/4);
    }
    pTC->pToken = 0;
  }

  return rc;
}