/ Check-in [2c4bbd90]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to improve the selection of deferred tokens within phrases.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-changes
Files: files | file ages | folders
SHA1: 2c4bbd90e2fca593c186bf412b608aff8c9f9061
User & Date: dan 2011-06-27 11:15:53
Context
2011-06-27
19:12
Remove an unnecessary assignment from vdbeapi.c. check-in: 25e5b768 user: dan tags: fts3-changes
11:15
Changes to improve the selection of deferred tokens within phrases. check-in: 2c4bbd90 user: dan tags: fts3-changes
2011-06-23
17:09
Fix some of the code issues (missing comments etc.) in the new FTS code. check-in: 8230d831 user: dan tags: fts3-changes
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

  2549   2549   ** in buffer aList[], size nList bytes.
  2550   2550   **
  2551   2551   ** If the isPoslist argument is true, then it is assumed that the doclist
  2552   2552   ** contains a position-list following each docid. Otherwise, it is assumed
  2553   2553   ** that the doclist is simply a list of docids stored as delta encoded 
  2554   2554   ** varints.
  2555   2555   */
  2556         -static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
         2556  +static int fts3DoclistCountDocids(char *aList, int nList){
  2557   2557     int nDoc = 0;                   /* Return value */
  2558   2558     if( aList ){
  2559   2559       char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
  2560   2560       char *p = aList;              /* Cursor */
  2561         -    if( !isPoslist ){
  2562         -      /* The number of docids in the list is the same as the number of 
  2563         -      ** varints. In FTS3 a varint consists of a single byte with the 0x80 
  2564         -      ** bit cleared and zero or more bytes with the 0x80 bit set. So to
  2565         -      ** count the varints in the buffer, just count the number of bytes
  2566         -      ** with the 0x80 bit clear.  */
  2567         -      while( p<aEnd ) nDoc += (((*p++)&0x80)==0);
  2568         -    }else{
  2569         -      while( p<aEnd ){
  2570         -        nDoc++;
  2571         -        while( (*p++)&0x80 );     /* Skip docid varint */
  2572         -        fts3PoslistCopy(0, &p);   /* Skip over position list */
  2573         -      }
         2561  +    while( p<aEnd ){
         2562  +      nDoc++;
         2563  +      while( (*p++)&0x80 );     /* Skip docid varint */
         2564  +      fts3PoslistCopy(0, &p);   /* Skip over position list */
  2574   2565       }
  2575   2566     }
  2576   2567   
  2577   2568     return nDoc;
  2578   2569   }
  2579   2570   
  2580   2571   /*
................................................................................
  3910   3901   
  3911   3902       /* Determine if token pTC should be deferred. If not, update nDocEst. 
  3912   3903       **
  3913   3904       ** TODO: If there are performance regressions involving deferred tokens,
  3914   3905       ** this (the logic that selects the tokens to be deferred) is probably
  3915   3906       ** the bit that needs to change.
  3916   3907       */
  3917         -    if( ii==0 ){
  3918         -      if( pTC->nOvfl ){
  3919         -        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
  3920         -      }else{
  3921         -        Fts3PhraseToken *pToken = pTC->pToken;
  3922         -        int nList = 0;
  3923         -        char *pList = 0;
  3924         -        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
  3925         -        assert( rc==SQLITE_OK || pList==0 );
  3926         -        if( rc==SQLITE_OK ){
  3927         -          nDocEst = fts3DoclistCountDocids(1, pList, nList);
  3928         -          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
  3929         -        }
  3930         -      }
  3931         -    }else{
  3932         -      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
  3933         -        Fts3PhraseToken *pToken = pTC->pToken;
  3934         -        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
  3935         -        fts3SegReaderCursorFree(pToken->pSegcsr);
  3936         -        pToken->pSegcsr = 0;
  3937         -      }
         3908  +
         3909  +    if( ii && pTC->nOvfl>=(nDocEst*nDocSize) ){
         3910  +      /* The number of overflow pages to load for this (and therefore all
         3911  +      ** subsequent) tokens is greater than the estimated number of pages 
         3912  +      ** that will be loaded if all subsequent tokens are deferred.
         3913  +      */
         3914  +      Fts3PhraseToken *pToken = pTC->pToken;
         3915  +      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
         3916  +      fts3SegReaderCursorFree(pToken->pSegcsr);
         3917  +      pToken->pSegcsr = 0;
         3918  +    }else if( ii==0 || pTC->pPhrase->nToken>1 ){
         3919  +      /* Either this is the cheapest token in the entire query, or it is
         3920  +      ** part of a multi-token phrase. Either way, the entire doclist will
         3921  +      ** (eventually) be loaded into memory. It may as well be now. */
         3922  +      Fts3PhraseToken *pToken = pTC->pToken;
         3923  +      int nList = 0;
         3924  +      char *pList = 0;
         3925  +      rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
         3926  +      assert( rc==SQLITE_OK || pList==0 );
         3927  +      if( rc==SQLITE_OK ){
         3928  +        fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
         3929  +        nDocEst = fts3DoclistCountDocids(
         3930  +            pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
         3931  +        );
         3932  +      }
         3933  +    }else {
         3934  +      /* This token will not be deferred. And it will not be loaded into
         3935  +      ** memory at this point either. So assume that it filters out 75% of
         3936  +      ** the currently estimated number of documents. */
  3938   3937         nDocEst = 1 + (nDocEst/4);
  3939   3938       }
  3940   3939       pTC->pToken = 0;
  3941   3940     }
  3942   3941   
  3943   3942     return rc;
  3944   3943   }