/ Check-in [22165994]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Deploy heuristics (well-commented) to better estimate how much unindexed terms in the WHERE clause filter the number of output rows from a single table.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 221659945c3f78d3b6789bfe8fdeb8d3ee1fa038
User & Date: drh 2014-11-22 18:50:44
Context
2014-11-22
19:52
Fix an error in the comments from the previous check-in. check-in: 9660ce54 user: drh tags: trunk
18:50
Deploy heuristics (well-commented) to better estimate how much unindexed terms in the WHERE clause filter the number of output rows from a single table. check-in: 22165994 user: drh tags: trunk
12:22
Remove a redundant test case (probably a copy/paste error). Add an assert() to where.c to ensure that automatic indexes do not have there output row counts adjusted downward by supplementary constraints. check-in: eea47933 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  4287   4287     return SQLITE_OK;
  4288   4288   }
  4289   4289   
  4290   4290   /*
  4291   4291   ** Adjust the WhereLoop.nOut value downward to account for terms of the
  4292   4292   ** WHERE clause that reference the loop but which are not used by an
  4293   4293   ** index.
         4294  +*
         4295  +** For every WHERE clause term that is not used by the index
         4296  +** and which has a truth probability assigned by one of the likelihood(),
         4297  +** likely(), or unlikely() SQL functions, reduce the estimated number
         4298  +** of output rows by the probability specified.
  4294   4299   **
  4295         -** In the current implementation, the first extra WHERE clause term reduces
  4296         -** the number of output rows by a factor of 10 and each additional term
  4297         -** reduces the number of output rows by sqrt(2).
         4300  +** TUNING:  For every WHERE clause term that is not used by the index
         4301  +** and which does not have an assigned truth probability, heuristics
         4302  +** described below are used to try to estimate the truth probability.
         4303  +** TODO --> Perhaps this is something that could be improved by better
         4304  +** table statistics.
         4305  +**
         4306  +** Heuristic 1:  Estimate the truth probability as 6.25%.  The 6.25%
         4307  +** value corresponds to 1 in LogEst notation, so this means decrement
         4308  +** the WhereLoop.nOut field for every such WHERE clause term.
         4309  +**
         4310  +** Heuristic 2:  If there exists one or more WHERE clause terms of the
         4311  +** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the
         4312  +** final output row estimate is no greater than 1/4 of the total number
         4313  +** of rows in the table.  In other words, assume that x==EXPR will filter
         4314  +** out at least 3 out of 4 rows.  If EXPR is -1 or 0 or 1, then maybe the
         4315  +** "x" column is boolean or else -1 or 0 or 1 is a common default value
         4316  +** on the "x" column and so in that case only cap the output row estimate
         4317  +** at 1/2 instead of 1/4.
  4298   4318   */
  4299   4319   static void whereLoopOutputAdjust(
  4300   4320     WhereClause *pWC,      /* The WHERE clause */
  4301   4321     WhereLoop *pLoop,      /* The loop to adjust downward */
  4302   4322     LogEst nRow            /* Number of rows in the entire table */
  4303   4323   ){
  4304   4324     WhereTerm *pTerm, *pX;
  4305   4325     Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf);
  4306         -  int i, j;
  4307         -  int nEq = 0;    /* Number of = constraints not within likely()/unlikely() */
         4326  +  int i, j, k;
         4327  +  LogEst iReduce = 0;    /* pLoop->nOut should not exceed nRow-iReduce */
  4308   4328   
  4309   4329     assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 );
  4310   4330     for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){
  4311   4331       if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break;
  4312   4332       if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue;
  4313   4333       if( (pTerm->prereqAll & notAllowed)!=0 ) continue;
  4314   4334       for(j=pLoop->nLTerm-1; j>=0; j--){
................................................................................
  4315   4335         pX = pLoop->aLTerm[j];
  4316   4336         if( pX==0 ) continue;
  4317   4337         if( pX==pTerm ) break;
  4318   4338         if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break;
  4319   4339       }
  4320   4340       if( j<0 ){
  4321   4341         if( pTerm->truthProb<=0 ){
         4342  +        /* If a truth probability is specified using the likelihood() hints,
         4343  +        ** then use the probability provided by the application. */
  4322   4344           pLoop->nOut += pTerm->truthProb;
  4323   4345         }else{
         4346  +        /* In the absence of explicit truth probabilities, use heuristics to
         4347  +        ** guess a reasonable truth probability. */
  4324   4348           pLoop->nOut--;
  4325         -        if( pTerm->eOperator&WO_EQ ) nEq++;
         4349  +        if( pTerm->eOperator&WO_EQ ){
         4350  +          Expr *pRight = pTerm->pExpr->pRight;
         4351  +          if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){
         4352  +            k = 10;
         4353  +          }else{
         4354  +            k = 20;
         4355  +          }
         4356  +          if( iReduce<k ) iReduce = k;
         4357  +        }
  4326   4358         }
  4327   4359       }
  4328   4360     }
  4329         -  /* TUNING:  If there is at least one equality constraint in the WHERE
  4330         -  ** clause that does not have a likelihood() explicitly assigned to it
  4331         -  ** then do not let the estimated number of output rows exceed half 
  4332         -  ** the number of rows in the table. */
  4333         -  if( nEq && pLoop->nOut>nRow-10 ){
  4334         -    pLoop->nOut = nRow - 10;
  4335         -  }
         4361  +  if( pLoop->nOut > nRow-iReduce )  pLoop->nOut = nRow - iReduce;
  4336   4362   }
  4337   4363   
  4338   4364   /*
  4339   4365   ** Adjust the cost C by the costMult facter T.  This only occurs if
  4340   4366   ** compiled with -DSQLITE_ENABLE_COSTMULT
  4341   4367   */
  4342   4368   #ifdef SQLITE_ENABLE_COSTMULT

Changes to test/scanstatus.test.

   264    264     PRAGMA foreign_keys=on;
   265    265   }
   266    266   do_execsql_test    4.2.1 { DELETE FROM p1 WHERE x=4 }
   267    267   do_scanstatus_test 4.2.2 { 
   268    268     nLoop 1 nVisit 1 nEst 1.0 zName sqlite_autoindex_p1_1 
   269    269     zExplain {SEARCH TABLE p1 USING INDEX sqlite_autoindex_p1_1 (x=?)}
   270    270   
   271         -  nLoop 1 nVisit 3 nEst 524288.0 zName c1 zExplain {SCAN TABLE c1}
          271  +  nLoop 1 nVisit 3 nEst 262144.0 zName c1 zExplain {SCAN TABLE c1}
   272    272   }
   273    273   
   274    274   #-------------------------------------------------------------------------
   275    275   # Further tests of different scan types.
   276    276   #
   277    277   reset_db
   278    278   proc tochar {i} {