Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -4289,24 +4289,44 @@ /* ** Adjust the WhereLoop.nOut value downward to account for terms of the ** WHERE clause that reference the loop but which are not used by an ** index. +* +** For every WHERE clause term that is not used by the index +** and which has a truth probability assigned by one of the likelihood(), +** likely(), or unlikely() SQL functions, reduce the estimated number +** of output rows by the probability specified. +** +** TUNING: For every WHERE clause term that is not used by the index +** and which does not have an assigned truth probability, heuristics +** described below are used to try to estimate the truth probability. +** TODO --> Perhaps this is something that could be improved by better +** table statistics. +** +** Heuristic 1: Estimate the truth probability as 6.25%. The 6.25% +** value corresponds to 1 in LogEst notation, so this means decrement +** the WhereLoop.nOut field for every such WHERE clause term. ** -** In the current implementation, the first extra WHERE clause term reduces -** the number of output rows by a factor of 10 and each additional term -** reduces the number of output rows by sqrt(2). +** Heuristic 2: If there exists one or more WHERE clause terms of the +** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the +** final output row estimate is no greater than 1/4 of the total number +** of rows in the table. In other words, assume that x==EXPR will filter +** out at least 3 out of 4 rows. If EXPR is -1 or 0 or 1, then maybe the +** "x" column is boolean or else -1 or 0 or 1 is a common default value +** on the "x" column and so in that case only cap the output row estimate +** at 1/2 instead of 1/4. */ static void whereLoopOutputAdjust( WhereClause *pWC, /* The WHERE clause */ WhereLoop *pLoop, /* The loop to adjust downward */ LogEst nRow /* Number of rows in the entire table */ ){ WhereTerm *pTerm, *pX; Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf); - int i, j; - int nEq = 0; /* Number of = constraints not within likely()/unlikely() */ + int i, j, k; + LogEst iReduce = 0; /* pLoop->nOut should not exceed nRow-iReduce */ assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){ if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break; if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue; @@ -4317,24 +4337,30 @@ if( pX==pTerm ) break; if( pX->iParent>=0 && (&pWC->a[pX->iParent])==pTerm ) break; } if( j<0 ){ if( pTerm->truthProb<=0 ){ + /* If a truth probability is specified using the likelihood() hints, + ** then use the probability provided by the application. */ pLoop->nOut += pTerm->truthProb; }else{ + /* In the absence of explicit truth probabilities, use heuristics to + ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&WO_EQ ) nEq++; + if( pTerm->eOperator&WO_EQ ){ + Expr *pRight = pTerm->pExpr->pRight; + if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ + k = 10; + }else{ + k = 20; + } + if( iReducenOut>nRow-10 ){ - pLoop->nOut = nRow - 10; - } + if( pLoop->nOut > nRow-iReduce ) pLoop->nOut = nRow - iReduce; } /* ** Adjust the cost C by the costMult facter T. This only occurs if ** compiled with -DSQLITE_ENABLE_COSTMULT Index: test/scanstatus.test ================================================================== --- test/scanstatus.test +++ test/scanstatus.test @@ -266,11 +266,11 @@ do_execsql_test 4.2.1 { DELETE FROM p1 WHERE x=4 } do_scanstatus_test 4.2.2 { nLoop 1 nVisit 1 nEst 1.0 zName sqlite_autoindex_p1_1 zExplain {SEARCH TABLE p1 USING INDEX sqlite_autoindex_p1_1 (x=?)} - nLoop 1 nVisit 3 nEst 524288.0 zName c1 zExplain {SCAN TABLE c1} + nLoop 1 nVisit 3 nEst 262144.0 zName c1 zExplain {SCAN TABLE c1} } #------------------------------------------------------------------------- # Further tests of different scan types. #