Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -1572,10 +1572,15 @@ #ifndef SQLITE_OMIT_OR_OPTIMIZATION const int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ const Bitmask maskSrc = getMask(pWC->pMaskSet, iCur); /* Bitmask for pSrc */ WhereTerm * const pWCEnd = &pWC->a[pWC->nTerm]; /* End of pWC->a[] */ WhereTerm *pTerm; /* A single term of the WHERE clause */ + + /* No OR-clause optimization allowed if the NOT INDEXED clause is used */ + if( pSrc->notIndexed ){ + return; + } /* Search the WHERE clause terms for a usable WO_OR term. */ for(pTerm=pWC->a; pTermeOperator==WO_OR && ((pTerm->prereqAll & ~maskSrc) & notReady)==0 @@ -1615,12 +1620,13 @@ } /* If there is an ORDER BY clause, increase the scan cost to account ** for the cost of the sort. */ if( pOrderBy!=0 ){ + WHERETRACE(("... sorting increases OR cost %.9g to %.9g\n", + rTotal, rTotal+nRow*estLog(nRow))); rTotal += nRow*estLog(nRow); - WHERETRACE(("... sorting increases OR cost to %.9g\n", rTotal)); } /* If the cost of scanning using this OR term for optimization is ** less than the current cost stored in pCost, replace the contents ** of pCost. */ @@ -2567,18 +2573,18 @@ ** ** bInEst: ** Set to true if there was at least one "x IN (SELECT ...)" term used ** in determining the value of nInMul. ** - ** nBound: + ** estBound: ** An estimate on the amount of the table that must be searched. A ** value of 100 means the entire table is searched. Range constraints ** might reduce this to a value less than 100 to indicate that only ** a fraction of the table needs searching. In the absence of ** sqlite_stat2 ANALYZE data, a single inequality reduces the search ** space to 1/3rd its original size. So an x>? constraint reduces - ** nBound to 33. Two constraints (x>? AND x? AND xprereqRight; } - /* Determine the value of nBound. */ + /* Determine the value of estBound. */ if( nEqnColumn ){ int j = pProbe->aiColumn[nEq]; if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){ WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pIdx); WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pIdx); - whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &nBound); + whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &estBound); if( pTop ){ + nBound = 1; wsFlags |= WHERE_TOP_LIMIT; used |= pTop->prereqRight; } if( pBtm ){ + nBound++; wsFlags |= WHERE_BTM_LIMIT; used |= pBtm->prereqRight; } wsFlags |= (WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE); } @@ -2702,12 +2711,12 @@ cost = nRow + nInMul*estLog(aiRowEst[0]); /* Adjust the number of rows and the cost downward to reflect rows ** that are excluded by range constraints. */ - nRow = (nRow * (double)nBound) / (double)100; - cost = (cost * (double)nBound) / (double)100; + nRow = (nRow * (double)estBound) / (double)100; + cost = (cost * (double)estBound) / (double)100; /* Add in the estimated cost of sorting the result */ if( bSort ){ cost += cost*estLog(cost); @@ -2725,27 +2734,39 @@ /* If there are additional constraints on this table that cannot ** be used with the current index, but which might lower the number ** of output rows, adjust the nRow value accordingly. This only ** matters if the current index is the least costly, so do not bother ** with this step if we already know this index will not be chosen. + ** Also, never reduce the output row count below 2 using this step. */ if( nRow>2 && cost<=pCost->rCost ){ int k; - int nSkip = nEq; + int nSkipEq = nEq; + int nSkipRange = nBound; Bitmask thisTab = getMask(pWC->pMaskSet, iCur); for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){ if( pTerm->wtFlags & TERM_VIRTUAL ) continue; if( (pTerm->prereqAll & notReady)!=thisTab ) continue; if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){ - if( nSkip ){ + if( nSkipEq ){ /* Ignore the first nEq equality matches since the index ** has already accounted for these */ - nSkip--; + nSkipEq--; }else{ /* Assume each additional equality match reduces the result ** set size by a factor of 10 */ nRow /= 10; + } + }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){ + if( nSkipRange ){ + /* Ignore the first nBound range constraints since the index + ** has already accounted for these */ + nSkipRange--; + }else{ + /* Assume each additional range constraint reduces the result + ** set size by a factor of 3 */ + nRow /= 3; } }else{ /* Any other expression lowers the output row count by half */ nRow /= 2; } @@ -2753,21 +2774,22 @@ if( nRow<2 ) nRow = 2; } WHERETRACE(( - "%s(%s): nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n" + "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n" " notReady=0x%llx nRow=%.2f cost=%.2f used=0x%llx\n", pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), - nEq, nInMul, nBound, bSort, bLookup, wsFlags, notReady, nRow, cost, used + nEq, nInMul, estBound, bSort, bLookup, wsFlags, + notReady, nRow, cost, used )); /* If this index is the best we have seen so far, then record this ** index and its cost in the pCost structure. */ if( (!pIdx || wsFlags) - && (costrCost || (cost==pCost->rCost && nRownRow)) + && (costrCost || (cost<=pCost->rCost && nRownRow)) ){ pCost->rCost = cost; pCost->nRow = nRow; pCost->used = used; pCost->plan.wsFlags = (wsFlags&wsFlagMask); @@ -4013,24 +4035,29 @@ memset(&bestPlan, 0, sizeof(bestPlan)); bestPlan.rCost = SQLITE_BIG_DBL; /* Loop through the remaining entries in the FROM clause to find the - ** next nested loop. The FROM clause entries may be iterated through + ** next nested loop. The loop tests all FROM clause entries ** either once or twice. ** - ** The first iteration, which is always performed, searches for the - ** FROM clause entry that permits the lowest-cost, "optimal" scan. In + ** The first test is always performed if there are two or more entries + ** remaining and never performed if there is only one FROM clause entry + ** to choose from. The first test looks for an "optimal" scan. In ** this context an optimal scan is one that uses the same strategy ** for the given FROM clause entry as would be selected if the entry ** were used as the innermost nested loop. In other words, a table ** is chosen such that the cost of running that table cannot be reduced - ** by waiting for other tables to run first. + ** by waiting for other tables to run first. This "optimal" test works + ** by first assuming that the FROM clause is on the inner loop and finding + ** its query plan, then checking to see if that query plan uses any + ** other FROM clause terms that are notReady. If no notReady terms are + ** used then the "optimal" query plan works. ** - ** The second iteration is only performed if no optimal scan strategies - ** were found by the first. This iteration is used to search for the - ** lowest cost scan overall. + ** The second loop iteration is only performed if no optimal scan + ** strategies were found by the first loop. This 2nd iteration is used to + ** search for the lowest cost scan overall. ** ** Previous versions of SQLite performed only the second iteration - ** the next outermost loop was always that with the lowest overall ** cost. However, this meant that SQLite could select the wrong plan ** for scripts such as the following: @@ -4044,13 +4071,12 @@ ** However, since the cost of a linear scan through table t2 is the same ** as the cost of a linear scan through table t1, a simple greedy ** algorithm may choose to use t2 for the outer loop, which is a much ** costlier approach. */ - for(isOptimal=1; isOptimal>=0 && bestJ<0; isOptimal--){ - Bitmask mask = (isOptimal ? 0 : notReady); - assert( (nTabList-iFrom)>1 || isOptimal ); + for(isOptimal=(iFrom=0; isOptimal--){ + Bitmask mask; /* Mask of tables not yet ready */ for(j=iFrom, pTabItem=&pTabList->a[j]; jiCursor); if( (m & notReady)==0 ){ if( j==iFrom ) iFrom++; continue; } + mask = (isOptimal ? m : notReady); pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); assert( pTabItem->pTab ); #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pTabItem->pTab) ){ @@ -4074,13 +4101,15 @@ bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost); } assert( isOptimal || (sCost.used¬Ready)==0 ); if( (sCost.used¬Ready)==0 - && (j==iFrom || sCost.rCost=10 OR c=4) } } {2 4 5 scan 0 sort 0} do_test where7-1.10 { -breakpoint count_steps { SELECT a FROM t1 WHERE (b=3 OR c>=10 OR c=4 OR b>10) } } {2 4 5 scan 0 sort 0} -breakpoint do_test where7-1.11 { count_steps { SELECT a FROM t1 WHERE (d=5 AND b=3) OR c==100 ORDER BY a; } } {2 5 scan 0 sort 1} @@ -104,17 +102,11 @@ do_test where7-1.12 { count_steps { SELECT a FROM t1 WHERE (b BETWEEN 2 AND 4) OR c=100 ORDER BY a } } {1 2 3 5 scan 0 sort 1} -do_test where7-1.13.1 { - count_steps { - SELECT a FROM t1 WHERE (b BETWEEN 0 AND 2) OR (c BETWEEN 9 AND 999) - ORDER BY a DESC - } -} {5 4 1 scan 4 sort 0} -do_test where7-1.13.2 { +do_test where7-1.13 { count_steps { SELECT a FROM t1 WHERE (b BETWEEN 0 AND 2) OR (c BETWEEN 9 AND 999) ORDER BY +a DESC } } {5 4 1 scan 0 sort 1} Index: test/where8.test ================================================================== --- test/where8.test +++ test/where8.test @@ -284,12 +284,13 @@ do_test where8-3.15 { execsql_status { SELECT c FROM t1, t2 WHERE a BETWEEN 1 AND 2 OR a = ( SELECT sum(e IS NULL) FROM t2 AS inner WHERE t2.d>inner.d ) + ORDER BY c } -} {I II I II I II I II I II I II III I II III I II III I II III I II III 9 0} +} {I I I I I I I I I I II II II II II II II II II II III III III III III 9 1} #----------------------------------------------------------------------- # The following tests - where8-4.* - verify that adding or removing # indexes does not change the results returned by various queries. #