/ Check-in [defaf0d9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further refinements to table order selection on join query planning.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: defaf0d99a807027f8883bf821b6482025f9f54e
User & Date: drh 2010-04-15 01:04:54
References
2010-08-04
18:43 New ticket [13f033c8] Performance regression. artifact: 07edd854 user: drh
Context
2010-04-15
13:29
The query planner fix of check-in [33b1f584ef] should have been on the trunk. check-in: f538d759 user: drh tags: trunk
02:37
Bring over the recent query planner enhancements from the trunk. check-in: 82969f27 user: drh tags: wal
01:04
Further refinements to table order selection on join query planning. check-in: defaf0d9 user: drh tags: trunk
2010-04-14
19:01
The query planner uses non-indexable WHERE clause terms to reduce the estimated number of output rows, then uses the estimated number of output rows as a tie-breaker when choosing table order. check-in: b87cb0c2 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  1570   1570     WhereCost *pCost            /* Lowest cost query plan */
  1571   1571   ){
  1572   1572   #ifndef SQLITE_OMIT_OR_OPTIMIZATION
  1573   1573     const int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  1574   1574     const Bitmask maskSrc = getMask(pWC->pMaskSet, iCur);  /* Bitmask for pSrc */
  1575   1575     WhereTerm * const pWCEnd = &pWC->a[pWC->nTerm];        /* End of pWC->a[] */
  1576   1576     WhereTerm *pTerm;                 /* A single term of the WHERE clause */
         1577  +
         1578  +  /* No OR-clause optimization allowed if the NOT INDEXED clause is used */
         1579  +  if( pSrc->notIndexed ){
         1580  +    return;
         1581  +  }
  1577   1582   
  1578   1583     /* Search the WHERE clause terms for a usable WO_OR term. */
  1579   1584     for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){
  1580   1585       if( pTerm->eOperator==WO_OR 
  1581   1586        && ((pTerm->prereqAll & ~maskSrc) & notReady)==0
  1582   1587        && (pTerm->u.pOrInfo->indexable & maskSrc)!=0 
  1583   1588       ){
................................................................................
  1613   1618           used |= sTermCost.used;
  1614   1619           if( rTotal>=pCost->rCost ) break;
  1615   1620         }
  1616   1621   
  1617   1622         /* If there is an ORDER BY clause, increase the scan cost to account 
  1618   1623         ** for the cost of the sort. */
  1619   1624         if( pOrderBy!=0 ){
         1625  +        WHERETRACE(("... sorting increases OR cost %.9g to %.9g\n",
         1626  +                    rTotal, rTotal+nRow*estLog(nRow)));
  1620   1627           rTotal += nRow*estLog(nRow);
  1621         -        WHERETRACE(("... sorting increases OR cost to %.9g\n", rTotal));
  1622   1628         }
  1623   1629   
  1624   1630         /* If the cost of scanning using this OR term for optimization is
  1625   1631         ** less than the current cost stored in pCost, replace the contents
  1626   1632         ** of pCost. */
  1627   1633         WHERETRACE(("... multi-index OR cost=%.9g nrow=%.9g\n", rTotal, nRow));
  1628   1634         if( rTotal<pCost->rCost ){
................................................................................
  2565   2571       **    the sub-select is assumed to return 25 rows for the purposes of 
  2566   2572       **    determining nInMul.
  2567   2573       **
  2568   2574       **  bInEst:  
  2569   2575       **    Set to true if there was at least one "x IN (SELECT ...)" term used 
  2570   2576       **    in determining the value of nInMul.
  2571   2577       **
  2572         -    **  nBound:
         2578  +    **  estBound:
  2573   2579       **    An estimate on the amount of the table that must be searched.  A
  2574   2580       **    value of 100 means the entire table is searched.  Range constraints
  2575   2581       **    might reduce this to a value less than 100 to indicate that only
  2576   2582       **    a fraction of the table needs searching.  In the absence of
  2577   2583       **    sqlite_stat2 ANALYZE data, a single inequality reduces the search
  2578   2584       **    space to 1/3rd its original size.  So an x>? constraint reduces
  2579         -    **    nBound to 33.  Two constraints (x>? AND x<?) reduce nBound to 11.
         2585  +    **    estBound to 33.  Two constraints (x>? AND x<?) reduce estBound to 11.
  2580   2586       **
  2581   2587       **  bSort:   
  2582   2588       **    Boolean. True if there is an ORDER BY clause that will require an 
  2583   2589       **    external sort (i.e. scanning the index being evaluated will not 
  2584   2590       **    correctly order records).
  2585   2591       **
  2586   2592       **  bLookup: 
................................................................................
  2594   2600       **
  2595   2601       **             SELECT a, b    FROM tbl WHERE a = 1;
  2596   2602       **             SELECT a, b, c FROM tbl WHERE a = 1;
  2597   2603       */
  2598   2604       int nEq;
  2599   2605       int bInEst = 0;
  2600   2606       int nInMul = 1;
  2601         -    int nBound = 100;
         2607  +    int estBound = 100;
         2608  +    int nBound = 0;             /* Number of range constraints seen */
  2602   2609       int bSort = 0;
  2603   2610       int bLookup = 0;
  2604   2611       WhereTerm *pTerm;           /* A single term of the WHERE clause */
  2605   2612   
  2606   2613       /* Determine the values of nEq and nInMul */
  2607   2614       for(nEq=0; nEq<pProbe->nColumn; nEq++){
  2608   2615         int j = pProbe->aiColumn[nEq];
................................................................................
  2620   2627           }
  2621   2628         }else if( pTerm->eOperator & WO_ISNULL ){
  2622   2629           wsFlags |= WHERE_COLUMN_NULL;
  2623   2630         }
  2624   2631         used |= pTerm->prereqRight;
  2625   2632       }
  2626   2633   
  2627         -    /* Determine the value of nBound. */
         2634  +    /* Determine the value of estBound. */
  2628   2635       if( nEq<pProbe->nColumn ){
  2629   2636         int j = pProbe->aiColumn[nEq];
  2630   2637         if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
  2631   2638           WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pIdx);
  2632   2639           WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pIdx);
  2633         -        whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &nBound);
         2640  +        whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &estBound);
  2634   2641           if( pTop ){
         2642  +          nBound = 1;
  2635   2643             wsFlags |= WHERE_TOP_LIMIT;
  2636   2644             used |= pTop->prereqRight;
  2637   2645           }
  2638   2646           if( pBtm ){
         2647  +          nBound++;
  2639   2648             wsFlags |= WHERE_BTM_LIMIT;
  2640   2649             used |= pBtm->prereqRight;
  2641   2650           }
  2642   2651           wsFlags |= (WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE);
  2643   2652         }
  2644   2653       }else if( pProbe->onError!=OE_None ){
  2645   2654         testcase( wsFlags & WHERE_COLUMN_IN );
................................................................................
  2700   2709       ** rows plus log2(table-size) times the number of binary searches.
  2701   2710       */
  2702   2711       cost = nRow + nInMul*estLog(aiRowEst[0]);
  2703   2712   
  2704   2713       /* Adjust the number of rows and the cost downward to reflect rows
  2705   2714       ** that are excluded by range constraints.
  2706   2715       */
  2707         -    nRow = (nRow * (double)nBound) / (double)100;
  2708         -    cost = (cost * (double)nBound) / (double)100;
         2716  +    nRow = (nRow * (double)estBound) / (double)100;
         2717  +    cost = (cost * (double)estBound) / (double)100;
  2709   2718   
  2710   2719       /* Add in the estimated cost of sorting the result
  2711   2720       */
  2712   2721       if( bSort ){
  2713   2722         cost += cost*estLog(cost);
  2714   2723       }
  2715   2724   
................................................................................
  2723   2732       /**** Cost of using this index has now been computed ****/
  2724   2733   
  2725   2734       /* If there are additional constraints on this table that cannot
  2726   2735       ** be used with the current index, but which might lower the number
  2727   2736       ** of output rows, adjust the nRow value accordingly.  This only 
  2728   2737       ** matters if the current index is the least costly, so do not bother
  2729   2738       ** with this step if we already know this index will not be chosen.
         2739  +    ** Also, never reduce the output row count below 2 using this step.
  2730   2740       */
  2731   2741       if( nRow>2 && cost<=pCost->rCost ){
  2732   2742         int k;
  2733         -      int nSkip = nEq;
         2743  +      int nSkipEq = nEq;
         2744  +      int nSkipRange = nBound;
  2734   2745         Bitmask thisTab = getMask(pWC->pMaskSet, iCur);
  2735   2746         for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){
  2736   2747           if( pTerm->wtFlags & TERM_VIRTUAL ) continue;
  2737   2748           if( (pTerm->prereqAll & notReady)!=thisTab ) continue;
  2738   2749           if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){
  2739         -          if( nSkip ){
         2750  +          if( nSkipEq ){
  2740   2751               /* Ignore the first nEq equality matches since the index
  2741   2752               ** has already accounted for these */
  2742         -            nSkip--;
         2753  +            nSkipEq--;
  2743   2754             }else{
  2744   2755               /* Assume each additional equality match reduces the result
  2745   2756               ** set size by a factor of 10 */
  2746   2757               nRow /= 10;
         2758  +          }
         2759  +        }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
         2760  +          if( nSkipRange ){
         2761  +            /* Ignore the first nBound range constraints since the index
         2762  +            ** has already accounted for these */
         2763  +            nSkipRange--;
         2764  +          }else{
         2765  +            /* Assume each additional range constraint reduces the result
         2766  +            ** set size by a factor of 3 */
         2767  +            nRow /= 3;
  2747   2768             }
  2748   2769           }else{
  2749   2770             /* Any other expression lowers the output row count by half */
  2750   2771             nRow /= 2;
  2751   2772           }
  2752   2773         }
  2753   2774         if( nRow<2 ) nRow = 2;
  2754   2775       }
  2755   2776   
  2756   2777   
  2757   2778       WHERETRACE((
  2758         -      "%s(%s): nEq=%d nInMul=%d nBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
         2779  +      "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
  2759   2780         "         notReady=0x%llx nRow=%.2f cost=%.2f used=0x%llx\n",
  2760   2781         pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
  2761         -      nEq, nInMul, nBound, bSort, bLookup, wsFlags, notReady, nRow, cost, used
         2782  +      nEq, nInMul, estBound, bSort, bLookup, wsFlags,
         2783  +      notReady, nRow, cost, used
  2762   2784       ));
  2763   2785   
  2764   2786       /* If this index is the best we have seen so far, then record this
  2765   2787       ** index and its cost in the pCost structure.
  2766   2788       */
  2767   2789       if( (!pIdx || wsFlags)
  2768         -     && (cost<pCost->rCost || (cost==pCost->rCost && nRow<pCost->nRow))
         2790  +     && (cost<pCost->rCost || (cost<=pCost->rCost && nRow<pCost->nRow))
  2769   2791       ){
  2770   2792         pCost->rCost = cost;
  2771   2793         pCost->nRow = nRow;
  2772   2794         pCost->used = used;
  2773   2795         pCost->plan.wsFlags = (wsFlags&wsFlagMask);
  2774   2796         pCost->plan.nEq = nEq;
  2775   2797         pCost->plan.u.pIdx = pIdx;
................................................................................
  4011   4033       Bitmask m;                  /* Bitmask value for j or bestJ */
  4012   4034       int isOptimal;              /* Iterator for optimal/non-optimal search */
  4013   4035   
  4014   4036       memset(&bestPlan, 0, sizeof(bestPlan));
  4015   4037       bestPlan.rCost = SQLITE_BIG_DBL;
  4016   4038   
  4017   4039       /* Loop through the remaining entries in the FROM clause to find the
  4018         -    ** next nested loop. The FROM clause entries may be iterated through
         4040  +    ** next nested loop. The loop tests all FROM clause entries
  4019   4041       ** either once or twice. 
  4020   4042       **
  4021         -    ** The first iteration, which is always performed, searches for the
  4022         -    ** FROM clause entry that permits the lowest-cost, "optimal" scan. In
         4043  +    ** The first test is always performed if there are two or more entries
         4044  +    ** remaining and never performed if there is only one FROM clause entry
         4045  +    ** to choose from.  The first test looks for an "optimal" scan.  In
  4023   4046       ** this context an optimal scan is one that uses the same strategy
  4024   4047       ** for the given FROM clause entry as would be selected if the entry
  4025   4048       ** were used as the innermost nested loop.  In other words, a table
  4026   4049       ** is chosen such that the cost of running that table cannot be reduced
  4027         -    ** by waiting for other tables to run first.
         4050  +    ** by waiting for other tables to run first.  This "optimal" test works
         4051  +    ** by first assuming that the FROM clause is on the inner loop and finding
         4052  +    ** its query plan, then checking to see if that query plan uses any
         4053  +    ** other FROM clause terms that are notReady.  If no notReady terms are
         4054  +    ** used then the "optimal" query plan works.
  4028   4055       **
  4029         -    ** The second iteration is only performed if no optimal scan strategies
  4030         -    ** were found by the first. This iteration is used to search for the
  4031         -    ** lowest cost scan overall.
         4056  +    ** The second loop iteration is only performed if no optimal scan
         4057  +    ** strategies were found by the first loop. This 2nd iteration is used to
         4058  +    ** search for the lowest cost scan overall.
  4032   4059       **
  4033   4060       ** Previous versions of SQLite performed only the second iteration -
  4034   4061       ** the next outermost loop was always that with the lowest overall
  4035   4062       ** cost. However, this meant that SQLite could select the wrong plan
  4036   4063       ** for scripts such as the following:
  4037   4064       **   
  4038   4065       **   CREATE TABLE t1(a, b); 
................................................................................
  4042   4069       ** The best strategy is to iterate through table t1 first. However it
  4043   4070       ** is not possible to determine this with a simple greedy algorithm.
  4044   4071       ** However, since the cost of a linear scan through table t2 is the same 
  4045   4072       ** as the cost of a linear scan through table t1, a simple greedy 
  4046   4073       ** algorithm may choose to use t2 for the outer loop, which is a much
  4047   4074       ** costlier approach.
  4048   4075       */
  4049         -    for(isOptimal=1; isOptimal>=0 && bestJ<0; isOptimal--){
  4050         -      Bitmask mask = (isOptimal ? 0 : notReady);
  4051         -      assert( (nTabList-iFrom)>1 || isOptimal );
         4076  +    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0; isOptimal--){
         4077  +      Bitmask mask;  /* Mask of tables not yet ready */
  4052   4078         for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
  4053   4079           int doNotReorder;    /* True if this table should not be reordered */
  4054   4080           WhereCost sCost;     /* Cost information from best[Virtual]Index() */
  4055   4081           ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
  4056   4082     
  4057   4083           doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
  4058   4084           if( j!=iFrom && doNotReorder ) break;
  4059   4085           m = getMask(pMaskSet, pTabItem->iCursor);
  4060   4086           if( (m & notReady)==0 ){
  4061   4087             if( j==iFrom ) iFrom++;
  4062   4088             continue;
  4063   4089           }
         4090  +        mask = (isOptimal ? m : notReady);
  4064   4091           pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
  4065   4092     
  4066   4093           assert( pTabItem->pTab );
  4067   4094   #ifndef SQLITE_OMIT_VIRTUALTABLE
  4068   4095           if( IsVirtual(pTabItem->pTab) ){
  4069   4096             sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
  4070   4097             bestVirtualIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost, pp);
................................................................................
  4072   4099   #endif
  4073   4100           {
  4074   4101             bestBtreeIndex(pParse, pWC, pTabItem, mask, pOrderBy, &sCost);
  4075   4102           }
  4076   4103           assert( isOptimal || (sCost.used&notReady)==0 );
  4077   4104   
  4078   4105           if( (sCost.used&notReady)==0
  4079         -         && (j==iFrom || sCost.rCost<bestPlan.rCost
  4080         -             || (sCost.rCost==bestPlan.rCost && sCost.nRow<bestPlan.nRow))
         4106  +         && (bestJ<0 || sCost.rCost<bestPlan.rCost
         4107  +             || (sCost.rCost<=bestPlan.rCost && sCost.nRow<bestPlan.nRow))
  4081   4108           ){
         4109  +          WHERETRACE(("... best so far with cost=%g and nRow=%g\n",
         4110  +                      sCost.rCost, sCost.nRow));
  4082   4111             bestPlan = sCost;
  4083   4112             bestJ = j;
  4084   4113           }
  4085   4114           if( doNotReorder ) break;
  4086   4115         }
  4087   4116       }
  4088   4117       assert( bestJ>=0 );

Changes to test/select2.test.

   149    149       INSERT INTO bb VALUES(4);
   150    150       SELECT * FROM aa, bb WHERE max(a,b)>2;
   151    151     }
   152    152   } {1 4 3 2 3 4}
   153    153   do_test select2-4.2 {
   154    154     execsql {
   155    155       INSERT INTO bb VALUES(0);
   156         -    SELECT * FROM aa, bb WHERE b;
          156  +    SELECT * FROM aa CROSS JOIN bb WHERE b;
   157    157     }
   158    158   } {1 2 1 4 3 2 3 4}
   159    159   do_test select2-4.3 {
   160    160     execsql {
   161         -    SELECT * FROM aa, bb WHERE NOT b;
          161  +    SELECT * FROM aa CROSS JOIN bb WHERE NOT b;
   162    162     }
   163    163   } {1 0 3 0}
   164    164   do_test select2-4.4 {
   165    165     execsql {
   166    166       SELECT * FROM aa, bb WHERE min(a,b);
   167    167     }
   168    168   } {1 2 1 4 3 2 3 4}

Changes to test/triggerA.test.

    73     73           UNION SELECT y FROM t1 WHERE x BETWEEN 3 and 5;
    74     74        SELECT * FROM v4 ORDER BY 1;
    75     75     }
    76     76   } {1 10 2 3 4 5 6 7 8 9 five four three}
    77     77   do_test triggerA-1.6 {
    78     78     db eval {
    79     79        CREATE VIEW v5 AS SELECT x, b FROM t1, t2 WHERE y=c;
    80         -     SELECT * FROM v5;
           80  +     SELECT * FROM v5 ORDER BY x DESC;
    81     81     }
    82     82   } {10 1003 9 904 8 805 7 705 6 603 5 504 4 404 3 305 2 203 1 103}
    83     83   
    84     84   # Create INSTEAD OF triggers on the views.  Run UPDATE and DELETE statements
    85     85   # using those triggers.  Verify correct operation.
    86     86   #
    87     87   do_test triggerA-2.1 {

Changes to test/where3.test.

   195    195   } {tC {} tA * tB * tD *}
   196    196   do_test where3-2.5 {
   197    197     queryplan {
   198    198       SELECT * FROM tA, tB, tC LEFT JOIN tD ON dpk=cx
   199    199        WHERE cpk=ax AND bpk=cx
   200    200     }
   201    201   } {tA {} tC * tB * tD *}
   202         -do_test where3-2.5 {
          202  +do_test where3-2.6 {
   203    203     queryplan {
   204    204       SELECT * FROM tA, tB, tC LEFT JOIN tD ON dpk=cx
   205    205        WHERE bpk=cx AND apk=bx
   206    206     }
   207    207   } {tC {} tB * tA * tD *}
   208         -do_test where3-2.6 {
          208  +do_test where3-2.7 {
   209    209     queryplan {
   210    210       SELECT * FROM tA, tB, tC LEFT JOIN tD ON dpk=cx
   211    211        WHERE cpk=bx AND apk=cx
   212    212     }
   213    213   } {tB {} tC * tA * tD *}
   214    214   
   215    215   
   216    216   finish_test

Changes to test/where7.test.

    86     86   } {2 4 5 scan 0 sort 0}
    87     87   do_test where7-1.9 {
    88     88     count_steps {
    89     89       SELECT a FROM t1 WHERE (b=3 OR c>=10 OR c=4)
    90     90     }
    91     91   } {2 4 5 scan 0 sort 0}
    92     92   do_test where7-1.10 {
    93         -breakpoint
    94     93     count_steps {
    95     94       SELECT a FROM t1 WHERE (b=3 OR c>=10 OR c=4 OR b>10)
    96     95     }
    97     96   } {2 4 5 scan 0 sort 0}
    98         -breakpoint
    99     97   do_test where7-1.11 {
   100     98     count_steps {
   101     99       SELECT a FROM t1 WHERE (d=5 AND b=3) OR c==100 ORDER BY a;
   102    100     }
   103    101   } {2 5 scan 0 sort 1}
   104    102   do_test where7-1.12 {
   105    103     count_steps {
   106    104       SELECT a FROM t1 WHERE (b BETWEEN 2 AND 4) OR c=100 ORDER BY a
   107    105     }
   108    106   } {1 2 3 5 scan 0 sort 1}
   109         -do_test where7-1.13.1 {
   110         -  count_steps {
   111         -    SELECT a FROM t1 WHERE (b BETWEEN 0 AND 2) OR (c BETWEEN 9 AND 999)
   112         -    ORDER BY a DESC
   113         -  }
   114         -} {5 4 1 scan 4 sort 0}
   115         -do_test where7-1.13.2 {
          107  +do_test where7-1.13 {
   116    108     count_steps {
   117    109       SELECT a FROM t1 WHERE (b BETWEEN 0 AND 2) OR (c BETWEEN 9 AND 999)
   118    110       ORDER BY +a DESC
   119    111     }
   120    112   } {5 4 1 scan 0 sort 1}
   121    113   
   122    114   do_test where7-1.14 {

Changes to test/where8.test.

   282    282   } {IV V 9 0}
   283    283   
   284    284   do_test where8-3.15 {
   285    285     execsql_status {
   286    286       SELECT c FROM t1, t2 WHERE a BETWEEN 1 AND 2 OR a = (
   287    287         SELECT sum(e IS NULL) FROM t2 AS inner WHERE t2.d>inner.d
   288    288       )
          289  +    ORDER BY c
   289    290     }
   290         -} {I II I II I II I II I II I II III I II III I II III I II III I II III 9 0}
          291  +} {I I I I I I I I I I II II II II II II II II II II III III III III III 9 1}
   291    292   
   292    293   #-----------------------------------------------------------------------
   293    294   # The following tests - where8-4.* - verify that adding or removing 
   294    295   # indexes does not change the results returned by various queries.
   295    296   #
   296    297   do_test where8-4.1 {
   297    298     execsql {