SQLite

Check-in [d5ebb78778]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to query planning for joins: Avoid unnecessary calls to "optimal scan" checks in cases where table reordering is not possible. Make sure optimal scan checks are carried out for CROSS JOINs and LEFT JOINs.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d5ebb7877885839e93eee3b322624d4c4215c1c4
User & Date: drh 2013-01-16 00:46:09.175
Context
2013-01-16
20:33
Fix the activate_extensions pragma so that it is a no-op when the required argument is omitted. (check-in: 6195ebd833 user: drh tags: trunk)
17:08
Enhance the query planner to exploit transitivity of join constraints in a multi-way join. (check-in: 13171eb5dc user: drh tags: transitive-constraints)
00:46
Improvements to query planning for joins: Avoid unnecessary calls to "optimal scan" checks in cases where table reordering is not possible. Make sure optimal scan checks are carried out for CROSS JOINs and LEFT JOINs. (check-in: d5ebb78778 user: drh tags: trunk)
2013-01-15
18:49
Fix a missing word in a comment. Enhance the "wheretrace" debugging output to show the estimated cost of each table option while planning the join order. (check-in: ac4e119a87 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
5068
5069
5070
5071
5072
5073
5074

5075
5076
5077
5078
5079
5080
5081
  for(sWBI.i=iFrom=0, pLevel=pWInfo->a; sWBI.i<nTabList; sWBI.i++, pLevel++){
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */

    int nUnconstrained;         /* Number tables without INDEXED BY */
    Bitmask notIndexed;         /* Mask of tables that cannot use an index */

    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;
    WHERETRACE(("*** Begin search for loop %d ***\n", sWBI.i));








>







5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
  for(sWBI.i=iFrom=0, pLevel=pWInfo->a; sWBI.i<nTabList; sWBI.i++, pLevel++){
    WhereCost bestPlan;         /* Most efficient plan seen so far */
    Index *pIdx;                /* Index for FROM table at pTabItem */
    int j;                      /* For looping over FROM tables */
    int bestJ = -1;             /* The value of j */
    Bitmask m;                  /* Bitmask value for j or bestJ */
    int isOptimal;              /* Iterator for optimal/non-optimal search */
    int ckOptimal;              /* Do the optimal scan check */
    int nUnconstrained;         /* Number tables without INDEXED BY */
    Bitmask notIndexed;         /* Mask of tables that cannot use an index */

    memset(&bestPlan, 0, sizeof(bestPlan));
    bestPlan.rCost = SQLITE_BIG_DBL;
    WHERETRACE(("*** Begin search for loop %d ***\n", sWBI.i));

5118
5119
5120
5121
5122
5123
5124
5125






5126
5127
5128
5129
5130
5131
5132
5133
5134






















5135
5136
5137
5138
5139
5140
5141
    ** Since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */
    nUnconstrained = 0;
    notIndexed = 0;
    for(isOptimal=(iFrom<nTabList-1); isOptimal>=0 && bestJ<0; isOptimal--){






      for(j=iFrom, sWBI.pSrc=&pTabList->a[j]; j<nTabList; j++, sWBI.pSrc++){
        int doNotReorder;    /* True if this table should not be reordered */
  
        doNotReorder =  (sWBI.pSrc->jointype & (JT_LEFT|JT_CROSS))!=0;
        if( j!=iFrom && doNotReorder ) break;
        m = getMask(pMaskSet, sWBI.pSrc->iCursor);
        if( (m & sWBI.notValid)==0 ){
          if( j==iFrom ) iFrom++;
          continue;






















        }
        sWBI.notReady = (isOptimal ? m : sWBI.notValid);
        if( sWBI.pSrc->pIndex==0 ) nUnconstrained++;
  
        WHERETRACE(("   === trying table %d (%s) with isOptimal=%d ===\n",
                    j, sWBI.pSrc->pTab->zName, isOptimal));
        assert( sWBI.pSrc->pTab );







|
>
>
>
>
>
>

<
<
<
<




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133




5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
    ** Since the cost of a linear scan through table t2 is the same 
    ** as the cost of a linear scan through table t1, a simple greedy 
    ** algorithm may choose to use t2 for the outer loop, which is a much
    ** costlier approach.
    */
    nUnconstrained = 0;
    notIndexed = 0;

    /* The optimal scan check only occurs if there are two or more tables
    ** available to be reordered */
    if( iFrom==nTabList-1 ){
      ckOptimal = 0;  /* Common case of just one table in the FROM clause */
    }else{
      ckOptimal = -1;
      for(j=iFrom, sWBI.pSrc=&pTabList->a[j]; j<nTabList; j++, sWBI.pSrc++){




        m = getMask(pMaskSet, sWBI.pSrc->iCursor);
        if( (m & sWBI.notValid)==0 ){
          if( j==iFrom ) iFrom++;
          continue;
        }
        if( j>iFrom && (sWBI.pSrc->jointype & (JT_LEFT|JT_CROSS))!=0 ) break;
        if( ++ckOptimal ) break;
        if( (sWBI.pSrc->jointype & JT_LEFT)!=0 ) break;
      }
    }
    assert( ckOptimal==0 || ckOptimal==1 );

    for(isOptimal=ckOptimal; isOptimal>=0 && bestJ<0; isOptimal--){
      for(j=iFrom, sWBI.pSrc=&pTabList->a[j]; j<nTabList; j++, sWBI.pSrc++){
        if( j>iFrom && (sWBI.pSrc->jointype & (JT_LEFT|JT_CROSS))!=0 ){
          /* This break and one like it in the ckOptimal computation loop
          ** above prevent table reordering across LEFT and CROSS JOINs.
          ** The LEFT JOIN case is necessary for correctness.  The prohibition
          ** against reordering across a CROSS JOIN is an SQLite feature that
          ** allows the developer to control table reordering */
          break;
        }
        m = getMask(pMaskSet, sWBI.pSrc->iCursor);
        if( (m & sWBI.notValid)==0 ){
          assert( j>iFrom );
          continue;
        }
        sWBI.notReady = (isOptimal ? m : sWBI.notValid);
        if( sWBI.pSrc->pIndex==0 ) nUnconstrained++;
  
        WHERETRACE(("   === trying table %d (%s) with isOptimal=%d ===\n",
                    j, sWBI.pSrc->pTab->zName, isOptimal));
        assert( sWBI.pSrc->pTab );
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
                  || sWBI.cost.plan.u.pIdx==sWBI.pSrc->pIndex );

        if( isOptimal && (sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 ){
          notIndexed |= m;
        }
        if( isOptimal ){
          pWInfo->a[j].rOptCost = sWBI.cost.rCost;
        }else if( iFrom<nTabList-1 ){
          /* If two or more tables have nearly the same outer loop cost, but
          ** very different inner loop (optimal) cost, we want to choose
          ** for the outer loop that table which benefits the least from
          ** being in the inner loop.  The following code scales the 
          ** outer loop cost estimate to accomplish that. */
          WHERETRACE(("   scaling cost from %.1f to %.1f\n",
                      sWBI.cost.rCost,







|







5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
                  || sWBI.cost.plan.u.pIdx==sWBI.pSrc->pIndex );

        if( isOptimal && (sWBI.cost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0 ){
          notIndexed |= m;
        }
        if( isOptimal ){
          pWInfo->a[j].rOptCost = sWBI.cost.rCost;
        }else if( ckOptimal ){
          /* If two or more tables have nearly the same outer loop cost, but
          ** very different inner loop (optimal) cost, we want to choose
          ** for the outer loop that table which benefits the least from
          ** being in the inner loop.  The following code scales the 
          ** outer loop cost estimate to accomplish that. */
          WHERETRACE(("   scaling cost from %.1f to %.1f\n",
                      sWBI.cost.rCost,
5203
5204
5205
5206
5207
5208
5209
5210




5211
5212
5213
5214




5215
5216
5217
5218
5219
5220
5221
                      "       cost=%.1f, nRow=%.1f, nOBSat=%d, wsFlags=%08x\n",
                      j, sWBI.pSrc->pTab->zName,
                      sWBI.cost.rCost, sWBI.cost.plan.nRow,
                      sWBI.cost.plan.nOBSat, sWBI.cost.plan.wsFlags));
          bestPlan = sWBI.cost;
          bestJ = j;
        }
        if( doNotReorder ) break;




      }
    }
    assert( bestJ>=0 );
    assert( sWBI.notValid & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );




    WHERETRACE(("*** Optimizer selects table %d (%s) for loop %d with:\n"
                "    cost=%.1f, nRow=%.1f, nOBSat=%d, wsFlags=0x%08x\n",
                bestJ, pTabList->a[bestJ].pTab->zName,
                pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow,
                bestPlan.plan.nOBSat, bestPlan.plan.wsFlags));
    if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
      assert( pWInfo->eDistinct==0 );







|
>
>
>
>




>
>
>
>







5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
                      "       cost=%.1f, nRow=%.1f, nOBSat=%d, wsFlags=%08x\n",
                      j, sWBI.pSrc->pTab->zName,
                      sWBI.cost.rCost, sWBI.cost.plan.nRow,
                      sWBI.cost.plan.nOBSat, sWBI.cost.plan.wsFlags));
          bestPlan = sWBI.cost;
          bestJ = j;
        }

        /* In a join like "w JOIN x LEFT JOIN y JOIN z"  make sure that
        ** table y (and not table z) is always the next inner loop inside
        ** of table x. */
        if( (sWBI.pSrc->jointype & JT_LEFT)!=0 ) break;
      }
    }
    assert( bestJ>=0 );
    assert( sWBI.notValid & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );
    assert( bestJ==iFrom || (pTabList->a[iFrom].jointype & JT_LEFT)==0 );
    testcase( bestJ>iFrom && (pTabList->a[iFrom].jointype & JT_CROSS)!=0 );
    testcase( bestJ>iFrom && bestJ<nTabList-1
                          && (pTabList->a[bestJ+1].jointype & JT_LEFT)!=0 );
    WHERETRACE(("*** Optimizer selects table %d (%s) for loop %d with:\n"
                "    cost=%.1f, nRow=%.1f, nOBSat=%d, wsFlags=0x%08x\n",
                bestJ, pTabList->a[bestJ].pTab->zName,
                pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow,
                bestPlan.plan.nOBSat, bestPlan.plan.wsFlags));
    if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
      assert( pWInfo->eDistinct==0 );