/ Check-in [6bfc5c69]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use histogram data to improve the row-count estimates on equality constraints.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat2-enhancement
Files: files | file ages | folders
SHA1: 6bfc5c69eb22938972bbf4e60179952dc215f770
User & Date: drh 2011-01-20 16:52:09
Context
2011-01-20
20:36
Update ANALYZE test cases to check out the use of histograms for equality constraints. check-in: c7b59afa user: drh tags: stat2-enhancement
16:52
Use histogram data to improve the row-count estimates on equality constraints. check-in: 6bfc5c69 user: drh tags: stat2-enhancement
02:56
The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values. check-in: 2cd374cd user: drh tags: stat2-enhancement
Changes
Hide Diffs Unified Diffs Show Whitespace Changes Patch

Changes to src/where.c.

2457
2458
2459
2460
2461
2462
2463













































2464
2465
2466
2467
2468
2469
2470
....
2624
2625
2626
2627
2628
2629
2630



2631
2632
2633
2634
2635
2636
2637
....
2643
2644
2645
2646
2647
2648
2649





2650
2651
2652
2653
2654
2655
2656
....
2719
2720
2721
2722
2723
2724
2725











2726
2727
2728
2729
2730
2731
2732
    *piEst = 11;
  }else{
    *piEst = 33;
  }
  return rc;
}















































/*
** Find the query plan for accessing a particular table.  Write the
** best query plan and its cost into the WhereCost object supplied as the
** last parameter.
**
** The lowest cost plan wins.  The cost is an estimate of the amount of
................................................................................
    int bInEst = 0;
    int nInMul = 1;
    int estBound = 100;
    int nBound = 0;             /* Number of range constraints seen */
    int bSort = 0;
    int bLookup = 0;
    WhereTerm *pTerm;           /* A single term of the WHERE clause */




    /* Determine the values of nEq and nInMul */
    for(nEq=0; nEq<pProbe->nColumn; nEq++){
      int j = pProbe->aiColumn[nEq];
      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
      if( pTerm==0 ) break;
      wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
................................................................................
          bInEst = 1;
        }else if( ALWAYS(pExpr->x.pList) ){
          nInMul *= pExpr->x.pList->nExpr + 1;
        }
      }else if( pTerm->eOperator & WO_ISNULL ){
        wsFlags |= WHERE_COLUMN_NULL;
      }





      used |= pTerm->prereqRight;
    }

    /* Determine the value of estBound. */
    if( nEq<pProbe->nColumn ){
      int j = pProbe->aiColumn[nEq];
      if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
................................................................................
    */
    nRow = (double)(aiRowEst[nEq] * nInMul);
    if( bInEst && nRow*2>aiRowEst[0] ){
      nRow = aiRowEst[0]/2;
      nInMul = (int)(nRow / aiRowEst[nEq]);
    }












    /* Assume constant cost to access a row and logarithmic cost to
    ** do a binary search.  Hence, the initial cost is the number of output
    ** rows plus log2(table-size) times the number of binary searches.
    */
    cost = nRow + nInMul*estLog(aiRowEst[0]);

    /* Adjust the number of rows and the cost downward to reflect rows







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>







 







>
>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>







2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
....
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
....
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
....
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
    *piEst = 11;
  }else{
    *piEst = 33;
  }
  return rc;
}

#ifdef SQLITE_ENABLE_STAT2
/*
** Estimate the number of rows that will be returned based on
** an equality constraint x=VALUE and where that VALUE occurs in
** the histogram data.  This only works when x is the left-most
** column of an index and sqlite_stat2 histogram data is available
** for that index.
**
** Write the estimated row count into *pnRow.  If unable to make
** an estimate, leave *pnRow unchanged.
*/
void whereEqScanEst(
  Parse *pParse,       /* Parsing & code generating context */
  Index *p,            /* The index whose left-most column is pTerm */
  WhereTerm *pTerm,    /* The x=VALUE constraint */
  double *pnRow        /* Write the revised row estimate here */
){
  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  u8 aff;                   /* Column affinity */
  int rc;                   /* Subfunction return code */
  double nRowEst;           /* New estimate of the number of rows */

  assert( p->aSample!=0 );
  assert( pTerm->eOperator==WO_EQ );
  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs);
  if( rc ) goto whereEqScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqScanEst_cancel;
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
    *pnRow = nRowEst;
  }

whereEqScanEst_cancel:
  sqlite3ValueFree(pRhs);
}
#endif /* defined(SQLITE_ENABLE_STAT2) */


/*
** Find the query plan for accessing a particular table.  Write the
** best query plan and its cost into the WhereCost object supplied as the
** last parameter.
**
** The lowest cost plan wins.  The cost is an estimate of the amount of
................................................................................
    int bInEst = 0;
    int nInMul = 1;
    int estBound = 100;
    int nBound = 0;               /* Number of range constraints seen */
    int bSort = 0;
    int bLookup = 0;
    WhereTerm *pTerm;             /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT2
    WhereTerm *pFirstEqTerm = 0;  /* First WO_EQ term */
#endif

    /* Determine the values of nEq and nInMul */
    for(nEq=0; nEq<pProbe->nColumn; nEq++){
      int j = pProbe->aiColumn[nEq];
      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
      if( pTerm==0 ) break;
      wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
................................................................................
          bInEst = 1;
        }else if( ALWAYS(pExpr->x.pList) ){
          nInMul *= pExpr->x.pList->nExpr + 1;
        }
      }else if( pTerm->eOperator & WO_ISNULL ){
        wsFlags |= WHERE_COLUMN_NULL;
      }
#ifdef SQLITE_ENABLE_STAT2
      else if( nEq==0 && pProbe->aSample ){
        pFirstEqTerm = pTerm;
      }
#endif
      used |= pTerm->prereqRight;
    }

    /* Determine the value of estBound. */
    if( nEq<pProbe->nColumn ){
      int j = pProbe->aiColumn[nEq];
      if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
................................................................................
    */
    nRow = (double)(aiRowEst[nEq] * nInMul);
    if( bInEst && nRow*2>aiRowEst[0] ){
      nRow = aiRowEst[0]/2;
      nInMul = (int)(nRow / aiRowEst[nEq]);
    }

#ifdef SQLITE_ENABLE_STAT2
    /* If the constraint is of the form x=VALUE and histogram
    ** data is available for column x, then it might be possible
    ** to get a better estimate on the number of rows based on
    ** VALUE and how common that value is according to the histogram.
    */
    if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){
      whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow);
    }
#endif /* SQLITE_ENABLE_STAT2 */

    /* Assume constant cost to access a row and logarithmic cost to
    ** do a binary search.  Hence, the initial cost is the number of output
    ** rows plus log2(table-size) times the number of binary searches.
    */
    cost = nRow + nInMul*estLog(aiRowEst[0]);

    /* Adjust the number of rows and the cost downward to reflect rows

Changes to test/analyze5.test.

210
211
212
213
214
215
216








217
218
219
220
221
222
223
224
225
 16  {y>=0 AND y<=''}                 50
 17  {y>=0 AND y<='alpha'}           400
 18  {y>=0 AND y<'alpha'}             50
 19  {y>=0 AND y<='bravo'}           700
 20  {y>=0 AND y<'charlie'}          700
 21  {y>=0 AND y<='charlie'}         900
 22  {y>=0 AND y<'delta'}            900








} {
  do_test analyze5-3.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \
       $rows]
}


finish_test







>
>
>
>
>
>
>
>









210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
 16  {y>=0 AND y<=''}                 50
 17  {y>=0 AND y<='alpha'}           400
 18  {y>=0 AND y<'alpha'}             50
 19  {y>=0 AND y<='bravo'}           700
 20  {y>=0 AND y<'charlie'}          700
 21  {y>=0 AND y<='charlie'}         900
 22  {y>=0 AND y<'delta'}            900
 23  {y>'alpha' AND y<x'00'}         600
 24  {y>='bravo' AND y<x'00'}        600
 25  {y>'bravo' AND y<x'00'}         300
 26  {y>='charlie' AND y<x'00'}      300
 27  {y>'charlie' AND y<x'00'}       100
 28  {y>='delta' AND y<x'00'}        100
 29  {y>'delta' AND y<x'00'}          50
 30  {y>='echo' AND y<x'00'}          50
} {
  do_test analyze5-3.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \
       $rows]
}


finish_test