/ Check-in [6bfc5c69]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Use histogram data to improve the row-count estimates on equality constraints.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat2-enhancement
Files: files | file ages | folders
SHA1: 6bfc5c69eb22938972bbf4e60179952dc215f770
User & Date: drh 2011-01-20 16:52:09
Context
2011-01-20
20:36
Update ANALYZE test cases to check out the use of histograms for equality constraints. check-in: c7b59afa user: drh tags: stat2-enhancement
16:52
Use histogram data to improve the row-count estimates on equality constraints. check-in: 6bfc5c69 user: drh tags: stat2-enhancement
02:56
The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values. check-in: 2cd374cd user: drh tags: stat2-enhancement
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  2457   2457       *piEst = 11;
  2458   2458     }else{
  2459   2459       *piEst = 33;
  2460   2460     }
  2461   2461     return rc;
  2462   2462   }
  2463   2463   
         2464  +#ifdef SQLITE_ENABLE_STAT2
         2465  +/*
         2466  +** Estimate the number of rows that will be returned based on
         2467  +** an equality constraint x=VALUE and where that VALUE occurs in
         2468  +** the histogram data.  This only works when x is the left-most
         2469  +** column of an index and sqlite_stat2 histogram data is available
         2470  +** for that index.
         2471  +**
         2472  +** Write the estimated row count into *pnRow.  If unable to make
         2473  +** an estimate, leave *pnRow unchanged.
         2474  +*/
         2475  +void whereEqScanEst(
         2476  +  Parse *pParse,       /* Parsing & code generating context */
         2477  +  Index *p,            /* The index whose left-most column is pTerm */
         2478  +  WhereTerm *pTerm,    /* The x=VALUE constraint */
         2479  +  double *pnRow        /* Write the revised row estimate here */
         2480  +){
         2481  +  sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
         2482  +  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
         2483  +  u8 aff;                   /* Column affinity */
         2484  +  int rc;                   /* Subfunction return code */
         2485  +  double nRowEst;           /* New estimate of the number of rows */
         2486  +
         2487  +  assert( p->aSample!=0 );
         2488  +  assert( pTerm->eOperator==WO_EQ );
         2489  +  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
         2490  +  rc = valueFromExpr(pParse, pTerm->pExpr->pRight, aff, &pRhs);
         2491  +  if( rc ) goto whereEqScanEst_cancel;
         2492  +  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
         2493  +  if( rc ) goto whereEqScanEst_cancel;
         2494  +  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
         2495  +  if( rc ) goto whereEqScanEst_cancel;
         2496  +  if( iLower>=iUpper ){
         2497  +    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
         2498  +    if( nRowEst<*pnRow ) *pnRow = nRowEst;
         2499  +  }else{
         2500  +    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
         2501  +    *pnRow = nRowEst;
         2502  +  }
         2503  +
         2504  +whereEqScanEst_cancel:
         2505  +  sqlite3ValueFree(pRhs);
         2506  +}
         2507  +#endif /* defined(SQLITE_ENABLE_STAT2) */
         2508  +
  2464   2509   
  2465   2510   /*
  2466   2511   ** Find the query plan for accessing a particular table.  Write the
  2467   2512   ** best query plan and its cost into the WhereCost object supplied as the
  2468   2513   ** last parameter.
  2469   2514   **
  2470   2515   ** The lowest cost plan wins.  The cost is an estimate of the amount of
................................................................................
  2620   2665       **             SELECT a, b    FROM tbl WHERE a = 1;
  2621   2666       **             SELECT a, b, c FROM tbl WHERE a = 1;
  2622   2667       */
  2623   2668       int nEq;
  2624   2669       int bInEst = 0;
  2625   2670       int nInMul = 1;
  2626   2671       int estBound = 100;
  2627         -    int nBound = 0;             /* Number of range constraints seen */
         2672  +    int nBound = 0;               /* Number of range constraints seen */
  2628   2673       int bSort = 0;
  2629   2674       int bLookup = 0;
  2630         -    WhereTerm *pTerm;           /* A single term of the WHERE clause */
         2675  +    WhereTerm *pTerm;             /* A single term of the WHERE clause */
         2676  +#ifdef SQLITE_ENABLE_STAT2
         2677  +    WhereTerm *pFirstEqTerm = 0;  /* First WO_EQ term */
         2678  +#endif
  2631   2679   
  2632   2680       /* Determine the values of nEq and nInMul */
  2633   2681       for(nEq=0; nEq<pProbe->nColumn; nEq++){
  2634   2682         int j = pProbe->aiColumn[nEq];
  2635   2683         pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
  2636   2684         if( pTerm==0 ) break;
  2637   2685         wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
................................................................................
  2643   2691             bInEst = 1;
  2644   2692           }else if( ALWAYS(pExpr->x.pList) ){
  2645   2693             nInMul *= pExpr->x.pList->nExpr + 1;
  2646   2694           }
  2647   2695         }else if( pTerm->eOperator & WO_ISNULL ){
  2648   2696           wsFlags |= WHERE_COLUMN_NULL;
  2649   2697         }
         2698  +#ifdef SQLITE_ENABLE_STAT2
         2699  +      else if( nEq==0 && pProbe->aSample ){
         2700  +        pFirstEqTerm = pTerm;
         2701  +      }
         2702  +#endif
  2650   2703         used |= pTerm->prereqRight;
  2651   2704       }
  2652   2705   
  2653   2706       /* Determine the value of estBound. */
  2654   2707       if( nEq<pProbe->nColumn ){
  2655   2708         int j = pProbe->aiColumn[nEq];
  2656   2709         if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
................................................................................
  2719   2772       */
  2720   2773       nRow = (double)(aiRowEst[nEq] * nInMul);
  2721   2774       if( bInEst && nRow*2>aiRowEst[0] ){
  2722   2775         nRow = aiRowEst[0]/2;
  2723   2776         nInMul = (int)(nRow / aiRowEst[nEq]);
  2724   2777       }
  2725   2778   
         2779  +#ifdef SQLITE_ENABLE_STAT2
         2780  +    /* If the constraint is of the form x=VALUE and histogram
         2781  +    ** data is available for column x, then it might be possible
         2782  +    ** to get a better estimate on the number of rows based on
         2783  +    ** VALUE and how common that value is according to the histogram.
         2784  +    */
         2785  +    if( nRow>(double)1 && nEq==1 && pFirstEqTerm!=0 ){
         2786  +      whereEqScanEst(pParse, pProbe, pFirstEqTerm, &nRow);
         2787  +    }
         2788  +#endif /* SQLITE_ENABLE_STAT2 */
         2789  +
  2726   2790       /* Assume constant cost to access a row and logarithmic cost to
  2727   2791       ** do a binary search.  Hence, the initial cost is the number of output
  2728   2792       ** rows plus log2(table-size) times the number of binary searches.
  2729   2793       */
  2730   2794       cost = nRow + nInMul*estLog(aiRowEst[0]);
  2731   2795   
  2732   2796       /* Adjust the number of rows and the cost downward to reflect rows

Changes to test/analyze5.test.

   210    210    16  {y>=0 AND y<=''}                 50
   211    211    17  {y>=0 AND y<='alpha'}           400
   212    212    18  {y>=0 AND y<'alpha'}             50
   213    213    19  {y>=0 AND y<='bravo'}           700
   214    214    20  {y>=0 AND y<'charlie'}          700
   215    215    21  {y>=0 AND y<='charlie'}         900
   216    216    22  {y>=0 AND y<'delta'}            900
          217  + 23  {y>'alpha' AND y<x'00'}         600
          218  + 24  {y>='bravo' AND y<x'00'}        600
          219  + 25  {y>'bravo' AND y<x'00'}         300
          220  + 26  {y>='charlie' AND y<x'00'}      300
          221  + 27  {y>'charlie' AND y<x'00'}       100
          222  + 28  {y>='delta' AND y<x'00'}        100
          223  + 29  {y>'delta' AND y<x'00'}          50
          224  + 30  {y>='echo' AND y<x'00'}          50
   217    225   } {
   218    226     do_test analyze5-3.$testid {
   219    227       eqp "SELECT * FROM t1 WHERE $where"
   220    228     } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \
   221    229          $rows]
   222    230   }
   223    231   
   224    232   
   225    233   finish_test