/ Check-in [c82cb9c0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat2-enhancement
Files: files | file ages | folders
SHA1: c82cb9c028b3ba5463ae50c30196dbf157a7a305
User & Date: drh 2011-01-21 18:18:13
Context
2011-01-22
00:10
Add the ability to use indices for constraints of the form "x IS NOT NULL" when sqlite_stat2 is available and most entries for column x are NULL. check-in: 5d5bddd2 user: drh tags: stat2-enhancement
2011-01-21
18:18
Adjustments to the result row estimator for the IN operator so that it gives the same estimates as the equivalent OR operator. Test cases for the same. check-in: c82cb9c0 user: drh tags: stat2-enhancement
16:27
Make use of histogram data to make better estimates for the number of rows that will be returned from "x IN (v1,v2,v3,...)" constraints. check-in: fd3977a2 user: drh tags: stat2-enhancement
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

   113    113   #define TERM_DYNAMIC    0x01   /* Need to call sqlite3ExprDelete(db, pExpr) */
   114    114   #define TERM_VIRTUAL    0x02   /* Added by the optimizer.  Do not code */
   115    115   #define TERM_CODED      0x04   /* This term is already coded */
   116    116   #define TERM_COPIED     0x08   /* Has a child */
   117    117   #define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
   118    118   #define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
   119    119   #define TERM_OR_OK      0x40   /* Used during OR-clause processing */
          120  +#define TERM_NOHELP     0x80   /* This term does not reduce the search space */
   120    121   
   121    122   /*
   122    123   ** An instance of the following structure holds all information about a
   123    124   ** WHERE clause.  Mostly this is a container for one or more WhereTerms.
   124    125   */
   125    126   struct WhereClause {
   126    127     Parse *pParse;           /* The parser context */
................................................................................
  1056   1057           exprAnalyze(pSrc, pWC, idxNew);
  1057   1058           pTerm = &pWC->a[idxTerm];
  1058   1059           pWC->a[idxNew].iParent = idxTerm;
  1059   1060           pTerm->nChild = 1;
  1060   1061         }else{
  1061   1062           sqlite3ExprListDelete(db, pList);
  1062   1063         }
         1064  +      pTerm->wtFlags |= TERM_NOHELP;
  1063   1065         pTerm->eOperator = 0;  /* case 1 trumps case 2 */
  1064   1066       }
  1065   1067     }
  1066   1068   }
  1067   1069   #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */
  1068   1070   
  1069   1071   
................................................................................
  2519   2521     return rc;
  2520   2522   }
  2521   2523   #endif /* defined(SQLITE_ENABLE_STAT2) */
  2522   2524   
  2523   2525   #ifdef SQLITE_ENABLE_STAT2
  2524   2526   /*
  2525   2527   ** Estimate the number of rows that will be returned based on
  2526         -** an IN constraint "x IN (V1,V2,V3,...)" where the right-hand side
  2527         -** of the IN operator is a list of values.
         2528  +** an IN constraint where the right-hand side of the IN operator
         2529  +** is a list of values.  Example:
         2530  +**
         2531  +**        WHERE x IN (1,2,3,4)
  2528   2532   **
  2529   2533   ** Write the estimated row count into *pnRow and return SQLITE_OK. 
  2530   2534   ** If unable to make an estimate, leave *pnRow unchanged and return
  2531   2535   ** non-zero.
  2532   2536   **
  2533   2537   ** This routine can fail if it is unable to load a collating sequence
  2534   2538   ** required for string comparison, or if unable to allocate memory
................................................................................
  2540   2544     Index *p,            /* The index whose left-most column is pTerm */
  2541   2545     ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
  2542   2546     double *pnRow        /* Write the revised row estimate here */
  2543   2547   ){
  2544   2548     sqlite3_value *pVal = 0;  /* One value from list */
  2545   2549     int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  2546   2550     u8 aff;                   /* Column affinity */
  2547         -  int rc;                   /* Subfunction return code */
         2551  +  int rc = SQLITE_OK;       /* Subfunction return code */
  2548   2552     double nRowEst;           /* New estimate of the number of rows */
  2549         -  int nRegion = 0;          /* Number of histogram regions spanned */
  2550         -  int nSingle = 0;          /* Count of values contained within one region */
         2553  +  int nSpan = 0;            /* Number of histogram regions spanned */
         2554  +  int nSingle = 0;          /* Histogram regions hit by a single value */
  2551   2555     int nNotFound = 0;        /* Count of values that are not constants */
  2552         -  int i;                            /* Loop counter */
  2553         -  u8 aHit[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions that are spanned */
         2556  +  int i;                               /* Loop counter */
         2557  +  u8 aSpan[SQLITE_INDEX_SAMPLES+1];    /* Histogram regions that are spanned */
         2558  +  u8 aSingle[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions hit once */
  2554   2559   
  2555   2560     assert( p->aSample!=0 );
  2556   2561     aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  2557         -  memset(aHit, 0, sizeof(aHit));
         2562  +  memset(aSpan, 0, sizeof(aSpan));
         2563  +  memset(aSingle, 0, sizeof(aSingle));
  2558   2564     for(i=0; i<pList->nExpr; i++){
  2559   2565       sqlite3ValueFree(pVal);
  2560   2566       rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
  2561   2567       if( rc ) break;
  2562         -    if( pVal==0 ){
         2568  +    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
  2563   2569         nNotFound++;
  2564   2570         continue;
  2565   2571       }
  2566   2572       rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
  2567   2573       if( rc ) break;
  2568   2574       rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
  2569   2575       if( rc ) break;
  2570   2576       if( iLower>=iUpper ){
  2571         -      nSingle++;
         2577  +      aSingle[iLower] = 1;
         2578  +    }else{
         2579  +      assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
         2580  +      while( iLower<iUpper ) aSpan[iLower++] = 1;
  2572   2581       }
  2573         -    assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
  2574         -    while( iLower<=iUpper ) aHit[iLower++] = 1;
  2575   2582     }
  2576   2583     if( rc==SQLITE_OK ){
  2577         -    for(i=nRegion=0; i<ArraySize(aHit); i++) nRegion += aHit[i];
  2578         -    nRowEst = nRegion*p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES+1)
         2584  +    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
         2585  +      if( aSpan[i] ){
         2586  +        nSpan++;
         2587  +      }else if( aSingle[i] ){
         2588  +        nSingle++;
         2589  +      }
         2590  +    }
         2591  +    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
  2579   2592                  + nNotFound*p->aiRowEst[1];
  2580   2593       if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
  2581   2594       *pnRow = nRowEst;
  2582         -    WHERETRACE(("IN row estimate: nRegion=%d, nSingle=%d, nNotFound=%d\n",
  2583         -                 nRegion, nSingle, nNotFound));
         2595  +    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
         2596  +                 nSpan, nSingle, nNotFound, nRowEst));
  2584   2597     }
  2585   2598     sqlite3ValueFree(pVal);
  2586   2599     return rc;
  2587   2600   }
  2588   2601   #endif /* defined(SQLITE_ENABLE_STAT2) */
  2589   2602   
  2590   2603   
................................................................................
  2919   2932         int k;                       /* Loop counter */
  2920   2933         int nSkipEq = nEq;           /* Number of == constraints to skip */
  2921   2934         int nSkipRange = nBound;     /* Number of < constraints to skip */
  2922   2935         Bitmask thisTab;             /* Bitmap for pSrc */
  2923   2936   
  2924   2937         thisTab = getMask(pWC->pMaskSet, iCur);
  2925   2938         for(pTerm=pWC->a, k=pWC->nTerm; nRow>2 && k; k--, pTerm++){
  2926         -        if( pTerm->wtFlags & TERM_VIRTUAL ) continue;
         2939  +        if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_NOHELP) ) continue;
  2927   2940           if( (pTerm->prereqAll & notValid)!=thisTab ) continue;
  2928   2941           if( pTerm->eOperator & (WO_EQ|WO_IN|WO_ISNULL) ){
  2929   2942             if( nSkipEq ){
  2930   2943               /* Ignore the first nEq equality matches since the index
  2931   2944               ** has already accounted for these */
  2932   2945               nSkipEq--;
  2933   2946             }else{
  2934   2947               /* Assume each additional equality match reduces the result
  2935   2948               ** set size by a factor of 10 */
  2936   2949               nRow /= 10;
  2937   2950             }
  2938   2951           }else if( pTerm->eOperator & (WO_LT|WO_LE|WO_GT|WO_GE) ){
  2939   2952             if( nSkipRange ){
  2940         -            /* Ignore the first nBound range constraints since the index
         2953  +            /* Ignore the first nSkipRange range constraints since the index
  2941   2954               ** has already accounted for these */
  2942   2955               nSkipRange--;
  2943   2956             }else{
  2944   2957               /* Assume each additional range constraint reduces the result
  2945   2958               ** set size by a factor of 3 */
  2946   2959               nRow /= 3;
  2947   2960             }

Changes to test/analyze5.test.

   114    114   } {
   115    115     do_test analyze5-1.$testid {
   116    116       eqp "SELECT * FROM t1 WHERE $where"
   117    117     } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \
   118    118          $rows]
   119    119   }
   120    120   foreach {testid where rows} {
   121         -  101  {z=-1}           50
          121  +  101  {z=-1}            50
   122    122     102  {z=0}            400
   123    123     103  {z=1}            300
   124    124     104  {z=2}            200
   125    125     105  {z=3}            100
   126    126     106  {z=4}             50
   127    127     107  {z=-10.0}         50
   128    128     108  {z=0.0}          400
................................................................................
   133    133     113  {z=1.5}           50
   134    134     114  {z=2.5}           50
   135    135   } {
   136    136     do_test analyze5-1.$testid {
   137    137       eqp "SELECT * FROM t1 WHERE $where"
   138    138     } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows]
   139    139   }
          140  +
          141  +# for the next sequence of tests a value of rows<=0 means a full-table scan
          142  +# is used.
          143  +#
          144  +#set sqlite_where_trace 1
          145  +foreach {testid where rows} {
          146  +  201  {z IN (-1)}            50
          147  +  202  {z IN (0)}            400
          148  +  203  {z IN (1)}            300
          149  +  204  {z IN (2)}            200
          150  +  205  {z IN (3)}            100
          151  +  206  {z IN (4)}             50
          152  +  207  {z IN (0.5)}           50
          153  +  208  {z IN (0,1)}          700
          154  +  209  {z IN (0,1,2)}        900
          155  +  210  {z IN (0,1,2,3)}        0
          156  +  211  {z IN (0,1,2,3,4,5)}    0
          157  +  212  {z IN (1,2)}          500
          158  +  213  {z IN (2,3)}          300
          159  +  214  {z=3 OR z=2}          300
          160  +  215  {z IN (-1,3)}         150
          161  +  216  {z=-1 OR z=3}         150
          162  +} {
          163  +  if {$rows<=0} {
          164  +    set ans {SCAN TABLE t1 (~100 rows)}
          165  +  } else {
          166  +    set ans [format {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)} $rows]
          167  +  }
          168  +  do_test analyze5-1.$testid {
          169  +    lindex [eqp "SELECT * FROM t1 WHERE $where"] 3
          170  +  } $ans
          171  +}
   140    172   
   141    173   # For the t1.y column, most entries are known to be zero.  So do a 
   142    174   # full table scan for y=0 but use the index for any other constraint on
   143    175   # y.
   144    176   #
   145    177   do_test analyze5-201 {
   146    178     eqp {SELECT * FROM t1 WHERE y=0}