SQLite

Check-in [c8ba8855d8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Adjust the output row estimating logic when using STAT2.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | query-planner-tweaks
Files: files | file ages | folders
SHA1: c8ba8855d8e93cd1e7dbbc9398a5f850fed5a382
User & Date: drh 2011-08-03 01:07:25.236
Context
2011-08-05
01:09
Allow the sqlite_stat2 table to contain a number of samples other than the default of 10. (check-in: b9d41c3490 user: drh tags: query-planner-tweaks)
2011-08-03
01:07
Adjust the output row estimating logic when using STAT2. (check-in: c8ba8855d8 user: drh tags: query-planner-tweaks)
2011-08-02
20:14
Exclude the 8_3_names.test script from the inmemory_journal permutation. (check-in: 78fc94c8d1 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/sqliteInt.h.
77
78
79
80
81
82
83

84

85
86
87
88
89
90
91
#endif

/*
** The number of samples of an index that SQLite takes in order to 
** construct a histogram of the table content when running ANALYZE
** and with SQLITE_ENABLE_STAT2
*/

#define SQLITE_INDEX_SAMPLES 10


/*
** The following macros are used to cast pointers to integers and
** integers to pointers.  The way you do this varies from one compiler
** to the next, so we have developed the following set of #if statements
** to generate appropriate macros for a wide range of compilers.
**







>
|
>







77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#endif

/*
** The number of samples of an index that SQLite takes in order to 
** construct a histogram of the table content when running ANALYZE
** and with SQLITE_ENABLE_STAT2
*/
#ifndef SQLITE_INDEX_SAMPLES
# define SQLITE_INDEX_SAMPLES 10
#endif

/*
** The following macros are used to cast pointers to integers and
** integers to pointers.  The way you do this varies from one compiler
** to the next, so we have developed the following set of #if statements
** to generate appropriate macros for a wide range of compilers.
**
Changes to src/where.c.
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:







|







2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
  if( pRhs==0 ) return SQLITE_NOTFOUND;
  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
  if( rc ) goto whereEqualScanEst_cancel;
  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  if( rc ) goto whereEqualScanEst_cancel;
  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  if( iLower>=iUpper ){
    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*3);
    if( nRowEst<*pnRow ) *pnRow = nRowEst;
  }else{
    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
    *pnRow = nRowEst;
  }

whereEqualScanEst_cancel:
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
      if( aSpan[i] ){
        nSpan++;
      }else if( aSingle[i] ){
        nSingle++;
      }
    }
    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
               + nNotFound*p->aiRowEst[1];
    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
    *pnRow = nRowEst;
    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
                 nSpan, nSingle, nNotFound, nRowEst));
  }
  sqlite3ValueFree(pVal);







|







2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
      if( aSpan[i] ){
        nSpan++;
      }else if( aSingle[i] ){
        nSingle++;
      }
    }
    nRowEst = (nSpan*3+nSingle)*p->aiRowEst[0]/(3*SQLITE_INDEX_SAMPLES)
               + nNotFound*p->aiRowEst[1];
    if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
    *pnRow = nRowEst;
    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
                 nSpan, nSingle, nNotFound, nRowEst));
  }
  sqlite3ValueFree(pVal);
Changes to test/analyze5.test.
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   50
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   50
  107  {z=-10.0}             t1z   50
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   50
  113  {z=1.5}               t1z   50
  114  {z=2.5}               t1z   50
  
  201  {z IN (-1)}           t1z   50
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   50
  207  {z IN (0.5)}          t1z   50
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]







|




|
|




|
|
|

|




|
|







|
|


|
|







124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   33
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   33
  107  {z=-10.0}             t1z   33
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   33
  113  {z=1.5}               t1z   33
  114  {z=2.5}               t1z   33
  
  201  {z IN (-1)}           t1z   33
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   33
  207  {z IN (0.5)}          t1z   33
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  133
  216  {z=-1 OR z=3}         t1z  133

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   33
  302  {y=0.1}               t1y   33

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {







|

|







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   3
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  33
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
Changes to test/analyze7.test.
95
96
97
98
99
100
101
102


103
104
105
106
107
108
109
110
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.4 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}


} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~2 rows)}}
do_test analyze7-3.5 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}








|
>
>
|







95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.4 {
  set x [execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}]
  regsub -all {[bcd]+} $x {x} x
  set x
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1x (x=?) (~2 rows)}}
do_test analyze7-3.5 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}