/ Check-in [9eff4702]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Backport test cases from trunk that seek to prove that the IS NOT NULL query optimization is working.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | branch-3.7.2
Files: files | file ages | folders
SHA1: 9eff470226c6f879bb1b24fb00b6786a816d8cfd
User & Date: drh 2011-04-08 23:36:09
Context
2011-04-09
03:30
Back port the unordered-index-hack to the 3.7.2 branch. check-in: 80353020 user: drh tags: branch-3.7.2
2011-04-08
23:36
Backport test cases from trunk that seek to prove that the IS NOT NULL query optimization is working. check-in: 9eff4702 user: drh tags: branch-3.7.2
23:05
Make sure the query planner is able to correctly analyze NULL value samples in the sqlite_stat2 table. This is a backport of changes from check-in [f73a167b434f] check-in: 1d637889 user: drh tags: branch-3.7.2
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Added test/analyze5.test.



































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# 2011 January 19
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat2 histogram data on tables
# with many repeated values and only a few distinct values.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat2 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}

unset -nocomplain i t u v w x y z
do_test analyze5-1.0 {
  db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
  for {set i 0} {$i < 1000} {incr i} {
    set y [expr {$i>=25 && $i<=50}]
    set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
    set x $z
    set w $z
    set t [expr {$z+0.5}]
    switch $z {
      0 {set u "alpha"; unset x}
      1 {set u "bravo"}
      2 {set u "charlie"}
      3 {set u "delta"; unset w}
    }
    if {$i%2} {set v $u} {set v [string toupper $u]}
    db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
  }
  db eval { 
    CREATE INDEX t1t ON t1(t);  -- 0.5, 1.5, 2.5, and 3.5
    CREATE INDEX t1u ON t1(u);  -- text
    CREATE INDEX t1v ON t1(v);  -- mixed case text
    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
    ANALYZE;
    SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno;
  }
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
do_test analyze5-1.1 {
  string tolower \
   [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}]
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
do_test analyze5-1.2 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno}
} {{} 0 0 0 0 1 1 1 2 2}
do_test analyze5-1.3 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno}
} {{} {} {} {} 1 1 1 2 2 3}
do_test analyze5-1.4 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno}
} {0 0 0 0 0 0 0 0 0 0}
do_test analyze5-1.5 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno}
} {0 0 0 0 1 1 1 2 2 3}
do_test analyze5-1.6 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno}
} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5}


# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
    1  {z>=0 AND z<=0}       t1z  400
    2  {z>=1 AND z<=1}       t1z  300
    3  {z>=2 AND z<=2}       t1z  200
    4  {z>=3 AND z<=3}       t1z  100
    5  {z>=4 AND z<=4}       t1z   50
    6  {z>=-1 AND z<=-1}     t1z   50
    7  {z>1 AND z<3}         t1z  200
    8  {z>0 AND z<100}       t1z  600
    9  {z>=1 AND z<100}      t1z  600
   10  {z>1 AND z<100}       t1z  300
   11  {z>=2 AND z<100}      t1z  300
   12  {z>2 AND z<100}       t1z  100
   13  {z>=3 AND z<100}      t1z  100
   14  {z>3 AND z<100}       t1z   50
   15  {z>=4 AND z<100}      t1z   50
   16  {z>=-100 AND z<=-1}   t1z   50
   17  {z>=-100 AND z<=0}    t1z  400
   18  {z>=-100 AND z<0}     t1z   50
   19  {z>=-100 AND z<=1}    t1z  700
   20  {z>=-100 AND z<2}     t1z  700
   21  {z>=-100 AND z<=2}    t1z  900
   22  {z>=-100 AND z<3}     t1z  900
  
   31  {z>=0.0 AND z<=0.0}   t1z  400
   32  {z>=1.0 AND z<=1.0}   t1z  300
   33  {z>=2.0 AND z<=2.0}   t1z  200
   34  {z>=3.0 AND z<=3.0}   t1z  100
   35  {z>=4.0 AND z<=4.0}   t1z   50
   36  {z>=-1.0 AND z<=-1.0} t1z   50
   37  {z>1.5 AND z<3.0}     t1z  200
   38  {z>0.5 AND z<100}     t1z  600
   39  {z>=1.0 AND z<100}    t1z  600
   40  {z>1.5 AND z<100}     t1z  300
   41  {z>=2.0 AND z<100}    t1z  300
   42  {z>2.1 AND z<100}     t1z  100
   43  {z>=3.0 AND z<100}    t1z  100
   44  {z>3.2 AND z<100}     t1z   50
   45  {z>=4.0 AND z<100}    t1z   50
   46  {z>=-100 AND z<=-1.0} t1z   50
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   50
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   50
  107  {z=-10.0}             t1z   50
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   50
  113  {z=1.5}               t1z   50
  114  {z=2.5}               t1z   50
  
  201  {z IN (-1)}           t1z   50
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   50
  207  {z IN (0.5)}          t1z   50
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]

  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]
    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
    if {$a1==$a2} {
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}

# Increase the number of NULLs in column x
#
db eval {
   UPDATE t1 SET x=NULL;
   UPDATE t1 SET x=rowid
    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]

  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]
    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
    if {$a1==$a2} {
      set res ok
    } else {
      set res "a1=\[$a1\] a2=\[$a2\]"
    }
    set res
  } {ok}
}

finish_test