SQLite

Check-in [1fb4d9d6f2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add tests for sqlite_stat4 sample selection. And a fix for the same.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | sqlite_stat4
Files: files | file ages | folders
SHA1: 1fb4d9d6f2675515feb8e3d971bbd54716372549
User & Date: dan 2013-08-15 14:39:09.873
Context
2013-08-15
16:15
Make sure the ANALYZE command allocates enough VDBE registers. (check-in: 46fec9b1a1 user: drh tags: sqlite_stat4)
14:39
Add tests for sqlite_stat4 sample selection. And a fix for the same. (check-in: 1fb4d9d6f2 user: dan tags: sqlite_stat4)
2013-08-14
19:54
Change the way ANALYZE works to use a single cursor when scanning indices. (check-in: bdce612b35 user: dan tags: sqlite_stat4)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/analyze.c.
407
408
409
410
411
412
413

414
415
416
417
418
419

420
421
422
423
424
425
426
427
428
429
430





431
432
433
434
435
436
437
  int i;
  i64 iSeq;
  i64 iPos;

  assert( IsStat4 || nEqZero==0 );

  if( pNew->isPSample==0 ){

    assert( pNew->anEq[pNew->iCol]>0 );

    /* This sample is being added because the prefix that ends in column 
    ** iCol occurs many times in the table. However, if we have already
    ** added a sample that shares this prefix, there is no need to add
    ** this one. Instead, upgrade the priority of the existing sample. */

    for(i=p->nSample-1; i>=0; i--){
      Stat4Sample *pOld = &p->a[i];
      if( pOld->anEq[pNew->iCol]==0 ){
        if( pOld->isPSample==0 ){
          assert( sampleIsBetter(pNew, pOld) );
          assert( pOld->iCol>pNew->iCol );
          pOld->iCol = pNew->iCol;
        }
        goto find_new_min;
      }
    }





  }

  /* If necessary, remove sample iMin to make room for the new sample. */
  if( p->nSample>=p->mxSample ){
    Stat4Sample *pMin = &p->a[p->iMin];
    tRowcnt *anEq = pMin->anEq;
    tRowcnt *anLt = pMin->anLt;







>





|
>



|
|
|
|

<


>
>
>
>
>







407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429

430
431
432
433
434
435
436
437
438
439
440
441
442
443
  int i;
  i64 iSeq;
  i64 iPos;

  assert( IsStat4 || nEqZero==0 );

  if( pNew->isPSample==0 ){
    Stat4Sample *pUpgrade = 0;
    assert( pNew->anEq[pNew->iCol]>0 );

    /* This sample is being added because the prefix that ends in column 
    ** iCol occurs many times in the table. However, if we have already
    ** added a sample that shares this prefix, there is no need to add
    ** this one. Instead, upgrade the priority of the highest priority
    ** existing sample that shares this prefix.  */
    for(i=p->nSample-1; i>=0; i--){
      Stat4Sample *pOld = &p->a[i];
      if( pOld->anEq[pNew->iCol]==0 ){
        if( pOld->isPSample ) return;
        assert( sampleIsBetter(pNew, pOld) );
        if( pUpgrade==0 || sampleIsBetter(pOld, pUpgrade) ){
          pUpgrade = pOld;
        }

      }
    }
    if( pUpgrade ){
      pUpgrade->iCol = pNew->iCol;
      pUpgrade->anEq[pUpgrade->iCol] = pNew->anEq[pUpgrade->iCol];
      goto find_new_min;
    }
  }

  /* If necessary, remove sample iMin to make room for the new sample. */
  if( p->nSample>=p->mxSample ){
    Stat4Sample *pMin = &p->a[p->iMin];
    tRowcnt *anEq = pMin->anEq;
    tRowcnt *anLt = pMin->anLt;
Changes to test/analyze9.test.
142
143
144
145
146
147
148













































































































149
150
151
  INSERT INTO t1 SELECT a+3,4,'four'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';
  INSERT INTO t1 SELECT a+4,5,'five'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';
  INSERT INTO t1 SELECT a+5,6,'six'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';	
  CREATE INDEX t1b ON t1(b);
  ANALYZE;
  SELECT c FROM t1 WHERE b=3 AND a BETWEEN 30 AND 60;
} {three-d three-e three-f}














































































































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  INSERT INTO t1 SELECT a+3,4,'four'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';
  INSERT INTO t1 SELECT a+4,5,'five'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';
  INSERT INTO t1 SELECT a+5,6,'six'||substr(c,4) FROM t1 WHERE c GLOB 'one-*';	
  CREATE INDEX t1b ON t1(b);
  ANALYZE;
  SELECT c FROM t1 WHERE b=3 AND a BETWEEN 30 AND 60;
} {three-d three-e three-f}

#-------------------------------------------------------------------------
# These tests verify that the sample selection for stat4 appears to be 
# working as designed.
#

reset_db
db func lindex lindex
db func lrange lrange

do_execsql_test 4.0 {
  DROP TABLE IF EXISTS t1;
  CREATE TABLE t1(a, b, c);
  CREATE INDEX i1 ON t1(c, b, a);
}


proc insert_filler_rows_n {iStart args} {
  set A(-ncopy) 1
  set A(-nval) 1

  foreach {k v} $args {
    if {[info exists A($k)]==0} { error "no such option: $k" }
    set A($k) $v
  }
  if {[llength $args] % 2} {
    error "option requires an argument: [lindex $args end]"
  }

  for {set i 0} {$i < $A(-nval)} {incr i} {
    set iVal [expr $iStart+$i]
    for {set j 0} {$j < $A(-ncopy)} {incr j} {
      execsql { INSERT INTO t1 VALUES($iVal, $iVal, $iVal) }
    }
  }
}

do_test 4.1 {
  execsql { BEGIN }
  insert_filler_rows_n  0  -ncopy 10 -nval 19
  insert_filler_rows_n 20  -ncopy  1 -nval 100

  execsql {
    INSERT INTO t1(c, b, a) VALUES(200, 1, 'a');
    INSERT INTO t1(c, b, a) VALUES(200, 1, 'b');
    INSERT INTO t1(c, b, a) VALUES(200, 1, 'c');

    INSERT INTO t1(c, b, a) VALUES(200, 2, 'e');
    INSERT INTO t1(c, b, a) VALUES(200, 2, 'f');

    INSERT INTO t1(c, b, a) VALUES(201, 3, 'g');
    INSERT INTO t1(c, b, a) VALUES(201, 4, 'h');

    ANALYZE;
    SELECT count(*) FROM sqlite_stat4;
    SELECT count(*) FROM t1;
  }
} {24 297}

do_execsql_test 4.2 {
  SELECT 
    neq,
    lrange(nlt, 0, 2),
    lrange(ndlt, 0, 2),
    lrange(test_decode(sample), 0, 2)
    FROM sqlite_stat4
  ORDER BY rowid LIMIT 16;
} {
  {10 10 10 1} {0 0 0} {0 0 0} {0 0 0}
  {10 10 10 1} {10 10 10} {1 1 1} {1 1 1}
  {10 10 10 1} {20 20 20} {2 2 2} {2 2 2}
  {10 10 10 1} {30 30 30} {3 3 3} {3 3 3}
  {10 10 10 1} {40 40 40} {4 4 4} {4 4 4}
  {10 10 10 1} {50 50 50} {5 5 5} {5 5 5}
  {10 10 10 1} {60 60 60} {6 6 6} {6 6 6}
  {10 10 10 1} {70 70 70} {7 7 7} {7 7 7}
  {10 10 10 1} {80 80 80} {8 8 8} {8 8 8}
  {10 10 10 1} {90 90 90} {9 9 9} {9 9 9}
  {10 10 10 1} {100 100 100} {10 10 10} {10 10 10}
  {10 10 10 1} {110 110 110} {11 11 11} {11 11 11}
  {10 10 10 1} {120 120 120} {12 12 12} {12 12 12}
  {10 10 10 1} {130 130 130} {13 13 13} {13 13 13}
  {10 10 10 1} {140 140 140} {14 14 14} {14 14 14}
  {10 10 10 1} {150 150 150} {15 15 15} {15 15 15}
}

do_execsql_test 4.3 {
  SELECT 
    neq,
    lrange(nlt, 0, 2),
    lrange(ndlt, 0, 2),
    lrange(test_decode(sample), 0, 1)
    FROM sqlite_stat4
  ORDER BY rowid DESC LIMIT 2;
} {
  {2 1 1 1} {295 296 296} {120 122 125} {201 4} 
  {5 3 1 1} {290 290 292} {119 119 121} {200 1}
}

do_execsql_test 4.4 { SELECT count(DISTINCT c) FROM t1 WHERE c<201 } 120
do_execsql_test 4.5 { SELECT count(DISTINCT c) FROM t1 WHERE c<200 } 119

# Check that the perioidic samples are present.
do_execsql_test 4.6 {
  SELECT count(*) FROM sqlite_stat4
  WHERE lindex(test_decode(sample), 3) IN 
    ('34', '68', '102', '136', '170', '204', '238', '272')
} {8}


finish_test