SQLite

Check-in [2cd374cd23]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | stat2-enhancement
Files: files | file ages | folders
SHA1: 2cd374cd23fa2fd38f49090d6eeb9b1e521d51d5
User & Date: drh 2011-01-20 02:56:37.736
Context
2011-01-20
16:52
Use histogram data to improve the row-count estimates on equality constraints. (check-in: 6bfc5c69eb user: drh tags: stat2-enhancement)
02:56
The first of a planned series of enhancements to the query planner that enable it to make better use of sqlite_stat2 histograms when the table has many repeated values. (check-in: 2cd374cd23 user: drh tags: stat2-enhancement)
2011-01-19
21:58
Comment improvements in pcache1.c. No changes to code. (check-in: 9660a0a225 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206


2207

2208




2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220

2221
2222

2223
2224
2225
2226
2227
2228
2229
2230
2231
2232





2233
2234
2235
2236
2237
2238
2239
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
** stored in Index.aSample. The domain of values stored in said column
** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions.
** Region 0 contains all values smaller than the first sample value. Region


** 1 contains values larger than or equal to the value of the first sample,

** but smaller than the value of the second. And so on.




**
** If successful, this function determines which of the regions value 
** pVal lies in, sets *piRegion to the region index (a value between 0
** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
** Or, if an OOM occurs while converting text values between encodings,
** SQLITE_NOMEM is returned and *piRegion is undefined.
*/
#ifdef SQLITE_ENABLE_STAT2
static int whereRangeRegion(
  Parse *pParse,              /* Database connection */
  Index *pIdx,                /* Index to consider domain of */
  sqlite3_value *pVal,        /* Value to consider */

  int *piRegion               /* OUT: Region of domain in which value lies */
){

  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->aSample;
    int i = 0;
    int eType = sqlite3_value_type(pVal);

    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break;





      }
    }else{ 
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;








|
|
|
>
>
|
>
|
>
>
>
>












>


>









|
>
>
>
>
>







2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
  bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
}
#endif /* SQLITE_OMIT_VIRTUALTABLE */

/*
** Argument pIdx is a pointer to an index structure that has an array of
** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
** stored in Index.aSample. These samples divide the domain of values stored
** the index into (SQLITE_INDEX_SAMPLES+1) regions.
** Region 0 contains all values less than the first sample value. Region
** 1 contains values between the first and second samples.  Region 2 contains
** values between samples 2 and 3.  And so on.  Region SQLITE_INDEX_SAMPLES
** contains values larger than the last sample.
**
** If the index contains many duplicates of a single value, then it is
** possible that two or more adjacent samples can hold the same value.
** When that is the case, the smallest possible region code is returned
** when roundUp is false and the largest possible region code is returned
** when roundUp is true.
**
** If successful, this function determines which of the regions value 
** pVal lies in, sets *piRegion to the region index (a value between 0
** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
** Or, if an OOM occurs while converting text values between encodings,
** SQLITE_NOMEM is returned and *piRegion is undefined.
*/
#ifdef SQLITE_ENABLE_STAT2
static int whereRangeRegion(
  Parse *pParse,              /* Database connection */
  Index *pIdx,                /* Index to consider domain of */
  sqlite3_value *pVal,        /* Value to consider */
  int roundUp,                /* Return largest valid region if true */
  int *piRegion               /* OUT: Region of domain in which value lies */
){
  assert( roundUp==0 || roundUp==1 );
  if( ALWAYS(pVal) ){
    IndexSample *aSample = pIdx->aSample;
    int i = 0;
    int eType = sqlite3_value_type(pVal);

    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
      double r = sqlite3_value_double(pVal);
      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        if( aSample[i].eType==SQLITE_NULL ) continue;
        if( aSample[i].eType>=SQLITE_TEXT ) break;
        if( roundUp ){
          if( aSample[i].u.r>r ) break;
        }else{
          if( aSample[i].u.r>=r ) break;
        }
      }
    }else{ 
      sqlite3 *db = pParse->db;
      CollSeq *pColl;
      const u8 *z;
      int n;

2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
          return SQLITE_NOMEM;
        }
        assert( z && pColl && pColl->xCmp );
      }
      n = sqlite3ValueBytes(pVal, pColl->enc);

      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        int r;
        int eSampletype = aSample[i].eType;
        if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
        if( (eSampletype!=eType) ) break;
#ifndef SQLITE_OMIT_UTF16
        if( pColl->enc!=SQLITE_UTF8 ){
          int nSample;
          char *zSample = sqlite3Utf8to16(
              db, pColl->enc, aSample[i].u.z, aSample[i].nByte, &nSample
          );
          if( !zSample ){
            assert( db->mallocFailed );
            return SQLITE_NOMEM;
          }
          r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
          sqlite3DbFree(db, zSample);
        }else
#endif
        {
          r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( r>0 ) break;
      }
    }

    assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
    *piRegion = i;
  }
  return SQLITE_OK;







|













|




|

|







2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
          return SQLITE_NOMEM;
        }
        assert( z && pColl && pColl->xCmp );
      }
      n = sqlite3ValueBytes(pVal, pColl->enc);

      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
        int c;
        int eSampletype = aSample[i].eType;
        if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
        if( (eSampletype!=eType) ) break;
#ifndef SQLITE_OMIT_UTF16
        if( pColl->enc!=SQLITE_UTF8 ){
          int nSample;
          char *zSample = sqlite3Utf8to16(
              db, pColl->enc, aSample[i].u.z, aSample[i].nByte, &nSample
          );
          if( !zSample ){
            assert( db->mallocFailed );
            return SQLITE_NOMEM;
          }
          c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
          sqlite3DbFree(db, zSample);
        }else
#endif
        {
          c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
        }
        if( c-roundUp>=0 ) break;
      }
    }

    assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
    *piRegion = i;
  }
  return SQLITE_OK;
2382
2383
2384
2385
2386
2387
2388


2389
2390
2391
2392
2393


2394
2395
2396
2397


2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422

2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433

  if( nEq==0 && p->aSample ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;
    int iUpper = SQLITE_INDEX_SAMPLES;


    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);


    }
    if( rc==SQLITE_OK && pUpper ){
      Expr *pExpr = pUpper->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);


    }

    if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
      sqlite3ValueFree(pLowerVal);
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
      if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, &iLower);
      }
    }

    iEst = iUpper - iLower;
    testcase( iEst==SQLITE_INDEX_SAMPLES );
    assert( iEst<=SQLITE_INDEX_SAMPLES );
    if( iEst<1 ){
      iEst = 1;
    }


    sqlite3ValueFree(pLowerVal);
    sqlite3ValueFree(pUpperVal);
    *piEst = (iEst * 100)/SQLITE_INDEX_SAMPLES;
    return rc;
  }
range_est_fallback:
#else
  UNUSED_PARAMETER(pParse);
  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(nEq);







>
>





>
>




>
>







|


|


|

|







|
|
>
|


<







2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446

2447
2448
2449
2450
2451
2452
2453

  if( nEq==0 && p->aSample ){
    sqlite3_value *pLowerVal = 0;
    sqlite3_value *pUpperVal = 0;
    int iEst;
    int iLower = 0;
    int iUpper = SQLITE_INDEX_SAMPLES;
    int roundUpUpper;
    int roundUpLower;
    u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;

    if( pLower ){
      Expr *pExpr = pLower->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
      assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
      roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
    }
    if( rc==SQLITE_OK && pUpper ){
      Expr *pExpr = pUpper->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);
      assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
      roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0;
    }

    if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
      sqlite3ValueFree(pLowerVal);
      sqlite3ValueFree(pUpperVal);
      goto range_est_fallback;
    }else if( pLowerVal==0 ){
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( pLower ) iLower = iUpper/2;
    }else if( pUpperVal==0 ){
      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
    }else{
      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
      if( rc==SQLITE_OK ){
        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
      }
    }

    iEst = iUpper - iLower;
    testcase( iEst==SQLITE_INDEX_SAMPLES );
    assert( iEst<=SQLITE_INDEX_SAMPLES );
    if( iEst<1 ){
      *piEst = 50/SQLITE_INDEX_SAMPLES;
    }else{
      *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
    }
    sqlite3ValueFree(pLowerVal);
    sqlite3ValueFree(pUpperVal);

    return rc;
  }
range_est_fallback:
#else
  UNUSED_PARAMETER(pParse);
  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(nEq);
Changes to test/analyze2.test.
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 500 AND y BETWEEN 400 AND 700
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~75 rows)}
}
do_eqp_test 2.7 {
  SELECT * FROM t1 WHERE x BETWEEN -400 AND -300 AND y BETWEEN 100 AND 300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~25 rows)}
}
do_eqp_test 2.8 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN -400 AND -300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~25 rows)}
}
do_eqp_test 2.9 {
  SELECT * FROM t1 WHERE x BETWEEN 500 AND 100 AND y BETWEEN 100 AND 300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~25 rows)}
}
do_eqp_test 2.10 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN 500 AND 100
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~25 rows)}
}

do_test analyze2-3.1 {
  set alphabet [list a b c d e f g h i j]
  execsql BEGIN
  for {set i 0} {$i < 1000} {incr i} {
    set str    [lindex $alphabet [expr ($i/100)%10]] 







|




|




|




|







150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 500 AND y BETWEEN 400 AND 700
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~75 rows)}
}
do_eqp_test 2.7 {
  SELECT * FROM t1 WHERE x BETWEEN -400 AND -300 AND y BETWEEN 100 AND 300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~12 rows)}
}
do_eqp_test 2.8 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN -400 AND -300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~12 rows)}
}
do_eqp_test 2.9 {
  SELECT * FROM t1 WHERE x BETWEEN 500 AND 100 AND y BETWEEN 100 AND 300
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~12 rows)}
}
do_eqp_test 2.10 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 300 AND y BETWEEN 500 AND 100
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~12 rows)}
}

do_test analyze2-3.1 {
  set alphabet [list a b c d e f g h i j]
  execsql BEGIN
  for {set i 0} {$i < 1000} {incr i} {
    set str    [lindex $alphabet [expr ($i/100)%10]] 
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 500 AND y BETWEEN 'a' AND 'b'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~50 rows)}
}
do_eqp_test 3.4 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 400 AND y BETWEEN 'a' AND 'h'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~50 rows)}
}
do_eqp_test 3.5 {
  SELECT * FROM t1 WHERE x<'a' AND y>'h'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~66 rows)}
}
do_eqp_test 3.6 {







|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 500 AND y BETWEEN 'a' AND 'b'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>? AND y<?) (~50 rows)}
}
do_eqp_test 3.4 {
  SELECT * FROM t1 WHERE x BETWEEN 100 AND 400 AND y BETWEEN 'a' AND 'h'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_x (x>? AND x<?) (~100 rows)}
}
do_eqp_test 3.5 {
  SELECT * FROM t1 WHERE x<'a' AND y>'h'
} {
  0 0 0 {SEARCH TABLE t1 USING INDEX t1_y (y>?) (~66 rows)}
}
do_eqp_test 3.6 {
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
do_test analyze2-6.2.2 {
  db cache flush
  execsql ANALYZE
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
do_test analyze2-6.2.3 {
  sqlite3 db test.db
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
do_test analyze2-6.2.4 {
  execsql { 
    PRAGMA writable_schema = 1;
    DELETE FROM sqlite_master WHERE tbl_name = 'sqlite_stat1';
  }
  sqlite3 db test.db
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 







|






|







412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
do_test analyze2-6.2.2 {
  db cache flush
  execsql ANALYZE
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
do_test analyze2-6.2.3 {
  sqlite3 db test.db
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
do_test analyze2-6.2.4 {
  execsql { 
    PRAGMA writable_schema = 1;
    DELETE FROM sqlite_master WHERE tbl_name = 'sqlite_stat1';
  }
  sqlite3 db test.db
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  }
  sqlite3 db test.db
  execsql ANALYZE
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

#--------------------------------------------------------------------
# These tests, analyze2-7.*, test that the sqlite_stat2 functionality
# works in shared-cache mode. Note that these tests reuse the database
# created for the analyze2-6.* tests.
#
ifcapable shared_cache {







|







453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
  }
  sqlite3 db test.db
  execsql ANALYZE
  eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
        t5.a>1 AND t5.a<15 AND
        t6.a>1
  }
} {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

#--------------------------------------------------------------------
# These tests, analyze2-7.*, test that the sqlite_stat2 functionality
# works in shared-cache mode. Note that these tests reuse the database
# created for the analyze2-6.* tests.
#
ifcapable shared_cache {
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  } {20}

  do_test analyze2-7.5 {
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.6 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db2
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db2
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.7 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

  do_test analyze2-7.8 {
    execsql { DELETE FROM sqlite_stat2 } db2
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.9 {
    execsql { SELECT * FROM sqlite_master } db2
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db2
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~2 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

  do_test analyze2-7.10 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1







|







|







|








|






|







497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  } {20}

  do_test analyze2-7.5 {
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.6 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db2
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db2
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.7 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

  do_test analyze2-7.8 {
    execsql { DELETE FROM sqlite_stat2 } db2
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db1
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}
  do_test analyze2-7.9 {
    execsql { SELECT * FROM sqlite_master } db2
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
    } db2
  } {0 0 1 {SEARCH TABLE t6 USING COVERING INDEX t6i (a>?) (~1 rows)} 0 1 0 {SEARCH TABLE t5 USING INTEGER PRIMARY KEY (rowid=?) (~1 rows)}}

  do_test analyze2-7.10 {
    incr_schema_cookie test.db
    execsql { SELECT * FROM sqlite_master } db1
    eqp { SELECT * FROM t5,t6 WHERE t5.rowid=t6.rowid AND 
          t5.a>1 AND t5.a<15 AND
          t6.a>1
Added test/analyze5.test.


































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# 2011 January 19
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat2 histogram data on tables
# with many repeated values and only a few distinct values.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat2 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
}

do_test analyze5-1.0 {
  execsql { CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z) }
  for {set i 0} {$i < 1000} {incr i} {
    set j [expr {$i>=25 && $i<=50}]
    set k [expr {($i>=400) + ($i>=700) + ($i>=875)}]
    execsql { INSERT INTO t1 VALUES($i,$j,$k) }
  }
  execsql { 
    CREATE INDEX t1y ON t1(y);
    CREATE INDEX t1z ON t1(z);
    ANALYZE;
    SELECT * FROM sqlite_stat2 ORDER BY 1, 2, 3;
  }
} [list t1 t1y 0 0 \
        t1 t1y 1 0 \
        t1 t1y 2 0 \
        t1 t1y 3 0 \
        t1 t1y 4 0 \
        t1 t1y 5 0 \
        t1 t1y 6 0 \
        t1 t1y 7 0 \
        t1 t1y 8 0 \
        t1 t1y 9 0 \
        t1 t1z 0 0 \
        t1 t1z 1 0 \
        t1 t1z 2 0 \
        t1 t1z 3 0 \
        t1 t1z 4 1 \
        t1 t1z 5 1 \
        t1 t1z 6 1 \
        t1 t1z 7 2 \
        t1 t1z 8 2 \
        t1 t1z 9 3]

# Verify that range queries generate the correct row count estimates
#
foreach {testid where rows} {
  1  {z>=0 AND z<=0}     400
  2  {z>=1 AND z<=1}     300
  3  {z>=2 AND z<=2}     200
  4  {z>=3 AND z<=3}     100
  5  {z>=4 AND z<=4}      50
  6  {z>=-1 AND z<=-1}    50
  7  {z>1 AND z<3}       200
  8  {z>0 AND z<100}     600
  9  {z>=1 AND z<100}    600
 10  {z>1 AND z<100}     300
 11  {z>=2 AND z<100}    300
 12  {z>2 AND z<100}     100
 13  {z>=3 AND z<100}    100
 14  {z>3 AND z<100}      50
 15  {z>=4 AND z<100}     50
 16  {z>=-100 AND z<=-1}  50
 17  {z>=-100 AND z<=0}  400
 18  {z>=-100 AND z<0}    50
 19  {z>=-100 AND z<=1}  700
 20  {z>=-100 AND z<2}   700
 21  {z>=-100 AND z<=2}  900
 22  {z>=-100 AND z<3}   900

 31  {z>=0.0 AND z<=0.0}   400
 32  {z>=1.0 AND z<=1.0}   300
 33  {z>=2.0 AND z<=2.0}   200
 34  {z>=3.0 AND z<=3.0}   100
 35  {z>=4.0 AND z<=4.0}    50
 36  {z>=-1.0 AND z<=-1.0}  50
 37  {z>1.5 AND z<3.0}     200
 38  {z>0.5 AND z<100}     600
 39  {z>=1.0 AND z<100}    600
 40  {z>1.5 AND z<100}     300
 41  {z>=2.0 AND z<100}    300
 42  {z>2.1 AND z<100}     100
 43  {z>=3.0 AND z<100}    100
 44  {z>3.2 AND z<100}      50
 45  {z>=4.0 AND z<100}     50
 46  {z>=-100 AND z<=-1.0}  50
 47  {z>=-100 AND z<=0.0}  400
 48  {z>=-100 AND z<0.0}    50
 49  {z>=-100 AND z<=1.0}  700
 50  {z>=-100 AND z<2.0}   700
 51  {z>=-100 AND z<=2.0}  900
 52  {z>=-100 AND z<3.0}   900

} {
  do_test analyze5-1.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \
       $rows]
}

# Change the table values from integer to floating point and then
# repeat the same sequence of tests.  We should get the same results.
#
do_test analyze5-2.0 {
  db eval {
    UPDATE t1 SET z=z+0.0;
    ANALYZE;
    SELECT sample FROM sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno;
  }
} {0.0 0.0 0.0 0.0 1.0 1.0 1.0 2.0 2.0 3.0}
foreach {testid where rows} {
  1  {z>=0 AND z<=0}     400
  2  {z>=1 AND z<=1}     300
  3  {z>=2 AND z<=2}     200
  4  {z>=3 AND z<=3}     100
  5  {z>=4 AND z<=4}      50
  6  {z>=-1 AND z<=-1}    50
  7  {z>1 AND z<3}       200
  8  {z>0 AND z<100}     600
  9  {z>=1 AND z<100}    600
 10  {z>1 AND z<100}     300
 11  {z>=2 AND z<100}    300
 12  {z>2 AND z<100}     100
 13  {z>=3 AND z<100}    100
 14  {z>3 AND z<100}      50
 15  {z>=4 AND z<100}     50
 16  {z>=-100 AND z<=-1}  50
 17  {z>=-100 AND z<=0}  400
 18  {z>=-100 AND z<0}    50
 19  {z>=-100 AND z<=1}  700
 20  {z>=-100 AND z<2}   700
 21  {z>=-100 AND z<=2}  900
 22  {z>=-100 AND z<3}   900

 31  {z>=0.0 AND z<=0.0}   400
 32  {z>=1.0 AND z<=1.0}   300
 33  {z>=2.0 AND z<=2.0}   200
 34  {z>=3.0 AND z<=3.0}   100
 35  {z>=4.0 AND z<=4.0}    50
 36  {z>=-1.0 AND z<=-1.0}  50
 37  {z>1.5 AND z<3.0}     200
 38  {z>0.5 AND z<100}     600
 39  {z>=1.0 AND z<100}    600
 40  {z>1.5 AND z<100}     300
 41  {z>=2.0 AND z<100}    300
 42  {z>2.1 AND z<100}     100
 43  {z>=3.0 AND z<100}    100
 44  {z>3.2 AND z<100}      50
 45  {z>=4.0 AND z<100}     50
 46  {z>=-100 AND z<=-1.0}  50
 47  {z>=-100 AND z<=0.0}  400
 48  {z>=-100 AND z<0.0}    50
 49  {z>=-100 AND z<=1.0}  700
 50  {z>=-100 AND z<2.0}   700
 51  {z>=-100 AND z<=2.0}  900
 52  {z>=-100 AND z<3.0}   900
} {
  do_test analyze5-2.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \
       $rows]
}

# Repeat the same range query tests using TEXT columns.
#
do_test analyze5-3.0 {
  db eval {
    UPDATE t1 SET y=CASE z WHEN 0 THEN 'alpha' WHEN 1 THEN 'bravo'
                           WHEN 2 THEN 'charlie' ELSE 'delta' END;
    ANALYZE;
    SELECT sample FROM sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno;
  }
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
foreach {testid where rows} {
  1  {y>='alpha' AND y<='alpha'}     400
  2  {y>='bravo' AND y<='bravo'}     300
  3  {y>='charlie' AND y<='charlie'} 200
  4  {y>='delta' AND y<='delta'}     100
  5  {y>='echo' AND y<='echo'}        50
  6  {y>='' AND y<=''}                50
  7  {y>'bravo' AND y<'delta'}       200
  8  {y>'alpha' AND y<'zzz'}         600
  9  {y>='bravo' AND y<'zzz'}        600
 10  {y>'bravo' AND y<'zzz'}         300
 11  {y>='charlie' AND y<'zzz'}      300
 12  {y>'charlie' AND y<'zzz'}       100
 13  {y>='delta' AND y<'zzz'}        100
 14  {y>'delta' AND y<'zzz'}          50
 15  {y>='echo' AND y<'zzz'}          50
 16  {y>=0 AND y<=''}                 50
 17  {y>=0 AND y<='alpha'}           400
 18  {y>=0 AND y<'alpha'}             50
 19  {y>=0 AND y<='bravo'}           700
 20  {y>=0 AND y<'charlie'}          700
 21  {y>=0 AND y<='charlie'}         900
 22  {y>=0 AND y<'delta'}            900
} {
  do_test analyze5-3.$testid {
    eqp "SELECT * FROM t1 WHERE $where"
  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \
       $rows]
}


finish_test