/ Check-in [89b2f708]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Further testing and bug fixing for sqlite_stat3. Added the Index.avgEq field to index statistics. Fixed several problems in the query planner associated with stat3.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat3-enhancement
Files: files | file ages | folders
SHA1: 89b2f70884cad0abdf4c66cb64ecddb2820ded74
User & Date: drh 2011-08-13 19:35:19
Context
2011-08-15
12:02
Fix a couple of typos in comments in analyze.c. check-in: ae31dc67 user: dan tags: stat3-enhancement
2011-08-13
19:35
Further testing and bug fixing for sqlite_stat3. Added the Index.avgEq field to index statistics. Fixed several problems in the query planner associated with stat3. check-in: 89b2f708 user: drh tags: stat3-enhancement
15:25
Add the sqlite_stat3.nDLT field. Use an linear congruence PRNG to choose which samples to select from among those with the same nEq field. check-in: 1dcd2428 user: drh tags: stat3-enhancement
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

953
954
955
956
957
958
959

960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979


980
981
982
983
984
985
986
987
988
989
990
991
992

993
994







995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
    nSample = sqlite3_column_int(pStmt, 1);
    if( nSample>255 ) continue;
    pIdx = sqlite3FindIndex(db, zIndex, zDb);
    if( pIdx==0 ) continue;
    assert( pIdx->nSample==0 );
    pIdx->nSample = (u8)nSample;
    pIdx->aSample = sqlite3MallocZero( nSample*sizeof(IndexSample) );

    if( pIdx->aSample==0 ){
      db->mallocFailed = 1;
      sqlite3_finalize(pStmt);
      return SQLITE_NOMEM;
    }
  }
  sqlite3_finalize(pStmt);

  zSql = sqlite3MPrintf(db, 
      "SELECT idx,nlt,neq,sample FROM %Q.sqlite_stat3", zDb);
  if( !zSql ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
  sqlite3DbFree(db, zSql);
  if( rc ) return rc;

  while( sqlite3_step(pStmt)==SQLITE_ROW ){
    char *zIndex;   /* Index name */
    Index *pIdx;    /* Pointer to the index object */



    zIndex = (char *)sqlite3_column_text(pStmt, 0);
    if( zIndex==0 ) continue;
    pIdx = sqlite3FindIndex(db, zIndex, zDb);
    if( pIdx==0 ) continue;
    if( pIdx==pPrevIdx ){
      idx++;
    }else{
      pPrevIdx = pIdx;
      idx = 0;
    }
    assert( idx<pIdx->nSample );
    pSample = &pIdx->aSample[idx];

    pSample->nLt = (tRowcnt)sqlite3_column_int64(pStmt, 1);
    pSample->nEq = (tRowcnt)sqlite3_column_int64(pStmt, 2);







    eType = sqlite3_column_type(pStmt, 3);
    pSample->eType = (u8)eType;
    switch( eType ){
      case SQLITE_INTEGER: {
        pSample->u.i = sqlite3_column_int64(pStmt, 3);
        break;
      }
      case SQLITE_FLOAT: {
        pSample->u.r = sqlite3_column_double(pStmt, 3);
        break;
      }
      case SQLITE_NULL: {
        break;
      }
      default: assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); {
        const char *z = (const char *)(
              (eType==SQLITE_BLOB) ?
              sqlite3_column_blob(pStmt, 3):
              sqlite3_column_text(pStmt, 3)
           );
        int n = sqlite3_column_bytes(pStmt, 2);
        if( n>0xffff ) n = 0xffff;
        pSample->nByte = (u16)n;
        if( n < 1){
          pSample->u.z = 0;
        }else{
          pSample->u.z = sqlite3Malloc(n);
          if( pSample->u.z==0 ){







>









|










>
>













>
|
|
>
>
>
>
>
>
>
|



|



|








|
|

|







953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
    nSample = sqlite3_column_int(pStmt, 1);
    if( nSample>255 ) continue;
    pIdx = sqlite3FindIndex(db, zIndex, zDb);
    if( pIdx==0 ) continue;
    assert( pIdx->nSample==0 );
    pIdx->nSample = (u8)nSample;
    pIdx->aSample = sqlite3MallocZero( nSample*sizeof(IndexSample) );
    pIdx->avgEq = pIdx->aiRowEst[1];
    if( pIdx->aSample==0 ){
      db->mallocFailed = 1;
      sqlite3_finalize(pStmt);
      return SQLITE_NOMEM;
    }
  }
  sqlite3_finalize(pStmt);

  zSql = sqlite3MPrintf(db, 
      "SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat3", zDb);
  if( !zSql ){
    return SQLITE_NOMEM;
  }
  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
  sqlite3DbFree(db, zSql);
  if( rc ) return rc;

  while( sqlite3_step(pStmt)==SQLITE_ROW ){
    char *zIndex;   /* Index name */
    Index *pIdx;    /* Pointer to the index object */
    int i;          /* Loop counter */
    tRowcnt sumEq;  /* Sum of the nEq values */

    zIndex = (char *)sqlite3_column_text(pStmt, 0);
    if( zIndex==0 ) continue;
    pIdx = sqlite3FindIndex(db, zIndex, zDb);
    if( pIdx==0 ) continue;
    if( pIdx==pPrevIdx ){
      idx++;
    }else{
      pPrevIdx = pIdx;
      idx = 0;
    }
    assert( idx<pIdx->nSample );
    pSample = &pIdx->aSample[idx];
    pSample->nEq = (tRowcnt)sqlite3_column_int64(pStmt, 1);
    pSample->nLt = (tRowcnt)sqlite3_column_int64(pStmt, 2);
    pSample->nDLt = (tRowcnt)sqlite3_column_int64(pStmt, 3);
    if( idx==pIdx->nSample-1 ){
      if( pSample->nDLt>0 ){
        for(i=0, sumEq=0; i<=idx-1; i++) sumEq += pIdx->aSample[i].nEq;
        pIdx->avgEq = (pSample->nLt - sumEq)/pSample->nDLt;
      }
      if( pIdx->avgEq<=0 ) pIdx->avgEq = 1;
    }
    eType = sqlite3_column_type(pStmt, 4);
    pSample->eType = (u8)eType;
    switch( eType ){
      case SQLITE_INTEGER: {
        pSample->u.i = sqlite3_column_int64(pStmt, 4);
        break;
      }
      case SQLITE_FLOAT: {
        pSample->u.r = sqlite3_column_double(pStmt, 4);
        break;
      }
      case SQLITE_NULL: {
        break;
      }
      default: assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); {
        const char *z = (const char *)(
              (eType==SQLITE_BLOB) ?
              sqlite3_column_blob(pStmt, 4):
              sqlite3_column_text(pStmt, 4)
           );
        int n = sqlite3_column_bytes(pStmt, 4);
        if( n>0xffff ) n = 0xffff;
        pSample->nByte = (u16)n;
        if( n < 1){
          pSample->u.z = 0;
        }else{
          pSample->u.z = sqlite3Malloc(n);
          if( pSample->u.z==0 ){

Changes to src/build.c.

2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
....
2170
2171
2172
2173
2174
2175
2176

2177
2178
2179
2180
2181
2182
2183
  ** dropped. Triggers are handled seperately because a trigger can be
  ** created in the temp database that refers to a table in another
  ** database.
  */
  sqlite3NestedParse(pParse, 
      "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
      pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);
  sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName);
  if( !isView && !IsVirtual(pTab) ){
    destroyTable(pParse, pTab);
  }

  /* Remove the table entry from SQLite's internal schema and modify
  ** the schema cookie.
  */
................................................................................
  ** on disk.
  */
  v = sqlite3GetVdbe(pParse);
  if( v ){
    sqlite3BeginWriteOperation(pParse, 1, iDb);
    sqlite3FkDropTable(pParse, pName, pTab);
    sqlite3CodeDropTable(pParse, pTab, iDb, isView);

  }

exit_drop_table:
  sqlite3SrcListDelete(db, pName);
}

/*







<







 







>







2058
2059
2060
2061
2062
2063
2064

2065
2066
2067
2068
2069
2070
2071
....
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
  ** dropped. Triggers are handled seperately because a trigger can be
  ** created in the temp database that refers to a table in another
  ** database.
  */
  sqlite3NestedParse(pParse, 
      "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
      pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);

  if( !isView && !IsVirtual(pTab) ){
    destroyTable(pParse, pTab);
  }

  /* Remove the table entry from SQLite's internal schema and modify
  ** the schema cookie.
  */
................................................................................
  ** on disk.
  */
  v = sqlite3GetVdbe(pParse);
  if( v ){
    sqlite3BeginWriteOperation(pParse, 1, iDb);
    sqlite3FkDropTable(pParse, pName, pTab);
    sqlite3CodeDropTable(pParse, pTab, iDb, isView);
    sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName);
  }

exit_drop_table:
  sqlite3SrcListDelete(db, pName);
}

/*

Changes to src/sqliteInt.h.

1498
1499
1500
1501
1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
....
1516
1517
1518
1519
1520
1521
1522

1523
1524
1525
1526
1527
1528
1529
  u8 nSample;      /* Number of elements in aSample[] */
  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
#ifdef SQLITE_ENABLE_STAT3

  IndexSample *aSample;    /* Samples of the left-most key */
#endif
};

/*
** Each sample stored in the sqlite_stat2 table is represented in memory 
** using a structure of this type.
................................................................................
    double r;       /* Value if eType is SQLITE_FLOAT */
    i64 i;          /* Value if eType is SQLITE_INTEGER */
  } u;
  u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
  u16 nByte;        /* Size in byte of text or blob. */
  tRowcnt nEq;      /* Est. number of rows where the key equals this sample */
  tRowcnt nLt;      /* Est. number of rows where key is less than this sample */

};

/*
** Each token coming out of the lexer is an instance of
** this structure.  Tokens are also used as part of an expression.
**
** Note if Token.z==0 then Token.dyn and Token.n are undefined and







>







 







>







1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
....
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
  u8 nSample;      /* Number of elements in aSample[] */
  char *zColAff;   /* String defining the affinity of each column */
  Index *pNext;    /* The next index associated with the same table */
  Schema *pSchema; /* Schema containing this index */
  u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
  char **azColl;   /* Array of collation sequence names for index */
#ifdef SQLITE_ENABLE_STAT3
  tRowcnt avgEq;           /* Average nEq value for key values not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
#endif
};

/*
** Each sample stored in the sqlite_stat2 table is represented in memory 
** using a structure of this type.
................................................................................
    double r;       /* Value if eType is SQLITE_FLOAT */
    i64 i;          /* Value if eType is SQLITE_INTEGER */
  } u;
  u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
  u16 nByte;        /* Size in byte of text or blob. */
  tRowcnt nEq;      /* Est. number of rows where the key equals this sample */
  tRowcnt nLt;      /* Est. number of rows where key is less than this sample */
  tRowcnt nDLt;     /* Est. number of distinct keys less than this sample */
};

/*
** Each token coming out of the lexer is an instance of
** this structure.  Tokens are also used as part of an expression.
**
** Note if Token.z==0 then Token.dyn and Token.n are undefined and

Changes to src/where.c.

2437
2438
2439
2440
2441
2442
2443


2444
2445
2446
2447
2448
2449
2450
2451
2452
2453

2454
2455
2456

2457
2458
2459







2460
2461
2462
2463
2464
2465
2466

2467

2468



2469
2470
2471
2472
2473
2474
2475
....
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552

2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
....
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
....
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
....
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
....
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
  int roundUp,                /* Round up if true.  Round down if false */
  tRowcnt *aStat              /* OUT: stats written here */
){
  tRowcnt n;
  IndexSample *aSample;
  int i, eType;
  int isEq = 0;



  assert( roundUp==0 || roundUp==1 );
  if( pVal==0 ) return SQLITE_ERROR;
  n = pIdx->aiRowEst[0];
  aSample = pIdx->aSample;
  i = 0;
  eType = sqlite3_value_type(pVal);

  if( eType==SQLITE_INTEGER ){
    i64 v = sqlite3_value_int64(pVal);

    for(i=0; i<pIdx->nSample; i++){
      if( aSample[i].eType==SQLITE_NULL ) continue;
      if( aSample[i].eType>=SQLITE_TEXT ) break;

      if( aSample[i].u.i>=v ){
        isEq = aSample[i].u.i==v;
        break;







      }
    }
  }else if( eType==SQLITE_FLOAT ){
    double r = sqlite3_value_double(pVal);
    for(i=0; i<pIdx->nSample; i++){
      if( aSample[i].eType==SQLITE_NULL ) continue;
      if( aSample[i].eType>=SQLITE_TEXT ) break;

      if( aSample[i].u.r>=r ){

        isEq = aSample[i].u.r==r;



        break;
      }
    }
  }else if( eType==SQLITE_NULL ){
    i = 0;
    if( pIdx->nSample>=1 && aSample[0].eType==SQLITE_NULL ) isEq = 1;
  }else{
................................................................................
    aStat[0] = aSample[i].nLt;
    aStat[1] = aSample[i].nEq;
  }else{
    tRowcnt iLower, iUpper, iGap;
    if( i==0 ){
      iLower = 0;
      iUpper = aSample[0].nLt;
    }else if( i>=pIdx->nSample ){
      iUpper = n;
      iLower = aSample[i].nEq + aSample[i].nLt;
    }else{

      iLower = aSample[i-1].nEq + aSample[i-1].nLt;
      iUpper = aSample[i].nLt;
    }
    aStat[1] = pIdx->aiRowEst[1];
    if( iLower>=iUpper ){
      iGap = 0;
    }else{
      iGap = iUpper - iLower;
      if( iGap>=aStat[1]/2 ) iGap -= aStat[1]/2;
    }
    if( roundUp ){
................................................................................
*/
static int whereRangeScanEst(
  Parse *pParse,       /* Parsing & code generating context */
  Index *p,            /* The index containing the range-compared column; "x" */
  int nEq,             /* index into p->aCol[] of the range-compared column */
  WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
  tRowcnt *pRangeDiv   /* OUT: Reduce search space by this divisor */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT3

  if( nEq==0 && p->nSample ){
    sqlite3_value *pRangeVal;
................................................................................
      Expr *pExpr = pUpper->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pRangeVal);
      assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
      if( rc==SQLITE_OK
       && whereKeyStats(pParse, p, pRangeVal, 1, a)==SQLITE_OK
      ){
        iUpper = a[0];
        if( pLower->eOperator==WO_LE ) iUpper += a[1];
      }
      sqlite3ValueFree(pRangeVal);
    }
    if( rc==SQLITE_OK ){
      if( iUpper<=iLower ){
        *pRangeDiv = p->aiRowEst[0];
      }else{
        *pRangeDiv = p->aiRowEst[0]/(iUpper - iLower);
      }
      WHERETRACE(("range scan regions: %u..%u  div=%u\n",
                  (u32)iLower, (u32)iUpper, (u32)*pRangeDiv));
      return SQLITE_OK;
    }
  }
#else
  UNUSED_PARAMETER(pParse);
  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(nEq);
#endif
  assert( pLower || pUpper );
  *pRangeDiv = 1;
  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *pRangeDiv *= 4;
  if( pUpper ) *pRangeDiv *= 4;
  return rc;
}

#ifdef SQLITE_ENABLE_STAT3
/*
** Estimate the number of rows that will be returned based on
** an equality constraint x=VALUE and where that VALUE occurs in
................................................................................
    **
    **             SELECT a, b    FROM tbl WHERE a = 1;
    **             SELECT a, b, c FROM tbl WHERE a = 1;
    */
    int nEq;                      /* Number of == or IN terms matching index */
    int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
    int nInMul = 1;               /* Number of distinct equalities to lookup */
    tRowcnt rangeDiv = 1;         /* Estimated reduction in search space */
    int nBound = 0;               /* Number of range constraints seen */
    int bSort = !!pOrderBy;       /* True if external sort required */
    int bDist = !!pDistinct;      /* True if index cannot help with DISTINCT */
    int bLookup = 0;              /* True if not a covering index */
    WhereTerm *pTerm;             /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT3
    WhereTerm *pFirstTerm = 0;    /* First term matching the index */
................................................................................
      }
    }
#endif /* SQLITE_ENABLE_STAT3 */

    /* Adjust the number of output rows and downward to reflect rows
    ** that are excluded by range constraints.
    */
    nRow = nRow/(double)rangeDiv;
    if( nRow<1 ) nRow = 1;

    /* Experiments run on real SQLite databases show that the time needed
    ** to do a binary search to locate a row in a table or index is roughly
    ** log10(N) times the time to move from one row to the next row within
    ** a table or index.  The actual times can vary, with the size of
    ** records being an important factor.  Both moves and searches are







>
>









|
>



>
|
|
|
>
>
>
>
>
>
>



|



>
|
>
|
>
>
>







 







<
<
<

>

<

|







 







|







 







|





|

|

|
|









|
|
|







 







|







 







|







2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
....
2558
2559
2560
2561
2562
2563
2564



2565
2566
2567

2568
2569
2570
2571
2572
2573
2574
2575
2576
....
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
....
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
....
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
....
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
  int roundUp,                /* Round up if true.  Round down if false */
  tRowcnt *aStat              /* OUT: stats written here */
){
  tRowcnt n;
  IndexSample *aSample;
  int i, eType;
  int isEq = 0;
  i64 v;
  double r, rS;

  assert( roundUp==0 || roundUp==1 );
  if( pVal==0 ) return SQLITE_ERROR;
  n = pIdx->aiRowEst[0];
  aSample = pIdx->aSample;
  i = 0;
  eType = sqlite3_value_type(pVal);

  if( eType==SQLITE_INTEGER ){
    v = sqlite3_value_int64(pVal);
    r = (i64)v;
    for(i=0; i<pIdx->nSample; i++){
      if( aSample[i].eType==SQLITE_NULL ) continue;
      if( aSample[i].eType>=SQLITE_TEXT ) break;
      if( aSample[i].eType==SQLITE_INTEGER ){
        if( aSample[i].u.i>=v ){
          isEq = aSample[i].u.i==v;
          break;
        }
      }else{
        assert( aSample[i].eType==SQLITE_FLOAT );
        if( aSample[i].u.r>=r ){
          isEq = aSample[i].u.r==r;
          break;
        }
      }
    }
  }else if( eType==SQLITE_FLOAT ){
    r = sqlite3_value_double(pVal);
    for(i=0; i<pIdx->nSample; i++){
      if( aSample[i].eType==SQLITE_NULL ) continue;
      if( aSample[i].eType>=SQLITE_TEXT ) break;
      if( aSample[i].eType==SQLITE_FLOAT ){
        rS = aSample[i].u.r;
      }else{
        rS = aSample[i].u.i;
      }
      if( rS>=r ){
        isEq = rS==r;
        break;
      }
    }
  }else if( eType==SQLITE_NULL ){
    i = 0;
    if( pIdx->nSample>=1 && aSample[0].eType==SQLITE_NULL ) isEq = 1;
  }else{
................................................................................
    aStat[0] = aSample[i].nLt;
    aStat[1] = aSample[i].nEq;
  }else{
    tRowcnt iLower, iUpper, iGap;
    if( i==0 ){
      iLower = 0;
      iUpper = aSample[0].nLt;



    }else{
      iUpper = i>=pIdx->nSample ? n : aSample[i].nLt;
      iLower = aSample[i-1].nEq + aSample[i-1].nLt;

    }
    aStat[1] = pIdx->avgEq;
    if( iLower>=iUpper ){
      iGap = 0;
    }else{
      iGap = iUpper - iLower;
      if( iGap>=aStat[1]/2 ) iGap -= aStat[1]/2;
    }
    if( roundUp ){
................................................................................
*/
static int whereRangeScanEst(
  Parse *pParse,       /* Parsing & code generating context */
  Index *p,            /* The index containing the range-compared column; "x" */
  int nEq,             /* index into p->aCol[] of the range-compared column */
  WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
  WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
  double *pRangeDiv   /* OUT: Reduce search space by this divisor */
){
  int rc = SQLITE_OK;

#ifdef SQLITE_ENABLE_STAT3

  if( nEq==0 && p->nSample ){
    sqlite3_value *pRangeVal;
................................................................................
      Expr *pExpr = pUpper->pExpr->pRight;
      rc = valueFromExpr(pParse, pExpr, aff, &pRangeVal);
      assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
      if( rc==SQLITE_OK
       && whereKeyStats(pParse, p, pRangeVal, 1, a)==SQLITE_OK
      ){
        iUpper = a[0];
        if( pUpper->eOperator==WO_LE ) iUpper += a[1];
      }
      sqlite3ValueFree(pRangeVal);
    }
    if( rc==SQLITE_OK ){
      if( iUpper<=iLower ){
        *pRangeDiv = (double)p->aiRowEst[0];
      }else{
        *pRangeDiv = (double)p->aiRowEst[0]/(double)(iUpper - iLower);
      }
      WHERETRACE(("range scan regions: %u..%u  div=%g\n",
                  (u32)iLower, (u32)iUpper, *pRangeDiv));
      return SQLITE_OK;
    }
  }
#else
  UNUSED_PARAMETER(pParse);
  UNUSED_PARAMETER(p);
  UNUSED_PARAMETER(nEq);
#endif
  assert( pLower || pUpper );
  *pRangeDiv = (double)1;
  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *pRangeDiv *= (double)4;
  if( pUpper ) *pRangeDiv *= (double)4;
  return rc;
}

#ifdef SQLITE_ENABLE_STAT3
/*
** Estimate the number of rows that will be returned based on
** an equality constraint x=VALUE and where that VALUE occurs in
................................................................................
    **
    **             SELECT a, b    FROM tbl WHERE a = 1;
    **             SELECT a, b, c FROM tbl WHERE a = 1;
    */
    int nEq;                      /* Number of == or IN terms matching index */
    int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
    int nInMul = 1;               /* Number of distinct equalities to lookup */
    double rangeDiv = (double)1;  /* Estimated reduction in search space */
    int nBound = 0;               /* Number of range constraints seen */
    int bSort = !!pOrderBy;       /* True if external sort required */
    int bDist = !!pDistinct;      /* True if index cannot help with DISTINCT */
    int bLookup = 0;              /* True if not a covering index */
    WhereTerm *pTerm;             /* A single term of the WHERE clause */
#ifdef SQLITE_ENABLE_STAT3
    WhereTerm *pFirstTerm = 0;    /* First term matching the index */
................................................................................
      }
    }
#endif /* SQLITE_ENABLE_STAT3 */

    /* Adjust the number of output rows and downward to reflect rows
    ** that are excluded by range constraints.
    */
    nRow = nRow/rangeDiv;
    if( nRow<1 ) nRow = 1;

    /* Experiments run on real SQLite databases show that the time needed
    ** to do a binary search to locate a row in a table or index is roughly
    ** log10(N) times the time to move from one row to the next row within
    ** a table or index.  The actual times can vary, with the size of
    ** records being an important factor.  Both moves and searches are

Changes to test/analyze.test.

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
...
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  sqlite3 db test.db
  execsql {
    SELECT * FROM t4 WHERE x=1234;
  }
} {}

# Verify that DROP TABLE and DROP INDEX remove entries from the 
# sqlite_stat1 and sqlite_stat2 tables.
#
do_test analyze-5.0 {
  execsql {
    DELETE FROM t3;
    DELETE FROM t4;
    INSERT INTO t3 VALUES(1,2,3,4);
    INSERT INTO t3 VALUES(5,6,7,8);
................................................................................
    INSERT INTO t3 SELECT a+64, b+64, c+64, d+64 FROM t3;
    INSERT INTO t4 SELECT a, b, c FROM t3;
    ANALYZE;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t3i1 t3i2 t3i3 t4i1 t4i2 t3 t4}
ifcapable stat2 {
  do_test analyze-5.1 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat2 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat2 ORDER BY 1;
    }
  } {t3i1 t3i2 t3i3 t4i1 t4i2 t3 t4}
}
do_test analyze-5.2 {
  execsql {
    DROP INDEX t3i2;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t3i1 t3i3 t4i1 t4i2 t3 t4}
ifcapable stat2 {
  do_test analyze-5.3 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat2 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat2 ORDER BY 1;
    }
  } {t3i1 t3i3 t4i1 t4i2 t3 t4}
}
do_test analyze-5.4 {
  execsql {
    DROP TABLE t3;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t4i1 t4i2 t4}
ifcapable stat2 {
  do_test analyze-5.5 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat2 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat2 ORDER BY 1;
    }
  } {t4i1 t4i2 t4}
}

# This test corrupts the database file so it must be the last test
# in the series.
#







|







 







|


|
|










|


|
|










|


|
|







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
...
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
  sqlite3 db test.db
  execsql {
    SELECT * FROM t4 WHERE x=1234;
  }
} {}

# Verify that DROP TABLE and DROP INDEX remove entries from the 
# sqlite_stat1 and sqlite_stat3 tables.
#
do_test analyze-5.0 {
  execsql {
    DELETE FROM t3;
    DELETE FROM t4;
    INSERT INTO t3 VALUES(1,2,3,4);
    INSERT INTO t3 VALUES(5,6,7,8);
................................................................................
    INSERT INTO t3 SELECT a+64, b+64, c+64, d+64 FROM t3;
    INSERT INTO t4 SELECT a, b, c FROM t3;
    ANALYZE;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t3i1 t3i2 t3i3 t4i1 t4i2 t3 t4}
ifcapable stat3 {
  do_test analyze-5.1 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat3 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat3 ORDER BY 1;
    }
  } {t3i1 t3i2 t3i3 t4i1 t4i2 t3 t4}
}
do_test analyze-5.2 {
  execsql {
    DROP INDEX t3i2;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t3i1 t3i3 t4i1 t4i2 t3 t4}
ifcapable stat3 {
  do_test analyze-5.3 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat3 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat3 ORDER BY 1;
    }
  } {t3i1 t3i3 t4i1 t4i2 t3 t4}
}
do_test analyze-5.4 {
  execsql {
    DROP TABLE t3;
    SELECT DISTINCT idx FROM sqlite_stat1 ORDER BY 1;
    SELECT DISTINCT tbl FROM sqlite_stat1 ORDER BY 1;
  }
} {t4i1 t4i2 t4}
ifcapable stat3 {
  do_test analyze-5.5 {
    execsql {
      SELECT DISTINCT idx FROM sqlite_stat3 ORDER BY 1;
      SELECT DISTINCT tbl FROM sqlite_stat3 ORDER BY 1;
    }
  } {t4i1 t4i2 t4}
}

# This test corrupts the database file so it must be the last test
# in the series.
#

Changes to test/analyze3.test.

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
...
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    COMMIT;
    ANALYZE;
  }
} {}

do_eqp_test analyze3-1.1.2 {
  SELECT sum(y) FROM t1 WHERE x>200 AND x<300
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?) (~100 rows)}}
do_eqp_test analyze3-1.1.3 {
  SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 
} {0 0 0 {SCAN TABLE t1 (~111 rows)}}

do_test analyze3-1.1.4 {
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>200 AND x<300 }
} {199 0 14850}
do_test analyze3-1.1.5 {
  set l [string range "200" 0 end]
  set u [string range "300" 0 end]
................................................................................
      CREATE INDEX i3 ON t3(x);
    COMMIT;
    ANALYZE;
  }
} {}
do_eqp_test analyze3-1.3.2 {
  SELECT sum(y) FROM t3 WHERE x>200 AND x<300
} {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?) (~100 rows)}}
do_eqp_test analyze3-1.3.3 {
  SELECT sum(y) FROM t3 WHERE x>0 AND x<1100
} {0 0 0 {SCAN TABLE t3 (~111 rows)}}

do_test analyze3-1.3.4 {
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>200 AND x<300 }
} {199 0 14850}
................................................................................
    append t [lindex {a b c d e f g h i j} [expr ($i%10)]]
    execsql { INSERT INTO t1 VALUES($i, $t) }
  }
  execsql COMMIT
} {}
do_eqp_test analyze3-2.2 {
  SELECT count(a) FROM t1 WHERE b LIKE 'a%'
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (b>? AND b<?) (~30000 rows)}}
do_eqp_test analyze3-2.3 {
  SELECT count(a) FROM t1 WHERE b LIKE '%a'
} {0 0 0 {SCAN TABLE t1 (~500000 rows)}}

do_test analyze3-2.4 {
  sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE 'a%' }
} {101 0 100}







|


|







 







|







 







|







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
...
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
...
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
    COMMIT;
    ANALYZE;
  }
} {}

do_eqp_test analyze3-1.1.2 {
  SELECT sum(y) FROM t1 WHERE x>200 AND x<300
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? AND x<?) (~179 rows)}}
do_eqp_test analyze3-1.1.3 {
  SELECT sum(y) FROM t1 WHERE x>0 AND x<1100 
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (x>? and x<?) {~959 rows)}}

do_test analyze3-1.1.4 {
  sf_execsql { SELECT sum(y) FROM t1 WHERE x>200 AND x<300 }
} {199 0 14850}
do_test analyze3-1.1.5 {
  set l [string range "200" 0 end]
  set u [string range "300" 0 end]
................................................................................
      CREATE INDEX i3 ON t3(x);
    COMMIT;
    ANALYZE;
  }
} {}
do_eqp_test analyze3-1.3.2 {
  SELECT sum(y) FROM t3 WHERE x>200 AND x<300
} {0 0 0 {SEARCH TABLE t3 USING INDEX i3 (x>? AND x<?) (~166 rows)}}
do_eqp_test analyze3-1.3.3 {
  SELECT sum(y) FROM t3 WHERE x>0 AND x<1100
} {0 0 0 {SCAN TABLE t3 (~111 rows)}}

do_test analyze3-1.3.4 {
  sf_execsql { SELECT sum(y) FROM t3 WHERE x>200 AND x<300 }
} {199 0 14850}
................................................................................
    append t [lindex {a b c d e f g h i j} [expr ($i%10)]]
    execsql { INSERT INTO t1 VALUES($i, $t) }
  }
  execsql COMMIT
} {}
do_eqp_test analyze3-2.2 {
  SELECT count(a) FROM t1 WHERE b LIKE 'a%'
} {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (b>? AND b<?) (~31250 rows)}}
do_eqp_test analyze3-2.3 {
  SELECT count(a) FROM t1 WHERE b LIKE '%a'
} {0 0 0 {SCAN TABLE t1 (~500000 rows)}}

do_test analyze3-2.4 {
  sf_execsql { SELECT count(*) FROM t1 WHERE b LIKE 'a%' }
} {101 0 100}

Changes to test/analyze5.test.

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
..
51
52
53
54
55
56
57
58
59
60

61
62
63
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
...
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223

224
225
226
227
228
229
230
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat2 histogram data on tables
# with many repeated values and only a few distinct values.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat2 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
................................................................................
    CREATE INDEX t1u ON t1(u);  -- text
    CREATE INDEX t1v ON t1(v);  -- mixed case text
    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
    ANALYZE;
    SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno;
  }
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}

do_test analyze5-1.1 {
  string tolower \
   [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}]
} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}

do_test analyze5-1.2 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno}
} {{} 0 0 0 0 1 1 1 2 2}
do_test analyze5-1.3 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno}
} {{} {} {} {} 1 1 1 2 2 3}
do_test analyze5-1.4 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno}
} {0 0 0 0 0 0 0 0 0 0}
do_test analyze5-1.5 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno}
} {0 0 0 0 1 1 1 2 2 3}
do_test analyze5-1.6 {
  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno}
} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5}


# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
    1  {z>=0 AND z<=0}       t1z  400
    2  {z>=1 AND z<=1}       t1z  300
    3  {z>=2 AND z<=2}       t1z  200
    4  {z>=3 AND z<=3}       t1z  100
    5  {z>=4 AND z<=4}       t1z   50
    6  {z>=-1 AND z<=-1}     t1z   50
    7  {z>1 AND z<3}         t1z  200
    8  {z>0 AND z<100}       t1z  600
    9  {z>=1 AND z<100}      t1z  600
   10  {z>1 AND z<100}       t1z  300
   11  {z>=2 AND z<100}      t1z  300
   12  {z>2 AND z<100}       t1z  100
   13  {z>=3 AND z<100}      t1z  100
   14  {z>3 AND z<100}       t1z   50
   15  {z>=4 AND z<100}      t1z   50
   16  {z>=-100 AND z<=-1}   t1z   50
   17  {z>=-100 AND z<=0}    t1z  400
   18  {z>=-100 AND z<0}     t1z   50
   19  {z>=-100 AND z<=1}    t1z  700
   20  {z>=-100 AND z<2}     t1z  700
   21  {z>=-100 AND z<=2}    t1z  900
   22  {z>=-100 AND z<3}     t1z  900
  
   31  {z>=0.0 AND z<=0.0}   t1z  400
   32  {z>=1.0 AND z<=1.0}   t1z  300
   33  {z>=2.0 AND z<=2.0}   t1z  200
   34  {z>=3.0 AND z<=3.0}   t1z  100
   35  {z>=4.0 AND z<=4.0}   t1z   50
   36  {z>=-1.0 AND z<=-1.0} t1z   50
   37  {z>1.5 AND z<3.0}     t1z  200
   38  {z>0.5 AND z<100}     t1z  600
   39  {z>=1.0 AND z<100}    t1z  600
   40  {z>1.5 AND z<100}     t1z  300
   41  {z>=2.0 AND z<100}    t1z  300
   42  {z>2.1 AND z<100}     t1z  100
   43  {z>=3.0 AND z<100}    t1z  100
   44  {z>3.2 AND z<100}     t1z   50
   45  {z>=4.0 AND z<100}    t1z   50
   46  {z>=-100 AND z<=-1.0} t1z   50
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z   50
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  900
   52  {z>=-100 AND z<3.0}   t1z  900
  
  101  {z=-1}                t1z   50
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  200
  105  {z=3}                 t1z  100
  106  {z=4}                 t1z   50
  107  {z=-10.0}             t1z   50
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  200
  111  {z=3.0}               t1z  100
  112  {z=4.0}               t1z   50
  113  {z=1.5}               t1z   50
  114  {z=2.5}               t1z   50
  
  201  {z IN (-1)}           t1z   50
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  200
  205  {z IN (3)}            t1z  100
  206  {z IN (4)}            t1z   50
  207  {z IN (0.5)}          t1z   50
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  900
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  500
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  150
  216  {z=-1 OR z=3}         t1z  150

  300  {y=0}                 {}   100
  301  {y=1}                 t1y   50
  302  {y=0.1}               t1y   50

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
................................................................................
    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  20
  501  {x=1 AND u='charlie'}               t1x   5
  502  {x IS NULL}                          {} 100
  503  {x=1}                               t1x  50
  504  {x IS NOT NULL}                     t1x  25
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count

  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]


  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]







|






|







 







|

|
>

<
|
|
>

<
|
|
<
<
<
<
<
<
<
<
<
<
<
<






|
|
|
|
|




|
|
|
|
|

|


|
|



|
|
|
|
|
|

|

|
|
|
|
|

|


|
|

|


|
|
|
|


|
|
|
|
|

|


|
|
|
|

|


|


|
|

|
|
|







 







|
|
|
|
|





>







>







6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
..
51
52
53
54
55
56
57
58
59
60
61
62

63
64
65
66

67
68












69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
...
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements tests for SQLite library.  The focus of the tests
# in this file is the use of the sqlite_stat3 histogram data on tables
# with many repeated values and only a few distinct values.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !stat3 {
  finish_test
  return
}

set testprefix analyze5

proc eqp {sql {db db}} {
................................................................................
    CREATE INDEX t1u ON t1(u);  -- text
    CREATE INDEX t1v ON t1(v);  -- mixed case text
    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
    ANALYZE;
    SELECT sample FROM sqlite_stat3 WHERE idx='t1u' ORDER BY nlt;
  }
} {alpha bravo charlie delta}

do_test analyze5-1.1 {

  db eval {SELECT DISTINCT lower(sample) FROM sqlite_stat3 WHERE idx='t1v'
             ORDER BY 1}
} {alpha bravo charlie delta}
do_test analyze5-1.2 {

  db eval {SELECT idx, count(*) FROM sqlite_stat3 GROUP BY 1 ORDER BY 1}
} {t1t 4 t1u 4 t1v 4 t1w 4 t1x 4 t1y 2 t1z 4}













# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
    1  {z>=0 AND z<=0}       t1z  400
    2  {z>=1 AND z<=1}       t1z  300
    3  {z>=2 AND z<=2}       t1z  175
    4  {z>=3 AND z<=3}       t1z  125
    5  {z>=4 AND z<=4}       t1z    1
    6  {z>=-1 AND z<=-1}     t1z    1
    7  {z>1 AND z<3}         t1z  175
    8  {z>0 AND z<100}       t1z  600
    9  {z>=1 AND z<100}      t1z  600
   10  {z>1 AND z<100}       t1z  300
   11  {z>=2 AND z<100}      t1z  300
   12  {z>2 AND z<100}       t1z  125
   13  {z>=3 AND z<100}      t1z  125
   14  {z>3 AND z<100}       t1z    1
   15  {z>=4 AND z<100}      t1z    1
   16  {z>=-100 AND z<=-1}   t1z    1
   17  {z>=-100 AND z<=0}    t1z  400
   18  {z>=-100 AND z<0}     t1z    1
   19  {z>=-100 AND z<=1}    t1z  700
   20  {z>=-100 AND z<2}     t1z  700
   21  {z>=-100 AND z<=2}    t1z  875
   22  {z>=-100 AND z<3}     t1z  875
  
   31  {z>=0.0 AND z<=0.0}   t1z  400
   32  {z>=1.0 AND z<=1.0}   t1z  300
   33  {z>=2.0 AND z<=2.0}   t1z  175
   34  {z>=3.0 AND z<=3.0}   t1z  125
   35  {z>=4.0 AND z<=4.0}   t1z    1
   36  {z>=-1.0 AND z<=-1.0} t1z    1
   37  {z>1.5 AND z<3.0}     t1z  174
   38  {z>0.5 AND z<100}     t1z  599
   39  {z>=1.0 AND z<100}    t1z  600
   40  {z>1.5 AND z<100}     t1z  299
   41  {z>=2.0 AND z<100}    t1z  300
   42  {z>2.1 AND z<100}     t1z  124
   43  {z>=3.0 AND z<100}    t1z  125
   44  {z>3.2 AND z<100}     t1z    1
   45  {z>=4.0 AND z<100}    t1z    1
   46  {z>=-100 AND z<=-1.0} t1z    1
   47  {z>=-100 AND z<=0.0}  t1z  400
   48  {z>=-100 AND z<0.0}   t1z    1
   49  {z>=-100 AND z<=1.0}  t1z  700
   50  {z>=-100 AND z<2.0}   t1z  700
   51  {z>=-100 AND z<=2.0}  t1z  875
   52  {z>=-100 AND z<3.0}   t1z  875
  
  101  {z=-1}                t1z    1
  102  {z=0}                 t1z  400
  103  {z=1}                 t1z  300
  104  {z=2}                 t1z  175
  105  {z=3}                 t1z  125
  106  {z=4}                 t1z    1
  107  {z=-10.0}             t1z    1
  108  {z=0.0}               t1z  400
  109  {z=1.0}               t1z  300
  110  {z=2.0}               t1z  175
  111  {z=3.0}               t1z  125
  112  {z=4.0}               t1z    1
  113  {z=1.5}               t1z    1
  114  {z=2.5}               t1z    1
  
  201  {z IN (-1)}           t1z    1
  202  {z IN (0)}            t1z  400
  203  {z IN (1)}            t1z  300
  204  {z IN (2)}            t1z  175
  205  {z IN (3)}            t1z  125
  206  {z IN (4)}            t1z    1
  207  {z IN (0.5)}          t1z    1
  208  {z IN (0,1)}          t1z  700
  209  {z IN (0,1,2)}        t1z  875
  210  {z IN (0,1,2,3)}      {}   100
  211  {z IN (0,1,2,3,4,5)}  {}   100
  212  {z IN (1,2)}          t1z  475
  213  {z IN (2,3)}          t1z  300
  214  {z=3 OR z=2}          t1z  300
  215  {z IN (-1,3)}         t1z  126
  216  {z=-1 OR z=3}         t1z  126

  300  {y=0}                 t1y  974
  301  {y=1}                 t1y   26
  302  {y=0.1}               t1y    1

  400  {x IS NULL}           t1x  400

} {
  # Verify that the expected index is used with the expected row count
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
................................................................................
    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
   ANALYZE;
}

# Verify that range queries generate the correct row count estimates
#
foreach {testid where index rows} {
  500  {x IS NULL AND u='charlie'}         t1u  17
  501  {x=1 AND u='charlie'}               t1x   1
  502  {x IS NULL}                         t1x 995
  503  {x=1}                               t1x   1
  504  {x IS NOT NULL}                     t1x   2
  505  {+x IS NOT NULL}                     {} 500
  506  {upper(x) IS NOT NULL}               {} 500

} {
  # Verify that the expected index is used with the expected row count
if {$testid==50299} {breakpoint; set sqlite_where_trace 1}
  do_test analyze5-1.${testid}a {
    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
    set idx {}
    regexp {INDEX (t1.) } $x all idx
    regexp {~([0-9]+) rows} $x all nrow
    list $idx $nrow
  } [list $index $rows]
if {$testid==50299} exit

  # Verify that the same result is achieved regardless of whether or not
  # the index is used
  do_test analyze5-1.${testid}b {
    set w2 [string map {y +y z +z} $where]
    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
                     ORDER BY +rowid"]

Changes to test/analyze7.test.

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

101
102
103
104
105
106

107
108
109
110
111
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.1 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE b=123;}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~10 rows)}}
do_test analyze7-3.2.1 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=?;}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
ifcapable stat2 {
  # If ENABLE_STAT2 is defined, SQLite comes up with a different estimated
  # row count for (c=2) than it does for (c=?).
  do_test analyze7-3.2.2 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~51 rows)}}
} else {
  # If ENABLE_STAT2 is not defined, the expected row count for (c=2) is the
  # same as that for (c=?).
  do_test analyze7-3.2.3 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}

do_test analyze7-3.4 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~2 rows)}}
do_test analyze7-3.5 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}

do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}

finish_test







|
|



|

|








>
|
|
|
|
|
|
>





78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
do_test analyze7-3.1 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE b=123;}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~10 rows)}}
do_test analyze7-3.2.1 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=?;}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
ifcapable stat3 {
  # If ENABLE_STAT3 is defined, SQLite comes up with a different estimated
  # row count for (c=2) than it does for (c=?).
  do_test analyze7-3.2.2 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~57 rows)}}
} else {
  # If ENABLE_STAT3 is not defined, the expected row count for (c=2) is the
  # same as that for (c=?).
  do_test analyze7-3.2.3 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=2;}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=?) (~86 rows)}}
}
do_test analyze7-3.3 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
ifcapable {!stat3} {
  do_test analyze7-3.4 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND b=123}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1b (b=?) (~2 rows)}}
  do_test analyze7-3.5 {
    execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE a=123 AND c=123}
  } {0 0 0 {SEARCH TABLE t1 USING INDEX t1a (a=?) (~1 rows)}}
}
do_test analyze7-3.6 {
  execsql {EXPLAIN QUERY PLAN SELECT * FROM t1 WHERE c=123 AND d=123 AND b=123}
} {0 0 0 {SEARCH TABLE t1 USING INDEX t1cd (c=? AND d=?) (~1 rows)}}

finish_test

Changes to test/dbstatus.test.

51
52
53
54
55
56
57






58
59
60
61
62
63
64
...
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
...
194
195
196
197
198
199
200



201

202
203
204
205
206
207
208


proc lookaside {db} {
  expr { $::lookaside_buffer_size *
    [lindex [sqlite3_db_status $db SQLITE_DBSTATUS_LOOKASIDE_USED 0] 1]
  }
}







#---------------------------------------------------------------------------
# Run the dbstatus-2 and dbstatus-3 tests with several of different
# lookaside buffer sizes.
#
foreach ::lookaside_buffer_size {0 64 120} {

................................................................................
      END;
    }
    5 {
      CREATE TABLE t1(a, b);
      CREATE TABLE t2(c, d);
      CREATE VIEW v1 AS SELECT * FROM t1 UNION SELECT * FROM t2;
    }
    6 {
      CREATE TABLE t1(a, b);
      CREATE INDEX i1 ON t1(a);
      CREATE INDEX i2 ON t1(a,b);
      CREATE INDEX i3 ON t1(b,b);
      INSERT INTO t1 VALUES(randomblob(20), randomblob(25));
      INSERT INTO t1 SELECT randomblob(20), randomblob(25) FROM t1;
      INSERT INTO t1 SELECT randomblob(20), randomblob(25) FROM t1;
................................................................................
    # for any reason is not counted as "schema memory".
    #
    # Additionally, in auto-vacuum mode, dropping tables and indexes causes
    # the page-cache to shrink. So the amount of memory freed is always
    # much greater than just that reported by DBSTATUS_SCHEMA_USED in this
    # case.
    #



    if {[string match *x $tn] || $AUTOVACUUM} {

      do_test dbstatus-2.$tn.ax { expr {($nSchema1-$nSchema2)<=$nFree} } 1
    } else {
      do_test dbstatus-2.$tn.a { expr {$nSchema1-$nSchema2} } $nFree
    }
  
    do_test dbstatus-2.$tn.b { list $nAlloc1 $nSchema1 } "$nAlloc3 $nSchema3"
    do_test dbstatus-2.$tn.c { list $nAlloc2 $nSchema2 } "$nAlloc4 $nSchema4"







>
>
>
>
>
>







 







|







 







>
>
>
|
>







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
...
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218


proc lookaside {db} {
  expr { $::lookaside_buffer_size *
    [lindex [sqlite3_db_status $db SQLITE_DBSTATUS_LOOKASIDE_USED 0] 1]
  }
}

ifcapable stat3 {
  set STAT3 1
} else {
  set STAT3 0
}

#---------------------------------------------------------------------------
# Run the dbstatus-2 and dbstatus-3 tests with several of different
# lookaside buffer sizes.
#
foreach ::lookaside_buffer_size {0 64 120} {

................................................................................
      END;
    }
    5 {
      CREATE TABLE t1(a, b);
      CREATE TABLE t2(c, d);
      CREATE VIEW v1 AS SELECT * FROM t1 UNION SELECT * FROM t2;
    }
    6y {
      CREATE TABLE t1(a, b);
      CREATE INDEX i1 ON t1(a);
      CREATE INDEX i2 ON t1(a,b);
      CREATE INDEX i3 ON t1(b,b);
      INSERT INTO t1 VALUES(randomblob(20), randomblob(25));
      INSERT INTO t1 SELECT randomblob(20), randomblob(25) FROM t1;
      INSERT INTO t1 SELECT randomblob(20), randomblob(25) FROM t1;
................................................................................
    # for any reason is not counted as "schema memory".
    #
    # Additionally, in auto-vacuum mode, dropping tables and indexes causes
    # the page-cache to shrink. So the amount of memory freed is always
    # much greater than just that reported by DBSTATUS_SCHEMA_USED in this
    # case.
    #
    # Some of the memory used for sqlite_stat3 is unaccounted for by
    # dbstatus.
    #
    if {[string match *x $tn] || $AUTOVACUUM
         || ([string match *y $tn] && $STAT3)} {
      do_test dbstatus-2.$tn.ax { expr {($nSchema1-$nSchema2)<=$nFree} } 1
    } else {
      do_test dbstatus-2.$tn.a { expr {$nSchema1-$nSchema2} } $nFree
    }
  
    do_test dbstatus-2.$tn.b { list $nAlloc1 $nSchema1 } "$nAlloc3 $nSchema3"
    do_test dbstatus-2.$tn.c { list $nAlloc2 $nSchema2 } "$nAlloc4 $nSchema4"

Changes to test/stat3.test.

7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
..
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements regression tests for SQLite library. This file 
# implements tests for the extra functionality provided by the ANALYZE 
# command when the library is compiled with SQLITE_ENABLE_STAT2 defined.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

set testprefix stat3

................................................................................
# command will delete the sqlite_stat2 table.  Likewise, if not compiled
# with SQLITE_ENABLE_STAT3, the sqlite_stat3 table is deleted.
#
do_test 1.1 {
  db eval {
    PRAGMA writable_schema=ON;
    CREATE TABLE sqlite_stat2(tbl,idx,sampleno,sample);
    CREATE TABLE sqlite_stat3(tbl,idx,sampleno,sample,neq,nlt);
    SELECT name FROM sqlite_master ORDER BY 1;
  }
} {sqlite_stat2 sqlite_stat3}
do_test 1.2 {
  db close
  sqlite3 db test.db
  db eval {SELECT name FROM sqlite_master ORDER BY 1}







|







 







|







7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
..
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file implements regression tests for SQLite library. This file 
# implements tests for the extra functionality provided by the ANALYZE 
# command when the library is compiled with SQLITE_ENABLE_STAT3 defined.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

set testprefix stat3

................................................................................
# command will delete the sqlite_stat2 table.  Likewise, if not compiled
# with SQLITE_ENABLE_STAT3, the sqlite_stat3 table is deleted.
#
do_test 1.1 {
  db eval {
    PRAGMA writable_schema=ON;
    CREATE TABLE sqlite_stat2(tbl,idx,sampleno,sample);
    CREATE TABLE sqlite_stat3(tbl,idx,neq,nlt,ndlt,sample);
    SELECT name FROM sqlite_master ORDER BY 1;
  }
} {sqlite_stat2 sqlite_stat3}
do_test 1.2 {
  db close
  sqlite3 db test.db
  db eval {SELECT name FROM sqlite_master ORDER BY 1}

Changes to test/unordered.test.

27
28
29
30
31
32
33


34
35

36
37
38
39
40
41
42
43
44
45
  INSERT INTO t1 SELECT a+16, b FROM t1;
  INSERT INTO t1 SELECT a+32, b FROM t1;
  INSERT INTO t1 SELECT a+64, b FROM t1;
  ANALYZE;
} {}

foreach idxmode {ordered unordered} {


  if {$idxmode == "unordered"} {
    execsql { UPDATE sqlite_stat1 SET stat = stat || ' unordered' }

    db close
    sqlite3 db test.db
  }
  foreach {tn sql r(ordered) r(unordered)} {
    1   "SELECT * FROM t1 ORDER BY a"
        {0 0 0 {SCAN TABLE t1 USING INDEX i1 (~128 rows)}}
        {0 0 0 {SCAN TABLE t1 (~128 rows)} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
    2   "SELECT * FROM t1 WHERE a >?"
        {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~32 rows)}}
        {0 0 0 {SCAN TABLE t1 (~42 rows)}}







>
>


>
|
|
<







27
28
29
30
31
32
33
34
35
36
37
38
39
40

41
42
43
44
45
46
47
  INSERT INTO t1 SELECT a+16, b FROM t1;
  INSERT INTO t1 SELECT a+32, b FROM t1;
  INSERT INTO t1 SELECT a+64, b FROM t1;
  ANALYZE;
} {}

foreach idxmode {ordered unordered} {
  catchsql { DELETE FROM sqlite_stat2 }
  catchsql { DELETE FROM sqlite_stat3 }
  if {$idxmode == "unordered"} {
    execsql { UPDATE sqlite_stat1 SET stat = stat || ' unordered' }
  }
  db close
  sqlite3 db test.db

  foreach {tn sql r(ordered) r(unordered)} {
    1   "SELECT * FROM t1 ORDER BY a"
        {0 0 0 {SCAN TABLE t1 USING INDEX i1 (~128 rows)}}
        {0 0 0 {SCAN TABLE t1 (~128 rows)} 0 0 0 {USE TEMP B-TREE FOR ORDER BY}}
    2   "SELECT * FROM t1 WHERE a >?"
        {0 0 0 {SEARCH TABLE t1 USING INDEX i1 (a>?) (~32 rows)}}
        {0 0 0 {SCAN TABLE t1 (~42 rows)}}