SQLite

Changes On Branch improved-index-scan
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch improved-index-scan Excluding Merge-Ins

This is equivalent to a diff from 483994a5 to 50f8ea37

2016-07-27
19:30
Enhance the query planner cost estimation for index scans to take into account WHERE clause terms that can be computed using only the index and that do not require looking up rows in the original table. This fixes an obscure performance regression that arose when the ORDER BY LIMIT optimization was added by check-in [bf46179d44843]. (check-in: 9e2b2681 user: drh tags: trunk)
19:20
Add test cases and fix a comment. (Closed-Leaf check-in: 50f8ea37 user: drh tags: improved-index-scan)
18:27
When estimating the cost of an index scan, factor in the cost savings of being able to use the index to evaluate some WHERE clause terms without having to do a table lookup. (check-in: a59b5622 user: drh tags: improved-index-scan)
16:03
Initialize a variable in where.c to avoid a valgrind warning. (check-in: 4d59df02 user: dan tags: trunk)
2016-07-26
18:15
Merge latest trunk changes into this branch. (check-in: d4f3d52c user: dan tags: rowvalue)
15:17
Merge fixes to sqlite3_scrub_backup() from trunk. (check-in: 91e811f5 user: drh tags: apple-osx)
10:46
Ensure that the sqlite3_scrub_backup() extension creates a backup database at least as large as indicated by the database header, even if the last page of the input database is a free-list leaf. (check-in: 483994a5 user: dan tags: trunk)
04:49
Copy the cache_spill setting from the main database over to the vacuum_db transient database when running a VACUUM. (check-in: c0e7d98e user: drh tags: trunk)

Changes to src/expr.c.

3960
3961
3962
3963
3964
3965
3966























































3967
3968
3969
3970
3971
3972
3973
   && sqlite3ExprCompare(pE1->pLeft, pE2->pLeft, iTab)==0
   && (pE1->op!=TK_ISNULL && pE1->op!=TK_IS)
  ){
    return 1;
  }
  return 0;
}
























































/*
** An instance of the following structure is used by the tree walker
** to count references to table columns in the arguments of an 
** aggregate function, in order to implement the
** sqlite3FunctionThisSrc() routine.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
   && sqlite3ExprCompare(pE1->pLeft, pE2->pLeft, iTab)==0
   && (pE1->op!=TK_ISNULL && pE1->op!=TK_IS)
  ){
    return 1;
  }
  return 0;
}

/*
** An instance of the following structure is used by the tree walker
** to determine if an expression can be evaluated by reference to the
** index only, without having to do a search for the corresponding
** table entry.  The IdxCover.pIdx field is the index.  IdxCover.iCur
** is the cursor for the table.
*/
struct IdxCover {
  Index *pIdx;     /* The index to be tested for coverage */
  int iCur;        /* Cursor number for the table corresponding to the index */
};

/*
** Check to see if there are references to columns in table 
** pWalker->u.pIdxCover->iCur can be satisfied using the index
** pWalker->u.pIdxCover->pIdx.
*/
static int exprIdxCover(Walker *pWalker, Expr *pExpr){
  if( pExpr->op==TK_COLUMN
   && pExpr->iTable==pWalker->u.pIdxCover->iCur
   && sqlite3ColumnOfIndex(pWalker->u.pIdxCover->pIdx, pExpr->iColumn)<0
  ){
    pWalker->eCode = 1;
    return WRC_Abort;
  }
  return WRC_Continue;
}

/*
** Determine if an index pIdx on table with cursor iCur contains will
** the expression pExpr.  Return true if the index does cover the
** expression and false if the pExpr expression references table columns
** that are not found in the index pIdx.
**
** An index covering an expression means that the expression can be
** evaluated using only the index and without having to lookup the
** corresponding table entry.
*/
int sqlite3ExprCoveredByIndex(
  Expr *pExpr,        /* The index to be tested */
  int iCur,           /* The cursor number for the corresponding table */
  Index *pIdx         /* The index that might be used for coverage */
){
  Walker w;
  struct IdxCover xcov;
  memset(&w, 0, sizeof(w));
  xcov.iCur = iCur;
  xcov.pIdx = pIdx;
  w.xExprCallback = exprIdxCover;
  w.u.pIdxCover = &xcov;
  sqlite3WalkExpr(&w, pExpr);
  return !w.eCode;
}


/*
** An instance of the following structure is used by the tree walker
** to count references to table columns in the arguments of an 
** aggregate function, in order to implement the
** sqlite3FunctionThisSrc() routine.
*/

Changes to src/sqliteInt.h.

3253
3254
3255
3256
3257
3258
3259

3260
3261
3262
3263
3264
3265
3266
    NameContext *pNC;                          /* Naming context */
    int n;                                     /* A counter */
    int iCur;                                  /* A cursor number */
    SrcList *pSrcList;                         /* FROM clause */
    struct SrcCount *pSrcCount;                /* Counting column references */
    struct CCurHint *pCCurHint;                /* Used by codeCursorHint() */
    int *aiCol;                                /* array of column indexes */

  } u;
};

/* Forward declarations */
int sqlite3WalkExpr(Walker*, Expr*);
int sqlite3WalkExprList(Walker*, ExprList*);
int sqlite3WalkSelect(Walker*, Select*);







>







3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
    NameContext *pNC;                          /* Naming context */
    int n;                                     /* A counter */
    int iCur;                                  /* A cursor number */
    SrcList *pSrcList;                         /* FROM clause */
    struct SrcCount *pSrcCount;                /* Counting column references */
    struct CCurHint *pCCurHint;                /* Used by codeCursorHint() */
    int *aiCol;                                /* array of column indexes */
    struct IdxCover *pIdxCover;                /* Check for index coverage */
  } u;
};

/* Forward declarations */
int sqlite3WalkExpr(Walker*, Expr*);
int sqlite3WalkExprList(Walker*, ExprList*);
int sqlite3WalkSelect(Walker*, Select*);
3696
3697
3698
3699
3700
3701
3702

3703
3704
3705
3706
3707
3708
3709
int sqlite3RunVacuum(char**, sqlite3*);
char *sqlite3NameFromToken(sqlite3*, Token*);
int sqlite3ExprCompare(Expr*, Expr*, int);
int sqlite3ExprListCompare(ExprList*, ExprList*, int);
int sqlite3ExprImpliesExpr(Expr*, Expr*, int);
void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*);
void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*);

int sqlite3FunctionUsesThisSrc(Expr*, SrcList*);
Vdbe *sqlite3GetVdbe(Parse*);
#ifndef SQLITE_OMIT_BUILTIN_TEST
void sqlite3PrngSaveState(void);
void sqlite3PrngRestoreState(void);
#endif
void sqlite3RollbackAll(sqlite3*,int);







>







3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
int sqlite3RunVacuum(char**, sqlite3*);
char *sqlite3NameFromToken(sqlite3*, Token*);
int sqlite3ExprCompare(Expr*, Expr*, int);
int sqlite3ExprListCompare(ExprList*, ExprList*, int);
int sqlite3ExprImpliesExpr(Expr*, Expr*, int);
void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*);
void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*);
int sqlite3ExprCoveredByIndex(Expr*, int iCur, Index *pIdx);
int sqlite3FunctionUsesThisSrc(Expr*, SrcList*);
Vdbe *sqlite3GetVdbe(Parse*);
#ifndef SQLITE_OMIT_BUILTIN_TEST
void sqlite3PrngSaveState(void);
void sqlite3PrngRestoreState(void);
#endif
void sqlite3RollbackAll(sqlite3*,int);

Changes to src/where.c.

2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;
    nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1];
    pNew->nOut -= nIter;
    /* TUNING:  Because uncertainties in the estimates for skip-scan queries,
    ** add a 1.375 fudge factor to make skip-scan slightly less likely. */
    nIter += 5;
    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
    pNew->nOut = saved_nOut;
    pNew->u.btree.nEq = saved_nEq;
    pNew->nSkip = saved_nSkip;
    pNew->wsFlags = saved_wsFlags;
  }








|



|







2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
   && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK
  ){
    LogEst nIter;
    pNew->u.btree.nEq++;
    pNew->nSkip++;
    pNew->aLTerm[pNew->nLTerm++] = 0;
    pNew->wsFlags |= WHERE_SKIPSCAN;
    nIter = pProbe->aiRowLogEst[saved_nEq]+1 - pProbe->aiRowLogEst[saved_nEq+1];
    pNew->nOut -= nIter;
    /* TUNING:  Because uncertainties in the estimates for skip-scan queries,
    ** add a 1.375 fudge factor to make skip-scan slightly less likely. */
    nIter += 4;
    whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul);
    pNew->nOut = saved_nOut;
    pNew->u.btree.nEq = saved_nEq;
    pNew->nSkip = saved_nSkip;
    pNew->wsFlags = saved_wsFlags;
  }

2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
























2782
2783
2784
2785
2786
2787
2788
2789
         && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan)
          )
      ){
        pNew->iSortIdx = b ? iSortIdx : 0;

        /* The cost of visiting the index rows is N*K, where K is
        ** between 1.1 and 3.0, depending on the relative sizes of the
        ** index and table rows. If this is a non-covering index scan,
        ** also add the cost of visiting table rows (N*3.0).  */
        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
        if( m!=0 ){
























          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, rSize+16);
        }
        ApplyCostMultiplier(pNew->rRun, pTab->costMult);
        whereLoopOutputAdjust(pWC, pNew, rSize);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }







|
<


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







2771
2772
2773
2774
2775
2776
2777
2778

2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
         && OptimizationEnabled(pWInfo->pParse->db, SQLITE_CoverIdxScan)
          )
      ){
        pNew->iSortIdx = b ? iSortIdx : 0;

        /* The cost of visiting the index rows is N*K, where K is
        ** between 1.1 and 3.0, depending on the relative sizes of the
        ** index and table rows. */

        pNew->rRun = rSize + 1 + (15*pProbe->szIdxRow)/pTab->szTabRow;
        if( m!=0 ){
          /* If this is a non-covering index scan, add in the cost of
          ** doing table lookups.  The cost will be 3x the number of
          ** lookups.  Take into account WHERE clause terms that can be
          ** satisfied using just the index, and that do not require a
          ** table lookup. */
          LogEst nLookup = rSize + 16;  /* Base cost:  N*3 */
          int ii;
          int iCur = pSrc->iCursor;
          WhereClause *pWC = &pWInfo->sWC;
          for(ii=0; ii<pWC->nTerm; ii++){
            WhereTerm *pTerm = &pWC->a[ii];
            if( !sqlite3ExprCoveredByIndex(pTerm->pExpr, iCur, pProbe) ){
              break;
            }
            /* pTerm can be evaluated using just the index.  So reduce
            ** the expected number of table lookups accordingly */
            if( pTerm->truthProb<=0 ){
              nLookup += pTerm->truthProb;
            }else{
              nLookup--;
              if( pTerm->eOperator & (WO_EQ|WO_IS) ) nLookup -= 19;
            }
          }
          
          pNew->rRun = sqlite3LogEstAdd(pNew->rRun, nLookup);
        }
        ApplyCostMultiplier(pNew->rRun, pTab->costMult);
        whereLoopOutputAdjust(pWC, pNew, rSize);
        rc = whereLoopInsert(pBuilder, pNew);
        pNew->nOut = rSize;
        if( rc ) break;
      }

Added test/index8.test.

























































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# 2016-07-27
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Test cases for ORDER BY and LIMIT on an index scan.
#


set testdir [file dirname $argv0]
source $testdir/tester.tcl

# Performance regression reported at
# http://www.mail-archive.com/sqlite-users@mailinglists.sqlite.org/msg98615.html
#
# Caused by the ORDER BY LIMIT optionation for check-in
# https://sqlite.org/src/info/bf46179d44843769
#
# Fixed on approximately 2016-07-27 by changes that compute a better score
# for index scans by taking into account WHERE clause constraints that can
# be handled by the index and do not require a table lookup.
#
do_execsql_test 1.0 {
  CREATE TABLE t1(a,b,c,d);
  WITH RECURSIVE c(x) AS (VALUES(0) UNION ALL SELECT x+1 FROM c WHERE x<100)
  INSERT INTO t1(a,b,c,d)
     SELECT x/10, x%10, x%19, x FROM c;
  CREATE INDEX t1abc ON t1(a,b,c);
  SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {0 4 4 4 2 3 4 23}

# Prior to the fix, the following EQP would show a table scan and a sort
# rather than an index scan.
#
do_execsql_test 1.0eqp {
  EXPLAIN QUERY PLAN
  SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {/SCAN TABLE t1 USING INDEX t1abc/}

# If we change the index so that it no longer covers the WHERE clause,
# then we should (correctly) revert to using a table scan.
#
do_execsql_test 1.1 {
  DROP INDEX t1abc;
  CREATE INDEX t1abd ON t1(a,b,d);
  SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {0 4 4 4 2 3 4 23}
do_execsql_test 1.1eqp {
  EXPLAIN QUERY PLAN
  SELECT * FROM t1 WHERE c=4 ORDER BY a, b LIMIT 2;
} {~/USING INDEX/}


finish_test

Changes to test/scanstatus.test.

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
do_eqp_test 5.3.1 {
  SELECT count(*) FROM t2 WHERE y = 'j';
} {0 0 0 {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}}
do_execsql_test 5.3.2 {
  SELECT count(*) FROM t2 WHERE y = 'j';
} {19}
do_scanstatus_test 5.3.3 { 
  nLoop 1 nVisit 19 nEst 56.0 zName t2xy zExplain
  {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

do_eqp_test 5.4.1 {
  SELECT count(*) FROM t1, t2 WHERE y = c;
} {
  0 0 0 {SCAN TABLE t1 USING COVERING INDEX t1bc}
  0 1 1 {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}
do_execsql_test 5.4.2 {
  SELECT count(*) FROM t1, t2 WHERE y = c;
} {200}
do_scanstatus_test 5.4.3 { 
  nLoop 1 nVisit 10 nEst 10.0 zName t1bc 
  zExplain {SCAN TABLE t1 USING COVERING INDEX t1bc}
  nLoop 10 nVisit 200 nEst 56.0 zName t2xy 
  zExplain {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

do_eqp_test 5.5.1 {
  SELECT count(*) FROM t1, t3 WHERE y = c;
} {
  0 0 1 {SCAN TABLE t3} 







|















|







329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
do_eqp_test 5.3.1 {
  SELECT count(*) FROM t2 WHERE y = 'j';
} {0 0 0 {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}}
do_execsql_test 5.3.2 {
  SELECT count(*) FROM t2 WHERE y = 'j';
} {19}
do_scanstatus_test 5.3.3 { 
  nLoop 1 nVisit 19 nEst 52.0 zName t2xy zExplain
  {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

do_eqp_test 5.4.1 {
  SELECT count(*) FROM t1, t2 WHERE y = c;
} {
  0 0 0 {SCAN TABLE t1 USING COVERING INDEX t1bc}
  0 1 1 {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}
do_execsql_test 5.4.2 {
  SELECT count(*) FROM t1, t2 WHERE y = c;
} {200}
do_scanstatus_test 5.4.3 { 
  nLoop 1 nVisit 10 nEst 10.0 zName t1bc 
  zExplain {SCAN TABLE t1 USING COVERING INDEX t1bc}
  nLoop 10 nVisit 200 nEst 52.0 zName t2xy 
  zExplain {SEARCH TABLE t2 USING COVERING INDEX t2xy (ANY(x) AND y=?)}
}

do_eqp_test 5.5.1 {
  SELECT count(*) FROM t1, t3 WHERE y = c;
} {
  0 0 1 {SCAN TABLE t3}