SQLite

Check-in [67cf24b30e]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimize WHERE clauses that constain AND, BETWEEN, and LIKE terms as operands of an OR. (CVS 6068)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 67cf24b30e087796cfb0fccf47328e72ade5ecdc
User & Date: drh 2008-12-28 18:35:09.000
Context
2008-12-28
20:47
Multi-index OR optimizer response to ORDER BY rowid. But fix in sqlite3_stmt_status(): report a full table scan when "ORDER BY rowid" is used without constraints. (CVS 6069) (check-in: 3464d369d3 user: drh tags: trunk)
18:35
Optimize WHERE clauses that constain AND, BETWEEN, and LIKE terms as operands of an OR. (CVS 6068) (check-in: 67cf24b30e user: drh tags: trunk)
16:55
Simplify the VM code that implements WHERE claues. (CVS 6067) (check-in: fa95f843e1 user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is responsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.345 2008/12/28 16:55:25 drh Exp $
*/
#include "sqliteInt.h"

/*
** Trace output macros
*/
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is responsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.346 2008/12/28 18:35:09 drh Exp $
*/
#include "sqliteInt.h"

/*
** Trace output macros
*/
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
125
126
127
128
129
130
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/*
** An instance of the following structure holds all information about a
** WHERE clause.  Mostly this is a container for one or more WhereTerms.
*/
struct WhereClause {
  Parse *pParse;           /* The parser context */
  WhereMaskSet *pMaskSet;  /* Mapping of table cursor numbers to bitmasks */

  int nTerm;               /* Number of terms */
  int nSlot;               /* Number of entries in a[] */
  WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
  WhereTerm aStatic[4];    /* Initial static space for a[] */
};

/*
** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to
** a dynamically allocated instance of the following structure.
*/
struct WhereOrInfo {
  WhereClause wc;          /* Decomposition into subterms */
  Bitmask indexable;       /* Bitmask of all indexable tables in the clause */
};

/*
** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to
** a dynamically allocated instance of the following structure.
*/
struct WhereAndInfo {
  WhereClause wc;          /* The OR subexpression broken out */
  Index *pIdx;             /* Index to use */
  double cost;             /* Cost of evaluating this OR subexpression */
};

/*
** An instance of the following structure keeps track of a mapping
** between VDBE cursor numbers and bits of the bitmasks in WhereTerm.
**
** The VDBE cursor numbers are small integers contained in 







>




















|
<
<







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153


154
155
156
157
158
159
160
/*
** An instance of the following structure holds all information about a
** WHERE clause.  Mostly this is a container for one or more WhereTerms.
*/
struct WhereClause {
  Parse *pParse;           /* The parser context */
  WhereMaskSet *pMaskSet;  /* Mapping of table cursor numbers to bitmasks */
  u8 op;                   /* Split operator.  TK_AND or TK_OR */
  int nTerm;               /* Number of terms */
  int nSlot;               /* Number of entries in a[] */
  WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
  WhereTerm aStatic[4];    /* Initial static space for a[] */
};

/*
** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to
** a dynamically allocated instance of the following structure.
*/
struct WhereOrInfo {
  WhereClause wc;          /* Decomposition into subterms */
  Bitmask indexable;       /* Bitmask of all indexable tables in the clause */
};

/*
** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to
** a dynamically allocated instance of the following structure.
*/
struct WhereAndInfo {
  WhereClause wc;          /* The subexpression broken out */


};

/*
** An instance of the following structure keeps track of a mapping
** between VDBE cursor numbers and bits of the bitmasks in WhereTerm.
**
** The VDBE cursor numbers are small integers contained in 
365
366
367
368
369
370
371

372
373
374
375
376
377
378
** does is make slot[] entries point to substructure within pExpr.
**
** In the previous sentence and in the diagram, "slot[]" refers to
** the WhereClause.a[] array.  The slot[] array grows as needed to contain
** all terms of the WHERE clause.
*/
static void whereSplit(WhereClause *pWC, Expr *pExpr, int op){

  if( pExpr==0 ) return;
  if( pExpr->op!=op ){
    whereClauseInsert(pWC, pExpr, 0);
  }else{
    whereSplit(pWC, pExpr->pLeft, op);
    whereSplit(pWC, pExpr->pRight, op);
  }







>







364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
** does is make slot[] entries point to substructure within pExpr.
**
** In the previous sentence and in the diagram, "slot[]" refers to
** the WhereClause.a[] array.  The slot[] array grows as needed to contain
** all terms of the WHERE clause.
*/
static void whereSplit(WhereClause *pWC, Expr *pExpr, int op){
  pWC->op = (u8)op;
  if( pExpr==0 ) return;
  if( pExpr->op!=op ){
    whereClauseInsert(pWC, pExpr, 0);
  }else{
    whereSplit(pWC, pExpr->pLeft, op);
    whereSplit(pWC, pExpr->pRight, op);
  }
831
832
833
834
835
836
837



838


















839

840
841
842
843
844
845
846

  /*
  ** Compute the set of tables that might satisfy cases 1 or 2.
  */
  indexable = chngToIN = ~(Bitmask)0;
  for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){
    if( (pOrTerm->eOperator & WO_SINGLE)==0 ){



      chngToIN = 0;


















      indexable = 0;   /***** FIX ME.  Some AND clauses are indexable. */

    }else if( pOrTerm->wtFlags & TERM_COPIED ){
      /* Skip this term for now.  We revisit it when we process the
      ** corresponding TERM_VIRTUAL term */
    }else{
      Bitmask b;
      b = getMask(pMaskSet, pOrTerm->leftCursor);
      if( pOrTerm->wtFlags & TERM_VIRTUAL ){







>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>







831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868

  /*
  ** Compute the set of tables that might satisfy cases 1 or 2.
  */
  indexable = chngToIN = ~(Bitmask)0;
  for(i=pOrWc->nTerm-1, pOrTerm=pOrWc->a; i>=0 && indexable; i--, pOrTerm++){
    if( (pOrTerm->eOperator & WO_SINGLE)==0 ){
      WhereAndInfo *pAndInfo;
      assert( pOrTerm->eOperator==0 );
      assert( (pOrTerm->wtFlags & (TERM_ANDINFO|TERM_ORINFO))==0 );
      chngToIN = 0;
      pAndInfo = sqlite3DbMallocRaw(db, sizeof(*pAndInfo));
      if( pAndInfo ){
        WhereClause *pAndWC;
        WhereTerm *pAndTerm;
        int j;
        Bitmask b = 0;
        pOrTerm->u.pAndInfo = pAndInfo;
        pOrTerm->wtFlags |= TERM_ANDINFO;
        pOrTerm->eOperator = WO_AND;
        pAndWC = &pAndInfo->wc;
        whereClauseInit(pAndWC, pWC->pParse, pMaskSet);
        whereSplit(pAndWC, pOrTerm->pExpr, TK_AND);
        exprAnalyzeAll(pSrc, pAndWC);
        for(j=0, pAndTerm=pAndWC->a; j<pAndWC->nTerm; j++, pAndTerm++){
          if( pAndTerm->pExpr && allowedOp(pAndTerm->pExpr->op) ){
            b |= getMask(pMaskSet, pAndTerm->leftCursor);
          }
        }
        indexable &= b;
      }
    }else if( pOrTerm->wtFlags & TERM_COPIED ){
      /* Skip this term for now.  We revisit it when we process the
      ** corresponding TERM_VIRTUAL term */
    }else{
      Bitmask b;
      b = getMask(pMaskSet, pOrTerm->leftCursor);
      if( pOrTerm->wtFlags & TERM_VIRTUAL ){
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
  **
  ** The two new terms are added onto the end of the WhereClause object.
  ** The new terms are "dynamic" and are children of the original BETWEEN
  ** term.  That means that if the BETWEEN term is coded, the children are
  ** skipped.  Or, if the children are satisfied by an index, the original
  ** BETWEEN term is skipped.
  */
  else if( pExpr->op==TK_BETWEEN ){
    ExprList *pList = pExpr->pList;
    int i;
    static const u8 ops[] = {TK_GE, TK_LE};
    assert( pList!=0 );
    assert( pList->nExpr==2 );
    for(i=0; i<2; i++){
      Expr *pNewExpr;







|







1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
  **
  ** The two new terms are added onto the end of the WhereClause object.
  ** The new terms are "dynamic" and are children of the original BETWEEN
  ** term.  That means that if the BETWEEN term is coded, the children are
  ** skipped.  Or, if the children are satisfied by an index, the original
  ** BETWEEN term is skipped.
  */
  else if( pExpr->op==TK_BETWEEN && pWC->op==TK_AND ){
    ExprList *pList = pExpr->pList;
    int i;
    static const u8 ops[] = {TK_GE, TK_LE};
    assert( pList!=0 );
    assert( pList->nExpr==2 );
    for(i=0; i<2; i++){
      Expr *pNewExpr;
1104
1105
1106
1107
1108
1109
1110

1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126

1127
1128
1129
1130
1131
1132
1133
#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */

#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY)
  /* Analyze a term that is composed of two or more subterms connected by
  ** an OR operator.
  */
  else if( pExpr->op==TK_OR ){

    exprAnalyzeOrTerm(pSrc, pWC, idxTerm);
  }
#endif /* SQLITE_OMIT_OR_OPTIMIZATION */

#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
  /* Add constraints to reduce the search space on a LIKE or GLOB
  ** operator.
  **
  ** A like pattern of the form "x LIKE 'abc%'" is changed into constraints
  **
  **          x>='abc' AND x<'abd' AND x LIKE 'abc%'
  **
  ** The last character of the prefix "abc" is incremented to form the
  ** termination condition "abd".
  */
  if( isLikeOrGlob(pParse, pExpr, &nPattern, &isComplete, &noCase) ){

    Expr *pLeft, *pRight;
    Expr *pStr1, *pStr2;
    Expr *pNewExpr1, *pNewExpr2;
    int idxNew1, idxNew2;

    pLeft = pExpr->pList->a[1].pExpr;
    pRight = pExpr->pList->a[0].pExpr;







>















|
>







1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
#endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */

#if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY)
  /* Analyze a term that is composed of two or more subterms connected by
  ** an OR operator.
  */
  else if( pExpr->op==TK_OR ){
    assert( pWC->op==TK_AND );
    exprAnalyzeOrTerm(pSrc, pWC, idxTerm);
  }
#endif /* SQLITE_OMIT_OR_OPTIMIZATION */

#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION
  /* Add constraints to reduce the search space on a LIKE or GLOB
  ** operator.
  **
  ** A like pattern of the form "x LIKE 'abc%'" is changed into constraints
  **
  **          x>='abc' AND x<'abd' AND x LIKE 'abc%'
  **
  ** The last character of the prefix "abc" is incremented to form the
  ** termination condition "abd".
  */
  if( isLikeOrGlob(pParse, pExpr, &nPattern, &isComplete, &noCase)
         && pWC->op==TK_AND ){
    Expr *pLeft, *pRight;
    Expr *pStr1, *pStr2;
    Expr *pNewExpr1, *pNewExpr2;
    int idxNew1, idxNew2;

    pLeft = pExpr->pList->a[1].pExpr;
    pRight = pExpr->pList->a[0].pExpr;
1828
1829
1830
1831
1832
1833
1834



1835
1836
1837



1838
1839
1840
1841
1842
1843
1844
      WhereClause *pOrWC = &pTerm->u.pOrInfo->wc;
      WhereTerm *pOrTerm;
      int j;
      double rTotal = 0;
      double nRow = 0;
      for(j=0, pOrTerm=pOrWC->a; j<pOrWC->nTerm; j++, pOrTerm++){
        WhereCost sTermCost;



        if( pOrTerm->leftCursor!=iCur ) continue;
        tempWC.a = pOrTerm;
        bestIndex(pParse, &tempWC, pSrc, notReady, 0, &sTermCost);



        if( sTermCost.plan.wsFlags==0 ){
          rTotal = pCost->rCost;
          break;
        }
        rTotal += sTermCost.rCost;
        nRow += sTermCost.nRow;
      }







>
>
>
|
|
|
>
>
>







1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
      WhereClause *pOrWC = &pTerm->u.pOrInfo->wc;
      WhereTerm *pOrTerm;
      int j;
      double rTotal = 0;
      double nRow = 0;
      for(j=0, pOrTerm=pOrWC->a; j<pOrWC->nTerm; j++, pOrTerm++){
        WhereCost sTermCost;
        if( pOrTerm->eOperator==WO_AND ){
          WhereClause *pAndWC = &pOrTerm->u.pAndInfo->wc;
          bestIndex(pParse, pAndWC, pSrc, notReady, 0, &sTermCost);
        }else if( pOrTerm->leftCursor==iCur ){
          tempWC.a = pOrTerm;
          bestIndex(pParse, &tempWC, pSrc, notReady, 0, &sTermCost);
        }else{
          continue;
        }
        if( sTermCost.plan.wsFlags==0 ){
          rTotal = pCost->rCost;
          break;
        }
        rTotal += sTermCost.rCost;
        nRow += sTermCost.nRow;
      }
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
      sqlite3VdbeAddOp2(v, OP_Null, 0, regOrRowset);
    }
    oneTab.nSrc = 1;
    oneTab.nAlloc = 1;
    oneTab.a[0] = *pTabItem;
    for(j=0, pOrTerm=pOrWc->a; j<pOrWc->nTerm; j++, pOrTerm++){
      WhereInfo *pSubWInfo;
      if( pOrTerm->leftCursor!=iCur ) continue;
      pSubWInfo = sqlite3WhereBegin(pParse, &oneTab, pOrTerm->pExpr, 0,
                        WHERE_FILL_ROWSET | WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE,
                        regOrRowset);
      if( pSubWInfo ){
        sqlite3WhereEnd(pSubWInfo);
      }
    }







|







2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
      sqlite3VdbeAddOp2(v, OP_Null, 0, regOrRowset);
    }
    oneTab.nSrc = 1;
    oneTab.nAlloc = 1;
    oneTab.a[0] = *pTabItem;
    for(j=0, pOrTerm=pOrWc->a; j<pOrWc->nTerm; j++, pOrTerm++){
      WhereInfo *pSubWInfo;
      if( pOrTerm->leftCursor!=iCur && pOrTerm->eOperator!=WO_AND ) continue;
      pSubWInfo = sqlite3WhereBegin(pParse, &oneTab, pOrTerm->pExpr, 0,
                        WHERE_FILL_ROWSET | WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE,
                        regOrRowset);
      if( pSubWInfo ){
        sqlite3WhereEnd(pSubWInfo);
      }
    }
Changes to test/where7.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2008 December 23
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the multi-index OR clause optimizer.
#
# $Id: where7.test,v 1.1 2008/12/23 23:56:22 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !or_opt {
  finish_test
  return













|







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 2008 December 23
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this file is testing the multi-index OR clause optimizer.
#
# $Id: where7.test,v 1.2 2008/12/28 18:35:09 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

ifcapable !or_opt {
  finish_test
  return
85
86
87
88
89
90
91











92
93
94
  }
} {2 4 5 scan 0}
do_test where7-1.10 {
  count_steps {
    SELECT a FROM t1 WHERE (b=3 OR c>=10 OR c=4 OR b>10)
  }
} {2 4 5 scan 0}













finish_test







>
>
>
>
>
>
>
>
>
>
>



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
  }
} {2 4 5 scan 0}
do_test where7-1.10 {
  count_steps {
    SELECT a FROM t1 WHERE (b=3 OR c>=10 OR c=4 OR b>10)
  }
} {2 4 5 scan 0}
do_test where7-1.11 {
  count_steps {
    SELECT a FROM t1 WHERE (d=5 AND b=3) OR c==100;
  }
} {2 5 scan 0}
do_test where7-1.12 {
breakpoint
  count_steps {
    SELECT a FROM t1 WHERE (b BETWEEN 2 AND 4) OR c=100
  }
} {1 2 3 5 scan 0}


finish_test