SQLite

Check-in [207335fdbf]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Make sure that the optimizer realizes that an "x IS NULL" contraint does not necessarily give a single-row result even on a UNIQUE index. Ticket #3824. (CVS 6545)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 207335fdbf992a2f5bc5982b3163a38016ba1b21
User & Date: drh 2009-04-24 14:51:42.000
Context
2009-04-24
15:46
Get rid of the special RowSet processing in where.c and move that into clients. Added the WHERE_DUPLICATES_OK option to eliminate an unnecessary RowSet during DELETE with a WHERE clause containing ORs. (CVS 6546) (check-in: 98606bee9e user: drh tags: trunk)
14:51
Make sure that the optimizer realizes that an "x IS NULL" contraint does not necessarily give a single-row result even on a UNIQUE index. Ticket #3824. (CVS 6545) (check-in: 207335fdbf user: drh tags: trunk)
10:13
Make selecting the asynchronous IO file-locking mode a runtime operation. Still untested. (CVS 6544) (check-in: 577277e84a user: danielk1977 tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/where.c.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is responsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.388 2009/04/23 13:22:44 drh Exp $
*/
#include "sqliteInt.h"

/*
** Trace output macros
*/
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)







|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
** This module contains C code that generates VDBE code used to process
** the WHERE clause of SQL statements.  This module is responsible for
** generating the code that loops through a table looking for applicable
** rows.  Indices are selected and used to speed the search when doing
** so is applicable.  Because this module is responsible for selecting
** indices, you might also think of this module as the "query optimizer".
**
** $Id: where.c,v 1.389 2009/04/24 14:51:42 drh Exp $
*/
#include "sqliteInt.h"

/*
** Trace output macros
*/
#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
222
223
224
225
226
227
228
229
230
231

232
233
234
235
236
237
238
239
240
** is set to WO_IN|WO_EQ.  The WhereLevel.wsFlags field can then be used as
** the "op" parameter to findTerm when we are resolving equality constraints.
** ISNULL constraints will then not be used on the right table of a left
** join.  Tickets #2177 and #2189.
*/
#define WHERE_ROWID_EQ     0x00001000  /* rowid=EXPR or rowid IN (...) */
#define WHERE_ROWID_RANGE  0x00002000  /* rowid<EXPR and/or rowid>EXPR */
#define WHERE_COLUMN_EQ    0x00010000  /* x=EXPR or x IN (...) */
#define WHERE_COLUMN_RANGE 0x00020000  /* x<EXPR and/or x>EXPR */
#define WHERE_COLUMN_IN    0x00040000  /* x IN (...) */

#define WHERE_INDEXED      0x00070000  /* Anything that uses an index */
#define WHERE_IN_ABLE      0x00071000  /* Able to support an IN operator */
#define WHERE_TOP_LIMIT    0x00100000  /* x<EXPR or x<=EXPR constraint */
#define WHERE_BTM_LIMIT    0x00200000  /* x>EXPR or x>=EXPR constraint */
#define WHERE_IDX_ONLY     0x00800000  /* Use index only - omit table */
#define WHERE_ORDERBY      0x01000000  /* Output will appear in correct order */
#define WHERE_REVERSE      0x02000000  /* Scan in reverse order */
#define WHERE_UNIQUE       0x04000000  /* Selects no more than one row */
#define WHERE_VIRTUALTABLE 0x08000000  /* Use virtual-table processing */







|


>
|
|







222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
** is set to WO_IN|WO_EQ.  The WhereLevel.wsFlags field can then be used as
** the "op" parameter to findTerm when we are resolving equality constraints.
** ISNULL constraints will then not be used on the right table of a left
** join.  Tickets #2177 and #2189.
*/
#define WHERE_ROWID_EQ     0x00001000  /* rowid=EXPR or rowid IN (...) */
#define WHERE_ROWID_RANGE  0x00002000  /* rowid<EXPR and/or rowid>EXPR */
#define WHERE_COLUMN_EQ    0x00010000  /* x=EXPR or x IN (...) or x IS NULL */
#define WHERE_COLUMN_RANGE 0x00020000  /* x<EXPR and/or x>EXPR */
#define WHERE_COLUMN_IN    0x00040000  /* x IN (...) */
#define WHERE_COLUMN_NULL  0x00080000  /* x IS NULL */
#define WHERE_INDEXED      0x000f0000  /* Anything that uses an index */
#define WHERE_IN_ABLE      0x000f1000  /* Able to support an IN operator */
#define WHERE_TOP_LIMIT    0x00100000  /* x<EXPR or x<=EXPR constraint */
#define WHERE_BTM_LIMIT    0x00200000  /* x>EXPR or x>=EXPR constraint */
#define WHERE_IDX_ONLY     0x00800000  /* Use index only - omit table */
#define WHERE_ORDERBY      0x01000000  /* Output will appear in correct order */
#define WHERE_REVERSE      0x02000000  /* Scan in reverse order */
#define WHERE_UNIQUE       0x04000000  /* Selects no more than one row */
#define WHERE_VIRTUALTABLE 0x08000000  /* Use virtual-table processing */
2026
2027
2028
2029
2030
2031
2032
2033

2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054


2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068


2069

2070
2071
2072
2073
2074
2075
2076
  for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
    double inMultiplier = 1;  /* Number of equality look-ups needed */
    int inMultIsEst = 0;      /* True if inMultiplier is an estimate */

    WHERETRACE(("... index %s:\n", pProbe->zName));

    /* Count the number of columns in the index that are satisfied
    ** by x=EXPR constraints or x IN (...) constraints.  For a term

    ** of the form x=EXPR we only have to do a single binary search.
    ** But for x IN (...) we have to do a number of binary searched
    ** equal to the number of entries on the RHS of the IN operator.
    ** The inMultipler variable with try to estimate the number of
    ** binary searches needed.
    */
    wsFlags = 0;
    for(i=0; i<pProbe->nColumn; i++){
      int j = pProbe->aiColumn[i];
      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe);
      if( pTerm==0 ) break;
      wsFlags |= WHERE_COLUMN_EQ;
      if( pTerm->eOperator & WO_IN ){
        Expr *pExpr = pTerm->pExpr;
        wsFlags |= WHERE_COLUMN_IN;
        if( ExprHasProperty(pExpr, EP_xIsSelect) ){
          inMultiplier *= 25;
          inMultIsEst = 1;
        }else if( pExpr->x.pList ){
          inMultiplier *= pExpr->x.pList->nExpr + 1;
        }


      }
    }
    nRow = pProbe->aiRowEst[i] * inMultiplier;
    /* If inMultiplier is an estimate and that estimate results in an
    ** nRow it that is more than half number of rows in the table,
    ** then reduce inMultipler */
    if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){
      nRow = pProbe->aiRowEst[0]/2;
      inMultiplier = nRow/pProbe->aiRowEst[i];
    }
    cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]);
    nEq = i;
    if( pProbe->onError!=OE_None && (wsFlags & WHERE_COLUMN_IN)==0
         && nEq==pProbe->nColumn ){


      wsFlags |= WHERE_UNIQUE;

    }
    WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n",
                nEq, inMultiplier, nRow, cost));

    /* Look for range constraints.  Assume that each range constraint
    ** makes the search space 1/3rd smaller.
    */







|
>
|
|



















>
>












|
|
>
>
|
>







2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
  for(; pProbe; pProbe=(pSrc->pIndex ? 0 : pProbe->pNext)){
    double inMultiplier = 1;  /* Number of equality look-ups needed */
    int inMultIsEst = 0;      /* True if inMultiplier is an estimate */

    WHERETRACE(("... index %s:\n", pProbe->zName));

    /* Count the number of columns in the index that are satisfied
    ** by x=EXPR or x IS NULL constraints or x IN (...) constraints.
    ** For a term of the form x=EXPR or x IS NULL we only have to do 
    ** a single binary search.  But for x IN (...) we have to do a
    ** number of binary searched
    ** equal to the number of entries on the RHS of the IN operator.
    ** The inMultipler variable with try to estimate the number of
    ** binary searches needed.
    */
    wsFlags = 0;
    for(i=0; i<pProbe->nColumn; i++){
      int j = pProbe->aiColumn[i];
      pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pProbe);
      if( pTerm==0 ) break;
      wsFlags |= WHERE_COLUMN_EQ;
      if( pTerm->eOperator & WO_IN ){
        Expr *pExpr = pTerm->pExpr;
        wsFlags |= WHERE_COLUMN_IN;
        if( ExprHasProperty(pExpr, EP_xIsSelect) ){
          inMultiplier *= 25;
          inMultIsEst = 1;
        }else if( pExpr->x.pList ){
          inMultiplier *= pExpr->x.pList->nExpr + 1;
        }
      }else if( pTerm->eOperator & WO_ISNULL ){
        wsFlags |= WHERE_COLUMN_NULL;
      }
    }
    nRow = pProbe->aiRowEst[i] * inMultiplier;
    /* If inMultiplier is an estimate and that estimate results in an
    ** nRow it that is more than half number of rows in the table,
    ** then reduce inMultipler */
    if( inMultIsEst && nRow*2 > pProbe->aiRowEst[0] ){
      nRow = pProbe->aiRowEst[0]/2;
      inMultiplier = nRow/pProbe->aiRowEst[i];
    }
    cost = nRow + inMultiplier*estLog(pProbe->aiRowEst[0]);
    nEq = i;
    if( pProbe->onError!=OE_None && nEq==pProbe->nColumn ){
      testcase( wsFlags & WHERE_COLUMN_IN );
      testcase( wsFlags & WHERE_COLUMN_NULL );
      if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0 ){
        wsFlags |= WHERE_UNIQUE;
      }
    }
    WHERETRACE(("...... nEq=%d inMult=%.9g nRow=%.9g cost=%.9g\n",
                nEq, inMultiplier, nRow, cost));

    /* Look for range constraints.  Assume that each range constraint
    ** makes the search space 1/3rd smaller.
    */
2093
2094
2095
2096
2097
2098
2099
2100
2101

2102
2103
2104
2105
2106
2107
2108
                    nRow, cost));
      }
    }

    /* Add the additional cost of sorting if that is a factor.
    */
    if( pOrderBy ){
      if( (wsFlags & WHERE_COLUMN_IN)==0 &&
           isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev) ){

        if( wsFlags==0 ){
          wsFlags = WHERE_COLUMN_RANGE;
        }
        wsFlags |= WHERE_ORDERBY;
        if( rev ){
          wsFlags |= WHERE_REVERSE;
        }







|
|
>







2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
                    nRow, cost));
      }
    }

    /* Add the additional cost of sorting if that is a factor.
    */
    if( pOrderBy ){
      if( (wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_NULL))==0
       && isSortingIndex(pParse,pWC->pMaskSet,pProbe,iCur,pOrderBy,nEq,&rev)
      ){
        if( wsFlags==0 ){
          wsFlags = WHERE_COLUMN_RANGE;
        }
        wsFlags |= WHERE_ORDERBY;
        if( rev ){
          wsFlags |= WHERE_REVERSE;
        }
Added test/tkt3824.test.
















































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# 2009 April 24                                                                 
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# Ticket #3824
#
# When you use an "IS NULL" constraint on a UNIQUE index, the result
# is not necessarily UNIQUE.  Make sure the optimizer does not assume
# uniqueness.
#
# $Id: tkt3824.test,v 1.1 2009/04/24 14:51:42 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl

proc execsql_status {sql {db db}} {
  set result [uplevel $db eval [list $sql]]
  if {[db status sort]} {
    concat $result sort
  } else {
    concat $result nosort
  }
}

do_test tkt3824-1.1 {
  db eval {
    CREATE TABLE t1(a,b);
    INSERT INTO t1 VALUES(1,NULL);
    INSERT INTO t1 VALUES(9,NULL);
    INSERT INTO t1 VALUES(5,NULL);
    INSERT INTO t1 VALUES(123,NULL);
    INSERT INTO t1 VALUES(-10,NULL);
    CREATE UNIQUE INDEX t1b ON t1(b);
  }
  execsql_status {
    SELECT a FROM t1 WHERE b IS NULL ORDER BY a;
  }
} {-10 1 5 9 123 sort}
do_test tkt3824-1.2 {
  execsql_status {
    SELECT a FROM t1 WHERE b IS NULL ORDER BY b, a;
  }
} {-10 1 5 9 123 sort}

do_test tkt3824-2.1 {
  db eval {
    CREATE TABLE t2(a,b,c);
    INSERT INTO t2 VALUES(1,1,NULL);
    INSERT INTO t2 VALUES(9,2,NULL);
    INSERT INTO t2 VALUES(5,2,NULL);
    INSERT INTO t2 VALUES(123,3,NULL);
    INSERT INTO t2 VALUES(-10,3,NULL);
    CREATE UNIQUE INDEX t2bc ON t2(b,c);
  }
  execsql_status {
    SELECT a FROM t2 WHERE b=2 AND c IS NULL ORDER BY a;
  }
} {5 9 sort}
do_test tkt3824-2.2 {
  execsql_status {
    SELECT a FROM t2 WHERE b=2 AND c IS NULL ORDER BY b, a;
  }
} {5 9 sort}
do_test tkt3824-2.3 {
  lsort [execsql_status {
    SELECT a FROM t2 WHERE b=2 AND c IS NULL ORDER BY b;
  }]
} {5 9 sort}

do_test tkt3824-3.1 {
  db eval {
    CREATE TABLE t3(x,y);
    INSERT INTO t3 SELECT a, b FROM t1;
    INSERT INTO t3 VALUES(234,567);
    CREATE UNIQUE INDEX t3y ON t3(y);
    DELETE FROM t3 WHERE y IS NULL;
    SELECT * FROM t3;
  }
} {234 567}

finish_test