/ Check-in [7b8548b1]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not consider a DISTINCT clause redundant unless a subset of the result-set is collectively subject to a UNIQUE constraint and it can be guaranteed that all columns of the subset are NOT NULL (either due to NOT NULL constraints WHERE clause terms). Fix for [385a5b56b9].
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 7b8548b1872cc1225355ba8311e93dd08d6526e2
User & Date: dan 2012-04-20 16:59:24
References
2012-04-20
17:44 Closed ticket [385a5b56]: A DISTINCT SELECT optimized using a UNIQUE index may allow duplicate NULL values. plus 3 other changes artifact: 40ce6655 user: dan
Context
2012-04-21
11:33
If terminating interactive input to the command-line shell with ^D, issue an extra \n to move the cursor to the next line before exiting. This check-in also accidently adds the test_spellfix.c file to the source tree. check-in: feff1ef0 user: drh tags: trunk
00:31
Merge the latest trunk changes into the WinRT branch (fixes for tickets [2a5629202f] and [385a5b56b9]). check-in: 25478dcf user: mistachkin tags: winrt
2012-04-20
16:59
Do not consider a DISTINCT clause redundant unless a subset of the result-set is collectively subject to a UNIQUE constraint and it can be guaranteed that all columns of the subset are NOT NULL (either due to NOT NULL constraints WHERE clause terms). Fix for [385a5b56b9]. check-in: 7b8548b1 user: dan tags: trunk
15:24
Fix for [2a5629202f]. When considering whether or not a UNIQUE index may be used to optimize an ORDER BY clause, do not assume that all index entries are distinct unless there is some reason to believe that the index contains no NULL values. check-in: 9870e4c4 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/where.c.

  1558   1558     **
  1559   1559     **   1. The index is itself UNIQUE, and
  1560   1560     **
  1561   1561     **   2. All of the columns in the index are either part of the pDistinct
  1562   1562     **      list, or else the WHERE clause contains a term of the form "col=X",
  1563   1563     **      where X is a constant value. The collation sequences of the
  1564   1564     **      comparison and select-list expressions must match those of the index.
         1565  +  **
         1566  +  **   3. All of those index columns for which the WHERE clause does not
         1567  +  **      contain a "col=X" term are subject to a NOT NULL constraint.
  1565   1568     */
  1566   1569     for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
  1567   1570       if( pIdx->onError==OE_None ) continue;
  1568   1571       for(i=0; i<pIdx->nColumn; i++){
  1569   1572         int iCol = pIdx->aiColumn[i];
  1570         -      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) 
  1571         -       && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
  1572         -      ){
  1573         -        break;
         1573  +      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
         1574  +        int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i);
         1575  +        if( iIdxCol<0 || pTab->aCol[pIdx->aiColumn[i]].notNull==0 ){
         1576  +          break;
         1577  +        }
  1574   1578         }
  1575   1579       }
  1576   1580       if( i==pIdx->nColumn ){
  1577   1581         /* This index implies that the DISTINCT qualifier is redundant. */
  1578   1582         return 1;
  1579   1583       }
  1580   1584     }
................................................................................
  1733   1737       ** NULL (since the corresponding "=" operator in the WHERE clause would 
  1734   1738       ** not be true). So if all remaining index columns have NOT NULL 
  1735   1739       ** constaints attached to them, we can be confident that the visited
  1736   1740       ** index entries are free of NULLs.  */
  1737   1741       for(i=nEqCol; i<pIdx->nColumn; i++){
  1738   1742         if( aCol[pIdx->aiColumn[i]].notNull==0 ) break;
  1739   1743       }
  1740         -    return (i>=pIdx->nColumn);
         1744  +    return (i==pIdx->nColumn);
  1741   1745     }
  1742   1746     return 0;
  1743   1747   }
  1744   1748   
  1745   1749   /*
  1746   1750   ** Prepare a crude estimate of the logarithm of the input value.
  1747   1751   ** The results need not be exact.  This is only used for estimating

Changes to test/distinct.test.

    73     73   do_execsql_test 1.0 {
    74     74     CREATE TABLE t1(a, b, c, d);
    75     75     CREATE UNIQUE INDEX i1 ON t1(b, c);
    76     76     CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase);
    77     77   
    78     78     CREATE TABLE t2(x INTEGER PRIMARY KEY, y);
    79     79   
    80         -  CREATE TABLE t3(c1 PRIMARY KEY, c2);
           80  +  CREATE TABLE t3(c1 PRIMARY KEY NOT NULL, c2 NOT NULL);
    81     81     CREATE INDEX i3 ON t3(c2);
           82  +
           83  +  CREATE TABLE t4(a, b NOT NULL, c NOT NULL, d NOT NULL);
           84  +  CREATE UNIQUE INDEX t4i1 ON t4(b, c);
           85  +  CREATE UNIQUE INDEX t4i2 ON t4(d COLLATE nocase);
    82     86   }
    83     87   foreach {tn noop sql} {
    84     88   
    85         -  1   1   "SELECT DISTINCT b, c FROM t1"
    86         -  2   1   "SELECT DISTINCT c FROM t1 WHERE b = ?"
           89  +  1.1 0   "SELECT DISTINCT b, c FROM t1"
           90  +  1.2 1   "SELECT DISTINCT b, c FROM t4"
           91  +  2.1 0   "SELECT DISTINCT c FROM t1 WHERE b = ?"
           92  +  2.2 1   "SELECT DISTINCT c FROM t4 WHERE b = ?"
    87     93     3   1   "SELECT DISTINCT rowid FROM t1"
    88     94     4   1   "SELECT DISTINCT rowid, a FROM t1"
    89     95     5   1   "SELECT DISTINCT x FROM t2"
    90     96     6   1   "SELECT DISTINCT * FROM t2"
    91     97     7   1   "SELECT DISTINCT * FROM (SELECT * FROM t2)"
    92     98   
    93         -  8   1   "SELECT DISTINCT * FROM t1"
           99  +  8.1 0   "SELECT DISTINCT * FROM t1"
          100  +  8.2 1   "SELECT DISTINCT * FROM t4"
    94    101   
    95    102     8   0   "SELECT DISTINCT a, b FROM t1"
    96    103   
    97    104     9   0   "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)"
    98    105     10  0   "SELECT DISTINCT c FROM t1"
    99    106     11  0   "SELECT DISTINCT b FROM t1"
   100    107   
   101         -  12  0   "SELECT DISTINCT a, d FROM t1"
   102         -  13  0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"
   103         -  14  1   "SELECT DISTINCT a, d COLLATE nocase FROM t1"
   104         -  15  0   "SELECT DISTINCT a, d COLLATE binary FROM t1"
   105         -  16  1   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"
          108  +  12.1 0   "SELECT DISTINCT a, d FROM t1"
          109  +  12.2 0   "SELECT DISTINCT a, d FROM t4"
          110  +  13.1 0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"
          111  +  13.2 0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t4"
          112  +  14.1 0   "SELECT DISTINCT a, d COLLATE nocase FROM t1"
          113  +  14.2 1   "SELECT DISTINCT a, d COLLATE nocase FROM t4"
          114  +
          115  +  15   0   "SELECT DISTINCT a, d COLLATE binary FROM t1"
          116  +  16.1 0   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"
          117  +  16.2 1   "SELECT DISTINCT a, b, c COLLATE binary FROM t4"
   106    118   
   107    119     16  0   "SELECT DISTINCT t1.rowid FROM t1, t2"
   108    120     17  0   { /* Technically, it would be possible to detect that DISTINCT
   109    121               ** is a no-op in cases like the following. But SQLite does not
   110    122               ** do so. */
   111    123               SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid }
   112    124   
................................................................................
   116    128     21  0   "SELECT DISTINCT c2 FROM t3"
   117    129   
   118    130     22  0   "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
   119    131     23  1   "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
   120    132   
   121    133     24  0   "SELECT DISTINCT rowid/2 FROM t1"
   122    134     25  1   "SELECT DISTINCT rowid/2, rowid FROM t1"
   123         -  26  1   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"
          135  +  26.1  0   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"
          136  +  26.2  1   "SELECT DISTINCT rowid/2, b FROM t4 WHERE c = ?"
   124    137   } {
   125    138     if {$noop} {
   126    139       do_distinct_noop_test 1.$tn $sql
   127    140     } else {
   128    141       do_distinct_not_noop_test 1.$tn $sql
   129    142     }
   130    143   }

Added test/tkt-385a5b56b9.test.

            1  +# 2012 April 02
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# The tests in this file were used while developing the SQLite 4 code. 
           12  +#
           13  +set testdir [file dirname $argv0]
           14  +source $testdir/tester.tcl
           15  +set testprefix tkt-385a5b56b9
           16  +
           17  +do_execsql_test 1.0 { 
           18  +  CREATE TABLE t1(x, y);
           19  +  INSERT INTO t1 VALUES(1, NULL);
           20  +  INSERT INTO t1 VALUES(2, NULL);
           21  +  INSERT INTO t1 VALUES(1, NULL);
           22  +}
           23  +
           24  +do_execsql_test 1.1 { SELECT DISTINCT x, y FROM t1 } {1 {} 2 {}}
           25  +do_execsql_test 1.2 { CREATE UNIQUE INDEX i1 ON t1(x, y) }
           26  +do_execsql_test 1.3 { SELECT DISTINCT x, y FROM t1 } {1 {} 2 {}}
           27  +
           28  +
           29  +#-------------------------------------------------------------------------
           30  +
           31  +do_execsql_test 2.0 {
           32  +  CREATE TABLE t2(x, y NOT NULL);
           33  +  CREATE UNIQUE INDEX t2x ON t2(x);
           34  +  CREATE UNIQUE INDEX t2y ON t2(y);
           35  +}
           36  +
           37  +do_eqp_test 2.1 { SELECT DISTINCT x FROM t2 } {
           38  +  0 0 0 {SCAN TABLE t2 USING COVERING INDEX t2x (~1000000 rows)}
           39  +}
           40  +
           41  +do_eqp_test 2.2 { SELECT DISTINCT y FROM t2 } {
           42  +  0 0 0 {SCAN TABLE t2 (~1000000 rows)}
           43  +}
           44  +
           45  +do_eqp_test 2.3 { SELECT DISTINCT x, y FROM t2 WHERE y=10 } {
           46  +  0 0 0 {SEARCH TABLE t2 USING INDEX t2y (y=?) (~1 rows)}
           47  +}
           48  +
           49  +do_eqp_test 2.4 { SELECT DISTINCT x, y FROM t2 WHERE x=10 } {
           50  +  0 0 0 {SEARCH TABLE t2 USING INDEX t2x (x=?) (~1 rows)}
           51  +}
           52  +
           53  +finish_test
           54  +