/ Check-in [7337293c]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements and tests for detection of redundant DISTINCT qualifiers.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | experimental
Files: files | file ages | folders
SHA1:7337293c87fb563604dd6ad284f2d1e30c938b4c
User & Date: dan 2011-07-01 14:21:38
Context
2011-07-01
18:26
Improve use of indexes to optimize DISTINCT queries. check-in: 6c202ea0 user: dan tags: experimental
14:21
Improvements and tests for detection of redundant DISTINCT qualifiers. check-in: 7337293c user: dan tags: experimental
2011-06-30
20:17
Experimental changes to improve optimization of DISTINCT queries. check-in: f7ba0219 user: dan tags: experimental
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/select.c.

  3844   3844       }
  3845   3845       rc = multiSelect(pParse, p, pDest);
  3846   3846       explainSetInteger(pParse->iSelectId, iRestoreSelectId);
  3847   3847       return rc;
  3848   3848     }
  3849   3849   #endif
  3850   3850   
  3851         -  /* If possible, rewrite the query to use GROUP BY instead of DISTINCT.
  3852         -  ** GROUP BY might use an index, DISTINCT never does.
  3853         -  */
  3854         -#if 0
  3855         -  assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 );
  3856         -  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){
  3857         -    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
  3858         -    pGroupBy = p->pGroupBy;
  3859         -    p->selFlags &= ~SF_Distinct;
  3860         -  }
  3861         -#endif
  3862         -
  3863   3851     /* If there is both a GROUP BY and an ORDER BY clause and they are
  3864   3852     ** identical, then disable the ORDER BY clause since the GROUP BY
  3865   3853     ** will cause elements to come out in the correct order.  This is
  3866   3854     ** an optimization - the correct answer should result regardless.
  3867   3855     ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER
  3868   3856     ** to disable this optimization for testing purposes.
  3869   3857     */
  3870   3858     if( sqlite3ExprListCompare(p->pGroupBy, pOrderBy)==0
  3871   3859            && (db->flags & SQLITE_GroupByOrder)==0 ){
  3872   3860       pOrderBy = 0;
  3873   3861     }
         3862  +
         3863  +  /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and 
         3864  +  ** if the select-list is the same as the ORDER BY list, then this query
         3865  +  ** can be rewritten as a GROUP BY. In other words, this:
         3866  +  **
         3867  +  **     SELECT DISTINCT xyz FROM ... ORDER BY xyz
         3868  +  **
         3869  +  ** is transformed to:
         3870  +  **
         3871  +  **     SELECT xyz FROM ... GROUP BY xyz
         3872  +  **
         3873  +  ** The second form is preferred as a single index (or temp-table) may be 
         3874  +  ** used for both the ORDER BY and DISTINCT processing. As originally 
         3875  +  ** written the query must use a temp-table for at least one of the ORDER 
         3876  +  ** BY and DISTINCT, and an index or separate temp-table for the other.
         3877  +  */
         3878  +  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
         3879  +   && sqlite3ExprListCompare(pOrderBy, p->pEList)==0
         3880  +  ){
         3881  +    p->selFlags &= ~SF_Distinct;
         3882  +    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
         3883  +    pGroupBy = p->pGroupBy;
         3884  +    pOrderBy = 0;
         3885  +  }
  3874   3886   
  3875   3887     /* If there is an ORDER BY clause, then this sorting
  3876   3888     ** index might end up being unused if the data can be 
  3877   3889     ** extracted in pre-sorted order.  If that is the case, then the
  3878   3890     ** OP_OpenEphemeral instruction will be changed to an OP_Noop once
  3879   3891     ** we figure out that the sorting index is not needed.  The addrSortIndex
  3880   3892     ** variable is used to facilitate that change.
................................................................................
  3931   3943       */
  3932   3944       if( addrSortIndex>=0 && pOrderBy==0 ){
  3933   3945         sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
  3934   3946         p->addrOpenEphm[2] = -1;
  3935   3947       }
  3936   3948   
  3937   3949       if( pWInfo->eDistinct ){
         3950  +      VdbeOp *pOp;                /* No longer required OpenEphemeral instr. */
         3951  +     
         3952  +      pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
         3953  +
  3938   3954         assert( isDistinct );
  3939   3955         assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED 
  3940   3956              || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE 
  3941   3957         );
  3942   3958         distinct = -1;
  3943   3959         if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){
  3944   3960           int iJump;
  3945   3961           int iExpr;
  3946   3962           int iFlag = ++pParse->nMem;
  3947   3963           int iBase = pParse->nMem+1;
  3948   3964           int iBase2 = iBase + pEList->nExpr;
  3949   3965           pParse->nMem += (pEList->nExpr*2);
  3950         -        VdbeOp *pOp;
  3951   3966   
  3952   3967           /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The
  3953   3968           ** OP_Integer initializes the "first row" flag.  */
  3954         -        pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
  3955   3969           pOp->opcode = OP_Integer;
  3956   3970           pOp->p1 = 1;
  3957   3971           pOp->p2 = iFlag;
  3958   3972   
  3959   3973           sqlite3ExprCodeExprList(pParse, pEList, iBase, 1);
  3960   3974           iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1;
  3961   3975           sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1);
................................................................................
  3966   3980             sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
  3967   3981           }
  3968   3982           sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue);
  3969   3983   
  3970   3984           sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag);
  3971   3985           assert( sqlite3VdbeCurrentAddr(v)==iJump );
  3972   3986           sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr);
         3987  +      }else{
         3988  +        pOp->opcode = OP_Noop;
  3973   3989         }
  3974   3990       }
  3975   3991   
  3976   3992       /* Use the standard inner loop. */
  3977   3993       selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest,
  3978   3994                       pWInfo->iContinue, pWInfo->iBreak);
  3979   3995   

Changes to src/where.c.

  4298   4298     Parse *pParse,
  4299   4299     SrcList *pTabList,
  4300   4300     WhereClause *pWC,
  4301   4301     ExprList *pDistinct
  4302   4302   ){
  4303   4303     Table *pTab;
  4304   4304     Index *pIdx;
  4305         -  int i;
         4305  +  int i;                          
  4306   4306     int iBase;
  4307   4307   
  4308   4308     /* If there is more than one table or sub-select in the FROM clause of
  4309   4309     ** this query, then it will not be possible to show that the DISTINCT 
  4310   4310     ** clause is redundant. */
  4311   4311     if( pTabList->nSrc!=1 ) return 0;
  4312   4312     iBase = pTabList->a[0].iCursor;
  4313   4313     pTab = pTabList->a[0].pTab;
  4314   4314   
  4315         -  /* Check if all the expressions in the ExprList are of type TK_COLUMN and
  4316         -  ** on the same table. If this is not the case, return early, since it will 
  4317         -  ** not be possible to prove that the DISTINCT qualifier is redundant. 
  4318         -  ** If any of the expressions is an IPK column, then return true.
  4319         -  */
         4315  +  /* If any of the expressions is an IPK column, then return true. */
  4320   4316     for(i=0; i<pDistinct->nExpr; i++){
  4321   4317       Expr *p = pDistinct->a[i].pExpr;
  4322         -    if( p->op!=TK_COLUMN || p->iTable!=iBase ) return 0;
  4323         -    if( p->iColumn<0 ) return 1;
         4318  +    assert( p->op!=TK_COLUMN || p->iTable==iBase );
         4319  +    if( p->op==TK_COLUMN && p->iColumn<0 ) return 1;
  4324   4320     }
  4325   4321   
  4326   4322     /* Loop through all indices on the table, checking each to see if it makes
  4327   4323     ** the DISTINCT qualifier redundant. It does so if:
  4328   4324     **
  4329   4325     **   1. The index is itself UNIQUE, and
  4330   4326     **
................................................................................
  4339   4335         int iCol = pIdx->aiColumn[i];
  4340   4336         const char *zColl = pIdx->azColl[i];
  4341   4337   
  4342   4338         if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){
  4343   4339           int j;
  4344   4340           for(j=0; j<pDistinct->nExpr; j++){
  4345   4341             Expr *p = pDistinct->a[j].pExpr;
  4346         -          if( p->iColumn==iCol ){
         4342  +          assert( p->op!=TK_COLUMN || p->iTable==iBase );
         4343  +          if( p->op==TK_COLUMN && p->iColumn==iCol ){
  4347   4344               CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
  4348         -            const char *zEColl = (pColl ? pColl : pParse->db->pDfltColl)->zName;
  4349         -            if( 0==sqlite3StrICmp(zColl, zEColl) ) break;
         4345  +            if( pColl && 0==sqlite3StrICmp(zColl, pColl->zName) ) break;
  4350   4346             }
  4351   4347           }
  4352   4348           if( j==pDistinct->nExpr ) break;
  4353   4349         }
  4354   4350       }
  4355   4351       if( i==pIdx->nColumn ){
  4356   4352         /* This index implies that the DISTINCT qualifier is redundant. */

Added test/distinct.test.

            1  +# 2011 July 1
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is the DISTINCT modifier.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +
           18  +set testprefix distinct
           19  +
           20  +
           21  +proc is_distinct_noop {sql} {
           22  +  set sql1 $sql
           23  +  set sql2 [string map {DISTINCT ""} $sql]
           24  +
           25  +  set program1 [list]
           26  +  set program2 [list]
           27  +  db eval "EXPLAIN $sql1" {
           28  +    if {$opcode != "Noop"} { lappend program1 $opcode }
           29  +  }
           30  +  db eval "EXPLAIN $sql2" {
           31  +    if {$opcode != "Noop"} { lappend program2 $opcode }
           32  +  }
           33  +
           34  +  return [expr {$program1==$program2}]
           35  +}
           36  +
           37  +proc do_distinct_noop_test {tn sql} {
           38  +  uplevel [list do_test $tn [list is_distinct_noop $sql] 1]
           39  +}
           40  +proc do_distinct_not_noop_test {tn sql} {
           41  +  uplevel [list do_test $tn [list is_distinct_noop $sql] 0]
           42  +}
           43  +
           44  +
           45  +#-------------------------------------------------------------------------
           46  +# The following tests - distinct-1.* - check that the planner correctly 
           47  +# detects cases where a UNIQUE index means that a DISTINCT clause is 
           48  +# redundant. Currently the planner only detects such cases when there
           49  +# is a single table in the FROM clause.
           50  +#
           51  +do_execsql_test 1.0 {
           52  +  CREATE TABLE t1(a, b, c, d);
           53  +  CREATE UNIQUE INDEX i1 ON t1(b, c);
           54  +  CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase);
           55  +
           56  +  CREATE TABLE t2(x INTEGER PRIMARY KEY, y);
           57  +
           58  +  CREATE TABLE t3(c1 PRIMARY KEY, c2);
           59  +  CREATE INDEX i3 ON t3(c2);
           60  +}
           61  +foreach {tn noop sql} {
           62  +
           63  +  1   1   "SELECT DISTINCT b, c FROM t1"
           64  +  2   1   "SELECT DISTINCT c FROM t1 WHERE b = ?"
           65  +  3   1   "SELECT DISTINCT rowid FROM t1"
           66  +  4   1   "SELECT DISTINCT rowid, a FROM t1"
           67  +  5   1   "SELECT DISTINCT x FROM t2"
           68  +  6   1   "SELECT DISTINCT * FROM t2"
           69  +  7   1   "SELECT DISTINCT * FROM (SELECT * FROM t2)"
           70  +
           71  +  8   1   "SELECT DISTINCT * FROM t1"
           72  +
           73  +  8   0   "SELECT DISTINCT a, b FROM t1"
           74  +
           75  +  9   0   "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)"
           76  +  10  0   "SELECT DISTINCT c FROM t1"
           77  +  11  0   "SELECT DISTINCT b FROM t1"
           78  +
           79  +  12  0   "SELECT DISTINCT a, d FROM t1"
           80  +  13  0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"
           81  +  14  1   "SELECT DISTINCT a, d COLLATE nocase FROM t1"
           82  +  15  0   "SELECT DISTINCT a, d COLLATE binary FROM t1"
           83  +  16  1   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"
           84  +
           85  +  16  0   "SELECT DISTINCT t1.rowid FROM t1, t2"
           86  +  17  0   { /* Technically, it would be possible to detect that DISTINCT
           87  +            ** is a no-op in cases like the following. But SQLite does not
           88  +            ** do so. */
           89  +            SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid }
           90  +
           91  +  18  1   "SELECT DISTINCT c1, c2 FROM t3"
           92  +  19  1   "SELECT DISTINCT c1 FROM t3"
           93  +  20  1   "SELECT DISTINCT * FROM t3"
           94  +  21  0   "SELECT DISTINCT c2 FROM t3"
           95  +
           96  +  22  0   "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
           97  +  23  1   "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
           98  +
           99  +  24  0   "SELECT DISTINCT rowid/2 FROM t1"
          100  +  25  1   "SELECT DISTINCT rowid/2, rowid FROM t1"
          101  +  26  1   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"
          102  +} {
          103  +  if {$noop} {
          104  +    do_distinct_noop_test 1.$tn $sql
          105  +  } else {
          106  +    do_distinct_not_noop_test 1.$tn $sql
          107  +  }
          108  +}
          109  +
          110  +
          111  +
          112  +
          113  +
          114  +
          115  +finish_test