SQLite

Check-in [52e75594]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Generalize the min/max optimization so that if an appropriate index exists, the index it can be used by any aggregate query that contains only a single max() or min() and does not contain a GROUP BY clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 52e755943f87354febe214e5dc3b423a1e38ba80
User & Date: drh 2012-12-13 18:57:31
References
2019-08-07
21:30 Ticket [f8a7060e] Incorrect result for query that uses MIN() and a CAST on rowid status still Open with 7 other changes (artifact: 3bee902b user: drh)
Context
2012-12-14
17:54
Optimize IN operators in the WHERE clause of queries using virtual tables. (check-in: 3d65c703 user: drh tags: trunk)
15:54
Merge in all the trunk changes that have occurred since this branch was opened. (check-in: 6d507e4d user: drh tags: vtab-IN-opt)
2012-12-13
18:57
Generalize the min/max optimization so that if an appropriate index exists, the index it can be used by any aggregate query that contains only a single max() or min() and does not contain a GROUP BY clause. (check-in: 52e75594 user: drh tags: trunk)
18:51
Increase the version number to 3.7.16 in advance of adding new features for the next release. (check-in: 8bcf5f51 user: drh tags: trunk)
16:37
Attempt to further generalize the min/max optimization so that, if an appropriate index exists, it can be used by any aggregate query that contains only a single aggregate of the form max(colname) or min(colname) and does not contain a GROUP BY clause. (Closed-Leaf check-in: 7280e14c user: dan tags: minmax-opt)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/select.c.
3156
3157
3158
3159
3160
3161
3162

3163
3164
3165
3166
3167


3168
3169
3170
3171




3172
3173




3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186

3187
3188

3189




3190
3191
3192
3193
3194
3195
3196
3197
  sqlite3SelectDelete(db, pSub1);

  return 1;
}
#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */

/*

** Analyze the SELECT statement passed as an argument to see if it
** is a min() or max() query. Return WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX if 
** it is, or 0 otherwise. At present, a query is considered to be
** a min()/max() query if:
**


**   1. There is a single object in the FROM clause.
**
**   2. There is a single expression in the result set, and it is
**      either min(x) or max(x), where x is a column reference.




*/
static u8 minMaxQuery(Select *p){




  Expr *pExpr;
  ExprList *pEList = p->pEList;

  if( pEList->nExpr!=1 ) return WHERE_ORDERBY_NORMAL;
  pExpr = pEList->a[0].pExpr;
  if( pExpr->op!=TK_AGG_FUNCTION ) return 0;
  if( NEVER(ExprHasProperty(pExpr, EP_xIsSelect)) ) return 0;
  pEList = pExpr->x.pList;
  if( pEList==0 || pEList->nExpr!=1 ) return 0;
  if( pEList->a[0].pExpr->op!=TK_AGG_COLUMN ) return WHERE_ORDERBY_NORMAL;
  assert( !ExprHasProperty(pExpr, EP_IntValue) );
  if( sqlite3StrICmp(pExpr->u.zToken,"min")==0 ){
    return WHERE_ORDERBY_MIN;

  }else if( sqlite3StrICmp(pExpr->u.zToken,"max")==0 ){
    return WHERE_ORDERBY_MAX;

  }




  return WHERE_ORDERBY_NORMAL;
}

/*
** The select statement passed as the first argument is an aggregate query.
** The second argment is the associated aggregate-info object. This 
** function tests if the SELECT is of the form:
**







>
|
<
<
<

>
>
|

|
|
>
>
>
>

|
>
>
>
>
|
|

<
<
|
<
<
<
|
|
|
|
>
|
|
>
|
>
>
>
>
|







3156
3157
3158
3159
3160
3161
3162
3163
3164



3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184


3185



3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
  sqlite3SelectDelete(db, pSub1);

  return 1;
}
#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */

/*
** Based on the contents of the AggInfo structure indicated by the first
** argument, this function checks if the following are true:



**
**    * the query contains just a single aggregate function,
**    * the aggregate function is either min() or max(), and
**    * the argument to the aggregate function is a column value.
**
** If all of the above are true, then WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX
** is returned as appropriate. Also, *ppMinMax is set to point to the 
** list of arguments passed to the aggregate before returning.
**
** Or, if the conditions above are not met, *ppMinMax is set to 0 and
** WHERE_ORDERBY_NORMAL is returned.
*/
static u8 minMaxQuery(AggInfo *pAggInfo, ExprList **ppMinMax){
  int eRet = WHERE_ORDERBY_NORMAL;          /* Return value */

  *ppMinMax = 0;
  if( pAggInfo->nFunc==1 ){
    Expr *pExpr = pAggInfo->aFunc[0].pExpr; /* Aggregate function */
    ExprList *pEList = pExpr->x.pList;      /* Arguments to agg function */



    assert( pExpr->op==TK_AGG_FUNCTION );



    if( pEList && pEList->nExpr==1 && pEList->a[0].pExpr->op==TK_AGG_COLUMN ){
      const char *zFunc = pExpr->u.zToken;
      if( sqlite3StrICmp(zFunc, "min")==0 ){
        eRet = WHERE_ORDERBY_MIN;
        *ppMinMax = pEList;
      }else if( sqlite3StrICmp(zFunc, "max")==0 ){
        eRet = WHERE_ORDERBY_MAX;
        *ppMinMax = pEList;
      }
    }
  }

  assert( *ppMinMax==0 || (*ppMinMax)->nExpr==1 );
  return eRet;
}

/*
** The select statement passed as the first argument is an aggregate query.
** The second argment is the associated aggregate-info object. This 
** function tests if the SELECT is of the form:
**
4523
4524
4525
4526
4527
4528
4529





4530



4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
        **
        **   + The optimizer code in where.c (the thing that decides which
        **     index or indices to use) should place a different priority on 
        **     satisfying the 'ORDER BY' clause than it does in other cases.
        **     Refer to code and comments in where.c for details.
        */
        ExprList *pMinMax = 0;





        u8 flag = minMaxQuery(p);



        if( flag ){
          assert( !ExprHasProperty(p->pEList->a[0].pExpr, EP_xIsSelect) );
          assert( p->pEList->a[0].pExpr->x.pList->nExpr==1 );
          pMinMax = sqlite3ExprListDup(db, p->pEList->a[0].pExpr->x.pList,0);
          pDel = pMinMax;
          if( pMinMax && !db->mallocFailed ){
            pMinMax->a[0].sortOrder = flag!=WHERE_ORDERBY_MIN ?1:0;
            pMinMax->a[0].pExpr->op = TK_COLUMN;
          }
        }
  







>
>
>
>
>
|
>
>
>

<
<
|







4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548


4549
4550
4551
4552
4553
4554
4555
4556
        **
        **   + The optimizer code in where.c (the thing that decides which
        **     index or indices to use) should place a different priority on 
        **     satisfying the 'ORDER BY' clause than it does in other cases.
        **     Refer to code and comments in where.c for details.
        */
        ExprList *pMinMax = 0;
        u8 flag = WHERE_ORDERBY_NORMAL;
        
        assert( p->pGroupBy==0 );
        assert( flag==0 );
        if( p->pHaving==0 ){
          flag = minMaxQuery(&sAggInfo, &pMinMax);
        }
        assert( flag==0 || (pMinMax!=0 && pMinMax->nExpr==1) );

        if( flag ){


          pMinMax = sqlite3ExprListDup(db, pMinMax, 0);
          pDel = pMinMax;
          if( pMinMax && !db->mallocFailed ){
            pMinMax->a[0].sortOrder = flag!=WHERE_ORDERBY_MIN ?1:0;
            pMinMax->a[0].pExpr->op = TK_COLUMN;
          }
        }
  
Changes to test/minmax.test.
13
14
15
16
17
18
19

20
21
22
23
24
25
26
# aggregate min() and max() functions and which are handled as
# as a special case.
#
# $Id: minmax.test,v 1.21 2008/07/08 18:05:26 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl


do_test minmax-1.0 {
  execsql {
    BEGIN;
    CREATE TABLE t1(x, y);
    INSERT INTO t1 VALUES(1,1);
    INSERT INTO t1 VALUES(2,2);







>







13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# aggregate min() and max() functions and which are handled as
# as a special case.
#
# $Id: minmax.test,v 1.21 2008/07/08 18:05:26 drh Exp $

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set ::testprefix minmax

do_test minmax-1.0 {
  execsql {
    BEGIN;
    CREATE TABLE t1(x, y);
    INSERT INTO t1 VALUES(1,1);
    INSERT INTO t1 VALUES(2,2);
532
533
534
535
536
537
538


539























































































540
541
542
} {25}
do_test minmax-12.17 {
  execsql {
    SELECT max(rowid) FROM t7 WHERE a=3 AND b=5 AND c=15;
  }
} {5}





























































































finish_test







>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
} {25}
do_test minmax-12.17 {
  execsql {
    SELECT max(rowid) FROM t7 WHERE a=3 AND b=5 AND c=15;
  }
} {5}

#-------------------------------------------------------------------------
reset_db

proc do_test_13 {op name sql1 sql2 res} {
  set ::sqlite_search_count 0
  uplevel [list do_execsql_test $name.1 $sql1 $res]
  set a $::sqlite_search_count

  set ::sqlite_search_count 0
  uplevel [list do_execsql_test $name.2 $sql2 $res]
  set b $::sqlite_search_count

  uplevel [list do_test $name.3 [list expr "$a $op $b"] 1]
}

# Run a test named $name. Check that SQL statements $sql1 and $sql2 both
# return the same result, but that $sql2 increments the $sqlite_search_count
# variable more often (indicating that it is visiting more rows to determine
# the result).
#
proc do_test_13_opt {name sql1 sql2 res} {
  uplevel [list do_test_13 < $name $sql1 $sql2 $res]
}

# Like [do_test_13_noopt], except this time check that the $sqlite_search_count
# variable is incremented the same number of times by both SQL statements.
#
proc do_test_13_noopt {name sql1 sql2 res} {
  uplevel [list do_test_13 == $name $sql1 $sql2 $res]
}

do_execsql_test 13.1 {
  CREATE TABLE t1(a, b, c);
  INSERT INTO t1 VALUES('a', 1, 1);
  INSERT INTO t1 VALUES('b', 6, 6);
  INSERT INTO t1 VALUES('c', 5, 5);
  INSERT INTO t1 VALUES('a', 4, 4);
  INSERT INTO t1 VALUES('a', 5, 5);
  INSERT INTO t1 VALUES('c', 6, 6);
  INSERT INTO t1 VALUES('b', 4, 4);
  INSERT INTO t1 VALUES('c', 7, 7);
  INSERT INTO t1 VALUES('b', 2, 2);
  INSERT INTO t1 VALUES('b', 3, 3);
  INSERT INTO t1 VALUES('a', 3, 3);
  INSERT INTO t1 VALUES('b', 5, 5);
  INSERT INTO t1 VALUES('c', 4, 4);
  INSERT INTO t1 VALUES('c', 3, 3);
  INSERT INTO t1 VALUES('a', 2, 2);
  SELECT * FROM t1 ORDER BY a, b, c;
} {a 1 1 a 2 2 a 3 3 a 4 4 a 5 5
   b 2 2 b 3 3 b 4 4 b 5 5 b 6 6
   c 3 3 c 4 4 c 5 5 c 6 6 c 7 7
}
do_execsql_test 13.2 { CREATE INDEX i1 ON t1(a, b, c) }

do_test_13_opt 13.3 {
  SELECT min(b) FROM t1 WHERE a='b'
} {
  SELECT min(c) FROM t1 WHERE a='b'
} {2}

do_test_13_opt 13.4 {
  SELECT a, min(b) FROM t1 WHERE a='b'
} {
  SELECT a, min(c) FROM t1 WHERE a='b'
} {b 2}

do_test_13_opt 13.4 {
  SELECT a||c, max(b)+4 FROM t1 WHERE a='c'
} {
  SELECT a||c, max(c)+4 FROM t1 WHERE a='c'
} {c7 11}

do_test_13_noopt 13.5 {
  SELECT a||c, max(b+1) FROM t1 WHERE a='c'
} {
  SELECT a||c, max(c+1) FROM t1 WHERE a='c'
} {c7 8}

do_test_13_noopt 13.6 {
  SELECT count(b) FROM t1 WHERE a='c'
} {
  SELECT count(c) FROM t1 WHERE a='c'
} {5}

do_test_13_noopt 13.7 {
  SELECT min(b), count(b) FROM t1 WHERE a='a';
} {
  SELECT min(c), count(c) FROM t1 WHERE a='a';
} {1 5}


finish_test