SQLite

Check-in [ef4ac0dd]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to distinct aggregates such that they can sometimes avoid using an ephermeral table to test for duplicates if the column that is distinct is part of an index.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: ef4ac0ddd297bbd38351410c5a387e1628561b3f1bec9e4c2c9d76fbe29f955a
User & Date: drh 2021-03-24 17:28:11
References
2021-04-03
19:23
Fix a crash in handling queries of the form "SELECT aggregate(DISTINCT tbl.col) FROM ... LEFT JOIN tbl ...". Fixes a problem introduced by [ef4ac0ddd297bbd3]. (check-in: 0dcf808d user: dan tags: trunk)
Context
2021-03-24
19:44
Comment improvements to on the distinct-agg optimization. Show a line in the EQP output when using an ephemeral table to implement DISTINCT on an aggregate. (check-in: 037ca79e user: drh tags: trunk)
17:28
Improvements to distinct aggregates such that they can sometimes avoid using an ephermeral table to test for duplicates if the column that is distinct is part of an index. (check-in: ef4ac0dd user: drh tags: trunk)
17:04
Fix a harmless compiler warning. (check-in: 26b005a9 user: drh tags: trunk)
2021-03-13
18:23
Fix a memory leak in the new code on this branch. (Closed-Leaf check-in: 0817cf2e user: dan tags: distinct-agg-opt)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/select.c.

737
738
739
740
741
742
743
744
745
746

747
748





749








750
















751
752
753

754
755






756





757





































































758
759
760
761
762
763


764
765
766
767
768




769
770
771
772
773
774
775
  if( iOffset>0 ){
    sqlite3VdbeAddOp3(v, OP_IfPos, iOffset, iContinue, 1); VdbeCoverage(v);
    VdbeComment((v, "OFFSET"));
  }
}

/*
** Add code that will check to make sure the N registers starting at iMem
** form a distinct entry.  iTab is a sorting index that holds previously
** seen combinations of the N values.  A new entry is made in iTab

** if the current N values are new.
**





** A jump to addrRepeat is made and the N+1 values are popped from the








** stack if the top N elements are not distinct.
















*/
static void codeDistinct(
  Parse *pParse,     /* Parsing and code generating context */

  int iTab,          /* A sorting index used to test for distinctness */
  int addrRepeat,    /* Jump to here if not distinct */






  int N,             /* Number of elements */





  int iMem           /* First element */





































































){
  Vdbe *v;
  int r1;

  v = pParse->pVdbe;
  r1 = sqlite3GetTempReg(pParse);


  sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, iMem, N); VdbeCoverage(v);
  sqlite3VdbeAddOp3(v, OP_MakeRecord, iMem, N, r1);
  sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, iMem, N);
  sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
  sqlite3ReleaseTempReg(pParse, r1);




}

#ifdef SQLITE_ENABLE_SORTER_REFERENCES
/*
** This function is called as part of inner-loop generation for a SELECT
** statement with an ORDER BY that is not optimized by an index. It 
** determines the expressions, if any, that the sorter-reference 







|
|
|
>
|

>
>
>
>
>
|
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|

>


>
>
>
>
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

<
<
|
|
|
>
>
|
|
|
|
|
>
>
>
>







737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869


870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
  if( iOffset>0 ){
    sqlite3VdbeAddOp3(v, OP_IfPos, iOffset, iContinue, 1); VdbeCoverage(v);
    VdbeComment((v, "OFFSET"));
  }
}

/*
** Add code that will check to make sure the array of registers starting at
** iMem form a distinct entry. This is used by both "SELECT DISTINCT ..." and
** distinct aggregates ("SELECT count(DISTINCT <expr>) ..."). Three strategies
** are available. Which is used depends on the value of parameter eTnctType,
** as follows:
**
**   WHERE_DISTINCT_UNORDERED/WHERE_DISTINCT_NOOP:
**     Parameter iTab is the cursor number of an ephemeral table that must
**     be opened before the VM code generated by this routine is executed.
**     The ephemeral cursor table is queried for a record identical to the
**     record formed by the current array of registers. If one is found,
**     jump to VM address addrRepeat. Otherwise, insert a new record into
**     the ephemeral cursor and proceed.
**
**     The returned value in this case is a copy of parameter iTab.
**
**   WHERE_DISTINCT_ORDERED:
**     In this case rows are being delivered sorted order sorted. The ephermal
**     table is not required in this case. Instead, the current set of 
**     registers are compared to previous row. If they match, the new row
**     is not distinct and control jumps to VM address addrRepeat. Otherwise,
**     the VM program proceeds with processing the new row.
**
**     The returned value in this case is the register number of the first
**     in an array of registers used to store the previous result row so that
**     it can be compared to the next. The caller must ensure that this cell
**     is initialized to NULL and has the "clear" flag set.
**
**   WHERE_DISTINCT_UNIQUE:
**     In this case it has already been determined that the rows are distinct.
**     No special action is required. The return value is always zero.
**
** Parameter pEList is the list of expressions used to generated the 
** contents of each row. It is used by this routine to determine (a) 
** how many elements there are in the array of registers and (b) the 
** collation sequences that should be used for the comparisons if 
** eTnctType is WHERE_DISTINCT_ORDERED.
*/
static int codeDistinct(
  Parse *pParse,     /* Parsing and code generating context */
  int eTnctType,     /* WHERE_DISTINCT_* value */
  int iTab,          /* A sorting index used to test for distinctness */
  int addrRepeat,    /* Jump to here if not distinct */
  ExprList *pEList,  /* Expression for each element */
  int regElem        /* First element */
){
  int iRet = 0;
  int nResultCol = pEList->nExpr;
  Vdbe *v = pParse->pVdbe;

  switch( eTnctType ){
    case WHERE_DISTINCT_ORDERED: {
      int i;
      int iJump;              /* Jump destination */
      int regPrev;            /* Previous row content */

      /* Allocate space for the previous row */
      iRet = regPrev = pParse->nMem+1;
      pParse->nMem += nResultCol;

      iJump = sqlite3VdbeCurrentAddr(v) + nResultCol;
      for(i=0; i<nResultCol; i++){
        CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[i].pExpr);
        if( i<nResultCol-1 ){
          sqlite3VdbeAddOp3(v, OP_Ne, regElem+i, iJump, regPrev+i);
          VdbeCoverage(v);
        }else{
          sqlite3VdbeAddOp3(v, OP_Eq, regElem+i, addrRepeat, regPrev+i);
          VdbeCoverage(v);
         }
        sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
        sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
      }
      assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed );
      sqlite3VdbeAddOp3(v, OP_Copy, regElem, regPrev, nResultCol-1);
      break;
    }

    case WHERE_DISTINCT_UNIQUE: {
      /* nothing to do */
      break;
    }

    default: {
      int r1 = sqlite3GetTempReg(pParse);
      sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, regElem, nResultCol);
      VdbeCoverage(v);
      sqlite3VdbeAddOp3(v, OP_MakeRecord, regElem, nResultCol, r1);
      sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, regElem, nResultCol);
      sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
      sqlite3ReleaseTempReg(pParse, r1);
      iRet = iTab;
      break;
    }
  }

  return iRet;
}

/*
** A call to this function must be made for each call to codeDistinct().
** Parameter is passed the value returned by that call to codeDistinct(),
** and iOpenEphAddr the address of the instruction used to open the
** ephemeral table (that may be) used by codeDistinct().
**
** Argument eTnctType is passed the strategy to be used for any DISTINCT
** operation, as returned by sqlite3WhereIsDistinct(). If the strategy
** is WHERE_DISTINCT_NOOP or WHERE_DISTINCT_UNORDERED, this function is
** a no-op. Otherwise:
**
**   WHERE_DISTINCT_UNIQUE:
**     In this case the ephemeral table is not required. So instruction
**     iOpenEphAddr is replaced by an OP_Noop.
**
**   WHERE_DISTINCT_ORDERED:
**     In this case the ephemeral table is not required. The instruction
**     iOpenEphAddr is replaced by an OP_Null instruction to set register
**     iVal to a NULL value with the "clear" flag set (see comments above
**     codeDistinct() for details).
*/
static void fixDistinctOpenEph(
  Parse *pParse,     /* Parsing and code generating context */
  int eTnctType,     /* WHERE_DISTINCT_* value */
  int iVal,          /* Value returned by codeDistinct() */
  int iOpenEphAddr   /* Address of OP_OpenEphemeral instruction for iTab */
){


  if( eTnctType==WHERE_DISTINCT_UNIQUE || eTnctType==WHERE_DISTINCT_ORDERED ){
    Vdbe *v = pParse->pVdbe;
    sqlite3VdbeChangeToNoop(v, iOpenEphAddr);
    if( eTnctType==WHERE_DISTINCT_ORDERED ){
      /* Change the OP_OpenEphemeral to an OP_Null that sets the MEM_Cleared 
      ** bit on the first register of the previous value.  This will cause the
      ** OP_Ne added in codeDistinct() to always fail on the first iteration of
      ** the loop even if the first row is all NULLs.  */
      VdbeOp *pOp = sqlite3VdbeGetOp(v, iOpenEphAddr);
      pOp->opcode = OP_Null;
      pOp->p1 = 1;
      pOp->p2 = iVal;
    }
  }
}

#ifdef SQLITE_ENABLE_SORTER_REFERENCES
/*
** This function is called as part of inner-loop generation for a SELECT
** statement with an ORDER BY that is not optimized by an index. It 
** determines the expressions, if any, that the sorter-reference 
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
  }

  /* If the DISTINCT keyword was present on the SELECT statement
  ** and this row has been seen before, then do not make this row
  ** part of the result.
  */
  if( hasDistinct ){
    switch( pDistinct->eTnctType ){
      case WHERE_DISTINCT_ORDERED: {
        VdbeOp *pOp;            /* No longer required OpenEphemeral instr. */
        int iJump;              /* Jump destination */
        int regPrev;            /* Previous row content */

        /* Allocate space for the previous row */
        regPrev = pParse->nMem+1;
        pParse->nMem += nResultCol;

        /* Change the OP_OpenEphemeral coded earlier to an OP_Null
        ** sets the MEM_Cleared bit on the first register of the
        ** previous value.  This will cause the OP_Ne below to always
        ** fail on the first iteration of the loop even if the first
        ** row is all NULLs.
        */
        sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
        pOp = sqlite3VdbeGetOp(v, pDistinct->addrTnct);
        pOp->opcode = OP_Null;
        pOp->p1 = 1;
        pOp->p2 = regPrev;
        pOp = 0;  /* Ensure pOp is not used after sqlite3VdbeAddOp() */

        iJump = sqlite3VdbeCurrentAddr(v) + nResultCol;
        for(i=0; i<nResultCol; i++){
          CollSeq *pColl = sqlite3ExprCollSeq(pParse, p->pEList->a[i].pExpr);
          if( i<nResultCol-1 ){
            sqlite3VdbeAddOp3(v, OP_Ne, regResult+i, iJump, regPrev+i);
            VdbeCoverage(v);
          }else{
            sqlite3VdbeAddOp3(v, OP_Eq, regResult+i, iContinue, regPrev+i);
            VdbeCoverage(v);
           }
          sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
          sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
        }
        assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed );
        sqlite3VdbeAddOp3(v, OP_Copy, regResult, regPrev, nResultCol-1);
        break;
      }

      case WHERE_DISTINCT_UNIQUE: {
        sqlite3VdbeChangeToNoop(v, pDistinct->addrTnct);
        break;
      }

      default: {
        assert( pDistinct->eTnctType==WHERE_DISTINCT_UNORDERED );
        codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol,
                     regResult);
        break;
      }
    }
    if( pSort==0 ){
      codeOffset(v, p->iOffset, iContinue);
    }
  }

  switch( eDest ){
    /* In this mode, write each query result to the key of the temporary







|
<
<
<
<
|
<
<
|
|
<
<
<
<
<
<
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1124
1125
1126
1127
1128
1129
1130
1131




1132


1133
1134






1135




































1136
1137
1138
1139
1140
1141
1142
  }

  /* If the DISTINCT keyword was present on the SELECT statement
  ** and this row has been seen before, then do not make this row
  ** part of the result.
  */
  if( hasDistinct ){
    int eType = pDistinct->eTnctType;




    int iTab = pDistinct->tabTnct;


    assert( nResultCol==p->pEList->nExpr );
    iTab = codeDistinct(pParse, eType, iTab, iContinue, p->pEList, regResult);






    fixDistinctOpenEph(pParse, eType, iTab, pDistinct->addrTnct);




































    if( pSort==0 ){
      codeOffset(v, p->iOffset, iContinue);
    }
  }

  switch( eDest ){
    /* In this mode, write each query result to the key of the temporary
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
      assert( !ExprHasProperty(pE, EP_xIsSelect) );
      if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){
        sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one "
           "argument");
        pFunc->iDistinct = -1;
      }else{
        KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0);
        sqlite3VdbeAddOp4(v, OP_OpenEphemeral, pFunc->iDistinct, 0, 0,
                          (char*)pKeyInfo, P4_KEYINFO);
      }
    }
  }
}

/*
** Invoke the OP_AggFinalize opcode for every aggregate function







|
|







5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
      assert( !ExprHasProperty(pE, EP_xIsSelect) );
      if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){
        sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one "
           "argument");
        pFunc->iDistinct = -1;
      }else{
        KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0);
        pFunc->iDistAddr = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, 
            pFunc->iDistinct, 0, 0, (char*)pKeyInfo, P4_KEYINFO);
      }
    }
  }
}

/*
** Invoke the OP_AggFinalize opcode for every aggregate function
5674
5675
5676
5677
5678
5679
5680
5681





5682
5683
5684
5685
5686
5687
5688
** the current cursor position.
**
** If regAcc is non-zero and there are no min() or max() aggregates
** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator
** registers if register regAcc contains 0. The caller will take care
** of setting and clearing regAcc.
*/
static void updateAccumulator(Parse *pParse, int regAcc, AggInfo *pAggInfo){





  Vdbe *v = pParse->pVdbe;
  int i;
  int regHit = 0;
  int addrHitTest = 0;
  struct AggInfo_func *pF;
  struct AggInfo_col *pC;








|
>
>
>
>
>







5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756
5757
5758
5759
5760
** the current cursor position.
**
** If regAcc is non-zero and there are no min() or max() aggregates
** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator
** registers if register regAcc contains 0. The caller will take care
** of setting and clearing regAcc.
*/
static void updateAccumulator(
  Parse *pParse, 
  int regAcc, 
  AggInfo *pAggInfo,
  int eDistinctType
){
  Vdbe *v = pParse->pVdbe;
  int i;
  int regHit = 0;
  int addrHitTest = 0;
  struct AggInfo_func *pF;
  struct AggInfo_col *pC;

5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730
5731
5732

5733
5734
5735
5736
5737
5738
5739
5740
      nArg = pList->nExpr;
      regAgg = sqlite3GetTempRange(pParse, nArg);
      sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP);
    }else{
      nArg = 0;
      regAgg = 0;
    }
    if( pF->iDistinct>=0 ){
      if( addrNext==0 ){ 
        addrNext = sqlite3VdbeMakeLabel(pParse);
      }
      testcase( nArg==0 );  /* Error condition */
      testcase( nArg>1 );   /* Also an error */

      codeDistinct(pParse, pF->iDistinct, addrNext, 1, regAgg);
    }
    if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
      CollSeq *pColl = 0;
      struct ExprList_item *pItem;
      int j;
      assert( pList!=0 );  /* pList!=0 if pF->pFunc has NEEDCOLL */
      for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){







|





>
|







5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813
      nArg = pList->nExpr;
      regAgg = sqlite3GetTempRange(pParse, nArg);
      sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP);
    }else{
      nArg = 0;
      regAgg = 0;
    }
    if( pF->iDistinct>=0 && pList ){
      if( addrNext==0 ){ 
        addrNext = sqlite3VdbeMakeLabel(pParse);
      }
      testcase( nArg==0 );  /* Error condition */
      testcase( nArg>1 );   /* Also an error */
      pF->iDistinct = codeDistinct(pParse, eDistinctType, 
          pF->iDistinct, addrNext, pList, regAgg);
    }
    if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
      CollSeq *pColl = 0;
      struct ExprList_item *pItem;
      int j;
      assert( pList!=0 );  /* pList!=0 if pF->pFunc has NEEDCOLL */
      for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){
6758
6759
6760
6761
6762
6763
6764














6765
6766
6767
6768
6769
6770
6771
      int addrOutputRow;  /* Start of subroutine that outputs a result row */
      int regOutputRow;   /* Return address register for output subroutine */
      int addrSetAbort;   /* Set the abort flag and return */
      int addrTopOfLoop;  /* Top of the input loop */
      int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */
      int addrReset;      /* Subroutine for resetting the accumulator */
      int regReset;       /* Return address register for reset subroutine */















      /* If there is a GROUP BY clause we might need a sorting index to
      ** implement it.  Allocate that sorting index now.  If it turns out
      ** that we do not need it after all, the OP_SorterOpen instruction
      ** will be converted into a Noop.  
      */
      pAggInfo->sortingIdx = pParse->nTab++;







>
>
>
>
>
>
>
>
>
>
>
>
>
>







6831
6832
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
      int addrOutputRow;  /* Start of subroutine that outputs a result row */
      int regOutputRow;   /* Return address register for output subroutine */
      int addrSetAbort;   /* Set the abort flag and return */
      int addrTopOfLoop;  /* Top of the input loop */
      int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */
      int addrReset;      /* Subroutine for resetting the accumulator */
      int regReset;       /* Return address register for reset subroutine */
      ExprList *pDistinct = 0;
      u16 distFlag = 0;
      int eDist = WHERE_DISTINCT_NOOP;

      if( pAggInfo->nFunc==1 
       && pAggInfo->aFunc[0].iDistinct>=0
       && pAggInfo->aFunc[0].pFExpr->x.pList 
      ){
        Expr *pExpr = pAggInfo->aFunc[0].pFExpr->x.pList->a[0].pExpr;
        pExpr = sqlite3ExprDup(db, pExpr, 0);
        pDistinct = sqlite3ExprListDup(db, pGroupBy, 0);
        pDistinct = sqlite3ExprListAppend(pParse, pDistinct, pExpr);
        distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0;
      }

      /* If there is a GROUP BY clause we might need a sorting index to
      ** implement it.  Allocate that sorting index now.  If it turns out
      ** that we do not need it after all, the OP_SorterOpen instruction
      ** will be converted into a Noop.  
      */
      pAggInfo->sortingIdx = pParse->nTab++;
6794
6795
6796
6797
6798
6799
6800
6801
6802
6803

6804

6805
6806
6807
6808
6809
6810
6811
      /* Begin a loop that will extract all source rows in GROUP BY order.
      ** This might involve two separate loops with an OP_Sort in between, or
      ** it might be a single loop that uses an index to extract information
      ** in the right order to begin with.
      */
      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
      SELECTTRACE(1,pParse,p,("WhereBegin\n"));
      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, 0,
          WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0), 0
      );

      if( pWInfo==0 ) goto select_end;

      SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
      if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){
        /* The optimizer is able to deliver rows in group by order so
        ** we do not have to sort.  The OP_OpenEphemeral table will be
        ** cancelled later because we still need to use the pKeyInfo
        */
        groupBySort = 0;







|
|

>

>







6881
6882
6883
6884
6885
6886
6887
6888
6889
6890
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
      /* Begin a loop that will extract all source rows in GROUP BY order.
      ** This might involve two separate loops with an OP_Sort in between, or
      ** it might be a single loop that uses an index to extract information
      ** in the right order to begin with.
      */
      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
      SELECTTRACE(1,pParse,p,("WhereBegin\n"));
      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, pDistinct,
          WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0) | distFlag, 0
      );
      sqlite3ExprListDelete(db, pDistinct);
      if( pWInfo==0 ) goto select_end;
      eDist = sqlite3WhereIsDistinct(pWInfo);
      SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
      if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){
        /* The optimizer is able to deliver rows in group by order so
        ** we do not have to sort.  The OP_OpenEphemeral table will be
        ** cancelled later because we still need to use the pKeyInfo
        */
        groupBySort = 0;
6915
6916
6917
6918
6919
6920
6921
6922
6923
6924
6925
6926
6927
6928
6929
      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
      VdbeComment((v, "reset accumulator"));

      /* Update the aggregate accumulators based on the content of
      ** the current row
      */
      sqlite3VdbeJumpHere(v, addr1);
      updateAccumulator(pParse, iUseFlag, pAggInfo);
      sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag);
      VdbeComment((v, "indicate data in accumulator"));

      /* End of the loop
      */
      if( groupBySort ){
        sqlite3VdbeAddOp2(v, OP_SorterNext, pAggInfo->sortingIdx,addrTopOfLoop);







|







7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
      sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
      VdbeComment((v, "reset accumulator"));

      /* Update the aggregate accumulators based on the content of
      ** the current row
      */
      sqlite3VdbeJumpHere(v, addr1);
      updateAccumulator(pParse, iUseFlag, pAggInfo, eDist);
      sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag);
      VdbeComment((v, "indicate data in accumulator"));

      /* End of the loop
      */
      if( groupBySort ){
        sqlite3VdbeAddOp2(v, OP_SorterNext, pAggInfo->sortingIdx,addrTopOfLoop);
6971
6972
6973
6974
6975
6976
6977
6978




6979
6980
6981
6982
6983
6984
6985
      /* Generate a subroutine that will reset the group-by accumulator
      */
      sqlite3VdbeResolveLabel(v, addrReset);
      resetAccumulator(pParse, pAggInfo);
      sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
      VdbeComment((v, "indicate accumulator empty"));
      sqlite3VdbeAddOp1(v, OP_Return, regReset);
     




    } /* endif pGroupBy.  Begin aggregate queries without GROUP BY: */
    else {
      Table *pTab;
      if( (pTab = isSimpleCount(p, pAggInfo))!=0 ){
        /* If isSimpleCount() returns a pointer to a Table structure, then
        ** the SQL statement is of the form:
        **







|
>
>
>
>







7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
      /* Generate a subroutine that will reset the group-by accumulator
      */
      sqlite3VdbeResolveLabel(v, addrReset);
      resetAccumulator(pParse, pAggInfo);
      sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
      VdbeComment((v, "indicate accumulator empty"));
      sqlite3VdbeAddOp1(v, OP_Return, regReset);

      if( eDist!=WHERE_DISTINCT_NOOP ){
        struct AggInfo_func *pF = &pAggInfo->aFunc[0];
        fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr);
      }
    } /* endif pGroupBy.  Begin aggregate queries without GROUP BY: */
    else {
      Table *pTab;
      if( (pTab = isSimpleCount(p, pAggInfo))!=0 ){
        /* If isSimpleCount() returns a pointer to a Table structure, then
        ** the SQL statement is of the form:
        **
7035
7036
7037
7038
7039
7040
7041



7042
7043
7044
7045
7046
7047
7048
          sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO);
        }
        sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem);
        sqlite3VdbeAddOp1(v, OP_Close, iCsr);
        explainSimpleCount(pParse, pTab, pBest);
      }else{
        int regAcc = 0;           /* "populate accumulators" flag */




        /* If there are accumulator registers but no min() or max() functions
        ** without FILTER clauses, allocate register regAcc. Register regAcc
        ** will contain 0 the first time the inner loop runs, and 1 thereafter.
        ** The code generated by updateAccumulator() uses this to ensure
        ** that the accumulator registers are (a) updated only once if
        ** there are no min() or max functions or (b) always updated for the







>
>
>







7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144
          sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO);
        }
        sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem);
        sqlite3VdbeAddOp1(v, OP_Close, iCsr);
        explainSimpleCount(pParse, pTab, pBest);
      }else{
        int regAcc = 0;           /* "populate accumulators" flag */
        ExprList *pDistinct = 0;
        u16 distFlag = 0;
        int eDist;

        /* If there are accumulator registers but no min() or max() functions
        ** without FILTER clauses, allocate register regAcc. Register regAcc
        ** will contain 0 the first time the inner loop runs, and 1 thereafter.
        ** The code generated by updateAccumulator() uses this to ensure
        ** that the accumulator registers are (a) updated only once if
        ** there are no min() or max functions or (b) always updated for the
7058
7059
7060
7061
7062
7063
7064



7065
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088

7089





7090
7091
7092
7093
7094
7095
7096
              break;
            }
          }
          if( i==pAggInfo->nFunc ){
            regAcc = ++pParse->nMem;
            sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc);
          }



        }

        /* This case runs if the aggregate has no GROUP BY clause.  The
        ** processing is much simpler since there is only a single row
        ** of output.
        */
        assert( p->pGroupBy==0 );
        resetAccumulator(pParse, pAggInfo);

        /* If this query is a candidate for the min/max optimization, then
        ** minMaxFlag will have been previously set to either
        ** WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX and pMinMaxOrderBy will
        ** be an appropriate ORDER BY expression for the optimization.
        */
        assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 );
        assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 );

        SELECTTRACE(1,pParse,p,("WhereBegin\n"));
        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy,
                                   0, minMaxFlag, 0);
        if( pWInfo==0 ){
          goto select_end;
        }
        SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));

        updateAccumulator(pParse, regAcc, pAggInfo);





        if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc);
        if( minMaxFlag ){
          sqlite3WhereMinMaxOptEarlyOut(v, pWInfo);
        }
        SELECTTRACE(1,pParse,p,("WhereEnd\n"));
        sqlite3WhereEnd(pWInfo);
        finalizeAggFunctions(pParse, pAggInfo);







>
>
>



















|




>
|
>
>
>
>
>







7154
7155
7156
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
              break;
            }
          }
          if( i==pAggInfo->nFunc ){
            regAcc = ++pParse->nMem;
            sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc);
          }
        }else if( pAggInfo->nFunc==1 && pAggInfo->aFunc[0].iDistinct>=0 ){
          pDistinct = pAggInfo->aFunc[0].pFExpr->x.pList;
          distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0;
        }

        /* This case runs if the aggregate has no GROUP BY clause.  The
        ** processing is much simpler since there is only a single row
        ** of output.
        */
        assert( p->pGroupBy==0 );
        resetAccumulator(pParse, pAggInfo);

        /* If this query is a candidate for the min/max optimization, then
        ** minMaxFlag will have been previously set to either
        ** WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX and pMinMaxOrderBy will
        ** be an appropriate ORDER BY expression for the optimization.
        */
        assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 );
        assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 );

        SELECTTRACE(1,pParse,p,("WhereBegin\n"));
        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy,
                                   pDistinct, minMaxFlag|distFlag, 0);
        if( pWInfo==0 ){
          goto select_end;
        }
        SELECTTRACE(1,pParse,p,("WhereBegin returns\n"));
        eDist = sqlite3WhereIsDistinct(pWInfo);
        updateAccumulator(pParse, regAcc, pAggInfo, eDist);
        if( eDist!=WHERE_DISTINCT_NOOP ){
          struct AggInfo_func *pF = &pAggInfo->aFunc[0];
          fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr);
        }

        if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc);
        if( minMaxFlag ){
          sqlite3WhereMinMaxOptEarlyOut(v, pWInfo);
        }
        SELECTTRACE(1,pParse,p,("WhereEnd\n"));
        sqlite3WhereEnd(pWInfo);
        finalizeAggFunctions(pParse, pAggInfo);

Changes to src/sqliteInt.h.

2599
2600
2601
2602
2603
2604
2605

2606
2607
2608
2609
2610
2611
2612
                          ** Additional columns are used only as parameters to
                          ** aggregate functions */
  struct AggInfo_func {   /* For each aggregate function */
    Expr *pFExpr;            /* Expression encoding the function */
    FuncDef *pFunc;          /* The aggregate function implementation */
    int iMem;                /* Memory location that acts as accumulator */
    int iDistinct;           /* Ephemeral table used to enforce DISTINCT */

  } *aFunc;
  int nFunc;              /* Number of entries in aFunc[] */
  u32 selId;              /* Select to which this AggInfo belongs */
};

/*
** The datatype ynVar is a signed integer, either 16-bit or 32-bit.







>







2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
                          ** Additional columns are used only as parameters to
                          ** aggregate functions */
  struct AggInfo_func {   /* For each aggregate function */
    Expr *pFExpr;            /* Expression encoding the function */
    FuncDef *pFunc;          /* The aggregate function implementation */
    int iMem;                /* Memory location that acts as accumulator */
    int iDistinct;           /* Ephemeral table used to enforce DISTINCT */
    int iDistAddr;           /* Address of OP_OpenEphemeral */
  } *aFunc;
  int nFunc;              /* Number of entries in aFunc[] */
  u32 selId;              /* Select to which this AggInfo belongs */
};

/*
** The datatype ynVar is a signed integer, either 16-bit or 32-bit.

Changes to src/where.c.

488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
){
  int i;
  const char *zColl = pIdx->azColl[iCol];

  for(i=0; i<pList->nExpr; i++){
    Expr *p = sqlite3ExprSkipCollateAndLikely(pList->a[i].pExpr);
    if( ALWAYS(p!=0)
     && p->op==TK_COLUMN
     && p->iColumn==pIdx->aiColumn[iCol]
     && p->iTable==iBase
    ){
      CollSeq *pColl = sqlite3ExprNNCollSeq(pParse, pList->a[i].pExpr);
      if( 0==sqlite3StrICmp(pColl->zName, zColl) ){
        return i;
      }







|







488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
){
  int i;
  const char *zColl = pIdx->azColl[iCol];

  for(i=0; i<pList->nExpr; i++){
    Expr *p = sqlite3ExprSkipCollateAndLikely(pList->a[i].pExpr);
    if( ALWAYS(p!=0)
     && (p->op==TK_COLUMN || p->op==TK_AGG_COLUMN)
     && p->iColumn==pIdx->aiColumn[iCol]
     && p->iTable==iBase
    ){
      CollSeq *pColl = sqlite3ExprNNCollSeq(pParse, pList->a[i].pExpr);
      if( 0==sqlite3StrICmp(pColl->zName, zColl) ){
        return i;
      }
553
554
555
556
557
558
559

560
561
562
563
564
565
566
567
  /* If any of the expressions is an IPK column on table iBase, then return 
  ** true. Note: The (p->iTable==iBase) part of this test may be false if the
  ** current SELECT is a correlated sub-query.
  */
  for(i=0; i<pDistinct->nExpr; i++){
    Expr *p = sqlite3ExprSkipCollateAndLikely(pDistinct->a[i].pExpr);
    if( NEVER(p==0) ) continue;

    if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1;
  }

  /* Loop through all indices on the table, checking each to see if it makes
  ** the DISTINCT qualifier redundant. It does so if:
  **
  **   1. The index is itself UNIQUE, and
  **







>
|







553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
  /* If any of the expressions is an IPK column on table iBase, then return 
  ** true. Note: The (p->iTable==iBase) part of this test may be false if the
  ** current SELECT is a correlated sub-query.
  */
  for(i=0; i<pDistinct->nExpr; i++){
    Expr *p = sqlite3ExprSkipCollateAndLikely(pDistinct->a[i].pExpr);
    if( NEVER(p==0) ) continue;
    if( p->op!=TK_COLUMN && p->op!=TK_AGG_COLUMN ) continue;
    if( p->iTable==iBase && p->iColumn<0 ) return 1;
  }

  /* Loop through all indices on the table, checking each to see if it makes
  ** the DISTINCT qualifier redundant. It does so if:
  **
  **   1. The index is itself UNIQUE, and
  **
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
    ** clause of the form X IS NULL or X=? that reference only outer
    ** loops.
    */
    for(i=0; i<nOrderBy; i++){
      if( MASKBIT(i) & obSat ) continue;
      pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr);
      if( NEVER(pOBExpr==0) ) continue;
      if( pOBExpr->op!=TK_COLUMN ) continue;
      if( pOBExpr->iTable!=iCur ) continue;
      pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn,
                       ~ready, eqOpMask, 0);
      if( pTerm==0 ) continue;
      if( pTerm->eOperator==WO_IN ){
        /* IN terms are only valid for sorting in the ORDER BY LIMIT 
        ** optimization, and then only if they are actually used







|







3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
    ** clause of the form X IS NULL or X=? that reference only outer
    ** loops.
    */
    for(i=0; i<nOrderBy; i++){
      if( MASKBIT(i) & obSat ) continue;
      pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr);
      if( NEVER(pOBExpr==0) ) continue;
      if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue;
      if( pOBExpr->iTable!=iCur ) continue;
      pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn,
                       ~ready, eqOpMask, 0);
      if( pTerm==0 ) continue;
      if( pTerm->eOperator==WO_IN ){
        /* IN terms are only valid for sorting in the ORDER BY LIMIT 
        ** optimization, and then only if they are actually used
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
          if( MASKBIT(i) & obSat ) continue;
          pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr);
          testcase( wctrlFlags & WHERE_GROUPBY );
          testcase( wctrlFlags & WHERE_DISTINCTBY );
          if( NEVER(pOBExpr==0) ) continue;
          if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0;
          if( iColumn>=XN_ROWID ){
            if( pOBExpr->op!=TK_COLUMN ) continue;
            if( pOBExpr->iTable!=iCur ) continue;
            if( pOBExpr->iColumn!=iColumn ) continue;
          }else{
            Expr *pIdxExpr = pIndex->aColExpr->a[j].pExpr;
            if( sqlite3ExprCompareSkip(pOBExpr, pIdxExpr, iCur) ){
              continue;
            }







|







3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
          if( MASKBIT(i) & obSat ) continue;
          pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr);
          testcase( wctrlFlags & WHERE_GROUPBY );
          testcase( wctrlFlags & WHERE_DISTINCTBY );
          if( NEVER(pOBExpr==0) ) continue;
          if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0;
          if( iColumn>=XN_ROWID ){
            if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue;
            if( pOBExpr->iTable!=iCur ) continue;
            if( pOBExpr->iColumn!=iColumn ) continue;
          }else{
            Expr *pIdxExpr = pIndex->aColExpr->a[j].pExpr;
            if( sqlite3ExprCompareSkip(pOBExpr, pIdxExpr, iCur) ){
              continue;
            }
5020
5021
5022
5023
5024
5025
5026
5027


5028
5029
5030
5031
5032
5033
5034
    }
  }
#endif

  /* Attempt to omit tables from the join that do not affect the result.
  ** For a table to not affect the result, the following must be true:
  **
  **   1) The query must not be an aggregate.


  **   2) The table must be the RHS of a LEFT JOIN.
  **   3) Either the query must be DISTINCT, or else the ON or USING clause
  **      must contain a constraint that limits the scan of the table to 
  **      at most a single row.
  **   4) The table must not be referenced by any part of the query apart
  **      from its own USING or ON clause.
  **







|
>
>







5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
    }
  }
#endif

  /* Attempt to omit tables from the join that do not affect the result.
  ** For a table to not affect the result, the following must be true:
  **
  **   1) The query must not be an aggregate. Or it must be an aggregate
  **      that contains only one aggregate function with the DISTINCT 
  **      qualifier. e.g. "SELECT count(DISTINCT ...) FROM".
  **   2) The table must be the RHS of a LEFT JOIN.
  **   3) Either the query must be DISTINCT, or else the ON or USING clause
  **      must contain a constraint that limits the scan of the table to 
  **      at most a single row.
  **   4) The table must not be referenced by any part of the query apart
  **      from its own USING or ON clause.
  **

Changes to test/distinctagg.test.

12
13
14
15
16
17
18

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# focus of this script is the DISTINCT modifier on aggregate functions.
#
# $Id: distinctagg.test,v 1.3 2009/02/09 13:19:28 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl


do_test distinctagg-1.1 {
  execsql {
    CREATE TABLE t1(a,b,c);
    INSERT INTO t1 VALUES(1,2,3);
    INSERT INTO t1 VALUES(1,3,4);
    INSERT INTO t1 VALUES(1,3,5);
    SELECT count(distinct a),
           count(distinct b),
           count(distinct c),
           count(all a) FROM t1;
  }
} {1 2 3 3}
do_test distinctagg-1.2 {
  execsql {
    SELECT b, count(distinct c) FROM t1 GROUP BY b ORDER BY b
  }
} {2 1 3 2}
do_test distinctagg-1.3 {
  execsql {
    INSERT INTO t1 SELECT a+1, b+3, c+5 FROM t1;
    INSERT INTO t1 SELECT a+2, b+6, c+10 FROM t1;
    INSERT INTO t1 SELECT a+4, b+12, c+20 FROM t1;







>















|







12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# focus of this script is the DISTINCT modifier on aggregate functions.
#
# $Id: distinctagg.test,v 1.3 2009/02/09 13:19:28 drh Exp $


set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix distinctagg

do_test distinctagg-1.1 {
  execsql {
    CREATE TABLE t1(a,b,c);
    INSERT INTO t1 VALUES(1,2,3);
    INSERT INTO t1 VALUES(1,3,4);
    INSERT INTO t1 VALUES(1,3,5);
    SELECT count(distinct a),
           count(distinct b),
           count(distinct c),
           count(all a) FROM t1;
  }
} {1 2 3 3}
do_test distinctagg-1.2 {
  execsql {
    SELECT b, count(distinct c) FROM t1 GROUP BY b
  }
} {2 1 3 2}
do_test distinctagg-1.3 {
  execsql {
    INSERT INTO t1 SELECT a+1, b+3, c+5 FROM t1;
    INSERT INTO t1 SELECT a+2, b+6, c+10 FROM t1;
    INSERT INTO t1 SELECT a+4, b+12, c+20 FROM t1;
54
55
56
57
58
59
60
61


































































































































62

  }
} {1 {DISTINCT aggregates must have exactly one argument}}
do_test distinctagg-2.2 {
  catchsql {
    SELECT group_concat(distinct a,b) FROM t1;
  }
} {1 {DISTINCT aggregates must have exactly one argument}}



































































































































finish_test









>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

>
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
  }
} {1 {DISTINCT aggregates must have exactly one argument}}
do_test distinctagg-2.2 {
  catchsql {
    SELECT group_concat(distinct a,b) FROM t1;
  }
} {1 {DISTINCT aggregates must have exactly one argument}}

#--------------------------------------------------------------------------
reset_db
do_execsql_test 3.0 {
  CREATE TABLE t1(a, b, c);
  CREATE TABLE t2(d, e, f);

  INSERT INTO t1 VALUES (1, 1, 1);
  INSERT INTO t1 VALUES (2, 2, 2);
  INSERT INTO t1 VALUES (3, 3, 3);
  INSERT INTO t1 VALUES (4, 1, 4);
  INSERT INTO t1 VALUES (5, 2, 1);
  INSERT INTO t1 VALUES (5, 3, 2);
  INSERT INTO t1 VALUES (4, 1, 3);
  INSERT INTO t1 VALUES (3, 2, 4);
  INSERT INTO t1 VALUES (2, 3, 1);
  INSERT INTO t1 VALUES (1, 1, 2);

  INSERT INTO t2 VALUES('a', 'a', 'a');
  INSERT INTO t2 VALUES('b', 'b', 'b');
  INSERT INTO t2 VALUES('c', 'c', 'c');

  CREATE INDEX t1a ON t1(a);
  CREATE INDEX t1bc ON t1(b, c);
}

foreach {tn use_eph sql res} {
  1  0  "SELECT count(DISTINCT a) FROM t1"                5
  2  0  "SELECT count(DISTINCT b) FROM t1"                3
  3  1  "SELECT count(DISTINCT c) FROM t1"                4
  4  0  "SELECT count(DISTINCT c) FROM t1 WHERE b=3"      3
  5  0  "SELECT count(DISTINCT rowid) FROM t1"           10
  6  0  "SELECT count(DISTINCT a) FROM t1, t2"            5
  7  0  "SELECT count(DISTINCT a) FROM t2, t1"            5
  8  1  "SELECT count(DISTINCT a+b) FROM t1, t2, t2, t2"  6
  9  0  "SELECT count(DISTINCT c) FROM t1 WHERE c=2"      1
 10  1  "SELECT count(DISTINCT t1.rowid) FROM t1, t2"    10
} {
  do_test 3.$tn.1 {
    set prg [db eval "EXPLAIN $sql"]
    set idx [lsearch $prg OpenEphemeral]
    expr {$idx>=0}
  } $use_eph

  do_execsql_test 3.$tn.2 $sql $res
}

do_execsql_test 3.10 {
  SELECT a, count(DISTINCT b) FROM t1 GROUP BY a;
} {
  1 1  2 2  3 2  4 1  5 2
}

#--------------------------------------------------------------------------
reset_db
do_execsql_test 3.0 {
  CREATE TABLE t1(a, b, c);
  CREATE INDEX t1a ON t1(a);
  CREATE INDEX t1bc ON t1(b, c);

  INSERT INTO t1 VALUES(1, 'A', 1);
  INSERT INTO t1 VALUES(1, 'A', 1);
  INSERT INTO t1 VALUES(2, 'A', 2);
  INSERT INTO t1 VALUES(2, 'A', 2);
  INSERT INTO t1 VALUES(1, 'B', 1);
  INSERT INTO t1 VALUES(2, 'B', 2);
  INSERT INTO t1 VALUES(3, 'B', 3);
  INSERT INTO t1 VALUES(NULL, 'B', NULL);
  INSERT INTO t1 VALUES(NULL, 'C', NULL);
  INSERT INTO t1 VALUES('d', 'D', 'd');

  CREATE TABLE t2(d, e, f);
  CREATE INDEX t2def ON t2(d, e, f);

  INSERT INTO t2 VALUES(1, 1, 'a');
  INSERT INTO t2 VALUES(1, 1, 'a');
  INSERT INTO t2 VALUES(1, 2, 'a');
  INSERT INTO t2 VALUES(1, 2, 'a');
  INSERT INTO t2 VALUES(1, 2, 'b');
  INSERT INTO t2 VALUES(1, 3, 'b');
  INSERT INTO t2 VALUES(1, 3, 'a');
  INSERT INTO t2 VALUES(1, 3, 'b');
  INSERT INTO t2 VALUES(2, 3, 'x');
  INSERT INTO t2 VALUES(2, 3, 'y');
  INSERT INTO t2 VALUES(2, 3, 'z');

  CREATE TABLE t3(x, y, z);
  INSERT INTO t3 VALUES(1,1,1);
  INSERT INTO t3 VALUES(2,2,2);
}

foreach {tn use_eph sql res} {
  1 0  "SELECT count(DISTINCT c) FROM t1 GROUP BY b"   {2 3 0 1}
  2 1  "SELECT count(DISTINCT a) FROM t1 GROUP BY b"   {2 3 0 1}
  3 1  "SELECT count(DISTINCT a) FROM t1 GROUP BY b+c" {0 1 1 1 1}

  4 0  "SELECT count(DISTINCT f) FROM t2 GROUP BY d, e" {1 2 2 3}
  5 1  "SELECT count(DISTINCT f) FROM t2 GROUP BY d" {2 3}
  6 0  "SELECT count(DISTINCT f) FROM t2 WHERE d IS 1 GROUP BY e" {1 2 2}
} {
  do_test 4.$tn.1 {
    set prg [db eval "EXPLAIN $sql"]
    set idx [lsearch $prg OpenEphemeral]
    expr {$idx>=0}
  } $use_eph

  do_execsql_test 4.$tn.2 $sql $res
}


set t3root [db one {SELECT rootpage FROM sqlite_schema WHERE name='t3'}]
foreach {tn use_t3 sql res} {
  1 1 "SELECT count(*) FROM t3"   2
  2 0 "SELECT count(*) FROM t1"   10
  2 1 "SELECT count(DISTINCT a) FROM t1, t3" 4
  3 0 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3" 4
  4 1 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 WHERE t3.x=1" 4
  5 1 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 WHERE t3.x=0" 0
  6 0 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 ON (t3.x=0)"  4
} {
  do_test 5.$tn.1 {
    set bUse 0
    db eval "EXPLAIN $sql" a {
      if {$a(opcode)=="OpenRead" && $a(p2)==$t3root} {set bUse 1}
    }
    set bUse
  } $use_t3

  do_execsql_test 5.$tn.2 $sql $res
}

finish_test