Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Improvements to distinct aggregates such that they can sometimes avoid using an ephermeral table to test for duplicates if the column that is distinct is part of an index. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
ef4ac0ddd297bbd38351410c5a387e16 |
User & Date: | drh 2021-03-24 17:28:11 |
References
2021-04-03
| ||
19:23 | Fix a crash in handling queries of the form "SELECT aggregate(DISTINCT tbl.col) FROM ... LEFT JOIN tbl ...". Fixes a problem introduced by [ef4ac0ddd297bbd3]. (check-in: 0dcf808d user: dan tags: trunk) | |
Context
2021-03-24
| ||
19:44 | Comment improvements to on the distinct-agg optimization. Show a line in the EQP output when using an ephemeral table to implement DISTINCT on an aggregate. (check-in: 037ca79e user: drh tags: trunk) | |
17:28 | Improvements to distinct aggregates such that they can sometimes avoid using an ephermeral table to test for duplicates if the column that is distinct is part of an index. (check-in: ef4ac0dd user: drh tags: trunk) | |
17:04 | Fix a harmless compiler warning. (check-in: 26b005a9 user: drh tags: trunk) | |
2021-03-13
| ||
18:23 | Fix a memory leak in the new code on this branch. (Closed-Leaf check-in: 0817cf2e user: dan tags: distinct-agg-opt) | |
Changes
Changes to src/select.c.
︙ | ︙ | |||
737 738 739 740 741 742 743 | if( iOffset>0 ){ sqlite3VdbeAddOp3(v, OP_IfPos, iOffset, iContinue, 1); VdbeCoverage(v); VdbeComment((v, "OFFSET")); } } /* | | | | > | > > > > > | > > > > > > > > | > > > > > > > > > > > > > > > > | > > > > > > > | > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < | | | > > | | | | | > > > > | 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 | if( iOffset>0 ){ sqlite3VdbeAddOp3(v, OP_IfPos, iOffset, iContinue, 1); VdbeCoverage(v); VdbeComment((v, "OFFSET")); } } /* ** Add code that will check to make sure the array of registers starting at ** iMem form a distinct entry. This is used by both "SELECT DISTINCT ..." and ** distinct aggregates ("SELECT count(DISTINCT <expr>) ..."). Three strategies ** are available. Which is used depends on the value of parameter eTnctType, ** as follows: ** ** WHERE_DISTINCT_UNORDERED/WHERE_DISTINCT_NOOP: ** Parameter iTab is the cursor number of an ephemeral table that must ** be opened before the VM code generated by this routine is executed. ** The ephemeral cursor table is queried for a record identical to the ** record formed by the current array of registers. If one is found, ** jump to VM address addrRepeat. Otherwise, insert a new record into ** the ephemeral cursor and proceed. ** ** The returned value in this case is a copy of parameter iTab. ** ** WHERE_DISTINCT_ORDERED: ** In this case rows are being delivered sorted order sorted. The ephermal ** table is not required in this case. Instead, the current set of ** registers are compared to previous row. If they match, the new row ** is not distinct and control jumps to VM address addrRepeat. Otherwise, ** the VM program proceeds with processing the new row. ** ** The returned value in this case is the register number of the first ** in an array of registers used to store the previous result row so that ** it can be compared to the next. The caller must ensure that this cell ** is initialized to NULL and has the "clear" flag set. ** ** WHERE_DISTINCT_UNIQUE: ** In this case it has already been determined that the rows are distinct. ** No special action is required. The return value is always zero. ** ** Parameter pEList is the list of expressions used to generated the ** contents of each row. It is used by this routine to determine (a) ** how many elements there are in the array of registers and (b) the ** collation sequences that should be used for the comparisons if ** eTnctType is WHERE_DISTINCT_ORDERED. */ static int codeDistinct( Parse *pParse, /* Parsing and code generating context */ int eTnctType, /* WHERE_DISTINCT_* value */ int iTab, /* A sorting index used to test for distinctness */ int addrRepeat, /* Jump to here if not distinct */ ExprList *pEList, /* Expression for each element */ int regElem /* First element */ ){ int iRet = 0; int nResultCol = pEList->nExpr; Vdbe *v = pParse->pVdbe; switch( eTnctType ){ case WHERE_DISTINCT_ORDERED: { int i; int iJump; /* Jump destination */ int regPrev; /* Previous row content */ /* Allocate space for the previous row */ iRet = regPrev = pParse->nMem+1; pParse->nMem += nResultCol; iJump = sqlite3VdbeCurrentAddr(v) + nResultCol; for(i=0; i<nResultCol; i++){ CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[i].pExpr); if( i<nResultCol-1 ){ sqlite3VdbeAddOp3(v, OP_Ne, regElem+i, iJump, regPrev+i); VdbeCoverage(v); }else{ sqlite3VdbeAddOp3(v, OP_Eq, regElem+i, addrRepeat, regPrev+i); VdbeCoverage(v); } sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ); sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); } assert( sqlite3VdbeCurrentAddr(v)==iJump || pParse->db->mallocFailed ); sqlite3VdbeAddOp3(v, OP_Copy, regElem, regPrev, nResultCol-1); break; } case WHERE_DISTINCT_UNIQUE: { /* nothing to do */ break; } default: { int r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4Int(v, OP_Found, iTab, addrRepeat, regElem, nResultCol); VdbeCoverage(v); sqlite3VdbeAddOp3(v, OP_MakeRecord, regElem, nResultCol, r1); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iTab, r1, regElem, nResultCol); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); sqlite3ReleaseTempReg(pParse, r1); iRet = iTab; break; } } return iRet; } /* ** A call to this function must be made for each call to codeDistinct(). ** Parameter is passed the value returned by that call to codeDistinct(), ** and iOpenEphAddr the address of the instruction used to open the ** ephemeral table (that may be) used by codeDistinct(). ** ** Argument eTnctType is passed the strategy to be used for any DISTINCT ** operation, as returned by sqlite3WhereIsDistinct(). If the strategy ** is WHERE_DISTINCT_NOOP or WHERE_DISTINCT_UNORDERED, this function is ** a no-op. Otherwise: ** ** WHERE_DISTINCT_UNIQUE: ** In this case the ephemeral table is not required. So instruction ** iOpenEphAddr is replaced by an OP_Noop. ** ** WHERE_DISTINCT_ORDERED: ** In this case the ephemeral table is not required. The instruction ** iOpenEphAddr is replaced by an OP_Null instruction to set register ** iVal to a NULL value with the "clear" flag set (see comments above ** codeDistinct() for details). */ static void fixDistinctOpenEph( Parse *pParse, /* Parsing and code generating context */ int eTnctType, /* WHERE_DISTINCT_* value */ int iVal, /* Value returned by codeDistinct() */ int iOpenEphAddr /* Address of OP_OpenEphemeral instruction for iTab */ ){ if( eTnctType==WHERE_DISTINCT_UNIQUE || eTnctType==WHERE_DISTINCT_ORDERED ){ Vdbe *v = pParse->pVdbe; sqlite3VdbeChangeToNoop(v, iOpenEphAddr); if( eTnctType==WHERE_DISTINCT_ORDERED ){ /* Change the OP_OpenEphemeral to an OP_Null that sets the MEM_Cleared ** bit on the first register of the previous value. This will cause the ** OP_Ne added in codeDistinct() to always fail on the first iteration of ** the loop even if the first row is all NULLs. */ VdbeOp *pOp = sqlite3VdbeGetOp(v, iOpenEphAddr); pOp->opcode = OP_Null; pOp->p1 = 1; pOp->p2 = iVal; } } } #ifdef SQLITE_ENABLE_SORTER_REFERENCES /* ** This function is called as part of inner-loop generation for a SELECT ** statement with an ORDER BY that is not optimized by an index. It ** determines the expressions, if any, that the sorter-reference |
︙ | ︙ | |||
1009 1010 1011 1012 1013 1014 1015 | } /* If the DISTINCT keyword was present on the SELECT statement ** and this row has been seen before, then do not make this row ** part of the result. */ if( hasDistinct ){ | | < < < < | < < | | < < < < < < | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 | } /* If the DISTINCT keyword was present on the SELECT statement ** and this row has been seen before, then do not make this row ** part of the result. */ if( hasDistinct ){ int eType = pDistinct->eTnctType; int iTab = pDistinct->tabTnct; assert( nResultCol==p->pEList->nExpr ); iTab = codeDistinct(pParse, eType, iTab, iContinue, p->pEList, regResult); fixDistinctOpenEph(pParse, eType, iTab, pDistinct->addrTnct); if( pSort==0 ){ codeOffset(v, p->iOffset, iContinue); } } switch( eDest ){ /* In this mode, write each query result to the key of the temporary |
︙ | ︙ | |||
5641 5642 5643 5644 5645 5646 5647 | assert( !ExprHasProperty(pE, EP_xIsSelect) ); if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){ sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one " "argument"); pFunc->iDistinct = -1; }else{ KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0); | | | | 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 | assert( !ExprHasProperty(pE, EP_xIsSelect) ); if( pE->x.pList==0 || pE->x.pList->nExpr!=1 ){ sqlite3ErrorMsg(pParse, "DISTINCT aggregates must have exactly one " "argument"); pFunc->iDistinct = -1; }else{ KeyInfo *pKeyInfo = sqlite3KeyInfoFromExprList(pParse, pE->x.pList,0,0); pFunc->iDistAddr = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, pFunc->iDistinct, 0, 0, (char*)pKeyInfo, P4_KEYINFO); } } } } /* ** Invoke the OP_AggFinalize opcode for every aggregate function |
︙ | ︙ | |||
5674 5675 5676 5677 5678 5679 5680 | ** the current cursor position. ** ** If regAcc is non-zero and there are no min() or max() aggregates ** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator ** registers if register regAcc contains 0. The caller will take care ** of setting and clearing regAcc. */ | | > > > > > | 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 | ** the current cursor position. ** ** If regAcc is non-zero and there are no min() or max() aggregates ** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator ** registers if register regAcc contains 0. The caller will take care ** of setting and clearing regAcc. */ static void updateAccumulator( Parse *pParse, int regAcc, AggInfo *pAggInfo, int eDistinctType ){ Vdbe *v = pParse->pVdbe; int i; int regHit = 0; int addrHitTest = 0; struct AggInfo_func *pF; struct AggInfo_col *pC; |
︙ | ︙ | |||
5720 5721 5722 5723 5724 5725 5726 | nArg = pList->nExpr; regAgg = sqlite3GetTempRange(pParse, nArg); sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP); }else{ nArg = 0; regAgg = 0; } | | > | | 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 | nArg = pList->nExpr; regAgg = sqlite3GetTempRange(pParse, nArg); sqlite3ExprCodeExprList(pParse, pList, regAgg, 0, SQLITE_ECEL_DUP); }else{ nArg = 0; regAgg = 0; } if( pF->iDistinct>=0 && pList ){ if( addrNext==0 ){ addrNext = sqlite3VdbeMakeLabel(pParse); } testcase( nArg==0 ); /* Error condition */ testcase( nArg>1 ); /* Also an error */ pF->iDistinct = codeDistinct(pParse, eDistinctType, pF->iDistinct, addrNext, pList, regAgg); } if( pF->pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){ CollSeq *pColl = 0; struct ExprList_item *pItem; int j; assert( pList!=0 ); /* pList!=0 if pF->pFunc has NEEDCOLL */ for(j=0, pItem=pList->a; !pColl && j<nArg; j++, pItem++){ |
︙ | ︙ | |||
6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 | int addrOutputRow; /* Start of subroutine that outputs a result row */ int regOutputRow; /* Return address register for output subroutine */ int addrSetAbort; /* Set the abort flag and return */ int addrTopOfLoop; /* Top of the input loop */ int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */ int addrReset; /* Subroutine for resetting the accumulator */ int regReset; /* Return address register for reset subroutine */ /* If there is a GROUP BY clause we might need a sorting index to ** implement it. Allocate that sorting index now. If it turns out ** that we do not need it after all, the OP_SorterOpen instruction ** will be converted into a Noop. */ pAggInfo->sortingIdx = pParse->nTab++; | > > > > > > > > > > > > > > | 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 | int addrOutputRow; /* Start of subroutine that outputs a result row */ int regOutputRow; /* Return address register for output subroutine */ int addrSetAbort; /* Set the abort flag and return */ int addrTopOfLoop; /* Top of the input loop */ int addrSortingIdx; /* The OP_OpenEphemeral for the sorting index */ int addrReset; /* Subroutine for resetting the accumulator */ int regReset; /* Return address register for reset subroutine */ ExprList *pDistinct = 0; u16 distFlag = 0; int eDist = WHERE_DISTINCT_NOOP; if( pAggInfo->nFunc==1 && pAggInfo->aFunc[0].iDistinct>=0 && pAggInfo->aFunc[0].pFExpr->x.pList ){ Expr *pExpr = pAggInfo->aFunc[0].pFExpr->x.pList->a[0].pExpr; pExpr = sqlite3ExprDup(db, pExpr, 0); pDistinct = sqlite3ExprListDup(db, pGroupBy, 0); pDistinct = sqlite3ExprListAppend(pParse, pDistinct, pExpr); distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0; } /* If there is a GROUP BY clause we might need a sorting index to ** implement it. Allocate that sorting index now. If it turns out ** that we do not need it after all, the OP_SorterOpen instruction ** will be converted into a Noop. */ pAggInfo->sortingIdx = pParse->nTab++; |
︙ | ︙ | |||
6794 6795 6796 6797 6798 6799 6800 | /* Begin a loop that will extract all source rows in GROUP BY order. ** This might involve two separate loops with an OP_Sort in between, or ** it might be a single loop that uses an index to extract information ** in the right order to begin with. */ sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); SELECTTRACE(1,pParse,p,("WhereBegin\n")); | | | > > | 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 | /* Begin a loop that will extract all source rows in GROUP BY order. ** This might involve two separate loops with an OP_Sort in between, or ** it might be a single loop that uses an index to extract information ** in the right order to begin with. */ sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); SELECTTRACE(1,pParse,p,("WhereBegin\n")); pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pGroupBy, pDistinct, WHERE_GROUPBY | (orderByGrp ? WHERE_SORTBYGROUP : 0) | distFlag, 0 ); sqlite3ExprListDelete(db, pDistinct); if( pWInfo==0 ) goto select_end; eDist = sqlite3WhereIsDistinct(pWInfo); SELECTTRACE(1,pParse,p,("WhereBegin returns\n")); if( sqlite3WhereIsOrdered(pWInfo)==pGroupBy->nExpr ){ /* The optimizer is able to deliver rows in group by order so ** we do not have to sort. The OP_OpenEphemeral table will be ** cancelled later because we still need to use the pKeyInfo */ groupBySort = 0; |
︙ | ︙ | |||
6915 6916 6917 6918 6919 6920 6921 | sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); VdbeComment((v, "reset accumulator")); /* Update the aggregate accumulators based on the content of ** the current row */ sqlite3VdbeJumpHere(v, addr1); | | | 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 | sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); VdbeComment((v, "reset accumulator")); /* Update the aggregate accumulators based on the content of ** the current row */ sqlite3VdbeJumpHere(v, addr1); updateAccumulator(pParse, iUseFlag, pAggInfo, eDist); sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag); VdbeComment((v, "indicate data in accumulator")); /* End of the loop */ if( groupBySort ){ sqlite3VdbeAddOp2(v, OP_SorterNext, pAggInfo->sortingIdx,addrTopOfLoop); |
︙ | ︙ | |||
6971 6972 6973 6974 6975 6976 6977 | /* Generate a subroutine that will reset the group-by accumulator */ sqlite3VdbeResolveLabel(v, addrReset); resetAccumulator(pParse, pAggInfo); sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag); VdbeComment((v, "indicate accumulator empty")); sqlite3VdbeAddOp1(v, OP_Return, regReset); | | > > > > | 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 | /* Generate a subroutine that will reset the group-by accumulator */ sqlite3VdbeResolveLabel(v, addrReset); resetAccumulator(pParse, pAggInfo); sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag); VdbeComment((v, "indicate accumulator empty")); sqlite3VdbeAddOp1(v, OP_Return, regReset); if( eDist!=WHERE_DISTINCT_NOOP ){ struct AggInfo_func *pF = &pAggInfo->aFunc[0]; fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr); } } /* endif pGroupBy. Begin aggregate queries without GROUP BY: */ else { Table *pTab; if( (pTab = isSimpleCount(p, pAggInfo))!=0 ){ /* If isSimpleCount() returns a pointer to a Table structure, then ** the SQL statement is of the form: ** |
︙ | ︙ | |||
7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 | sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO); } sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem); sqlite3VdbeAddOp1(v, OP_Close, iCsr); explainSimpleCount(pParse, pTab, pBest); }else{ int regAcc = 0; /* "populate accumulators" flag */ /* If there are accumulator registers but no min() or max() functions ** without FILTER clauses, allocate register regAcc. Register regAcc ** will contain 0 the first time the inner loop runs, and 1 thereafter. ** The code generated by updateAccumulator() uses this to ensure ** that the accumulator registers are (a) updated only once if ** there are no min() or max functions or (b) always updated for the | > > > | 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 | sqlite3VdbeChangeP4(v, -1, (char *)pKeyInfo, P4_KEYINFO); } sqlite3VdbeAddOp2(v, OP_Count, iCsr, pAggInfo->aFunc[0].iMem); sqlite3VdbeAddOp1(v, OP_Close, iCsr); explainSimpleCount(pParse, pTab, pBest); }else{ int regAcc = 0; /* "populate accumulators" flag */ ExprList *pDistinct = 0; u16 distFlag = 0; int eDist; /* If there are accumulator registers but no min() or max() functions ** without FILTER clauses, allocate register regAcc. Register regAcc ** will contain 0 the first time the inner loop runs, and 1 thereafter. ** The code generated by updateAccumulator() uses this to ensure ** that the accumulator registers are (a) updated only once if ** there are no min() or max functions or (b) always updated for the |
︙ | ︙ | |||
7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 | break; } } if( i==pAggInfo->nFunc ){ regAcc = ++pParse->nMem; sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc); } } /* This case runs if the aggregate has no GROUP BY clause. The ** processing is much simpler since there is only a single row ** of output. */ assert( p->pGroupBy==0 ); resetAccumulator(pParse, pAggInfo); /* If this query is a candidate for the min/max optimization, then ** minMaxFlag will have been previously set to either ** WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX and pMinMaxOrderBy will ** be an appropriate ORDER BY expression for the optimization. */ assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 ); assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 ); SELECTTRACE(1,pParse,p,("WhereBegin\n")); pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy, | > > > | > | > > > > > | 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 | break; } } if( i==pAggInfo->nFunc ){ regAcc = ++pParse->nMem; sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc); } }else if( pAggInfo->nFunc==1 && pAggInfo->aFunc[0].iDistinct>=0 ){ pDistinct = pAggInfo->aFunc[0].pFExpr->x.pList; distFlag = pDistinct ? WHERE_WANT_DISTINCT : 0; } /* This case runs if the aggregate has no GROUP BY clause. The ** processing is much simpler since there is only a single row ** of output. */ assert( p->pGroupBy==0 ); resetAccumulator(pParse, pAggInfo); /* If this query is a candidate for the min/max optimization, then ** minMaxFlag will have been previously set to either ** WHERE_ORDERBY_MIN or WHERE_ORDERBY_MAX and pMinMaxOrderBy will ** be an appropriate ORDER BY expression for the optimization. */ assert( minMaxFlag==WHERE_ORDERBY_NORMAL || pMinMaxOrderBy!=0 ); assert( pMinMaxOrderBy==0 || pMinMaxOrderBy->nExpr==1 ); SELECTTRACE(1,pParse,p,("WhereBegin\n")); pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy, pDistinct, minMaxFlag|distFlag, 0); if( pWInfo==0 ){ goto select_end; } SELECTTRACE(1,pParse,p,("WhereBegin returns\n")); eDist = sqlite3WhereIsDistinct(pWInfo); updateAccumulator(pParse, regAcc, pAggInfo, eDist); if( eDist!=WHERE_DISTINCT_NOOP ){ struct AggInfo_func *pF = &pAggInfo->aFunc[0]; fixDistinctOpenEph(pParse, eDist, pF->iDistinct, pF->iDistAddr); } if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc); if( minMaxFlag ){ sqlite3WhereMinMaxOptEarlyOut(v, pWInfo); } SELECTTRACE(1,pParse,p,("WhereEnd\n")); sqlite3WhereEnd(pWInfo); finalizeAggFunctions(pParse, pAggInfo); |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 | ** Additional columns are used only as parameters to ** aggregate functions */ struct AggInfo_func { /* For each aggregate function */ Expr *pFExpr; /* Expression encoding the function */ FuncDef *pFunc; /* The aggregate function implementation */ int iMem; /* Memory location that acts as accumulator */ int iDistinct; /* Ephemeral table used to enforce DISTINCT */ } *aFunc; int nFunc; /* Number of entries in aFunc[] */ u32 selId; /* Select to which this AggInfo belongs */ }; /* ** The datatype ynVar is a signed integer, either 16-bit or 32-bit. | > | 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 | ** Additional columns are used only as parameters to ** aggregate functions */ struct AggInfo_func { /* For each aggregate function */ Expr *pFExpr; /* Expression encoding the function */ FuncDef *pFunc; /* The aggregate function implementation */ int iMem; /* Memory location that acts as accumulator */ int iDistinct; /* Ephemeral table used to enforce DISTINCT */ int iDistAddr; /* Address of OP_OpenEphemeral */ } *aFunc; int nFunc; /* Number of entries in aFunc[] */ u32 selId; /* Select to which this AggInfo belongs */ }; /* ** The datatype ynVar is a signed integer, either 16-bit or 32-bit. |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
488 489 490 491 492 493 494 | ){ int i; const char *zColl = pIdx->azColl[iCol]; for(i=0; i<pList->nExpr; i++){ Expr *p = sqlite3ExprSkipCollateAndLikely(pList->a[i].pExpr); if( ALWAYS(p!=0) | | | 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 | ){ int i; const char *zColl = pIdx->azColl[iCol]; for(i=0; i<pList->nExpr; i++){ Expr *p = sqlite3ExprSkipCollateAndLikely(pList->a[i].pExpr); if( ALWAYS(p!=0) && (p->op==TK_COLUMN || p->op==TK_AGG_COLUMN) && p->iColumn==pIdx->aiColumn[iCol] && p->iTable==iBase ){ CollSeq *pColl = sqlite3ExprNNCollSeq(pParse, pList->a[i].pExpr); if( 0==sqlite3StrICmp(pColl->zName, zColl) ){ return i; } |
︙ | ︙ | |||
553 554 555 556 557 558 559 | /* If any of the expressions is an IPK column on table iBase, then return ** true. Note: The (p->iTable==iBase) part of this test may be false if the ** current SELECT is a correlated sub-query. */ for(i=0; i<pDistinct->nExpr; i++){ Expr *p = sqlite3ExprSkipCollateAndLikely(pDistinct->a[i].pExpr); if( NEVER(p==0) ) continue; | > | | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 | /* If any of the expressions is an IPK column on table iBase, then return ** true. Note: The (p->iTable==iBase) part of this test may be false if the ** current SELECT is a correlated sub-query. */ for(i=0; i<pDistinct->nExpr; i++){ Expr *p = sqlite3ExprSkipCollateAndLikely(pDistinct->a[i].pExpr); if( NEVER(p==0) ) continue; if( p->op!=TK_COLUMN && p->op!=TK_AGG_COLUMN ) continue; if( p->iTable==iBase && p->iColumn<0 ) return 1; } /* Loop through all indices on the table, checking each to see if it makes ** the DISTINCT qualifier redundant. It does so if: ** ** 1. The index is itself UNIQUE, and ** |
︙ | ︙ | |||
3818 3819 3820 3821 3822 3823 3824 | ** clause of the form X IS NULL or X=? that reference only outer ** loops. */ for(i=0; i<nOrderBy; i++){ if( MASKBIT(i) & obSat ) continue; pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr); if( NEVER(pOBExpr==0) ) continue; | | | 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 | ** clause of the form X IS NULL or X=? that reference only outer ** loops. */ for(i=0; i<nOrderBy; i++){ if( MASKBIT(i) & obSat ) continue; pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr); if( NEVER(pOBExpr==0) ) continue; if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue; if( pOBExpr->iTable!=iCur ) continue; pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, ~ready, eqOpMask, 0); if( pTerm==0 ) continue; if( pTerm->eOperator==WO_IN ){ /* IN terms are only valid for sorting in the ORDER BY LIMIT ** optimization, and then only if they are actually used |
︙ | ︙ | |||
3947 3948 3949 3950 3951 3952 3953 | if( MASKBIT(i) & obSat ) continue; pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr); testcase( wctrlFlags & WHERE_GROUPBY ); testcase( wctrlFlags & WHERE_DISTINCTBY ); if( NEVER(pOBExpr==0) ) continue; if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0; if( iColumn>=XN_ROWID ){ | | | 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 | if( MASKBIT(i) & obSat ) continue; pOBExpr = sqlite3ExprSkipCollateAndLikely(pOrderBy->a[i].pExpr); testcase( wctrlFlags & WHERE_GROUPBY ); testcase( wctrlFlags & WHERE_DISTINCTBY ); if( NEVER(pOBExpr==0) ) continue; if( (wctrlFlags & (WHERE_GROUPBY|WHERE_DISTINCTBY))==0 ) bOnce = 0; if( iColumn>=XN_ROWID ){ if( pOBExpr->op!=TK_COLUMN && pOBExpr->op!=TK_AGG_COLUMN ) continue; if( pOBExpr->iTable!=iCur ) continue; if( pOBExpr->iColumn!=iColumn ) continue; }else{ Expr *pIdxExpr = pIndex->aColExpr->a[j].pExpr; if( sqlite3ExprCompareSkip(pOBExpr, pIdxExpr, iCur) ){ continue; } |
︙ | ︙ | |||
5020 5021 5022 5023 5024 5025 5026 | } } #endif /* Attempt to omit tables from the join that do not affect the result. ** For a table to not affect the result, the following must be true: ** | | > > | 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 | } } #endif /* Attempt to omit tables from the join that do not affect the result. ** For a table to not affect the result, the following must be true: ** ** 1) The query must not be an aggregate. Or it must be an aggregate ** that contains only one aggregate function with the DISTINCT ** qualifier. e.g. "SELECT count(DISTINCT ...) FROM". ** 2) The table must be the RHS of a LEFT JOIN. ** 3) Either the query must be DISTINCT, or else the ON or USING clause ** must contain a constraint that limits the scan of the table to ** at most a single row. ** 4) The table must not be referenced by any part of the query apart ** from its own USING or ON clause. ** |
︙ | ︙ |
Changes to test/distinctagg.test.
︙ | ︙ | |||
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | # focus of this script is the DISTINCT modifier on aggregate functions. # # $Id: distinctagg.test,v 1.3 2009/02/09 13:19:28 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl do_test distinctagg-1.1 { execsql { CREATE TABLE t1(a,b,c); INSERT INTO t1 VALUES(1,2,3); INSERT INTO t1 VALUES(1,3,4); INSERT INTO t1 VALUES(1,3,5); SELECT count(distinct a), count(distinct b), count(distinct c), count(all a) FROM t1; } } {1 2 3 3} do_test distinctagg-1.2 { execsql { | > | | 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | # focus of this script is the DISTINCT modifier on aggregate functions. # # $Id: distinctagg.test,v 1.3 2009/02/09 13:19:28 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix distinctagg do_test distinctagg-1.1 { execsql { CREATE TABLE t1(a,b,c); INSERT INTO t1 VALUES(1,2,3); INSERT INTO t1 VALUES(1,3,4); INSERT INTO t1 VALUES(1,3,5); SELECT count(distinct a), count(distinct b), count(distinct c), count(all a) FROM t1; } } {1 2 3 3} do_test distinctagg-1.2 { execsql { SELECT b, count(distinct c) FROM t1 GROUP BY b } } {2 1 3 2} do_test distinctagg-1.3 { execsql { INSERT INTO t1 SELECT a+1, b+3, c+5 FROM t1; INSERT INTO t1 SELECT a+2, b+6, c+10 FROM t1; INSERT INTO t1 SELECT a+4, b+12, c+20 FROM t1; |
︙ | ︙ | |||
54 55 56 57 58 59 60 61 62 | } } {1 {DISTINCT aggregates must have exactly one argument}} do_test distinctagg-2.2 { catchsql { SELECT group_concat(distinct a,b) FROM t1; } } {1 {DISTINCT aggregates must have exactly one argument}} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | } } {1 {DISTINCT aggregates must have exactly one argument}} do_test distinctagg-2.2 { catchsql { SELECT group_concat(distinct a,b) FROM t1; } } {1 {DISTINCT aggregates must have exactly one argument}} #-------------------------------------------------------------------------- reset_db do_execsql_test 3.0 { CREATE TABLE t1(a, b, c); CREATE TABLE t2(d, e, f); INSERT INTO t1 VALUES (1, 1, 1); INSERT INTO t1 VALUES (2, 2, 2); INSERT INTO t1 VALUES (3, 3, 3); INSERT INTO t1 VALUES (4, 1, 4); INSERT INTO t1 VALUES (5, 2, 1); INSERT INTO t1 VALUES (5, 3, 2); INSERT INTO t1 VALUES (4, 1, 3); INSERT INTO t1 VALUES (3, 2, 4); INSERT INTO t1 VALUES (2, 3, 1); INSERT INTO t1 VALUES (1, 1, 2); INSERT INTO t2 VALUES('a', 'a', 'a'); INSERT INTO t2 VALUES('b', 'b', 'b'); INSERT INTO t2 VALUES('c', 'c', 'c'); CREATE INDEX t1a ON t1(a); CREATE INDEX t1bc ON t1(b, c); } foreach {tn use_eph sql res} { 1 0 "SELECT count(DISTINCT a) FROM t1" 5 2 0 "SELECT count(DISTINCT b) FROM t1" 3 3 1 "SELECT count(DISTINCT c) FROM t1" 4 4 0 "SELECT count(DISTINCT c) FROM t1 WHERE b=3" 3 5 0 "SELECT count(DISTINCT rowid) FROM t1" 10 6 0 "SELECT count(DISTINCT a) FROM t1, t2" 5 7 0 "SELECT count(DISTINCT a) FROM t2, t1" 5 8 1 "SELECT count(DISTINCT a+b) FROM t1, t2, t2, t2" 6 9 0 "SELECT count(DISTINCT c) FROM t1 WHERE c=2" 1 10 1 "SELECT count(DISTINCT t1.rowid) FROM t1, t2" 10 } { do_test 3.$tn.1 { set prg [db eval "EXPLAIN $sql"] set idx [lsearch $prg OpenEphemeral] expr {$idx>=0} } $use_eph do_execsql_test 3.$tn.2 $sql $res } do_execsql_test 3.10 { SELECT a, count(DISTINCT b) FROM t1 GROUP BY a; } { 1 1 2 2 3 2 4 1 5 2 } #-------------------------------------------------------------------------- reset_db do_execsql_test 3.0 { CREATE TABLE t1(a, b, c); CREATE INDEX t1a ON t1(a); CREATE INDEX t1bc ON t1(b, c); INSERT INTO t1 VALUES(1, 'A', 1); INSERT INTO t1 VALUES(1, 'A', 1); INSERT INTO t1 VALUES(2, 'A', 2); INSERT INTO t1 VALUES(2, 'A', 2); INSERT INTO t1 VALUES(1, 'B', 1); INSERT INTO t1 VALUES(2, 'B', 2); INSERT INTO t1 VALUES(3, 'B', 3); INSERT INTO t1 VALUES(NULL, 'B', NULL); INSERT INTO t1 VALUES(NULL, 'C', NULL); INSERT INTO t1 VALUES('d', 'D', 'd'); CREATE TABLE t2(d, e, f); CREATE INDEX t2def ON t2(d, e, f); INSERT INTO t2 VALUES(1, 1, 'a'); INSERT INTO t2 VALUES(1, 1, 'a'); INSERT INTO t2 VALUES(1, 2, 'a'); INSERT INTO t2 VALUES(1, 2, 'a'); INSERT INTO t2 VALUES(1, 2, 'b'); INSERT INTO t2 VALUES(1, 3, 'b'); INSERT INTO t2 VALUES(1, 3, 'a'); INSERT INTO t2 VALUES(1, 3, 'b'); INSERT INTO t2 VALUES(2, 3, 'x'); INSERT INTO t2 VALUES(2, 3, 'y'); INSERT INTO t2 VALUES(2, 3, 'z'); CREATE TABLE t3(x, y, z); INSERT INTO t3 VALUES(1,1,1); INSERT INTO t3 VALUES(2,2,2); } foreach {tn use_eph sql res} { 1 0 "SELECT count(DISTINCT c) FROM t1 GROUP BY b" {2 3 0 1} 2 1 "SELECT count(DISTINCT a) FROM t1 GROUP BY b" {2 3 0 1} 3 1 "SELECT count(DISTINCT a) FROM t1 GROUP BY b+c" {0 1 1 1 1} 4 0 "SELECT count(DISTINCT f) FROM t2 GROUP BY d, e" {1 2 2 3} 5 1 "SELECT count(DISTINCT f) FROM t2 GROUP BY d" {2 3} 6 0 "SELECT count(DISTINCT f) FROM t2 WHERE d IS 1 GROUP BY e" {1 2 2} } { do_test 4.$tn.1 { set prg [db eval "EXPLAIN $sql"] set idx [lsearch $prg OpenEphemeral] expr {$idx>=0} } $use_eph do_execsql_test 4.$tn.2 $sql $res } set t3root [db one {SELECT rootpage FROM sqlite_schema WHERE name='t3'}] foreach {tn use_t3 sql res} { 1 1 "SELECT count(*) FROM t3" 2 2 0 "SELECT count(*) FROM t1" 10 2 1 "SELECT count(DISTINCT a) FROM t1, t3" 4 3 0 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3" 4 4 1 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 WHERE t3.x=1" 4 5 1 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 WHERE t3.x=0" 0 6 0 "SELECT count(DISTINCT a) FROM t1 LEFT JOIN t3 ON (t3.x=0)" 4 } { do_test 5.$tn.1 { set bUse 0 db eval "EXPLAIN $sql" a { if {$a(opcode)=="OpenRead" && $a(p2)==$t3root} {set bUse 1} } set bUse } $use_t3 do_execsql_test 5.$tn.2 $sql $res } finish_test |