Index: src/delete.c ================================================================== --- src/delete.c +++ src/delete.c @@ -369,11 +369,13 @@ int regRowid; /* Actual register containing rowids */ /* Collect rowids of every row to be deleted. */ sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK + ); if( pWInfo==0 ) goto delete_from_cleanup; regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid); sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid); if( db->flags & SQLITE_CountRows ){ sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1); Index: src/fkey.c ================================================================== --- src/fkey.c +++ src/fkey.c @@ -558,11 +558,11 @@ /* Create VDBE to loop through the entries in pSrc that match the WHERE ** clause. If the constraint is not deferred, throw an exception for ** each row found. Otherwise, for deferred constraints, increment the ** deferred constraint counter by nIncr for each row selected. */ - pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0); + pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0); if( nIncr>0 && pFKey->isDeferred==0 ){ sqlite3ParseToplevel(pParse)->mayAbort = 1; } sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr); if( pWInfo ){ Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -3719,10 +3719,11 @@ Expr *pHaving; /* The HAVING clause. May be NULL */ int isDistinct; /* True if the DISTINCT keyword is present */ int distinct; /* Table to use for the distinct set */ int rc = 1; /* Value to return from this function */ int addrSortIndex; /* Address of an OP_OpenEphemeral instruction */ + int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */ AggInfo sAggInfo; /* Information used by aggregate queries */ int iEnd; /* Address of the end of the query */ sqlite3 *db; /* The database connection */ #ifndef SQLITE_OMIT_EXPLAIN @@ -3845,20 +3846,10 @@ explainSetInteger(pParse->iSelectId, iRestoreSelectId); return rc; } #endif - /* If possible, rewrite the query to use GROUP BY instead of DISTINCT. - ** GROUP BY might use an index, DISTINCT never does. - */ - assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 ); - if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){ - p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); - pGroupBy = p->pGroupBy; - p->selFlags &= ~SF_Distinct; - } - /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then disable the ORDER BY clause since the GROUP BY ** will cause elements to come out in the correct order. This is ** an optimization - the correct answer should result regardless. ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER @@ -3866,10 +3857,34 @@ */ if( sqlite3ExprListCompare(p->pGroupBy, pOrderBy)==0 && (db->flags & SQLITE_GroupByOrder)==0 ){ pOrderBy = 0; } + + /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and + ** if the select-list is the same as the ORDER BY list, then this query + ** can be rewritten as a GROUP BY. In other words, this: + ** + ** SELECT DISTINCT xyz FROM ... ORDER BY xyz + ** + ** is transformed to: + ** + ** SELECT xyz FROM ... GROUP BY xyz + ** + ** The second form is preferred as a single index (or temp-table) may be + ** used for both the ORDER BY and DISTINCT processing. As originally + ** written the query must use a temp-table for at least one of the ORDER + ** BY and DISTINCT, and an index or separate temp-table for the other. + */ + if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct + && sqlite3ExprListCompare(pOrderBy, p->pEList)==0 + ){ + p->selFlags &= ~SF_Distinct; + p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); + pGroupBy = p->pGroupBy; + pOrderBy = 0; + } /* If there is an ORDER BY clause, then this sorting ** index might end up being unused if the data can be ** extracted in pre-sorted order. If that is the case, then the ** OP_OpenEphemeral instruction will be changed to an OP_Noop once @@ -3902,26 +3917,25 @@ /* Open a virtual index to use for the distinct set. */ if( p->selFlags & SF_Distinct ){ KeyInfo *pKeyInfo; - assert( isAgg || pGroupBy ); distinct = pParse->nTab++; pKeyInfo = keyInfoFromExprList(pParse, p->pEList); - sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, - (char*)pKeyInfo, P4_KEYINFO_HANDOFF); + addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0, + (char*)pKeyInfo, P4_KEYINFO_HANDOFF); sqlite3VdbeChangeP5(v, BTREE_UNORDERED); }else{ distinct = -1; } /* Aggregate and non-aggregate queries are handled differently */ if( !isAgg && pGroupBy==0 ){ - /* This case is for non-aggregate queries - ** Begin the database scan - */ - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0); + ExprList *pDist = (isDistinct ? p->pEList : 0); + + /* Begin the database scan. */ + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0); if( pWInfo==0 ) goto select_end; if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut; /* If sorting index that was created by a prior OP_OpenEphemeral ** instruction ended up not being needed, then change the OP_OpenEphemeral @@ -3930,14 +3944,55 @@ if( addrSortIndex>=0 && pOrderBy==0 ){ sqlite3VdbeChangeToNoop(v, addrSortIndex, 1); p->addrOpenEphm[2] = -1; } - /* Use the standard inner loop - */ - assert(!isDistinct); - selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest, + if( pWInfo->eDistinct ){ + VdbeOp *pOp; /* No longer required OpenEphemeral instr. */ + + pOp = sqlite3VdbeGetOp(v, addrDistinctIndex); + + assert( isDistinct ); + assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED + || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE + ); + distinct = -1; + if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){ + int iJump; + int iExpr; + int iFlag = ++pParse->nMem; + int iBase = pParse->nMem+1; + int iBase2 = iBase + pEList->nExpr; + pParse->nMem += (pEList->nExpr*2); + + /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The + ** OP_Integer initializes the "first row" flag. */ + pOp->opcode = OP_Integer; + pOp->p1 = 1; + pOp->p2 = iFlag; + + sqlite3ExprCodeExprList(pParse, pEList, iBase, 1); + iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1; + sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1); + for(iExpr=0; iExprnExpr; iExpr++){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr); + sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr); + sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ); + sqlite3VdbeChangeP5(v, SQLITE_NULLEQ); + } + sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue); + + sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag); + assert( sqlite3VdbeCurrentAddr(v)==iJump ); + sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr); + }else{ + pOp->opcode = OP_Noop; + } + } + + /* Use the standard inner loop. */ + selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest, pWInfo->iContinue, pWInfo->iBreak); /* End the database scan loop. */ sqlite3WhereEnd(pWInfo); @@ -4043,11 +4098,11 @@ ** This might involve two separate loops with an OP_Sort in between, or ** it might be a single loop that uses an index to extract information ** in the right order to begin with. */ sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0); if( pWInfo==0 ) goto select_end; if( pGroupBy==0 ){ /* The optimizer is able to deliver rows in group by order so ** we do not have to sort. The OP_OpenEphemeral table will be ** cancelled later because we still need to use the pKeyInfo @@ -4305,11 +4360,11 @@ /* This case runs if the aggregate has no GROUP BY clause. The ** processing is much simpler since there is only a single row ** of output. */ resetAccumulator(pParse, &sAggInfo); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag); + pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag); if( pWInfo==0 ){ sqlite3ExprListDelete(db, pDel); goto select_end; } updateAccumulator(pParse, &sAggInfo); Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -1964,10 +1964,11 @@ struct WhereInfo { Parse *pParse; /* Parsing and code generating context */ u16 wctrlFlags; /* Flags originally passed to sqlite3WhereBegin() */ u8 okOnePass; /* Ok to use one-pass algorithm for UPDATE or DELETE */ u8 untestedTerms; /* Not all WHERE terms resolved by outer loop */ + u8 eDistinct; SrcList *pTabList; /* List of tables in the join */ int iTop; /* The very beginning of the WHERE loop */ int iContinue; /* Jump here to continue with next record */ int iBreak; /* Jump here to break out of the loop */ int nLevel; /* Number of nested loop */ @@ -1975,10 +1976,13 @@ double savedNQueryLoop; /* pParse->nQueryLoop outside the WHERE loop */ double nRowOut; /* Estimated number of output rows */ WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; +#define WHERE_DISTINCT_UNIQUE 1 +#define WHERE_DISTINCT_ORDERED 2 + /* ** A NameContext defines a context in which to resolve table and column ** names. The context consists of a list of tables (the pSrcList) field and ** a list of named expression (pEList). The named expression list may ** be NULL. The pSrc corresponds to the FROM clause of a SELECT or @@ -2736,11 +2740,11 @@ #if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY) Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *, char *); #endif void sqlite3DeleteFrom(Parse*, SrcList*, Expr*); void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int); -WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16); +WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16); void sqlite3WhereEnd(WhereInfo*); int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int); void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int); void sqlite3ExprCodeMove(Parse*, int, int, int); void sqlite3ExprCodeCopy(Parse*, int, int, int); Index: src/update.c ================================================================== --- src/update.c +++ src/update.c @@ -309,11 +309,13 @@ } /* Begin the database scan */ sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid); - pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED); + pWInfo = sqlite3WhereBegin( + pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED + ); if( pWInfo==0 ) goto update_cleanup; okOnePass = pWInfo->okOnePass; /* Remember the rowid of every item to be updated. */ Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -251,10 +251,11 @@ #define WHERE_REVERSE 0x02000000 /* Scan in reverse order */ #define WHERE_UNIQUE 0x04000000 /* Selects no more than one row */ #define WHERE_VIRTUALTABLE 0x08000000 /* Use virtual-table processing */ #define WHERE_MULTI_OR 0x10000000 /* OR using multiple indices */ #define WHERE_TEMP_INDEX 0x20000000 /* Uses an ephemeral index */ +#define WHERE_DISTINCT 0x40000000 /* Correct order for DISTINCT */ /* ** Initialize a preallocated WhereClause structure. */ static void whereClauseInit( @@ -1395,10 +1396,162 @@ } } return 0; } +/* +** This function searches the expression list passed as the second argument +** for an expression of type TK_COLUMN that refers to the same column and +** uses the same collation sequence as the iCol'th column of index pIdx. +** Argument iBase is the cursor number used for the table that pIdx refers +** to. +** +** If such an expression is found, its index in pList->a[] is returned. If +** no expression is found, -1 is returned. +*/ +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; inExpr; i++){ + Expr *p = pList->a[i].pExpr; + if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, p); + if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; + } + } + } + + return -1; +} + +/* +** This routine determines if pIdx can be used to assist in processing a +** DISTINCT qualifier. In other words, it tests whether or not using this +** index for the outer loop guarantees that rows with equal values for +** all expressions in the pDistinct list are delivered grouped together. +** +** For example, the query +** +** SELECT DISTINCT a, b, c FROM tbl WHERE a = ? +** +** can benefit from any index on columns "b" and "c". +*/ +static int isDistinctIndex( + Parse *pParse, /* Parsing context */ + WhereClause *pWC, /* The WHERE clause */ + Index *pIdx, /* The index being considered */ + int base, /* Cursor number for the table pIdx is on */ + ExprList *pDistinct, /* The DISTINCT expressions */ + int nEqCol /* Number of index columns with == */ +){ + Bitmask mask = 0; /* Mask of unaccounted for pDistinct exprs */ + int i; /* Iterator variable */ + + if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0; + + /* Loop through all the expressions in the distinct list. If any of them + ** are not simple column references, return early. Otherwise, test if the + ** WHERE clause contains a "col=X" clause. If it does, the expression + ** can be ignored. If it does not, and the column does not belong to the + ** same table as index pIdx, return early. Finally, if there is no + ** matching "col=X" expression and the column is on the same table as pIdx, + ** set the corresponding bit in variable mask. + */ + for(i=0; inExpr; i++){ + WhereTerm *pTerm; + Expr *p = pDistinct->a[i].pExpr; + if( p->op!=TK_COLUMN ) return 0; + pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0); + if( pTerm ){ + Expr *pX = pTerm->pExpr; + CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); + CollSeq *p2 = sqlite3ExprCollSeq(pParse, p); + if( p1==p2 ) continue; + } + if( p->iTable!=base ) return 0; + mask |= (((Bitmask)1) << i); + } + + for(i=nEqCol; mask && inColumn; i++){ + int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i); + if( iExpr<0 ) break; + mask &= ~(((Bitmask)1) << iExpr); + } + + return (mask==0); +} + + +/* +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. A DISTINCT list is redundant if the database contains a +** UNIQUE index that guarantees that the result of the query will be distinct +** anyway. +*/ +static int isDistinctRedundant( + Parse *pParse, + SrcList *pTabList, + WhereClause *pWC, + ExprList *pDistinct +){ + Table *pTab; + Index *pIdx; + int i; + int iBase; + + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; + + /* If any of the expressions is an IPK column on table iBase, then return + ** true. Note: The (p->iTable==iBase) part of this test may be false if the + ** current SELECT is a correlated sub-query. + */ + for(i=0; inExpr; i++){ + Expr *p = pDistinct->a[i].pExpr; + if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; + } + + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->onError==OE_None ) continue; + for(i=0; inColumn; i++){ + int iCol = pIdx->aiColumn[i]; + if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) + && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i) + ){ + break; + } + } + if( i==pIdx->nColumn ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; + } + } + + return 0; +} /* ** This routine decides if pIdx can be used to satisfy the ORDER BY ** clause. If it can, it returns 1. If pIdx cannot satisfy the ** ORDER BY clause, this routine returns 0. @@ -1431,11 +1584,14 @@ int sortOrder = 0; /* XOR of index and ORDER BY sort direction */ int nTerm; /* Number of ORDER BY terms */ struct ExprList_item *pTerm; /* A term of the ORDER BY clause */ sqlite3 *db = pParse->db; - assert( pOrderBy!=0 ); + if( !pOrderBy ) return 0; + if( wsFlags & WHERE_COLUMN_IN ) return 0; + if( pIdx->bUnordered ) return 0; + nTerm = pOrderBy->nExpr; assert( nTerm>0 ); /* Argument pIdx must either point to a 'real' named index structure, ** or an index structure allocated on the stack by bestBtreeIndex() to @@ -1521,11 +1677,11 @@ */ j = nTerm; } } - *pbRev = sortOrder!=0; + if( pbRev ) *pbRev = sortOrder!=0; if( j>=nTerm ){ /* All terms of the ORDER BY clause are covered by this index so ** this index can be used for sorting. */ return 1; } @@ -2687,10 +2843,11 @@ WhereClause *pWC, /* The WHERE clause */ struct SrcList_item *pSrc, /* The FROM clause term to search */ Bitmask notReady, /* Mask of cursors not available for indexing */ Bitmask notValid, /* Cursors not available for any purpose */ ExprList *pOrderBy, /* The ORDER BY clause */ + ExprList *pDistinct, /* The select-list if query is DISTINCT */ WhereCost *pCost /* Lowest cost query plan */ ){ int iCur = pSrc->iCursor; /* The cursor of the table to be accessed */ Index *pProbe; /* An index we are evaluating */ Index *pIdx; /* Copy of pProbe, or zero for IPK index */ @@ -2827,11 +2984,12 @@ int nEq; /* Number of == or IN terms matching index */ int bInEst = 0; /* True if "x IN (SELECT...)" seen */ int nInMul = 1; /* Number of distinct equalities to lookup */ int estBound = 100; /* Estimated reduction in search space */ int nBound = 0; /* Number of range constraints seen */ - int bSort = 0; /* True if external sort required */ + int bSort = !!pOrderBy; /* True if external sort required */ + int bDist = !!pDistinct; /* True if index cannot help with DISTINCT */ int bLookup = 0; /* True if not a covering index */ WhereTerm *pTerm; /* A single term of the WHERE clause */ #ifdef SQLITE_ENABLE_STAT2 WhereTerm *pFirstTerm = 0; /* First term matching the index */ #endif @@ -2891,21 +3049,24 @@ /* If there is an ORDER BY clause and the index being considered will ** naturally scan rows in the required order, set the appropriate flags ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index ** will scan rows in a different order, set the bSort variable. */ - if( pOrderBy ){ - if( (wsFlags & WHERE_COLUMN_IN)==0 - && pProbe->bUnordered==0 - && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, - nEq, wsFlags, &rev) - ){ - wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; - wsFlags |= (rev ? WHERE_REVERSE : 0); - }else{ - bSort = 1; - } + if( isSortingIndex( + pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev) + ){ + bSort = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY; + wsFlags |= (rev ? WHERE_REVERSE : 0); + } + + /* If there is a DISTINCT qualifier and this index will scan rows in + ** order of the DISTINCT expressions, clear bDist and set the appropriate + ** flags in wsFlags. */ + if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){ + bDist = 0; + wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT; } /* If currently calculating the cost of using an index (not the IPK ** index), determine if all required column data may be obtained without ** using the main table (i.e. if the index is a covering @@ -3018,10 +3179,13 @@ ** difference and select C of 3.0. */ if( bSort ){ cost += nRow*estLog(nRow)*3; } + if( bDist ){ + cost += nRow*estLog(nRow)*3; + } /**** Cost of using this index has now been computed ****/ /* If there are additional constraints on this table that cannot ** be used with the current index, but which might lower the number @@ -3163,11 +3327,11 @@ } sqlite3DbFree(pParse->db, p); }else #endif { - bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); + bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost); } } /* ** Disable a term in the WHERE clause. Except, do not disable the term @@ -4125,11 +4289,11 @@ for(ii=0; iinTerm; ii++){ WhereTerm *pOrTerm = &pOrWc->a[ii]; if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){ WhereInfo *pSubWInfo; /* Info for single OR-term scan */ /* Loop through table entries that match term pOrTerm. */ - pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0, WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE | WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY); if( pSubWInfo ){ explainOneScan( pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 @@ -4366,10 +4530,11 @@ WhereInfo *sqlite3WhereBegin( Parse *pParse, /* The parser context */ SrcList *pTabList, /* A list of all tables to be scanned */ Expr *pWhere, /* The WHERE clause */ ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */ + ExprList *pDistinct, /* The select-list for DISTINCT queries - or NULL */ u16 wctrlFlags /* One of the WHERE_* flags defined in sqliteInt.h */ ){ int i; /* Loop counter */ int nByteWInfo; /* Num. bytes allocated for WhereInfo struct */ int nTabList; /* Number of elements in pTabList */ @@ -4492,10 +4657,19 @@ */ exprAnalyzeAll(pTabList, pWC); if( db->mallocFailed ){ goto whereBeginError; } + + /* Check if the DISTINCT qualifier, if there is one, is redundant. + ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to + ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT. + */ + if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){ + pDistinct = 0; + pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE; + } /* Chose the best index to use for each table in the FROM clause. ** ** This loop fills in the following fields: ** @@ -4576,10 +4750,11 @@ Bitmask mask; /* Mask of tables not yet ready */ for(j=iFrom, pTabItem=&pTabList->a[j]; jjointype & (JT_LEFT|JT_CROSS))!=0; if( j!=iFrom && doNotReorder ) break; m = getMask(pMaskSet, pTabItem->iCursor); if( (m & notReady)==0 ){ @@ -4586,10 +4761,11 @@ if( j==iFrom ) iFrom++; continue; } mask = (isOptimal ? m : notReady); pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0); + pDist = (i==0 ? pDistinct : 0); if( pTabItem->pIndex==0 ) nUnconstrained++; WHERETRACE(("=== trying table %d with isOptimal=%d ===\n", j, isOptimal)); assert( pTabItem->pTab ); @@ -4600,11 +4776,11 @@ &sCost, pp); }else #endif { bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy, - &sCost); + pDist, &sCost); } assert( isOptimal || (sCost.used¬Ready)==0 ); /* If an INDEXED BY clause is present, then the plan must use that ** index if it uses any index at all */ @@ -4660,10 +4836,14 @@ WHERETRACE(("*** Optimizer selects table %d for loop %d" " with cost=%g and nRow=%g\n", bestJ, pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow)); if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){ *ppOrderBy = 0; + } + if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){ + assert( pWInfo->eDistinct==0 ); + pWInfo->eDistinct = WHERE_DISTINCT_ORDERED; } andFlags &= bestPlan.plan.wsFlags; pLevel->plan = bestPlan.plan; testcase( bestPlan.plan.wsFlags & WHERE_INDEXED ); testcase( bestPlan.plan.wsFlags & WHERE_TEMP_INDEX ); Index: test/collate5.test ================================================================== --- test/collate5.test +++ test/collate5.test @@ -55,21 +55,21 @@ } {} do_test collate5-1.1 { execsql { SELECT DISTINCT a FROM collate5t1; } -} {A B N} +} {a b n} do_test collate5-1.2 { execsql { SELECT DISTINCT b FROM collate5t1; } -} {{} Apple apple banana} +} {apple Apple banana {}} do_test collate5-1.3 { execsql { SELECT DISTINCT a, b FROM collate5t1; } -} {A Apple a apple B banana N {}} +} {a apple A Apple b banana n {}} # Ticket #3376 # do_test collate5-1.11 { execsql { ADDED test/distinct.test Index: test/distinct.test ================================================================== --- /dev/null +++ test/distinct.test @@ -0,0 +1,166 @@ +# 2011 July 1 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is the DISTINCT modifier. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +set testprefix distinct + + +proc is_distinct_noop {sql} { + set sql1 $sql + set sql2 [string map {DISTINCT ""} $sql] + + set program1 [list] + set program2 [list] + db eval "EXPLAIN $sql1" { + if {$opcode != "Noop"} { lappend program1 $opcode } + } + db eval "EXPLAIN $sql2" { + if {$opcode != "Noop"} { lappend program2 $opcode } + } + + return [expr {$program1==$program2}] +} + +proc do_distinct_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 1] +} +proc do_distinct_not_noop_test {tn sql} { + uplevel [list do_test $tn [list is_distinct_noop $sql] 0] +} + +proc do_temptables_test {tn sql temptables} { + uplevel [list do_test $tn [subst -novar { + set ret "" + db eval "EXPLAIN [set sql]" { + if {$opcode == "OpenEphemeral"} { + if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" } + if {$p5 == "10"} { + lappend ret hash + } else { + lappend ret btree + } + } + } + set ret + }] $temptables] +} + + +#------------------------------------------------------------------------- +# The following tests - distinct-1.* - check that the planner correctly +# detects cases where a UNIQUE index means that a DISTINCT clause is +# redundant. Currently the planner only detects such cases when there +# is a single table in the FROM clause. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c, d); + CREATE UNIQUE INDEX i1 ON t1(b, c); + CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase); + + CREATE TABLE t2(x INTEGER PRIMARY KEY, y); + + CREATE TABLE t3(c1 PRIMARY KEY, c2); + CREATE INDEX i3 ON t3(c2); +} +foreach {tn noop sql} { + + 1 1 "SELECT DISTINCT b, c FROM t1" + 2 1 "SELECT DISTINCT c FROM t1 WHERE b = ?" + 3 1 "SELECT DISTINCT rowid FROM t1" + 4 1 "SELECT DISTINCT rowid, a FROM t1" + 5 1 "SELECT DISTINCT x FROM t2" + 6 1 "SELECT DISTINCT * FROM t2" + 7 1 "SELECT DISTINCT * FROM (SELECT * FROM t2)" + + 8 1 "SELECT DISTINCT * FROM t1" + + 8 0 "SELECT DISTINCT a, b FROM t1" + + 9 0 "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)" + 10 0 "SELECT DISTINCT c FROM t1" + 11 0 "SELECT DISTINCT b FROM t1" + + 12 0 "SELECT DISTINCT a, d FROM t1" + 13 0 "SELECT DISTINCT a, b, c COLLATE nocase FROM t1" + 14 1 "SELECT DISTINCT a, d COLLATE nocase FROM t1" + 15 0 "SELECT DISTINCT a, d COLLATE binary FROM t1" + 16 1 "SELECT DISTINCT a, b, c COLLATE binary FROM t1" + + 16 0 "SELECT DISTINCT t1.rowid FROM t1, t2" + 17 0 { /* Technically, it would be possible to detect that DISTINCT + ** is a no-op in cases like the following. But SQLite does not + ** do so. */ + SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid } + + 18 1 "SELECT DISTINCT c1, c2 FROM t3" + 19 1 "SELECT DISTINCT c1 FROM t3" + 20 1 "SELECT DISTINCT * FROM t3" + 21 0 "SELECT DISTINCT c2 FROM t3" + + 22 0 "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + 23 1 "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)" + + 24 0 "SELECT DISTINCT rowid/2 FROM t1" + 25 1 "SELECT DISTINCT rowid/2, rowid FROM t1" + 26 1 "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?" +} { + if {$noop} { + do_distinct_noop_test 1.$tn $sql + } else { + do_distinct_not_noop_test 1.$tn $sql + } +} + +#------------------------------------------------------------------------- +# The following tests - distinct-2.* - test cases where an index is +# used to deliver results in order of the DISTINCT expressions. +# +drop_all_tables +do_execsql_test 2.0 { + CREATE TABLE t1(a, b, c); + + CREATE INDEX i1 ON t1(a, b); + CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase); + + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); + INSERT INTO t1 VALUES('a', 'b', 'c'); + INSERT INTO t1 VALUES('A', 'B', 'C'); +} + +foreach {tn sql temptables res} { + 1 "a, b FROM t1" {} {A B a b} + 2 "b, a FROM t1" {} {B A b a} + 3 "a, b, c FROM t1" {hash} {a b c A B C} + 4 "a, b, c FROM t1 ORDER BY a, b, c" {btree} {A B C a b c} + 5 "b FROM t1 WHERE a = 'a'" {} {b} + 6 "b FROM t1" {hash} {b B} + 7 "a FROM t1" {} {A a} + 8 "b COLLATE nocase FROM t1" {} {b} + 9 "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {} {B} +} { + do_execsql_test 2.$tn.1 "SELECT DISTINCT $sql" $res + do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables +} + +do_execsql_test 2.A { + SELECT (SELECT DISTINCT o.a FROM t1 AS i) FROM t1 AS o; +} {a A a A} + + + + +finish_test Index: test/e_select.test ================================================================== --- test/e_select.test +++ test/e_select.test @@ -1236,12 +1236,12 @@ 3.2 "SELECT ALL x FROM h1, h2 ON (x=b)" {One one Four four} 3.1 "SELECT x FROM h2" {One Two Three Four one two three four} 3.2 "SELECT x FROM h1, h2 ON (x=b)" {One one Four four} - 4.1 "SELECT DISTINCT x FROM h2" {four one three two} - 4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {four one} + 4.1 "SELECT DISTINCT x FROM h2" {One Two Three Four} + 4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {One Four} } # EVIDENCE-OF: R-02054-15343 For the purposes of detecting duplicate # rows, two NULL values are considered to be equal. # @@ -1251,15 +1251,15 @@ # EVIDENCE-OF: R-58359-52112 The normal rules for selecting a collation # sequence to compare text values with apply. # do_select_tests e_select-5.6 { - 1 "SELECT DISTINCT b FROM h1" {I IV four i iv one} - 2 "SELECT DISTINCT b COLLATE nocase FROM h1" {four i iv one} - 3 "SELECT DISTINCT x FROM h2" {four one three two} + 1 "SELECT DISTINCT b FROM h1" {one I i four IV iv} + 2 "SELECT DISTINCT b COLLATE nocase FROM h1" {one I four IV} + 3 "SELECT DISTINCT x FROM h2" {One Two Three Four} 4 "SELECT DISTINCT x COLLATE binary FROM h2" { - Four One Three Two four one three two + One Two Three Four one two three four } } #------------------------------------------------------------------------- # The following tests - e_select-7.* - test that statements made to do Index: test/fuzzer1.test ================================================================== --- test/fuzzer1.test +++ test/fuzzer1.test @@ -1374,9 +1374,9 @@ SELECT DISTINCT streetname.n FROM f2, streetname WHERE f2.word MATCH 'tayle' AND f2.distance<=200 AND streetname.n>=f2.word AND streetname.n<=(f2.word || x'F7BFBFBF') } -} {steelewood tallia tallu talwyn taymouth thelema trailer {tyler finley}} +} {{tyler finley} trailer taymouth steelewood tallia tallu talwyn thelema} finish_test Index: test/insert4.test ================================================================== --- test/insert4.test +++ test/insert4.test @@ -110,11 +110,11 @@ execsql { DELETE FROM t3; INSERT INTO t3 SELECT DISTINCT * FROM t2; SELECT * FROM t3; } -} {1 9 9 1} +} {9 1 1 9} xferopt_test insert4-2.4.2 0 do_test insert4-2.4.3 { catchsql { DELETE FROM t1; INSERT INTO t1 SELECT DISTINCT * FROM t2; Index: test/misc5.test ================================================================== --- test/misc5.test +++ test/misc5.test @@ -503,11 +503,11 @@ ) ) ) ORDER BY LOWER(artist) ASC; } - } {one} + } {two} } # Ticket #1370. Do not overwrite small files (less than 1024 bytes) # when trying to open them as a database. # Index: test/selectB.test ================================================================== --- test/selectB.test +++ test/selectB.test @@ -353,11 +353,11 @@ execsql { SELECT * FROM ( SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2 ) } - } {0 1 0 1} + } {0 1 1 0} do_test selectB-$ii.20 { execsql { SELECT DISTINCT * FROM ( SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2 Index: test/tester.tcl ================================================================== --- test/tester.tcl +++ test/tester.tcl @@ -18,11 +18,11 @@ # test cases are as follows: # # Commands to manipulate the db and the file-system at a high level: # # copy_file FROM TO -# drop_all_table ?DB? +# drop_all_tables ?DB? # forcedelete FILENAME # # Test the capability of the SQLite version built into the interpreter to # determine if a specific test can be run: #