/ Check-in [45e581bf]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge experimental changes improving optimization of DISTINCT queries with the trunk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 45e581bff7a75db6c9a2c45b73d034d0b8a166d1
User & Date: dan 2011-07-02 09:46:52
References
2014-02-08
18:47 New ticket [fccbde53] DISTINCT thinks a zeroblob() and blob of all zeros are different. artifact: 74e2e659 user: drh
2012-04-20
17:27 Ticket [385a5b56] A DISTINCT SELECT optimized using a UNIQUE index may allow duplicate NULL values. status still Open with 1 other change artifact: 2a938c78 user: dan
2012-03-03
01:35 Ticket [3557ad65] Incorrect DISTINCT on an indexed query with IN status still Open with 3 other changes artifact: bf8e83ba user: drh
2011-07-02
13:34
Cherrypick [45e581bff7] into the 3.7.2 branch. check-in: c593792c user: dan tags: branch-3.7.2
Context
2011-07-02
15:32
Ensure that automatic indexes are only created in scenarios where they may be used more than once. check-in: 27c65d4d user: dan tags: trunk
09:46
Merge experimental changes improving optimization of DISTINCT queries with the trunk. check-in: 45e581bf user: dan tags: trunk
06:44
Fix a broken assert() in where.c. Closed-Leaf check-in: 090b2917 user: dan tags: experimental
2011-07-01
14:22
Test case for ticket [d6ddba6706353915ceed] check-in: 953e169e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/delete.c.

   367    367       int iRowSet = ++pParse->nMem;   /* Register for rowset of rows to delete */
   368    368       int iRowid = ++pParse->nMem;    /* Used for storing rowid values. */
   369    369       int regRowid;                   /* Actual register containing rowids */
   370    370   
   371    371       /* Collect rowids of every row to be deleted.
   372    372       */
   373    373       sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
   374         -    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK);
          374  +    pWInfo = sqlite3WhereBegin(
          375  +        pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK
          376  +    );
   375    377       if( pWInfo==0 ) goto delete_from_cleanup;
   376    378       regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid);
   377    379       sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid);
   378    380       if( db->flags & SQLITE_CountRows ){
   379    381         sqlite3VdbeAddOp2(v, OP_AddImm, memCnt, 1);
   380    382       }
   381    383       sqlite3WhereEnd(pWInfo);

Changes to src/fkey.c.

   556    556     sNameContext.pParse = pParse;
   557    557     sqlite3ResolveExprNames(&sNameContext, pWhere);
   558    558   
   559    559     /* Create VDBE to loop through the entries in pSrc that match the WHERE
   560    560     ** clause. If the constraint is not deferred, throw an exception for
   561    561     ** each row found. Otherwise, for deferred constraints, increment the
   562    562     ** deferred constraint counter by nIncr for each row selected.  */
   563         -  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0);
          563  +  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0);
   564    564     if( nIncr>0 && pFKey->isDeferred==0 ){
   565    565       sqlite3ParseToplevel(pParse)->mayAbort = 1;
   566    566     }
   567    567     sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, nIncr);
   568    568     if( pWInfo ){
   569    569       sqlite3WhereEnd(pWInfo);
   570    570     }

Changes to src/select.c.

  3717   3717     ExprList *pOrderBy;    /* The ORDER BY clause.  May be NULL */
  3718   3718     ExprList *pGroupBy;    /* The GROUP BY clause.  May be NULL */
  3719   3719     Expr *pHaving;         /* The HAVING clause.  May be NULL */
  3720   3720     int isDistinct;        /* True if the DISTINCT keyword is present */
  3721   3721     int distinct;          /* Table to use for the distinct set */
  3722   3722     int rc = 1;            /* Value to return from this function */
  3723   3723     int addrSortIndex;     /* Address of an OP_OpenEphemeral instruction */
         3724  +  int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */
  3724   3725     AggInfo sAggInfo;      /* Information used by aggregate queries */
  3725   3726     int iEnd;              /* Address of the end of the query */
  3726   3727     sqlite3 *db;           /* The database connection */
  3727   3728   
  3728   3729   #ifndef SQLITE_OMIT_EXPLAIN
  3729   3730     int iRestoreSelectId = pParse->iSelectId;
  3730   3731     pParse->iSelectId = pParse->iNextSelectId++;
................................................................................
  3843   3844       }
  3844   3845       rc = multiSelect(pParse, p, pDest);
  3845   3846       explainSetInteger(pParse->iSelectId, iRestoreSelectId);
  3846   3847       return rc;
  3847   3848     }
  3848   3849   #endif
  3849   3850   
  3850         -  /* If possible, rewrite the query to use GROUP BY instead of DISTINCT.
  3851         -  ** GROUP BY might use an index, DISTINCT never does.
  3852         -  */
  3853         -  assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 );
  3854         -  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){
  3855         -    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
  3856         -    pGroupBy = p->pGroupBy;
  3857         -    p->selFlags &= ~SF_Distinct;
  3858         -  }
  3859         -
  3860   3851     /* If there is both a GROUP BY and an ORDER BY clause and they are
  3861   3852     ** identical, then disable the ORDER BY clause since the GROUP BY
  3862   3853     ** will cause elements to come out in the correct order.  This is
  3863   3854     ** an optimization - the correct answer should result regardless.
  3864   3855     ** Use the SQLITE_GroupByOrder flag with SQLITE_TESTCTRL_OPTIMIZER
  3865   3856     ** to disable this optimization for testing purposes.
  3866   3857     */
  3867   3858     if( sqlite3ExprListCompare(p->pGroupBy, pOrderBy)==0
  3868   3859            && (db->flags & SQLITE_GroupByOrder)==0 ){
  3869   3860       pOrderBy = 0;
  3870   3861     }
         3862  +
         3863  +  /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and 
         3864  +  ** if the select-list is the same as the ORDER BY list, then this query
         3865  +  ** can be rewritten as a GROUP BY. In other words, this:
         3866  +  **
         3867  +  **     SELECT DISTINCT xyz FROM ... ORDER BY xyz
         3868  +  **
         3869  +  ** is transformed to:
         3870  +  **
         3871  +  **     SELECT xyz FROM ... GROUP BY xyz
         3872  +  **
         3873  +  ** The second form is preferred as a single index (or temp-table) may be 
         3874  +  ** used for both the ORDER BY and DISTINCT processing. As originally 
         3875  +  ** written the query must use a temp-table for at least one of the ORDER 
         3876  +  ** BY and DISTINCT, and an index or separate temp-table for the other.
         3877  +  */
         3878  +  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
         3879  +   && sqlite3ExprListCompare(pOrderBy, p->pEList)==0
         3880  +  ){
         3881  +    p->selFlags &= ~SF_Distinct;
         3882  +    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
         3883  +    pGroupBy = p->pGroupBy;
         3884  +    pOrderBy = 0;
         3885  +  }
  3871   3886   
  3872   3887     /* If there is an ORDER BY clause, then this sorting
  3873   3888     ** index might end up being unused if the data can be 
  3874   3889     ** extracted in pre-sorted order.  If that is the case, then the
  3875   3890     ** OP_OpenEphemeral instruction will be changed to an OP_Noop once
  3876   3891     ** we figure out that the sorting index is not needed.  The addrSortIndex
  3877   3892     ** variable is used to facilitate that change.
................................................................................
  3900   3915     p->nSelectRow = (double)LARGEST_INT64;
  3901   3916     computeLimitRegisters(pParse, p, iEnd);
  3902   3917   
  3903   3918     /* Open a virtual index to use for the distinct set.
  3904   3919     */
  3905   3920     if( p->selFlags & SF_Distinct ){
  3906   3921       KeyInfo *pKeyInfo;
  3907         -    assert( isAgg || pGroupBy );
  3908   3922       distinct = pParse->nTab++;
  3909   3923       pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
  3910         -    sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
  3911         -                        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
         3924  +    addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
         3925  +        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
  3912   3926       sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
  3913   3927     }else{
  3914   3928       distinct = -1;
  3915   3929     }
  3916   3930   
  3917   3931     /* Aggregate and non-aggregate queries are handled differently */
  3918   3932     if( !isAgg && pGroupBy==0 ){
  3919         -    /* This case is for non-aggregate queries
  3920         -    ** Begin the database scan
  3921         -    */
  3922         -    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0);
         3933  +    ExprList *pDist = (isDistinct ? p->pEList : 0);
         3934  +
         3935  +    /* Begin the database scan. */
         3936  +    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0);
  3923   3937       if( pWInfo==0 ) goto select_end;
  3924   3938       if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut;
  3925   3939   
  3926   3940       /* If sorting index that was created by a prior OP_OpenEphemeral 
  3927   3941       ** instruction ended up not being needed, then change the OP_OpenEphemeral
  3928   3942       ** into an OP_Noop.
  3929   3943       */
  3930   3944       if( addrSortIndex>=0 && pOrderBy==0 ){
  3931   3945         sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
  3932   3946         p->addrOpenEphm[2] = -1;
  3933   3947       }
  3934   3948   
  3935         -    /* Use the standard inner loop
  3936         -    */
  3937         -    assert(!isDistinct);
  3938         -    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest,
         3949  +    if( pWInfo->eDistinct ){
         3950  +      VdbeOp *pOp;                /* No longer required OpenEphemeral instr. */
         3951  +     
         3952  +      pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
         3953  +
         3954  +      assert( isDistinct );
         3955  +      assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED 
         3956  +           || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE 
         3957  +      );
         3958  +      distinct = -1;
         3959  +      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){
         3960  +        int iJump;
         3961  +        int iExpr;
         3962  +        int iFlag = ++pParse->nMem;
         3963  +        int iBase = pParse->nMem+1;
         3964  +        int iBase2 = iBase + pEList->nExpr;
         3965  +        pParse->nMem += (pEList->nExpr*2);
         3966  +
         3967  +        /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The
         3968  +        ** OP_Integer initializes the "first row" flag.  */
         3969  +        pOp->opcode = OP_Integer;
         3970  +        pOp->p1 = 1;
         3971  +        pOp->p2 = iFlag;
         3972  +
         3973  +        sqlite3ExprCodeExprList(pParse, pEList, iBase, 1);
         3974  +        iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1;
         3975  +        sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1);
         3976  +        for(iExpr=0; iExpr<pEList->nExpr; iExpr++){
         3977  +          CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr);
         3978  +          sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr);
         3979  +          sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
         3980  +          sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
         3981  +        }
         3982  +        sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue);
         3983  +
         3984  +        sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag);
         3985  +        assert( sqlite3VdbeCurrentAddr(v)==iJump );
         3986  +        sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr);
         3987  +      }else{
         3988  +        pOp->opcode = OP_Noop;
         3989  +      }
         3990  +    }
         3991  +
         3992  +    /* Use the standard inner loop. */
         3993  +    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest,
  3939   3994                       pWInfo->iContinue, pWInfo->iBreak);
  3940   3995   
  3941   3996       /* End the database scan loop.
  3942   3997       */
  3943   3998       sqlite3WhereEnd(pWInfo);
  3944   3999     }else{
  3945   4000       /* This is the processing for aggregate queries */
................................................................................
  4041   4096   
  4042   4097         /* Begin a loop that will extract all source rows in GROUP BY order.
  4043   4098         ** This might involve two separate loops with an OP_Sort in between, or
  4044   4099         ** it might be a single loop that uses an index to extract information
  4045   4100         ** in the right order to begin with.
  4046   4101         */
  4047   4102         sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
  4048         -      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0);
         4103  +      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0);
  4049   4104         if( pWInfo==0 ) goto select_end;
  4050   4105         if( pGroupBy==0 ){
  4051   4106           /* The optimizer is able to deliver rows in group by order so
  4052   4107           ** we do not have to sort.  The OP_OpenEphemeral table will be
  4053   4108           ** cancelled later because we still need to use the pKeyInfo
  4054   4109           */
  4055   4110           pGroupBy = p->pGroupBy;
................................................................................
  4303   4358           }
  4304   4359     
  4305   4360           /* This case runs if the aggregate has no GROUP BY clause.  The
  4306   4361           ** processing is much simpler since there is only a single row
  4307   4362           ** of output.
  4308   4363           */
  4309   4364           resetAccumulator(pParse, &sAggInfo);
  4310         -        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag);
         4365  +        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag);
  4311   4366           if( pWInfo==0 ){
  4312   4367             sqlite3ExprListDelete(db, pDel);
  4313   4368             goto select_end;
  4314   4369           }
  4315   4370           updateAccumulator(pParse, &sAggInfo);
  4316   4371           if( !pMinMax && flag ){
  4317   4372             sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iBreak);

Changes to src/sqliteInt.h.

  1962   1962   ** into the second half to give some continuity.
  1963   1963   */
  1964   1964   struct WhereInfo {
  1965   1965     Parse *pParse;       /* Parsing and code generating context */
  1966   1966     u16 wctrlFlags;      /* Flags originally passed to sqlite3WhereBegin() */
  1967   1967     u8 okOnePass;        /* Ok to use one-pass algorithm for UPDATE or DELETE */
  1968   1968     u8 untestedTerms;    /* Not all WHERE terms resolved by outer loop */
         1969  +  u8 eDistinct;
  1969   1970     SrcList *pTabList;             /* List of tables in the join */
  1970   1971     int iTop;                      /* The very beginning of the WHERE loop */
  1971   1972     int iContinue;                 /* Jump here to continue with next record */
  1972   1973     int iBreak;                    /* Jump here to break out of the loop */
  1973   1974     int nLevel;                    /* Number of nested loop */
  1974   1975     struct WhereClause *pWC;       /* Decomposition of the WHERE clause */
  1975   1976     double savedNQueryLoop;        /* pParse->nQueryLoop outside the WHERE loop */
  1976   1977     double nRowOut;                /* Estimated number of output rows */
  1977   1978     WhereLevel a[1];               /* Information about each nest loop in WHERE */
  1978   1979   };
  1979   1980   
         1981  +#define WHERE_DISTINCT_UNIQUE 1
         1982  +#define WHERE_DISTINCT_ORDERED 2
         1983  +
  1980   1984   /*
  1981   1985   ** A NameContext defines a context in which to resolve table and column
  1982   1986   ** names.  The context consists of a list of tables (the pSrcList) field and
  1983   1987   ** a list of named expression (pEList).  The named expression list may
  1984   1988   ** be NULL.  The pSrc corresponds to the FROM clause of a SELECT or
  1985   1989   ** to the table being operated on by INSERT, UPDATE, or DELETE.  The
  1986   1990   ** pEList corresponds to the result set of a SELECT and is NULL for
................................................................................
  2734   2738   int sqlite3IsReadOnly(Parse*, Table*, int);
  2735   2739   void sqlite3OpenTable(Parse*, int iCur, int iDb, Table*, int);
  2736   2740   #if defined(SQLITE_ENABLE_UPDATE_DELETE_LIMIT) && !defined(SQLITE_OMIT_SUBQUERY)
  2737   2741   Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, Expr *, Expr *, char *);
  2738   2742   #endif
  2739   2743   void sqlite3DeleteFrom(Parse*, SrcList*, Expr*);
  2740   2744   void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int);
  2741         -WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16);
         2745  +WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16);
  2742   2746   void sqlite3WhereEnd(WhereInfo*);
  2743   2747   int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int);
  2744   2748   void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int);
  2745   2749   void sqlite3ExprCodeMove(Parse*, int, int, int);
  2746   2750   void sqlite3ExprCodeCopy(Parse*, int, int, int);
  2747   2751   void sqlite3ExprCacheStore(Parse*, int, int, int);
  2748   2752   void sqlite3ExprCachePush(Parse*);

Changes to src/update.c.

   307    307     if( sqlite3ResolveExprNames(&sNC, pWhere) ){
   308    308       goto update_cleanup;
   309    309     }
   310    310   
   311    311     /* Begin the database scan
   312    312     */
   313    313     sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid);
   314         -  pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED);
          314  +  pWInfo = sqlite3WhereBegin(
          315  +      pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED
          316  +  );
   315    317     if( pWInfo==0 ) goto update_cleanup;
   316    318     okOnePass = pWInfo->okOnePass;
   317    319   
   318    320     /* Remember the rowid of every item to be updated.
   319    321     */
   320    322     sqlite3VdbeAddOp2(v, OP_Rowid, iCur, regOldRowid);
   321    323     if( !okOnePass ){

Changes to src/where.c.

   249    249   #define WHERE_IDX_ONLY     0x00800000  /* Use index only - omit table */
   250    250   #define WHERE_ORDERBY      0x01000000  /* Output will appear in correct order */
   251    251   #define WHERE_REVERSE      0x02000000  /* Scan in reverse order */
   252    252   #define WHERE_UNIQUE       0x04000000  /* Selects no more than one row */
   253    253   #define WHERE_VIRTUALTABLE 0x08000000  /* Use virtual-table processing */
   254    254   #define WHERE_MULTI_OR     0x10000000  /* OR using multiple indices */
   255    255   #define WHERE_TEMP_INDEX   0x20000000  /* Uses an ephemeral index */
          256  +#define WHERE_DISTINCT     0x40000000  /* Correct order for DISTINCT */
   256    257   
   257    258   /*
   258    259   ** Initialize a preallocated WhereClause structure.
   259    260   */
   260    261   static void whereClauseInit(
   261    262     WhereClause *pWC,        /* The WhereClause to be initialized */
   262    263     Parse *pParse,           /* The parsing context */
................................................................................
  1393   1394       if( (exprTableUsage(pMaskSet, pList->a[iFirst++].pExpr)&allowed)!=0 ){
  1394   1395         return 1;
  1395   1396       }
  1396   1397     }
  1397   1398     return 0;
  1398   1399   }
  1399   1400   
         1401  +/*
         1402  +** This function searches the expression list passed as the second argument
         1403  +** for an expression of type TK_COLUMN that refers to the same column and
         1404  +** uses the same collation sequence as the iCol'th column of index pIdx.
         1405  +** Argument iBase is the cursor number used for the table that pIdx refers
         1406  +** to.
         1407  +**
         1408  +** If such an expression is found, its index in pList->a[] is returned. If
         1409  +** no expression is found, -1 is returned.
         1410  +*/
         1411  +static int findIndexCol(
         1412  +  Parse *pParse,                  /* Parse context */
         1413  +  ExprList *pList,                /* Expression list to search */
         1414  +  int iBase,                      /* Cursor for table associated with pIdx */
         1415  +  Index *pIdx,                    /* Index to match column of */
         1416  +  int iCol                        /* Column of index to match */
         1417  +){
         1418  +  int i;
         1419  +  const char *zColl = pIdx->azColl[iCol];
         1420  +
         1421  +  for(i=0; i<pList->nExpr; i++){
         1422  +    Expr *p = pList->a[i].pExpr;
         1423  +    if( pIdx->aiColumn[iCol]==p->iColumn && iBase==p->iTable ){
         1424  +      CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
         1425  +      if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){
         1426  +        return i;
         1427  +      }
         1428  +    }
         1429  +  }
         1430  +
         1431  +  return -1;
         1432  +}
         1433  +
         1434  +/*
         1435  +** This routine determines if pIdx can be used to assist in processing a
         1436  +** DISTINCT qualifier. In other words, it tests whether or not using this
         1437  +** index for the outer loop guarantees that rows with equal values for
         1438  +** all expressions in the pDistinct list are delivered grouped together.
         1439  +**
         1440  +** For example, the query 
         1441  +**
         1442  +**   SELECT DISTINCT a, b, c FROM tbl WHERE a = ?
         1443  +**
         1444  +** can benefit from any index on columns "b" and "c".
         1445  +*/
         1446  +static int isDistinctIndex(
         1447  +  Parse *pParse,                  /* Parsing context */
         1448  +  WhereClause *pWC,               /* The WHERE clause */
         1449  +  Index *pIdx,                    /* The index being considered */
         1450  +  int base,                       /* Cursor number for the table pIdx is on */
         1451  +  ExprList *pDistinct,            /* The DISTINCT expressions */
         1452  +  int nEqCol                      /* Number of index columns with == */
         1453  +){
         1454  +  Bitmask mask = 0;               /* Mask of unaccounted for pDistinct exprs */
         1455  +  int i;                          /* Iterator variable */
         1456  +
         1457  +  if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0;
         1458  +
         1459  +  /* Loop through all the expressions in the distinct list. If any of them
         1460  +  ** are not simple column references, return early. Otherwise, test if the
         1461  +  ** WHERE clause contains a "col=X" clause. If it does, the expression
         1462  +  ** can be ignored. If it does not, and the column does not belong to the
         1463  +  ** same table as index pIdx, return early. Finally, if there is no
         1464  +  ** matching "col=X" expression and the column is on the same table as pIdx,
         1465  +  ** set the corresponding bit in variable mask.
         1466  +  */
         1467  +  for(i=0; i<pDistinct->nExpr; i++){
         1468  +    WhereTerm *pTerm;
         1469  +    Expr *p = pDistinct->a[i].pExpr;
         1470  +    if( p->op!=TK_COLUMN ) return 0;
         1471  +    pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0);
         1472  +    if( pTerm ){
         1473  +      Expr *pX = pTerm->pExpr;
         1474  +      CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
         1475  +      CollSeq *p2 = sqlite3ExprCollSeq(pParse, p);
         1476  +      if( p1==p2 ) continue;
         1477  +    }
         1478  +    if( p->iTable!=base ) return 0;
         1479  +    mask |= (((Bitmask)1) << i);
         1480  +  }
         1481  +
         1482  +  for(i=nEqCol; mask && i<pIdx->nColumn; i++){
         1483  +    int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i);
         1484  +    if( iExpr<0 ) break;
         1485  +    mask &= ~(((Bitmask)1) << iExpr);
         1486  +  }
         1487  +
         1488  +  return (mask==0);
         1489  +}
         1490  +
         1491  +
         1492  +/*
         1493  +** Return true if the DISTINCT expression-list passed as the third argument
         1494  +** is redundant. A DISTINCT list is redundant if the database contains a
         1495  +** UNIQUE index that guarantees that the result of the query will be distinct
         1496  +** anyway.
         1497  +*/
         1498  +static int isDistinctRedundant(
         1499  +  Parse *pParse,
         1500  +  SrcList *pTabList,
         1501  +  WhereClause *pWC,
         1502  +  ExprList *pDistinct
         1503  +){
         1504  +  Table *pTab;
         1505  +  Index *pIdx;
         1506  +  int i;                          
         1507  +  int iBase;
         1508  +
         1509  +  /* If there is more than one table or sub-select in the FROM clause of
         1510  +  ** this query, then it will not be possible to show that the DISTINCT 
         1511  +  ** clause is redundant. */
         1512  +  if( pTabList->nSrc!=1 ) return 0;
         1513  +  iBase = pTabList->a[0].iCursor;
         1514  +  pTab = pTabList->a[0].pTab;
         1515  +
         1516  +  /* If any of the expressions is an IPK column on table iBase, then return 
         1517  +  ** true. Note: The (p->iTable==iBase) part of this test may be false if the
         1518  +  ** current SELECT is a correlated sub-query.
         1519  +  */
         1520  +  for(i=0; i<pDistinct->nExpr; i++){
         1521  +    Expr *p = pDistinct->a[i].pExpr;
         1522  +    if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1;
         1523  +  }
         1524  +
         1525  +  /* Loop through all indices on the table, checking each to see if it makes
         1526  +  ** the DISTINCT qualifier redundant. It does so if:
         1527  +  **
         1528  +  **   1. The index is itself UNIQUE, and
         1529  +  **
         1530  +  **   2. All of the columns in the index are either part of the pDistinct
         1531  +  **      list, or else the WHERE clause contains a term of the form "col=X",
         1532  +  **      where X is a constant value. The collation sequences of the
         1533  +  **      comparison and select-list expressions must match those of the index.
         1534  +  */
         1535  +  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
         1536  +    if( pIdx->onError==OE_None ) continue;
         1537  +    for(i=0; i<pIdx->nColumn; i++){
         1538  +      int iCol = pIdx->aiColumn[i];
         1539  +      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) 
         1540  +       && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
         1541  +      ){
         1542  +        break;
         1543  +      }
         1544  +    }
         1545  +    if( i==pIdx->nColumn ){
         1546  +      /* This index implies that the DISTINCT qualifier is redundant. */
         1547  +      return 1;
         1548  +    }
         1549  +  }
         1550  +
         1551  +  return 0;
         1552  +}
  1400   1553   
  1401   1554   /*
  1402   1555   ** This routine decides if pIdx can be used to satisfy the ORDER BY
  1403   1556   ** clause.  If it can, it returns 1.  If pIdx cannot satisfy the
  1404   1557   ** ORDER BY clause, this routine returns 0.
  1405   1558   **
  1406   1559   ** pOrderBy is an ORDER BY clause from a SELECT statement.  pTab is the
................................................................................
  1429   1582   ){
  1430   1583     int i, j;                       /* Loop counters */
  1431   1584     int sortOrder = 0;              /* XOR of index and ORDER BY sort direction */
  1432   1585     int nTerm;                      /* Number of ORDER BY terms */
  1433   1586     struct ExprList_item *pTerm;    /* A term of the ORDER BY clause */
  1434   1587     sqlite3 *db = pParse->db;
  1435   1588   
  1436         -  assert( pOrderBy!=0 );
         1589  +  if( !pOrderBy ) return 0;
         1590  +  if( wsFlags & WHERE_COLUMN_IN ) return 0;
         1591  +  if( pIdx->bUnordered ) return 0;
         1592  +
  1437   1593     nTerm = pOrderBy->nExpr;
  1438   1594     assert( nTerm>0 );
  1439   1595   
  1440   1596     /* Argument pIdx must either point to a 'real' named index structure, 
  1441   1597     ** or an index structure allocated on the stack by bestBtreeIndex() to
  1442   1598     ** represent the rowid index that is part of every table.  */
  1443   1599     assert( pIdx->zName || (pIdx->nColumn==1 && pIdx->aiColumn[0]==-1) );
................................................................................
  1519   1675         ** to sort because the primary key is unique and so none of the other
  1520   1676         ** columns will make any difference
  1521   1677         */
  1522   1678         j = nTerm;
  1523   1679       }
  1524   1680     }
  1525   1681   
  1526         -  *pbRev = sortOrder!=0;
         1682  +  if( pbRev ) *pbRev = sortOrder!=0;
  1527   1683     if( j>=nTerm ){
  1528   1684       /* All terms of the ORDER BY clause are covered by this index so
  1529   1685       ** this index can be used for sorting. */
  1530   1686       return 1;
  1531   1687     }
  1532   1688     if( pIdx->onError!=OE_None && i==pIdx->nColumn
  1533   1689         && (wsFlags & WHERE_COLUMN_NULL)==0
................................................................................
  2685   2841   static void bestBtreeIndex(
  2686   2842     Parse *pParse,              /* The parsing context */
  2687   2843     WhereClause *pWC,           /* The WHERE clause */
  2688   2844     struct SrcList_item *pSrc,  /* The FROM clause term to search */
  2689   2845     Bitmask notReady,           /* Mask of cursors not available for indexing */
  2690   2846     Bitmask notValid,           /* Cursors not available for any purpose */
  2691   2847     ExprList *pOrderBy,         /* The ORDER BY clause */
         2848  +  ExprList *pDistinct,        /* The select-list if query is DISTINCT */
  2692   2849     WhereCost *pCost            /* Lowest cost query plan */
  2693   2850   ){
  2694   2851     int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
  2695   2852     Index *pProbe;              /* An index we are evaluating */
  2696   2853     Index *pIdx;                /* Copy of pProbe, or zero for IPK index */
  2697   2854     int eqTermMask;             /* Current mask of valid equality operators */
  2698   2855     int idxEqTermMask;          /* Index mask of valid equality operators */
................................................................................
  2825   2982       **             SELECT a, b, c FROM tbl WHERE a = 1;
  2826   2983       */
  2827   2984       int nEq;                      /* Number of == or IN terms matching index */
  2828   2985       int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
  2829   2986       int nInMul = 1;               /* Number of distinct equalities to lookup */
  2830   2987       int estBound = 100;           /* Estimated reduction in search space */
  2831   2988       int nBound = 0;               /* Number of range constraints seen */
  2832         -    int bSort = 0;                /* True if external sort required */
         2989  +    int bSort = !!pOrderBy;       /* True if external sort required */
         2990  +    int bDist = !!pDistinct;      /* True if index cannot help with DISTINCT */
  2833   2991       int bLookup = 0;              /* True if not a covering index */
  2834   2992       WhereTerm *pTerm;             /* A single term of the WHERE clause */
  2835   2993   #ifdef SQLITE_ENABLE_STAT2
  2836   2994       WhereTerm *pFirstTerm = 0;    /* First term matching the index */
  2837   2995   #endif
  2838   2996   
  2839   2997       /* Determine the values of nEq and nInMul */
................................................................................
  2889   3047         }
  2890   3048       }
  2891   3049   
  2892   3050       /* If there is an ORDER BY clause and the index being considered will
  2893   3051       ** naturally scan rows in the required order, set the appropriate flags
  2894   3052       ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
  2895   3053       ** will scan rows in a different order, set the bSort variable.  */
  2896         -    if( pOrderBy ){
  2897         -      if( (wsFlags & WHERE_COLUMN_IN)==0
  2898         -        && pProbe->bUnordered==0
  2899         -        && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy,
  2900         -                          nEq, wsFlags, &rev)
  2901         -      ){
  2902         -        wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
  2903         -        wsFlags |= (rev ? WHERE_REVERSE : 0);
  2904         -      }else{
  2905         -        bSort = 1;
  2906         -      }
         3054  +    if( isSortingIndex(
         3055  +          pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev)
         3056  +    ){
         3057  +      bSort = 0;
         3058  +      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
         3059  +      wsFlags |= (rev ? WHERE_REVERSE : 0);
         3060  +    }
         3061  +
         3062  +    /* If there is a DISTINCT qualifier and this index will scan rows in
         3063  +    ** order of the DISTINCT expressions, clear bDist and set the appropriate
         3064  +    ** flags in wsFlags. */
         3065  +    if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){
         3066  +      bDist = 0;
         3067  +      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
  2907   3068       }
  2908   3069   
  2909   3070       /* If currently calculating the cost of using an index (not the IPK
  2910   3071       ** index), determine if all required column data may be obtained without 
  2911   3072       ** using the main table (i.e. if the index is a covering
  2912   3073       ** index for this query). If it is, set the WHERE_IDX_ONLY flag in
  2913   3074       ** wsFlags. Otherwise, set the bLookup variable to true.  */
................................................................................
  3016   3177       ** adds C*N*log10(N) to the cost, where N is the number of rows to be 
  3017   3178       ** sorted and C is a factor between 1.95 and 4.3.  We will split the
  3018   3179       ** difference and select C of 3.0.
  3019   3180       */
  3020   3181       if( bSort ){
  3021   3182         cost += nRow*estLog(nRow)*3;
  3022   3183       }
         3184  +    if( bDist ){
         3185  +      cost += nRow*estLog(nRow)*3;
         3186  +    }
  3023   3187   
  3024   3188       /**** Cost of using this index has now been computed ****/
  3025   3189   
  3026   3190       /* If there are additional constraints on this table that cannot
  3027   3191       ** be used with the current index, but which might lower the number
  3028   3192       ** of output rows, adjust the nRow value accordingly.  This only 
  3029   3193       ** matters if the current index is the least costly, so do not bother
................................................................................
  3161   3325       if( p->needToFreeIdxStr ){
  3162   3326         sqlite3_free(p->idxStr);
  3163   3327       }
  3164   3328       sqlite3DbFree(pParse->db, p);
  3165   3329     }else
  3166   3330   #endif
  3167   3331     {
  3168         -    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
         3332  +    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost);
  3169   3333     }
  3170   3334   }
  3171   3335   
  3172   3336   /*
  3173   3337   ** Disable a term in the WHERE clause.  Except, do not disable the term
  3174   3338   ** if it controls a LEFT OUTER JOIN and it did not originate in the ON
  3175   3339   ** or USING clause of that join.
................................................................................
  4123   4287       iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn);
  4124   4288   
  4125   4289       for(ii=0; ii<pOrWc->nTerm; ii++){
  4126   4290         WhereTerm *pOrTerm = &pOrWc->a[ii];
  4127   4291         if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){
  4128   4292           WhereInfo *pSubWInfo;          /* Info for single OR-term scan */
  4129   4293           /* Loop through table entries that match term pOrTerm. */
  4130         -        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0,
         4294  +        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0, 0,
  4131   4295                           WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE |
  4132   4296                           WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY);
  4133   4297           if( pSubWInfo ){
  4134   4298             explainOneScan(
  4135   4299                 pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0
  4136   4300             );
  4137   4301             if( (wctrlFlags & WHERE_DUPLICATES_OK)==0 ){
................................................................................
  4364   4528   ** output order, then the *ppOrderBy is unchanged.
  4365   4529   */
  4366   4530   WhereInfo *sqlite3WhereBegin(
  4367   4531     Parse *pParse,        /* The parser context */
  4368   4532     SrcList *pTabList,    /* A list of all tables to be scanned */
  4369   4533     Expr *pWhere,         /* The WHERE clause */
  4370   4534     ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */
         4535  +  ExprList *pDistinct,  /* The select-list for DISTINCT queries - or NULL */
  4371   4536     u16 wctrlFlags        /* One of the WHERE_* flags defined in sqliteInt.h */
  4372   4537   ){
  4373   4538     int i;                     /* Loop counter */
  4374   4539     int nByteWInfo;            /* Num. bytes allocated for WhereInfo struct */
  4375   4540     int nTabList;              /* Number of elements in pTabList */
  4376   4541     WhereInfo *pWInfo;         /* Will become the return value of this function */
  4377   4542     Vdbe *v = pParse->pVdbe;   /* The virtual database engine */
................................................................................
  4490   4655     ** want to analyze these virtual terms, so start analyzing at the end
  4491   4656     ** and work forward so that the added virtual terms are never processed.
  4492   4657     */
  4493   4658     exprAnalyzeAll(pTabList, pWC);
  4494   4659     if( db->mallocFailed ){
  4495   4660       goto whereBeginError;
  4496   4661     }
         4662  +
         4663  +  /* Check if the DISTINCT qualifier, if there is one, is redundant. 
         4664  +  ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
         4665  +  ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
         4666  +  */
         4667  +  if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
         4668  +    pDistinct = 0;
         4669  +    pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
         4670  +  }
  4497   4671   
  4498   4672     /* Chose the best index to use for each table in the FROM clause.
  4499   4673     **
  4500   4674     ** This loop fills in the following fields:
  4501   4675     **
  4502   4676     **   pWInfo->a[].pIdx      The index to use for this level of the loop.
  4503   4677     **   pWInfo->a[].wsFlags   WHERE_xxx flags associated with pIdx
................................................................................
  4574   4748       notIndexed = 0;
  4575   4749       for(isOptimal=(iFrom<nTabList-1); isOptimal>=0 && bestJ<0; isOptimal--){
  4576   4750         Bitmask mask;             /* Mask of tables not yet ready */
  4577   4751         for(j=iFrom, pTabItem=&pTabList->a[j]; j<nTabList; j++, pTabItem++){
  4578   4752           int doNotReorder;    /* True if this table should not be reordered */
  4579   4753           WhereCost sCost;     /* Cost information from best[Virtual]Index() */
  4580   4754           ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
         4755  +        ExprList *pDist;     /* DISTINCT clause for index to optimize */
  4581   4756     
  4582   4757           doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
  4583   4758           if( j!=iFrom && doNotReorder ) break;
  4584   4759           m = getMask(pMaskSet, pTabItem->iCursor);
  4585   4760           if( (m & notReady)==0 ){
  4586   4761             if( j==iFrom ) iFrom++;
  4587   4762             continue;
  4588   4763           }
  4589   4764           mask = (isOptimal ? m : notReady);
  4590   4765           pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
         4766  +        pDist = (i==0 ? pDistinct : 0);
  4591   4767           if( pTabItem->pIndex==0 ) nUnconstrained++;
  4592   4768     
  4593   4769           WHERETRACE(("=== trying table %d with isOptimal=%d ===\n",
  4594   4770                       j, isOptimal));
  4595   4771           assert( pTabItem->pTab );
  4596   4772   #ifndef SQLITE_OMIT_VIRTUALTABLE
  4597   4773           if( IsVirtual(pTabItem->pTab) ){
................................................................................
  4598   4774             sqlite3_index_info **pp = &pWInfo->a[j].pIdxInfo;
  4599   4775             bestVirtualIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
  4600   4776                              &sCost, pp);
  4601   4777           }else 
  4602   4778   #endif
  4603   4779           {
  4604   4780             bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
  4605         -                         &sCost);
         4781  +              pDist, &sCost);
  4606   4782           }
  4607   4783           assert( isOptimal || (sCost.used&notReady)==0 );
  4608   4784   
  4609   4785           /* If an INDEXED BY clause is present, then the plan must use that
  4610   4786           ** index if it uses any index at all */
  4611   4787           assert( pTabItem->pIndex==0 
  4612   4788                     || (sCost.plan.wsFlags & WHERE_NOT_FULLSCAN)==0
................................................................................
  4658   4834       assert( bestJ>=0 );
  4659   4835       assert( notReady & getMask(pMaskSet, pTabList->a[bestJ].iCursor) );
  4660   4836       WHERETRACE(("*** Optimizer selects table %d for loop %d"
  4661   4837                   " with cost=%g and nRow=%g\n",
  4662   4838                   bestJ, pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow));
  4663   4839       if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){
  4664   4840         *ppOrderBy = 0;
         4841  +    }
         4842  +    if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
         4843  +      assert( pWInfo->eDistinct==0 );
         4844  +      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
  4665   4845       }
  4666   4846       andFlags &= bestPlan.plan.wsFlags;
  4667   4847       pLevel->plan = bestPlan.plan;
  4668   4848       testcase( bestPlan.plan.wsFlags & WHERE_INDEXED );
  4669   4849       testcase( bestPlan.plan.wsFlags & WHERE_TEMP_INDEX );
  4670   4850       if( bestPlan.plan.wsFlags & (WHERE_INDEXED|WHERE_TEMP_INDEX) ){
  4671   4851         pLevel->iIdxCur = pParse->nTab++;

Changes to test/collate5.test.

    53     53       INSERT INTO collate5t1 VALUES('N', NULL);
    54     54     } 
    55     55   } {}
    56     56   do_test collate5-1.1 {
    57     57     execsql {
    58     58       SELECT DISTINCT a FROM collate5t1;
    59     59     }
    60         -} {A B N}
           60  +} {a b n}
    61     61   do_test collate5-1.2 {
    62     62     execsql {
    63     63       SELECT DISTINCT b FROM collate5t1;
    64     64     }
    65         -} {{} Apple apple banana}
           65  +} {apple Apple banana {}}
    66     66   do_test collate5-1.3 {
    67     67     execsql {
    68     68       SELECT DISTINCT a, b FROM collate5t1;
    69     69     }
    70         -} {A Apple a apple B banana N {}}
           70  +} {a apple A Apple b banana n {}}
    71     71   
    72     72   # Ticket #3376
    73     73   #
    74     74   do_test collate5-1.11 {
    75     75     execsql {
    76     76       CREATE TABLE tkt3376(a COLLATE nocase PRIMARY KEY);
    77     77       INSERT INTO tkt3376 VALUES('abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz');

Added test/distinct.test.

            1  +# 2011 July 1
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is the DISTINCT modifier.
           13  +#
           14  +
           15  +set testdir [file dirname $argv0]
           16  +source $testdir/tester.tcl
           17  +
           18  +set testprefix distinct
           19  +
           20  +
           21  +proc is_distinct_noop {sql} {
           22  +  set sql1 $sql
           23  +  set sql2 [string map {DISTINCT ""} $sql]
           24  +
           25  +  set program1 [list]
           26  +  set program2 [list]
           27  +  db eval "EXPLAIN $sql1" {
           28  +    if {$opcode != "Noop"} { lappend program1 $opcode }
           29  +  }
           30  +  db eval "EXPLAIN $sql2" {
           31  +    if {$opcode != "Noop"} { lappend program2 $opcode }
           32  +  }
           33  +
           34  +  return [expr {$program1==$program2}]
           35  +}
           36  +
           37  +proc do_distinct_noop_test {tn sql} {
           38  +  uplevel [list do_test $tn [list is_distinct_noop $sql] 1]
           39  +}
           40  +proc do_distinct_not_noop_test {tn sql} {
           41  +  uplevel [list do_test $tn [list is_distinct_noop $sql] 0]
           42  +}
           43  +
           44  +proc do_temptables_test {tn sql temptables} {
           45  +  uplevel [list do_test $tn [subst -novar {
           46  +    set ret ""
           47  +    db eval "EXPLAIN [set sql]" {
           48  +      if {$opcode == "OpenEphemeral"} { 
           49  +        if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" }
           50  +        if {$p5 == "10"} {
           51  +          lappend ret hash
           52  +        } else {
           53  +          lappend ret btree
           54  +        }
           55  +      }
           56  +    }
           57  +    set ret
           58  +  }] $temptables]
           59  +}
           60  +
           61  +
           62  +#-------------------------------------------------------------------------
           63  +# The following tests - distinct-1.* - check that the planner correctly 
           64  +# detects cases where a UNIQUE index means that a DISTINCT clause is 
           65  +# redundant. Currently the planner only detects such cases when there
           66  +# is a single table in the FROM clause.
           67  +#
           68  +do_execsql_test 1.0 {
           69  +  CREATE TABLE t1(a, b, c, d);
           70  +  CREATE UNIQUE INDEX i1 ON t1(b, c);
           71  +  CREATE UNIQUE INDEX i2 ON t1(d COLLATE nocase);
           72  +
           73  +  CREATE TABLE t2(x INTEGER PRIMARY KEY, y);
           74  +
           75  +  CREATE TABLE t3(c1 PRIMARY KEY, c2);
           76  +  CREATE INDEX i3 ON t3(c2);
           77  +}
           78  +foreach {tn noop sql} {
           79  +
           80  +  1   1   "SELECT DISTINCT b, c FROM t1"
           81  +  2   1   "SELECT DISTINCT c FROM t1 WHERE b = ?"
           82  +  3   1   "SELECT DISTINCT rowid FROM t1"
           83  +  4   1   "SELECT DISTINCT rowid, a FROM t1"
           84  +  5   1   "SELECT DISTINCT x FROM t2"
           85  +  6   1   "SELECT DISTINCT * FROM t2"
           86  +  7   1   "SELECT DISTINCT * FROM (SELECT * FROM t2)"
           87  +
           88  +  8   1   "SELECT DISTINCT * FROM t1"
           89  +
           90  +  8   0   "SELECT DISTINCT a, b FROM t1"
           91  +
           92  +  9   0   "SELECT DISTINCT c FROM t1 WHERE b IN (1,2)"
           93  +  10  0   "SELECT DISTINCT c FROM t1"
           94  +  11  0   "SELECT DISTINCT b FROM t1"
           95  +
           96  +  12  0   "SELECT DISTINCT a, d FROM t1"
           97  +  13  0   "SELECT DISTINCT a, b, c COLLATE nocase FROM t1"
           98  +  14  1   "SELECT DISTINCT a, d COLLATE nocase FROM t1"
           99  +  15  0   "SELECT DISTINCT a, d COLLATE binary FROM t1"
          100  +  16  1   "SELECT DISTINCT a, b, c COLLATE binary FROM t1"
          101  +
          102  +  16  0   "SELECT DISTINCT t1.rowid FROM t1, t2"
          103  +  17  0   { /* Technically, it would be possible to detect that DISTINCT
          104  +            ** is a no-op in cases like the following. But SQLite does not
          105  +            ** do so. */
          106  +            SELECT DISTINCT t1.rowid FROM t1, t2 WHERE t1.rowid=t2.rowid }
          107  +
          108  +  18  1   "SELECT DISTINCT c1, c2 FROM t3"
          109  +  19  1   "SELECT DISTINCT c1 FROM t3"
          110  +  20  1   "SELECT DISTINCT * FROM t3"
          111  +  21  0   "SELECT DISTINCT c2 FROM t3"
          112  +
          113  +  22  0   "SELECT DISTINCT * FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
          114  +  23  1   "SELECT DISTINCT rowid FROM (SELECT 1, 2, 3 UNION SELECT 4, 5, 6)"
          115  +
          116  +  24  0   "SELECT DISTINCT rowid/2 FROM t1"
          117  +  25  1   "SELECT DISTINCT rowid/2, rowid FROM t1"
          118  +  26  1   "SELECT DISTINCT rowid/2, b FROM t1 WHERE c = ?"
          119  +} {
          120  +  if {$noop} {
          121  +    do_distinct_noop_test 1.$tn $sql
          122  +  } else {
          123  +    do_distinct_not_noop_test 1.$tn $sql
          124  +  }
          125  +}
          126  +
          127  +#-------------------------------------------------------------------------
          128  +# The following tests - distinct-2.* - test cases where an index is
          129  +# used to deliver results in order of the DISTINCT expressions. 
          130  +#
          131  +drop_all_tables
          132  +do_execsql_test 2.0 {
          133  +  CREATE TABLE t1(a, b, c);
          134  +
          135  +  CREATE INDEX i1 ON t1(a, b);
          136  +  CREATE INDEX i2 ON t1(b COLLATE nocase, c COLLATE nocase);
          137  +
          138  +  INSERT INTO t1 VALUES('a', 'b', 'c');
          139  +  INSERT INTO t1 VALUES('A', 'B', 'C');
          140  +  INSERT INTO t1 VALUES('a', 'b', 'c');
          141  +  INSERT INTO t1 VALUES('A', 'B', 'C');
          142  +}
          143  +
          144  +foreach {tn sql temptables res} {
          145  +  1   "a, b FROM t1"                                       {}      {A B a b}
          146  +  2   "b, a FROM t1"                                       {}      {B A b a}
          147  +  3   "a, b, c FROM t1"                                    {hash}  {a b c A B C}
          148  +  4   "a, b, c FROM t1 ORDER BY a, b, c"                   {btree} {A B C a b c}
          149  +  5   "b FROM t1 WHERE a = 'a'"                            {}      {b}
          150  +  6   "b FROM t1"                                          {hash}  {b B}
          151  +  7   "a FROM t1"                                          {}      {A a}
          152  +  8   "b COLLATE nocase FROM t1"                           {}      {b}
          153  +  9   "b COLLATE nocase FROM t1 ORDER BY b COLLATE nocase" {}      {B}
          154  +} {
          155  +  do_execsql_test    2.$tn.1 "SELECT DISTINCT $sql" $res
          156  +  do_temptables_test 2.$tn.2 "SELECT DISTINCT $sql" $temptables
          157  +}
          158  +
          159  +do_execsql_test 2.A {
          160  +  SELECT (SELECT DISTINCT o.a FROM t1 AS i) FROM t1 AS o;
          161  +} {a A a A}
          162  +
          163  +
          164  +
          165  +
          166  +finish_test

Changes to test/e_select.test.

  1234   1234   do_select_tests e_select-5 {
  1235   1235     3.1 "SELECT ALL x FROM h2" {One Two Three Four one two three four}
  1236   1236     3.2 "SELECT ALL x FROM h1, h2 ON (x=b)" {One one Four four}
  1237   1237   
  1238   1238     3.1 "SELECT x FROM h2" {One Two Three Four one two three four}
  1239   1239     3.2 "SELECT x FROM h1, h2 ON (x=b)" {One one Four four}
  1240   1240   
  1241         -  4.1 "SELECT DISTINCT x FROM h2" {four one three two}
  1242         -  4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {four one}
         1241  +  4.1 "SELECT DISTINCT x FROM h2" {One Two Three Four}
         1242  +  4.2 "SELECT DISTINCT x FROM h1, h2 ON (x=b)" {One Four}
  1243   1243   } 
  1244   1244   
  1245   1245   # EVIDENCE-OF: R-02054-15343 For the purposes of detecting duplicate
  1246   1246   # rows, two NULL values are considered to be equal.
  1247   1247   #
  1248   1248   do_select_tests e_select-5.5 {
  1249   1249     1  "SELECT DISTINCT d FROM h3" {{} 2 2,3 2,4 3}
  1250   1250   }
  1251   1251   
  1252   1252   # EVIDENCE-OF: R-58359-52112 The normal rules for selecting a collation
  1253   1253   # sequence to compare text values with apply.
  1254   1254   #
  1255   1255   do_select_tests e_select-5.6 {
  1256         -  1  "SELECT DISTINCT b FROM h1"                  {I IV four i iv one}
  1257         -  2  "SELECT DISTINCT b COLLATE nocase FROM h1"   {four i iv one}
  1258         -  3  "SELECT DISTINCT x FROM h2"                  {four one three two}
         1256  +  1  "SELECT DISTINCT b FROM h1"                  {one I i four IV iv}
         1257  +  2  "SELECT DISTINCT b COLLATE nocase FROM h1"   {one I four IV}
         1258  +  3  "SELECT DISTINCT x FROM h2"                  {One Two Three Four}
  1259   1259     4  "SELECT DISTINCT x COLLATE binary FROM h2"   {
  1260         -    Four One Three Two four one three two
         1260  +    One Two Three Four one two three four
  1261   1261     }
  1262   1262   }
  1263   1263   
  1264   1264   #-------------------------------------------------------------------------
  1265   1265   # The following tests - e_select-7.* - test that statements made to do
  1266   1266   # with compound SELECT statements are correct.
  1267   1267   #

Changes to test/fuzzer1.test.

  1372   1372   do_test fuzzer1-2.3 {
  1373   1373     execsql {
  1374   1374       SELECT DISTINCT streetname.n FROM f2, streetname
  1375   1375        WHERE f2.word MATCH 'tayle'
  1376   1376          AND f2.distance<=200
  1377   1377          AND streetname.n>=f2.word AND streetname.n<=(f2.word || x'F7BFBFBF')
  1378   1378     }
  1379         -} {steelewood tallia tallu talwyn taymouth thelema trailer {tyler finley}}
         1379  +} {{tyler finley} trailer taymouth steelewood tallia tallu talwyn thelema}
  1380   1380   
  1381   1381   
  1382   1382   finish_test

Changes to test/insert4.test.

   108    108   #
   109    109   do_test insert4-2.4.1 {
   110    110     execsql {
   111    111       DELETE FROM t3;
   112    112       INSERT INTO t3 SELECT DISTINCT * FROM t2;
   113    113       SELECT * FROM t3;
   114    114     }
   115         -} {1 9 9 1}
          115  +} {9 1 1 9}
   116    116   xferopt_test insert4-2.4.2 0
   117    117   do_test insert4-2.4.3 {
   118    118     catchsql {
   119    119       DELETE FROM t1;
   120    120       INSERT INTO t1 SELECT DISTINCT * FROM t2;
   121    121     }
   122    122   } {1 {constraint failed}}

Changes to test/misc5.test.

   501    501             )    
   502    502             WHERE artist <> '' 
   503    503           )  
   504    504          )       
   505    505         )  
   506    506         ORDER BY LOWER(artist) ASC;
   507    507       }
   508         -  } {one}
          508  +  } {two}
   509    509   }
   510    510   
   511    511   # Ticket #1370.  Do not overwrite small files (less than 1024 bytes)
   512    512   # when trying to open them as a database.
   513    513   #
   514    514   if {[permutation] == ""} {
   515    515     do_test misc5-4.1 {

Changes to test/selectB.test.

   351    351   
   352    352     do_test selectB-$ii.19 {
   353    353       execsql {
   354    354         SELECT * FROM (
   355    355           SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2
   356    356         )
   357    357       }
   358         -  } {0 1 0 1}
          358  +  } {0 1 1 0}
   359    359   
   360    360     do_test selectB-$ii.20 {
   361    361       execsql {
   362    362         SELECT DISTINCT * FROM (
   363    363           SELECT DISTINCT (a/10) FROM t1 UNION ALL SELECT DISTINCT(d%2) FROM t2
   364    364         )
   365    365       }

Changes to test/tester.tcl.

    16     16   #-------------------------------------------------------------------------
    17     17   # The commands provided by the code in this file to help with creating 
    18     18   # test cases are as follows:
    19     19   #
    20     20   # Commands to manipulate the db and the file-system at a high level:
    21     21   #
    22     22   #      copy_file              FROM TO
    23         -#      drop_all_table         ?DB?
           23  +#      drop_all_tables        ?DB?
    24     24   #      forcedelete            FILENAME
    25     25   #
    26     26   # Test the capability of the SQLite version built into the interpreter to
    27     27   # determine if a specific test can be run:
    28     28   #
    29     29   #      ifcapable              EXPR
    30     30   #