/ Check-in [e15e1006]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid unnecessary loads of columns in an aggregate query that are not within an aggregate function and that are not part of the GROUP BY clause.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: e15e100660d290249ef235e7a8927b88296e56ec0f80ec626eecbd542adc7633
User & Date: drh 2018-06-05 23:21:11
Context
2018-06-05
23:51
Update the version number to 3.25.0 for the next development cycle. check-in: 7598236c user: drh tags: trunk
23:21
Avoid unnecessary loads of columns in an aggregate query that are not within an aggregate function and that are not part of the GROUP BY clause. check-in: e15e1006 user: drh tags: trunk
20:04
Calculate non-aggregate expressions in the SELECT list of an aggregate query that does not use min() or max() once per group, instead of once per row visited. Closed-Leaf check-in: dce2dfbe user: dan tags: exp-agg-opt
13:43
Update and correct the documentation on the OP_OpenRead, OP_OpenWrite, and OP_ReopenIdx opcodes. No code changes other than the addition of an assert(). check-in: 8a0b730d user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/select.c.

  5095   5095     for(i=0, pF=pAggInfo->aFunc; i<pAggInfo->nFunc; i++, pF++){
  5096   5096       ExprList *pList = pF->pExpr->x.pList;
  5097   5097       assert( !ExprHasProperty(pF->pExpr, EP_xIsSelect) );
  5098   5098       sqlite3VdbeAddOp2(v, OP_AggFinal, pF->iMem, pList ? pList->nExpr : 0);
  5099   5099       sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF);
  5100   5100     }
  5101   5101   }
         5102  +
  5102   5103   
  5103   5104   /*
  5104   5105   ** Update the accumulator memory cells for an aggregate based on
  5105   5106   ** the current cursor position.
         5107  +**
         5108  +** If regAcc is non-zero and there are no min() or max() aggregates
         5109  +** in pAggInfo, then only populate the pAggInfo->nAccumulator accumulator
         5110  +** registers i register regAcc contains 0. The caller will take care
         5111  +** of setting and clearing regAcc.
  5106   5112   */
  5107         -static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){
         5113  +static void updateAccumulator(Parse *pParse, int regAcc, AggInfo *pAggInfo){
  5108   5114     Vdbe *v = pParse->pVdbe;
  5109   5115     int i;
  5110   5116     int regHit = 0;
  5111   5117     int addrHitTest = 0;
  5112   5118     struct AggInfo_func *pF;
  5113   5119     struct AggInfo_col *pC;
  5114   5120   
................................................................................
  5164   5170     ** to pC->iMem. But by the time the value is used, the original register
  5165   5171     ** may have been used, invalidating the underlying buffer holding the
  5166   5172     ** text or blob value. See ticket [883034dcb5].
  5167   5173     **
  5168   5174     ** Another solution would be to change the OP_SCopy used to copy cached
  5169   5175     ** values to an OP_Copy.
  5170   5176     */
         5177  +  if( regHit==0 && pAggInfo->nAccumulator ){
         5178  +    regHit = regAcc;
         5179  +  }
  5171   5180     if( regHit ){
  5172   5181       addrHitTest = sqlite3VdbeAddOp1(v, OP_If, regHit); VdbeCoverage(v);
  5173   5182     }
  5174   5183     sqlite3ExprCacheClear(pParse);
  5175   5184     for(i=0, pC=pAggInfo->aCol; i<pAggInfo->nAccumulator; i++, pC++){
  5176   5185       sqlite3ExprCode(pParse, pC->pExpr, pC->iMem);
  5177   5186     }
................................................................................
  6017   6026         addrReset = sqlite3VdbeMakeLabel(v);
  6018   6027         iAMem = pParse->nMem + 1;
  6019   6028         pParse->nMem += pGroupBy->nExpr;
  6020   6029         iBMem = pParse->nMem + 1;
  6021   6030         pParse->nMem += pGroupBy->nExpr;
  6022   6031         sqlite3VdbeAddOp2(v, OP_Integer, 0, iAbortFlag);
  6023   6032         VdbeComment((v, "clear abort flag"));
  6024         -      sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
  6025         -      VdbeComment((v, "indicate accumulator empty"));
  6026   6033         sqlite3VdbeAddOp3(v, OP_Null, 0, iAMem, iAMem+pGroupBy->nExpr-1);
  6027   6034   
  6028   6035         /* Begin a loop that will extract all source rows in GROUP BY order.
  6029   6036         ** This might involve two separate loops with an OP_Sort in between, or
  6030   6037         ** it might be a single loop that uses an index to extract information
  6031   6038         ** in the right order to begin with.
  6032   6039         */
................................................................................
  6151   6158         sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
  6152   6159         VdbeComment((v, "reset accumulator"));
  6153   6160   
  6154   6161         /* Update the aggregate accumulators based on the content of
  6155   6162         ** the current row
  6156   6163         */
  6157   6164         sqlite3VdbeJumpHere(v, addr1);
  6158         -      updateAccumulator(pParse, &sAggInfo);
         6165  +      updateAccumulator(pParse, iUseFlag, &sAggInfo);
  6159   6166         sqlite3VdbeAddOp2(v, OP_Integer, 1, iUseFlag);
  6160   6167         VdbeComment((v, "indicate data in accumulator"));
  6161   6168   
  6162   6169         /* End of the loop
  6163   6170         */
  6164   6171         if( groupBySort ){
  6165   6172           sqlite3VdbeAddOp2(v, OP_SorterNext, sAggInfo.sortingIdx, addrTopOfLoop);
................................................................................
  6203   6210         sqlite3VdbeAddOp1(v, OP_Return, regOutputRow);
  6204   6211         VdbeComment((v, "end groupby result generator"));
  6205   6212   
  6206   6213         /* Generate a subroutine that will reset the group-by accumulator
  6207   6214         */
  6208   6215         sqlite3VdbeResolveLabel(v, addrReset);
  6209   6216         resetAccumulator(pParse, &sAggInfo);
         6217  +      sqlite3VdbeAddOp2(v, OP_Integer, 0, iUseFlag);
         6218  +      VdbeComment((v, "indicate accumulator empty"));
  6210   6219         sqlite3VdbeAddOp1(v, OP_Return, regReset);
  6211   6220        
  6212   6221       } /* endif pGroupBy.  Begin aggregate queries without GROUP BY: */
  6213   6222       else {
  6214   6223   #ifndef SQLITE_OMIT_BTREECOUNT
  6215   6224         Table *pTab;
  6216   6225         if( (pTab = isSimpleCount(p, &sAggInfo))!=0 ){
................................................................................
  6268   6277           }
  6269   6278           sqlite3VdbeAddOp2(v, OP_Count, iCsr, sAggInfo.aFunc[0].iMem);
  6270   6279           sqlite3VdbeAddOp1(v, OP_Close, iCsr);
  6271   6280           explainSimpleCount(pParse, pTab, pBest);
  6272   6281         }else
  6273   6282   #endif /* SQLITE_OMIT_BTREECOUNT */
  6274   6283         {
         6284  +        int regAcc = 0;           /* "populate accumulators" flag */
         6285  +
         6286  +        /* If there are accumulator registers but no min() or max() functions,
         6287  +        ** allocate register regAcc. Register regAcc will contain 0 the first
         6288  +        ** time the inner loop runs, and 1 thereafter. The code generated
         6289  +        ** by updateAccumulator() only updates the accumulator registers if
         6290  +        ** regAcc contains 0.  */
         6291  +        if( sAggInfo.nAccumulator ){
         6292  +          for(i=0; i<sAggInfo.nFunc; i++){
         6293  +            if( sAggInfo.aFunc[i].pFunc->funcFlags&SQLITE_FUNC_NEEDCOLL ) break;
         6294  +          }
         6295  +          if( i==sAggInfo.nFunc ){
         6296  +            regAcc = ++pParse->nMem;
         6297  +            sqlite3VdbeAddOp2(v, OP_Integer, 0, regAcc);
         6298  +          }
         6299  +        }
         6300  +
  6275   6301           /* This case runs if the aggregate has no GROUP BY clause.  The
  6276   6302           ** processing is much simpler since there is only a single row
  6277   6303           ** of output.
  6278   6304           */
  6279   6305           assert( p->pGroupBy==0 );
  6280   6306           resetAccumulator(pParse, &sAggInfo);
  6281   6307   
................................................................................
  6289   6315   
  6290   6316           SELECTTRACE(1,pParse,p,("WhereBegin\n"));
  6291   6317           pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, pMinMaxOrderBy,
  6292   6318                                      0, minMaxFlag, 0);
  6293   6319           if( pWInfo==0 ){
  6294   6320             goto select_end;
  6295   6321           }
  6296         -        updateAccumulator(pParse, &sAggInfo);
         6322  +        updateAccumulator(pParse, regAcc, &sAggInfo);
         6323  +        if( regAcc ) sqlite3VdbeAddOp2(v, OP_Integer, 1, regAcc);
  6297   6324           if( sqlite3WhereIsOrdered(pWInfo)>0 ){
  6298   6325             sqlite3VdbeGoto(v, sqlite3WhereBreakLabel(pWInfo));
  6299   6326             VdbeComment((v, "%s() by index",
  6300   6327                   (minMaxFlag==WHERE_ORDERBY_MIN?"min":"max")));
  6301   6328           }
  6302   6329           sqlite3WhereEnd(pWInfo);
  6303   6330           finalizeAggFunctions(pParse, &sAggInfo);

Changes to test/aggnested.test.

    61     61       NULL,B4 INTEGER NOT NULL,PRIMARY KEY(B1));
    62     62       REPLACE INTO t2 VALUES(1,88,888,8888);
    63     63       REPLACE INTO t2 VALUES(2,99,999,9999);
    64     64       SELECT (SELECT GROUP_CONCAT(CASE WHEN a1=1 THEN'A' ELSE 'B' END) FROM t2),
    65     65               t1.* 
    66     66       FROM t1;
    67     67     }
    68         -} {A,B,B 3 33 333 3333}
           68  +} {A,B,B 1 11 111 1111}
    69     69   db2 close
    70     70   
    71     71   ##################### Test cases for ticket [bfbf38e5e9956ac69f] ############
    72     72   #
    73     73   # This first test case is the original problem report:
    74     74   do_test aggnested-3.0 {
    75     75     db eval {

Changes to test/e_select.test.

   797    797   do_select_tests e_select-4.1 {
   798    798     1  "SELECT * FROM z1 LIMIT 1"             {51.65 -59.58 belfries}
   799    799     2  "SELECT * FROM z1,z2 LIMIT 1"          {51.65 -59.58 belfries {} 21}
   800    800     3  "SELECT z1.* FROM z1,z2 LIMIT 1"       {51.65 -59.58 belfries}
   801    801     4  "SELECT z2.* FROM z1,z2 LIMIT 1"       {{} 21}
   802    802     5  "SELECT z2.*, z1.* FROM z1,z2 LIMIT 1" {{} 21 51.65 -59.58 belfries}
   803    803   
   804         -  6  "SELECT count(*), * FROM z1"           {6 63 born -26}
          804  +  6  "SELECT count(*), * FROM z1"           {6 51.65 -59.58 belfries}
   805    805     7  "SELECT max(a), * FROM z1"             {63 63 born -26}
   806    806     8  "SELECT *, min(a) FROM z1"             {-5 {} 75 -5}
   807    807   
   808    808     9  "SELECT *,* FROM z1,z2 LIMIT 1" {        
   809    809        51.65 -59.58 belfries {} 21 51.65 -59.58 belfries {} 21
   810    810     }
   811    811     10 "SELECT z1.*,z1.* FROM z2,z1 LIMIT 1" {        
................................................................................
   935    935     CREATE TABLE a2(one PRIMARY KEY, three);
   936    936     INSERT INTO a2 VALUES(1, 1);
   937    937     INSERT INTO a2 VALUES(3, 2);
   938    938     INSERT INTO a2 VALUES(6, 3);
   939    939     INSERT INTO a2 VALUES(10, 4);
   940    940   } {}
   941    941   do_select_tests e_select-4.6 {
   942         -  1 "SELECT one, two, count(*) FROM a1"                        {4 10 4} 
   943         -  2 "SELECT one, two, count(*) FROM a1 WHERE one<3"            {2 3 2} 
          942  +  1 "SELECT one, two, count(*) FROM a1"                        {1 1 4}
          943  +  2 "SELECT one, two, count(*) FROM a1 WHERE one<3"            {1 1 2}
   944    944     3 "SELECT one, two, count(*) FROM a1 WHERE one>3"            {4 10 1} 
   945         -  4 "SELECT *, count(*) FROM a1 JOIN a2"                       {4 10 10 4 16} 
   946         -  5 "SELECT *, sum(three) FROM a1 NATURAL JOIN a2"             {3 6 2 3}
   947         -  6 "SELECT *, sum(three) FROM a1 NATURAL JOIN a2"             {3 6 2 3}
   948         -  7 "SELECT group_concat(three, ''), a1.* FROM a1 NATURAL JOIN a2" {12 3 6}
          945  +  4 "SELECT *, count(*) FROM a1 JOIN a2"                       {1 1 1 1 16}
          946  +  5 "SELECT *, sum(three) FROM a1 NATURAL JOIN a2"             {1 1 1 3}
          947  +  6 "SELECT *, sum(three) FROM a1 NATURAL JOIN a2"             {1 1 1 3}
          948  +  7 "SELECT group_concat(three, ''), a1.* FROM a1 NATURAL JOIN a2" {12 1 1}
   949    949   }
   950    950   
   951    951   # EVIDENCE-OF: R-04486-07266 Or, if the dataset contains zero rows, then
   952    952   # each non-aggregate expression is evaluated against a row consisting
   953    953   # entirely of NULL values.
   954    954   #
   955    955   do_select_tests e_select-4.7 {
................................................................................
  1124   1124     1.2  "SELECT up FROM c1 GROUP BY up HAVING sum(down)>16" {y}
  1125   1125     1.3  "SELECT up FROM c1 GROUP BY up HAVING sum(down)<16" {x}
  1126   1126     1.4  "SELECT up||down FROM c1 GROUP BY (down<5) HAVING max(down)<10" {x4}
  1127   1127   
  1128   1128     2.1  "SELECT up FROM c1 GROUP BY up HAVING down>10" {y}
  1129   1129     2.2  "SELECT up FROM c1 GROUP BY up HAVING up='y'"  {y}
  1130   1130   
  1131         -  2.3  "SELECT i, j FROM c2 GROUP BY i>4 HAVING i>6"  {9 36}
         1131  +  2.3  "SELECT i, j FROM c2 GROUP BY i>4 HAVING j>6"  {5 10}
  1132   1132   }
  1133   1133   
  1134   1134   # EVIDENCE-OF: R-23927-54081 Each expression in the result-set is then
  1135   1135   # evaluated once for each group of rows.
  1136   1136   #
  1137   1137   # EVIDENCE-OF: R-53735-47017 If the expression is an aggregate
  1138   1138   # expression, it is evaluated across all rows in the group.
................................................................................
  1150   1150   # arbitrarily chosen row from within the group.
  1151   1151   #
  1152   1152   # EVIDENCE-OF: R-53924-08809 If there is more than one non-aggregate
  1153   1153   # expression in the result-set, then all such expressions are evaluated
  1154   1154   # for the same row.
  1155   1155   #
  1156   1156   do_select_tests e_select-4.15 {
  1157         -  1  "SELECT i, j FROM c2 GROUP BY i%2"             {8 28   9 36}
  1158         -  2  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j<30" {8 28}
  1159         -  3  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j>30" {9 36}
  1160         -  4  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j>30" {9 36}
         1157  +  1  "SELECT i, j FROM c2 GROUP BY i%2"             {2 1 1 0}
         1158  +  2  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j<30" {2 1 1 0}
         1159  +  3  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j>30" {}
         1160  +  4  "SELECT i, j FROM c2 GROUP BY i%2 HAVING j>30" {}
  1161   1161     5  "SELECT count(*), i, k FROM c2 NATURAL JOIN c3 GROUP BY substr(k, 1, 1)"
  1162         -        {2 5 boron   2 2 helium   1 3 lithium}
         1162  +        {2 4 beryllium 2 1 hydrogen 1 3 lithium}
  1163   1163   } 
  1164   1164   
  1165   1165   # EVIDENCE-OF: R-19334-12811 Each group of input dataset rows
  1166   1166   # contributes a single row to the set of result rows.
  1167   1167   #
  1168   1168   # EVIDENCE-OF: R-02223-49279 Subject to filtering associated with the
  1169   1169   # DISTINCT keyword, the number of rows returned by an aggregate query

Changes to test/select5.test.

   150    150       SELECT a, b FROM t2 GROUP BY a, b;
   151    151     } 
   152    152   } {1 2 1 4 6 4}
   153    153   do_test select5-5.5 {
   154    154     execsql {
   155    155       SELECT a, b FROM t2 GROUP BY a;
   156    156     } 
   157         -} {1 4 6 4}
          157  +} {1 2 6 4}
   158    158   
   159    159   # Test rendering of columns for the GROUP BY clause.
   160    160   #
   161    161   do_test select5-5.11 {
   162    162     execsql {
   163    163       SELECT max(c), b*a, b, a FROM t2 GROUP BY b*a, b, a
   164    164     }