/ Check-in [d1248165]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add code to actually use the sqlite_stat2.cnt field in the query planner. This changes some plans resulting in a few failures in analyze5.test.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | query-planner-tweaks
Files: files | file ages | folders
SHA1: d1248165e3e02aaf8a2a7872793918b4a9f102a8
User & Date: drh 2011-08-08 17:18:40
Original Comment: Add code to actually use the sqlite_stat2.cnt field in the query planner. This changes some plans resulting in a few failures in analyze5.test.
Context
2011-08-08
17:18
Add code to actually use the sqlite_stat2.cnt field in the query planner. This changes some plans resulting in a few failures in analyze5.test. Closed-Leaf check-in: d1248165 user: drh tags: query-planner-tweaks
2011-08-07
00:21
The ANALYZE command adds the sqlite_stat2.cnt column if it does not already exist. check-in: 794fde6f user: drh tags: query-planner-tweaks
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

   835    835             pSample = allocIndexSample(db, &pIdx->comkey, pIdx->comkey.n);
   836    836           }else{
   837    837             pSample = allocIndexSample(db, &pIdx->sample, iSample);
   838    838           }
   839    839           if( pSample==0 ) break;
   840    840           eType = sqlite3_column_type(pStmt, 2);
   841    841           pSample->eType = (u8)eType;
   842         -        pSample->nCopy = sqlite3_column_int(pStmt, 4);
   843         -        if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
          842  +        pSample->nCopy = sqlite3_column_int(pStmt, 3);
          843  +        if( eType==SQLITE_INTEGER ){
          844  +          pSample->u.i = sqlite3_column_int64(pStmt, 2);
          845  +        }else if( eType==SQLITE_FLOAT ){
   844    846             pSample->u.r = sqlite3_column_double(pStmt, 2);
   845    847           }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
   846    848             const char *z = (const char *)(
   847    849                (eType==SQLITE_BLOB) ?
   848    850                 sqlite3_column_blob(pStmt, 2):
   849    851                 sqlite3_column_text(pStmt, 2)
   850    852             );

Changes to src/sqliteInt.h.

  1454   1454   /*
  1455   1455   ** Each sample stored in the sqlite_stat2 table is represented in memory 
  1456   1456   ** using a structure of this type.
  1457   1457   */
  1458   1458   struct IndexSample {
  1459   1459     union {
  1460   1460       char *z;        /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
  1461         -    double r;       /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
         1461  +    double r;       /* Value if eType is SQLITE_FLOAT */
         1462  +    i64 i;          /* Value if eType is SQLITE_INTEGER */
  1462   1463     } u;
  1463   1464     u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
  1464   1465     u8 nByte;         /* Size in byte of text or blob. */
  1465   1466     u32 nCopy;        /* How many copies of this sample are in the database */
  1466   1467   };
  1467   1468   
  1468   1469   /*

Changes to src/where.c.

  2418   2418     */
  2419   2419     bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
  2420   2420   }
  2421   2421   #endif /* SQLITE_OMIT_VIRTUALTABLE */
  2422   2422   
  2423   2423   /*
  2424   2424   ** Argument pIdx is a pointer to an index structure that has an array of
  2425         -** pIdx->sample.n evenly spaced samples of the first indexed column
  2426         -** stored in Index.sample. These samples divide the domain of values stored
  2427         -** the index into (pIdx->sample.n+1) regions.
  2428         -** Region 0 contains all values less than the first sample value. Region
  2429         -** 1 contains values between the first and second samples.  Region 2 contains
  2430         -** values between samples 2 and 3.  And so on.  Region pIdx->sample.n
  2431         -** contains values larger than the last sample.
         2425  +** pIdx->sample.n (hereafter "S") evenly spaced samples of the first indexed
         2426  +** column stored in Index.sample. These samples divide the domain of values
         2427  +** stored the index into S+1 regions.  Region 0 contains all values less than
         2428  +** the first sample value. Region 1 contains values between the first and
         2429  +** second samples.  Region 2 contains values between samples 2 and 3.  And so
         2430  +** on.  Region S contains values larger than the last sample.
         2431  +**
         2432  +** Note that samples are computed as being centered on S buckets where each
         2433  +** bucket contains the nearly same number of rows.  This routine takes samples
         2434  +** to be dividers between regions, though.  Hence, region 0 and region S
         2435  +** contain half as many rows as the interior regions.
  2432   2436   **
  2433   2437   ** If the index contains many duplicates of a single value, then it is
  2434   2438   ** possible that two or more adjacent samples can hold the same value.
  2435   2439   ** When that is the case, the smallest possible region code is returned
  2436   2440   ** when roundUp is false and the largest possible region code is returned
  2437   2441   ** when roundUp is true.
  2438   2442   **
  2439   2443   ** If successful, this function determines which of the regions value 
  2440   2444   ** pVal lies in, sets *piRegion to the region index (a value between 0
  2441         -** and S+1, inclusive) and returns SQLITE_OK.
         2445  +** and S, inclusive) and returns SQLITE_OK.
  2442   2446   ** Or, if an OOM occurs while converting text values between encodings,
  2443   2447   ** SQLITE_NOMEM is returned and *piRegion is undefined.
  2444   2448   */
  2445   2449   #ifdef SQLITE_ENABLE_STAT2
  2446   2450   static int whereRangeRegion(
  2447   2451     Parse *pParse,              /* Database connection */
  2448   2452     Index *pIdx,                /* Index to consider domain of */
  2449   2453     sqlite3_value *pVal,        /* Value to consider */
  2450   2454     int roundUp,                /* Return largest valid region if true */
  2451         -  int *piRegion               /* OUT: Region of domain in which value lies */
         2455  +  int *piRegion,              /* OUT: Region of domain in which value lies */
         2456  +  u32 *pnCopy                 /* OUT: Number of rows with pVal, or -1 if unk */
  2452   2457   ){
  2453   2458     assert( roundUp==0 || roundUp==1 );
  2454   2459     if( ALWAYS(pVal) ){
  2455   2460       IndexSample *aSample = pIdx->sample.a;
  2456   2461       int nSample = pIdx->sample.n;
  2457   2462       int i = 0;
  2458   2463       int eType = sqlite3_value_type(pVal);
  2459   2464   
  2460   2465       assert( nSample>0 );
  2461         -    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
         2466  +    if( eType==SQLITE_INTEGER ){
         2467  +      i64 x = sqlite3_value_int64(pVal);
         2468  +      for(i=0; i<nSample; i++){
         2469  +        if( aSample[i].eType==SQLITE_NULL ) continue;
         2470  +        if( aSample[i].eType>=SQLITE_TEXT ) break;
         2471  +        if( aSample[i].u.i==x ) *pnCopy = aSample[i].nCopy;
         2472  +        if( roundUp ){
         2473  +          if( aSample[i].u.i>x ) break;
         2474  +        }else{
         2475  +          if( aSample[i].u.i>=x ) break;
         2476  +        }
         2477  +      }
         2478  +    }else if( eType==SQLITE_FLOAT ){
  2462   2479         double r = sqlite3_value_double(pVal);
  2463   2480         for(i=0; i<nSample; i++){
  2464   2481           if( aSample[i].eType==SQLITE_NULL ) continue;
  2465   2482           if( aSample[i].eType>=SQLITE_TEXT ) break;
         2483  +        if( aSample[i].u.r==r ) *pnCopy = aSample[i].nCopy;
  2466   2484           if( roundUp ){
  2467   2485             if( aSample[i].u.r>r ) break;
  2468   2486           }else{
  2469   2487             if( aSample[i].u.r>=r ) break;
  2470   2488           }
  2471   2489         }
  2472   2490       }else if( eType==SQLITE_NULL ){
  2473   2491         i = 0;
         2492  +      if( aSample[0].eType==SQLITE_NULL ) *pnCopy = aSample[0].nCopy;
  2474   2493         if( roundUp ){
  2475   2494           while( i<nSample && aSample[i].eType==SQLITE_NULL ) i++;
  2476   2495         }
  2477   2496       }else{ 
  2478   2497         sqlite3 *db = pParse->db;
  2479   2498         CollSeq *pColl;
  2480   2499         const u8 *z;
  2481   2500         int n;
  2482   2501   
  2483         -      /* pVal comes from sqlite3ValueFromExpr() so the type cannot be NULL */
  2484   2502         assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB );
  2485         -
  2486   2503         if( eType==SQLITE_BLOB ){
  2487   2504           z = (const u8 *)sqlite3_value_blob(pVal);
  2488   2505           pColl = db->pDfltColl;
  2489   2506           assert( pColl->enc==SQLITE_UTF8 );
  2490   2507         }else{
  2491   2508           pColl = sqlite3GetCollSeq(db, SQLITE_UTF8, 0, *pIdx->azColl);
  2492   2509           if( pColl==0 ){
................................................................................
  2520   2537             c = pColl->xCmp(pColl->pUser, nSample, zSample, n, z);
  2521   2538             sqlite3DbFree(db, zSample);
  2522   2539           }else
  2523   2540   #endif
  2524   2541           {
  2525   2542             c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
  2526   2543           }
         2544  +        if( c==0 ) *pnCopy = aSample[i].nCopy;
  2527   2545           if( c-roundUp>=0 ) break;
  2528   2546         }
  2529   2547       }
  2530   2548   
  2531   2549       assert( i>=0 && i<=pIdx->sample.n );
  2532   2550       *piRegion = i;
  2533   2551     }
................................................................................
  2628   2646       sqlite3_value *pUpperVal = 0;
  2629   2647       int iEst;
  2630   2648       int iLower = 0;
  2631   2649       int nSample = p->sample.n;
  2632   2650       int iUpper = p->sample.n;
  2633   2651       int roundUpUpper = 0;
  2634   2652       int roundUpLower = 0;
         2653  +    u32 nC = 0;
  2635   2654       u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  2636   2655   
  2637   2656       if( pLower ){
  2638   2657         Expr *pExpr = pLower->pExpr->pRight;
  2639   2658         rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
  2640   2659         assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
  2641   2660         roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
................................................................................
  2648   2667       }
  2649   2668   
  2650   2669       if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
  2651   2670         sqlite3ValueFree(pLowerVal);
  2652   2671         sqlite3ValueFree(pUpperVal);
  2653   2672         goto range_est_fallback;
  2654   2673       }else if( pLowerVal==0 ){
  2655         -      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
         2674  +      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper, &nC);
  2656   2675         if( pLower ) iLower = iUpper/2;
  2657   2676       }else if( pUpperVal==0 ){
  2658         -      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
         2677  +      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower, &nC);
  2659   2678         if( pUpper ) iUpper = (iLower + p->sample.n + 1)/2;
  2660   2679       }else{
  2661         -      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
         2680  +      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper, &nC);
  2662   2681         if( rc==SQLITE_OK ){
  2663         -        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
         2682  +        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower, &nC);
  2664   2683         }
  2665   2684       }
  2666   2685       WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
  2667         -
  2668   2686       iEst = iUpper - iLower;
  2669   2687       testcase( iEst==nSample );
  2670   2688       assert( iEst<=nSample );
  2671   2689       assert( nSample>0 );
  2672   2690       if( iEst<1 ){
  2673   2691         *piEst = 50/nSample;
  2674   2692       }else{
................................................................................
  2716   2734     double *pnRow        /* Write the revised row estimate here */
  2717   2735   ){
  2718   2736     sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
  2719   2737     int iLower, iUpper;       /* Range of histogram regions containing pRhs */
  2720   2738     u8 aff;                   /* Column affinity */
  2721   2739     int rc;                   /* Subfunction return code */
  2722   2740     double nRowEst;           /* New estimate of the number of rows */
         2741  +  u32 nC = 0;               /* Key copy count */
  2723   2742   
  2724   2743     assert( p->sample.a!=0 );
  2725   2744     assert( p->sample.n>0 );
  2726   2745     aff = p->pTable->aCol[p->aiColumn[0]].affinity;
  2727   2746     if( pExpr ){
  2728   2747       rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
  2729   2748       if( rc ) goto whereEqualScanEst_cancel;
  2730   2749     }else{
  2731   2750       pRhs = sqlite3ValueNew(pParse->db);
  2732   2751     }
  2733   2752     if( pRhs==0 ) return SQLITE_NOTFOUND;
  2734         -  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
         2753  +  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower, &nC);
  2735   2754     if( rc ) goto whereEqualScanEst_cancel;
  2736         -  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
  2737         -  if( rc ) goto whereEqualScanEst_cancel;
  2738         -  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
  2739         -  if( iLower>=iUpper ){
  2740         -    nRowEst = p->aiRowEst[0]/(p->sample.n*3);
  2741         -    if( nRowEst<*pnRow ) *pnRow = nRowEst;
         2755  +  if( nC==0 ){
         2756  +    rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper, &nC);
         2757  +    if( rc ) goto whereEqualScanEst_cancel;
         2758  +  }
         2759  +  if( nC ){
         2760  +    WHERETRACE(("equality scan count: %u\n", nC));
         2761  +    *pnRow = nC;
  2742   2762     }else{
  2743         -    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->sample.n;
  2744         -    *pnRow = nRowEst;
         2763  +    WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
         2764  +    if( iLower>=iUpper ){
         2765  +      nRowEst = p->aiRowEst[0]/(p->sample.n*3);
         2766  +      if( nRowEst<*pnRow ) *pnRow = nRowEst;
         2767  +    }else{
         2768  +      nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->sample.n;
         2769  +      *pnRow = nRowEst;
         2770  +    }
  2745   2771     }
  2746   2772   
  2747   2773   whereEqualScanEst_cancel:
  2748   2774     sqlite3ValueFree(pRhs);
  2749   2775     return rc;
  2750   2776   }
  2751   2777   #endif /* defined(SQLITE_ENABLE_STAT2) */
................................................................................
  2778   2804     u8 aff;                   /* Column affinity */
  2779   2805     int rc = SQLITE_OK;       /* Subfunction return code */
  2780   2806     double nRowEst;           /* New estimate of the number of rows */
  2781   2807     int nSpan = 0;            /* Number of histogram regions spanned */
  2782   2808     int nSingle = 0;          /* Histogram regions hit by a single value */
  2783   2809     int nNotFound = 0;        /* Count of values that are not constants */
  2784   2810     int i;                             /* Loop counter */
         2811  +  u32 nC;                            /* Exact count of rows for a key */
  2785   2812     int nSample = p->sample.n;         /* Number of samples */
  2786   2813     u8 aSpan[SQLITE_MAX_SAMPLES+1];    /* Histogram regions that are spanned */
  2787   2814     u8 aSingle[SQLITE_MAX_SAMPLES+1];  /* Histogram regions hit once */
  2788   2815   
  2789   2816     assert( p->sample.a!=0 );
  2790   2817     assert( nSample>0 );
  2791   2818     aff = p->pTable->aCol[p->aiColumn[0]].affinity;
................................................................................
  2795   2822       sqlite3ValueFree(pVal);
  2796   2823       rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
  2797   2824       if( rc ) break;
  2798   2825       if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
  2799   2826         nNotFound++;
  2800   2827         continue;
  2801   2828       }
  2802         -    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
         2829  +    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower, &nC);
  2803   2830       if( rc ) break;
  2804         -    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
         2831  +    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper, &nC);
  2805   2832       if( rc ) break;
  2806   2833       if( iLower>=iUpper ){
  2807   2834         aSingle[iLower] = 1;
  2808   2835       }else{
  2809   2836         assert( iLower>=0 && iUpper<=nSample );
  2810   2837         while( iLower<iUpper ) aSpan[iLower++] = 1;
  2811   2838       }