Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Allow the sqlite_stat2 table to contain a number of samples other than the default of 10. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | query-planner-tweaks |
Files: | files | file ages | folders |
SHA1: |
b9d41c3490bf325915227a32b612e677 |
User & Date: | drh 2011-08-05 01:09:49.877 |
Context
2011-08-06
| ||
02:03 | Merge together the fork in the query-planner-tweaks branch. (check-in: 2daab6bd42 user: drh tags: query-planner-tweaks) | |
2011-08-05
| ||
01:09 | Allow the sqlite_stat2 table to contain a number of samples other than the default of 10. (check-in: b9d41c3490 user: drh tags: query-planner-tweaks) | |
2011-08-03
| ||
01:07 | Adjust the output row estimating logic when using STAT2. (check-in: c8ba8855d8 user: drh tags: query-planner-tweaks) | |
Changes
Changes to src/analyze.c.
︙ | ︙ | |||
560 561 562 563 564 565 566 | ** If the Index.aSample variable is not NULL, delete the aSample[] array ** and its contents. */ void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){ #ifdef SQLITE_ENABLE_STAT2 if( pIdx->aSample ){ int j; | | | 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 | ** If the Index.aSample variable is not NULL, delete the aSample[] array ** and its contents. */ void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){ #ifdef SQLITE_ENABLE_STAT2 if( pIdx->aSample ){ int j; for(j=0; j<pIdx->nSample; j++){ IndexSample *p = &pIdx->aSample[j]; if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){ sqlite3DbFree(db, p->u.z); } } sqlite3DbFree(db, pIdx->aSample); } |
︙ | ︙ | |||
610 611 612 613 614 615 616 617 618 619 620 621 622 623 | /* Clear any prior statistics */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){ Index *pIdx = sqliteHashData(i); sqlite3DefaultRowEst(pIdx); sqlite3DeleteIndexSamples(db, pIdx); pIdx->aSample = 0; } /* Check to make sure the sqlite_stat1 table exists */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zName; if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){ return SQLITE_ERROR; | > | 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 | /* Clear any prior statistics */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){ Index *pIdx = sqliteHashData(i); sqlite3DefaultRowEst(pIdx); sqlite3DeleteIndexSamples(db, pIdx); pIdx->aSample = 0; pIdx->nSample = 0; } /* Check to make sure the sqlite_stat1 table exists */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zName; if( sqlite3FindTable(db, "sqlite_stat1", sInfo.zDatabase)==0 ){ return SQLITE_ERROR; |
︙ | ︙ | |||
639 640 641 642 643 644 645 | if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){ rc = SQLITE_ERROR; } if( rc==SQLITE_OK ){ sqlite3_stmt *pStmt = 0; zSql = sqlite3MPrintf(db, | | > > > > > | | | | < | | | | | | | | | > | | < < | | | | | | | | | | | | < < | | | | | | | | < < < | 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 | if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){ rc = SQLITE_ERROR; } if( rc==SQLITE_OK ){ sqlite3_stmt *pStmt = 0; zSql = sqlite3MPrintf(db, "SELECT idx, sampleno, sample FROM %Q.sqlite_stat2" " ORDER BY rowid DESC", sInfo.zDatabase); if( !zSql ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); sqlite3DbFree(db, zSql); } if( rc==SQLITE_OK ){ while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ int iSample; int eType; IndexSample *pSample; zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; pIdx = sqlite3FindIndex(db, zIndex, sInfo.zDatabase); if( pIdx==0 ) continue; iSample = sqlite3_column_int(pStmt, 1); if( iSample>=SQLITE_MAX_SAMPLES || iSample<0 ) continue; if( pIdx->nSample<=iSample ){ IndexSample *pNew; int sz = sizeof(IndexSample)*(iSample+1); pNew = (IndexSample*)sqlite3Realloc(pIdx->aSample, sz); if( pNew==0 ){ db->mallocFailed = 1; break; } pIdx->aSample = pNew; pIdx->nSample = iSample+1; } eType = sqlite3_column_type(pStmt, 2); pSample = &pIdx->aSample[iSample]; pSample->eType = (u8)eType; if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ pSample->u.r = sqlite3_column_double(pStmt, 2); }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){ const char *z = (const char *)( (eType==SQLITE_BLOB) ? sqlite3_column_blob(pStmt, 2): sqlite3_column_text(pStmt, 2) ); int n = sqlite3_column_bytes(pStmt, 2); if( n>24 ) n = 24; pSample->nByte = (u8)n; if( n < 1){ pSample->u.z = 0; }else{ pSample->u.z = sqlite3DbStrNDup(0, z, n); if( pSample->u.z==0 ){ db->mallocFailed = 1; break; } } } } rc = sqlite3_finalize(pStmt); } } |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
80 81 82 83 84 85 86 87 88 89 90 91 92 93 | ** The number of samples of an index that SQLite takes in order to ** construct a histogram of the table content when running ANALYZE ** and with SQLITE_ENABLE_STAT2 */ #ifndef SQLITE_INDEX_SAMPLES # define SQLITE_INDEX_SAMPLES 10 #endif /* ** The following macros are used to cast pointers to integers and ** integers to pointers. The way you do this varies from one compiler ** to the next, so we have developed the following set of #if statements ** to generate appropriate macros for a wide range of compilers. ** | > | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | ** The number of samples of an index that SQLite takes in order to ** construct a histogram of the table content when running ANALYZE ** and with SQLITE_ENABLE_STAT2 */ #ifndef SQLITE_INDEX_SAMPLES # define SQLITE_INDEX_SAMPLES 10 #endif #define SQLITE_MAX_SAMPLES 100 /* ** The following macros are used to cast pointers to integers and ** integers to pointers. The way you do this varies from one compiler ** to the next, so we have developed the following set of #if statements ** to generate appropriate macros for a wide range of compilers. ** |
︙ | ︙ | |||
1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 | int *aiColumn; /* Which columns are used by this index. 1st is 0 */ unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */ Table *pTable; /* The SQL table being indexed */ int tnum; /* Page containing root of this index in database file */ u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ u8 autoIndex; /* True if is automatically created (ex: by UNIQUE) */ u8 bUnordered; /* Use this index for == or IN queries only */ char *zColAff; /* String defining the affinity of each column */ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* Array of size Index.nColumn. True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ IndexSample *aSample; /* Array of SQLITE_INDEX_SAMPLES samples */ }; | > | 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 | int *aiColumn; /* Which columns are used by this index. 1st is 0 */ unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */ Table *pTable; /* The SQL table being indexed */ int tnum; /* Page containing root of this index in database file */ u8 onError; /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */ u8 autoIndex; /* True if is automatically created (ex: by UNIQUE) */ u8 bUnordered; /* Use this index for == or IN queries only */ u8 nSample; /* Number of slots in aSample[] */ char *zColAff; /* String defining the affinity of each column */ Index *pNext; /* The next index associated with the same table */ Schema *pSchema; /* Schema containing this index */ u8 *aSortOrder; /* Array of size Index.nColumn. True==DESC, False==ASC */ char **azColl; /* Array of collation sequence names for index */ IndexSample *aSample; /* Array of SQLITE_INDEX_SAMPLES samples */ }; |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
2418 2419 2420 2421 2422 2423 2424 | */ bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); } #endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Argument pIdx is a pointer to an index structure that has an array of | | | | | > > | | | 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 | */ bestOrClauseIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost); } #endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Argument pIdx is a pointer to an index structure that has an array of ** pIdx->nSample evenly spaced samples of the first indexed column ** stored in Index.aSample. These samples divide the domain of values stored ** the index into (pIdx->nSample+1) regions. ** Region 0 contains all values less than the first sample value. Region ** 1 contains values between the first and second samples. Region 2 contains ** values between samples 2 and 3. And so on. Region pIdx->nSample ** contains values larger than the last sample. ** ** If the index contains many duplicates of a single value, then it is ** possible that two or more adjacent samples can hold the same value. ** When that is the case, the smallest possible region code is returned ** when roundUp is false and the largest possible region code is returned ** when roundUp is true. ** ** If successful, this function determines which of the regions value ** pVal lies in, sets *piRegion to the region index (a value between 0 ** and S+1, inclusive) and returns SQLITE_OK. ** Or, if an OOM occurs while converting text values between encodings, ** SQLITE_NOMEM is returned and *piRegion is undefined. */ #ifdef SQLITE_ENABLE_STAT2 static int whereRangeRegion( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ sqlite3_value *pVal, /* Value to consider */ int roundUp, /* Return largest valid region if true */ int *piRegion /* OUT: Region of domain in which value lies */ ){ assert( roundUp==0 || roundUp==1 ); if( ALWAYS(pVal) ){ IndexSample *aSample = pIdx->aSample; int nSample = pIdx->nSample; int i = 0; int eType = sqlite3_value_type(pVal); assert( nSample>0 ); if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ double r = sqlite3_value_double(pVal); for(i=0; i<nSample; i++){ if( aSample[i].eType==SQLITE_NULL ) continue; if( aSample[i].eType>=SQLITE_TEXT ) break; if( roundUp ){ if( aSample[i].u.r>r ) break; }else{ if( aSample[i].u.r>=r ) break; } } }else if( eType==SQLITE_NULL ){ i = 0; if( roundUp ){ while( i<nSample && aSample[i].eType==SQLITE_NULL ) i++; } }else{ sqlite3 *db = pParse->db; CollSeq *pColl; const u8 *z; int n; |
︙ | ︙ | |||
2496 2497 2498 2499 2500 2501 2502 | if( !z ){ return SQLITE_NOMEM; } assert( z && pColl && pColl->xCmp ); } n = sqlite3ValueBytes(pVal, pColl->enc); | | | 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 | if( !z ){ return SQLITE_NOMEM; } assert( z && pColl && pColl->xCmp ); } n = sqlite3ValueBytes(pVal, pColl->enc); for(i=0; i<nSample; i++){ int c; int eSampletype = aSample[i].eType; if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue; if( (eSampletype!=eType) ) break; #ifndef SQLITE_OMIT_UTF16 if( pColl->enc!=SQLITE_UTF8 ){ int nSample; |
︙ | ︙ | |||
2522 2523 2524 2525 2526 2527 2528 | { c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); } if( c-roundUp>=0 ) break; } } | | | 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 | { c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); } if( c-roundUp>=0 ) break; } } assert( i>=0 && i<=pIdx->nSample ); *piRegion = i; } return SQLITE_OK; } #endif /* #ifdef SQLITE_ENABLE_STAT2 */ /* |
︙ | ︙ | |||
2622 2623 2624 2625 2626 2627 2628 | #ifdef SQLITE_ENABLE_STAT2 if( nEq==0 && p->aSample ){ sqlite3_value *pLowerVal = 0; sqlite3_value *pUpperVal = 0; int iEst; int iLower = 0; | > | | 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 | #ifdef SQLITE_ENABLE_STAT2 if( nEq==0 && p->aSample ){ sqlite3_value *pLowerVal = 0; sqlite3_value *pUpperVal = 0; int iEst; int iLower = 0; int nSample = p->nSample; int iUpper = p->nSample; int roundUpUpper = 0; int roundUpLower = 0; u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pLower ){ Expr *pExpr = pLower->pExpr->pRight; rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal); |
︙ | ︙ | |||
2649 2650 2651 2652 2653 2654 2655 | sqlite3ValueFree(pUpperVal); goto range_est_fallback; }else if( pLowerVal==0 ){ rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( pLower ) iLower = iUpper/2; }else if( pUpperVal==0 ){ rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); | | | | > | | | 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 | sqlite3ValueFree(pUpperVal); goto range_est_fallback; }else if( pLowerVal==0 ){ rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( pLower ) iLower = iUpper/2; }else if( pUpperVal==0 ){ rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); if( pUpper ) iUpper = (iLower + p->nSample + 1)/2; }else{ rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper); if( rc==SQLITE_OK ){ rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower); } } WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper)); iEst = iUpper - iLower; testcase( iEst==nSample ); assert( iEst<=nSample ); assert( nSample>0 ); if( iEst<1 ){ *piEst = 50/nSample; }else{ *piEst = (iEst*100)/nSample; } sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); return rc; } range_est_fallback: #else |
︙ | ︙ | |||
2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 | sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pExpr ){ rc = valueFromExpr(pParse, pExpr, aff, &pRhs); if( rc ) goto whereEqualScanEst_cancel; }else{ pRhs = sqlite3ValueNew(pParse->db); } if( pRhs==0 ) return SQLITE_NOTFOUND; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); if( rc ) goto whereEqualScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); if( rc ) goto whereEqualScanEst_cancel; WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ | > | | | 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 | sqlite3_value *pRhs = 0; /* VALUE on right-hand side of pTerm */ int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ assert( p->aSample!=0 ); assert( p->nSample>0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; if( pExpr ){ rc = valueFromExpr(pParse, pExpr, aff, &pRhs); if( rc ) goto whereEqualScanEst_cancel; }else{ pRhs = sqlite3ValueNew(pParse->db); } if( pRhs==0 ) return SQLITE_NOTFOUND; rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower); if( rc ) goto whereEqualScanEst_cancel; rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper); if( rc ) goto whereEqualScanEst_cancel; WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper)); if( iLower>=iUpper ){ nRowEst = p->aiRowEst[0]/(p->nSample*3); if( nRowEst<*pnRow ) *pnRow = nRowEst; }else{ nRowEst = (iUpper-iLower)*p->aiRowEst[0]/p->nSample; *pnRow = nRowEst; } whereEqualScanEst_cancel: sqlite3ValueFree(pRhs); return rc; } |
︙ | ︙ | |||
2772 2773 2774 2775 2776 2777 2778 | int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ int nSpan = 0; /* Number of histogram regions spanned */ int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ | | > | | > | | | | | | 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 | int iLower, iUpper; /* Range of histogram regions containing pRhs */ u8 aff; /* Column affinity */ int rc = SQLITE_OK; /* Subfunction return code */ double nRowEst; /* New estimate of the number of rows */ int nSpan = 0; /* Number of histogram regions spanned */ int nSingle = 0; /* Histogram regions hit by a single value */ int nNotFound = 0; /* Count of values that are not constants */ int i; /* Loop counter */ int nSample = p->nSample; /* Number of samples */ u8 aSpan[SQLITE_MAX_SAMPLES+1]; /* Histogram regions that are spanned */ u8 aSingle[SQLITE_MAX_SAMPLES+1]; /* Histogram regions hit once */ assert( p->aSample!=0 ); assert( nSample>0 ); aff = p->pTable->aCol[p->aiColumn[0]].affinity; memset(aSpan, 0, nSample+1); memset(aSingle, 0, nSample+1); for(i=0; i<pList->nExpr; i++){ sqlite3ValueFree(pVal); rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal); if( rc ) break; if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){ nNotFound++; continue; } rc = whereRangeRegion(pParse, p, pVal, 0, &iLower); if( rc ) break; rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper); if( rc ) break; if( iLower>=iUpper ){ aSingle[iLower] = 1; }else{ assert( iLower>=0 && iUpper<=nSample ); while( iLower<iUpper ) aSpan[iLower++] = 1; } } if( rc==SQLITE_OK ){ for(i=nSpan=0; i<=nSample; i++){ if( aSpan[i] ){ nSpan++; }else if( aSingle[i] ){ nSingle++; } } nRowEst = (nSpan*3+nSingle)*p->aiRowEst[0]/(3*nSample) + nNotFound*p->aiRowEst[1]; if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0]; *pnRow = nRowEst; WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n", nSpan, nSingle, nNotFound, nRowEst)); } sqlite3ValueFree(pVal); |
︙ | ︙ |