Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | analyze-worst-case |
Files: | files | file ages | folders |
SHA1: |
6326ba5891fae9c6be0c0c51cebcbe44 |
User & Date: | drh 2016-02-29 21:27:16.923 |
Context
2016-02-29
| ||
23:02 | Improvements to the logic for adding the "noskipscan" flag to stat1 entries. (check-in: 421b5b544a user: drh tags: analyze-worst-case) | |
21:27 | The ANALYZE command automatically appends "noskipscan" to sqlite_stat1 entries that have large worst-case repeat estimates but small average repeat estimates. (check-in: 6326ba5891 user: drh tags: analyze-worst-case) | |
18:30 | Modify the ANALYZE command to store worst-case statistics in sqlite_stat1, rather thn average case. (check-in: 5a0143c94e user: drh tags: analyze-worst-case) | |
Changes
Changes to src/analyze.c.
︙ | ︙ | |||
416 417 418 419 420 421 422 | assert( nKeyCol<=nCol ); assert( nKeyCol>0 ); /* Allocate the space required for the Stat4Accum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* Stat4Accum.anEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.amxEq */ | < > > < | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 | assert( nKeyCol<=nCol ); assert( nKeyCol>0 ); /* Allocate the space required for the Stat4Accum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* Stat4Accum.anEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.amxEq */ + sizeof(tRowcnt)*nColUp /* Stat4Accum.anDLt */ #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + sizeof(tRowcnt)*nColUp /* Stat4Accum.anLt */ + sizeof(Stat4Sample)*(nCol+mxSample) /* Stat4Accum.aBest[], a[] */ + sizeof(tRowcnt)*3*nColUp*(nCol+mxSample) #endif ; db = sqlite3_context_db_handle(context); p = sqlite3DbMallocZero(db, n); if( p==0 ){ sqlite3_result_error_nomem(context); return; } p->db = db; p->nRow = 0; p->nCol = nCol; p->nKeyCol = nKeyCol; p->current.anEq = (tRowcnt*)&p[1]; p->current.amxEq = &p->current.anEq[nColUp]; p->current.anDLt = &p->current.amxEq[nColUp]; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 { u8 *pSpace; /* Allocated space not yet assigned */ int i; /* Loop counter */ p->iGet = -1; p->mxSample = mxSample; p->nPSample = (tRowcnt)(sqlite3_value_int64(argv[2])/(mxSample/3+1) + 1); p->current.anLt = &p->current.anDLt[nColUp]; p->iPrn = 0x689e962d*(u32)nCol ^ 0xd0944565*(u32)sqlite3_value_int(argv[2]); /* Set up the Stat4Accum.a[] and aBest[] arrays */ p->a = (struct Stat4Sample*)&p->current.anLt[nColUp]; p->aBest = &p->a[mxSample]; pSpace = (u8*)(&p->a[mxSample+nCol]); |
︙ | ︙ | |||
735 736 737 738 739 740 741 | /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply ** to the current row of the index. */ for(i=0; i<iChng; i++){ if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++; p->current.anEq[i]++; } for(i=iChng; i<p->nCol; i++){ | < > | 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 | /* Update anDLt[], anLt[] and anEq[] to reflect the values that apply ** to the current row of the index. */ for(i=0; i<iChng; i++){ if( p->current.amxEq[i]==p->current.anEq[i] ) p->current.amxEq[i]++; p->current.anEq[i]++; } for(i=iChng; i<p->nCol; i++){ p->current.anDLt[i]++; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 p->current.anLt[i] += p->current.anEq[i]; #endif p->current.anEq[i] = 1; } } p->nRow++; #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 |
︙ | ︙ | |||
841 842 843 844 845 846 847 | ** "100 10 2", then SQLite estimates that: ** ** * the index contains 100 rows, ** * "WHERE a=?" matches 10 rows, and ** * "WHERE a=? AND b=?" matches 2 rows. ** ** Use the worst-case estimate: the maximum number of repeated entries | | > > > > > > | > > > > | < | > | 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 | ** "100 10 2", then SQLite estimates that: ** ** * the index contains 100 rows, ** * "WHERE a=?" matches 10 rows, and ** * "WHERE a=? AND b=?" matches 2 rows. ** ** Use the worst-case estimate: the maximum number of repeated entries ** in the index. The worst-case estimate is best for picking indexes. ** But for skip-scan, we want an average case estimate. The worst-case ** estimate might be too high. To avoid undesirable skip-scans, if the ** worst-case estimate is above the WHERE_SKIPSCAN_ONSET but the average ** estimate is below, simply disable skipscans on this index by adding ** the "noskipscan" modifier onto the end of the stat line. */ char *z; int i; int noSkipScan = 0; char *zRet = sqlite3MallocZero( (p->nKeyCol+2)*25 ); if( zRet==0 ){ sqlite3_result_error_nomem(context); return; } sqlite3_snprintf(24, zRet, "%llu", (u64)p->nRow); z = zRet + sqlite3Strlen30(zRet); for(i=0; i<p->nKeyCol; i++){ u64 iVal = p->current.amxEq[i]; sqlite3_snprintf(24, z, " %llu", iVal); z += sqlite3Strlen30(z); assert( p->current.anEq[i] ); if( i>0 && iVal>=WHERE_SKIPSCAN_ONSET+5 ){ iVal = p->current.anDLt[i]; iVal = (p->nRow + iVal)/(iVal + 1); if( iVal<WHERE_SKIPSCAN_ONSET-5 ) noSkipScan = 1; } } if( noSkipScan ) sqlite3_snprintf(14, z, " noskipscan"); sqlite3_result_text(context, zRet, -1, sqlite3_free); } #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 else if( eCall==STAT_GET_ROWID ){ if( p->iGet<0 ){ samplePushPrevious(p, 0); p->iGet = 0; |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 | #define JT_CROSS 0x0002 /* Explicit use of the CROSS keyword */ #define JT_NATURAL 0x0004 /* True for a "natural" join */ #define JT_LEFT 0x0008 /* Left outer join */ #define JT_RIGHT 0x0010 /* Right outer join */ #define JT_OUTER 0x0020 /* The "OUTER" keyword is present */ #define JT_ERROR 0x0040 /* unknown or unsupported join type */ /* ** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin() ** and the WhereInfo.wctrlFlags member. */ #define WHERE_ORDERBY_NORMAL 0x0000 /* No-op */ #define WHERE_ORDERBY_MIN 0x0001 /* ORDER BY processing for min() func */ | > > > > > > > > > | 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 | #define JT_CROSS 0x0002 /* Explicit use of the CROSS keyword */ #define JT_NATURAL 0x0004 /* True for a "natural" join */ #define JT_LEFT 0x0008 /* Left outer join */ #define JT_RIGHT 0x0010 /* Right outer join */ #define JT_OUTER 0x0020 /* The "OUTER" keyword is present */ #define JT_ERROR 0x0040 /* unknown or unsupported join type */ /* ** TUNING: The skip-scan optimization is only profitable if the average ** number of repeats of an entry in the index is greater than or equal to ** WHERE_SKIPSCAN_ONSET. If the average numbe of repeats is less ** than WHERE_SKIPSCAN_ONSET, then it is faster to do a full table ** scan. */ #define WHERE_SKIPSCAN_ONSET 18 #define WHERE_SKIPSCAN_ONSET_LOG 42 /* ** Flags appropriate for the wctrlFlags parameter of sqlite3WhereBegin() ** and the WhereInfo.wctrlFlags member. */ #define WHERE_ORDERBY_NORMAL 0x0000 /* No-op */ #define WHERE_ORDERBY_MIN 0x0001 /* ORDER BY processing for min() func */ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
2407 2408 2409 2410 2411 2412 2413 | ** ** The magic number 18 is selected on the basis that scanning 17 rows ** is almost always quicker than an index seek (even though if the index ** contains fewer than 2^17 rows we assume otherwise in other parts of ** the code). And, even if it is not, it should not be too much slower. ** On the other hand, the extra seeks could end up being significantly ** more expensive. */ | | | | 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 | ** ** The magic number 18 is selected on the basis that scanning 17 rows ** is almost always quicker than an index seek (even though if the index ** contains fewer than 2^17 rows we assume otherwise in other parts of ** the code). And, even if it is not, it should not be too much slower. ** On the other hand, the extra seeks could end up being significantly ** more expensive. */ assert( WHERE_SKIPSCAN_ONSET_LOG==sqlite3LogEst(WHERE_SKIPSCAN_ONSET) ); if( saved_nEq==saved_nSkip && saved_nEq+1<pProbe->nKeyCol && pProbe->noSkipScan==0 && pProbe->aiRowLogEst[saved_nEq+1]>=WHERE_SKIPSCAN_ONSET_LOG && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK ){ LogEst nIter; pNew->u.btree.nEq++; pNew->nSkip++; pNew->aLTerm[pNew->nLTerm++] = 0; pNew->wsFlags |= WHERE_SKIPSCAN; |
︙ | ︙ |