Index: src/analyze.c ================================================================== --- src/analyze.c +++ src/analyze.c @@ -264,10 +264,11 @@ typedef struct StatAccum StatAccum; typedef struct StatSample StatSample; struct StatSample { tRowcnt *anEq; /* sqlite_stat4.nEq */ tRowcnt *anDLt; /* sqlite_stat4.nDLt */ + tRowcnt *amxEq; /* Maximum length run of equal values */ #ifdef SQLITE_ENABLE_STAT4 tRowcnt *anLt; /* sqlite_stat4.nLt */ union { i64 iRowid; /* Rowid in main table of the key */ u8 *aRowid; /* Key for WITHOUT ROWID tables */ @@ -423,10 +424,11 @@ assert( nKeyCol>0 ); /* Allocate the space required for the StatAccum object */ n = sizeof(*p) + sizeof(tRowcnt)*nColUp /* StatAccum.anEq */ + + sizeof(tRowcnt)*nColUp /* StatAccum.amxEq */ + sizeof(tRowcnt)*nColUp; /* StatAccum.anDLt */ #ifdef SQLITE_ENABLE_STAT4 if( mxSample ){ n += sizeof(tRowcnt)*nColUp /* StatAccum.anLt */ + sizeof(StatSample)*(nCol+mxSample) /* StatAccum.aBest[], a[] */ @@ -445,11 +447,12 @@ p->nLimit = sqlite3_value_int64(argv[3]); p->nCol = nCol; p->nKeyCol = nKeyCol; p->nSkipAhead = 0; p->current.anDLt = (tRowcnt*)&p[1]; - p->current.anEq = &p->current.anDLt[nColUp]; + p->current.amxEq = &p->current.anDLt[nColUp]; + p->current.anEq = &p->current.amxEq[nColUp]; #ifdef SQLITE_ENABLE_STAT4 p->mxSample = p->nLimit==0 ? mxSample : 0; if( mxSample ){ u8 *pSpace; /* Allocated space not yet assigned */ @@ -714,11 +717,14 @@ assert( p->nCol>0 ); assert( iChngnCol ); if( p->nRow==0 ){ /* This is the first call to this function. Do initialization. */ - for(i=0; inCol; i++) p->current.anEq[i] = 1; + for(i=0; inCol; i++){ + p->current.anEq[i] = 1; + p->current.amxEq[i] = 1; + } }else{ /* Second and subsequent calls get processed here */ #ifdef SQLITE_ENABLE_STAT4 if( p->mxSample ) samplePushPrevious(p, iChng); #endif @@ -731,10 +737,13 @@ for(i=iChng; inCol; i++){ p->current.anDLt[i]++; #ifdef SQLITE_ENABLE_STAT4 if( p->mxSample ) p->current.anLt[i] += p->current.anEq[i]; #endif + if( p->current.amxEq[i]current.anEq[i] ){ + p->current.amxEq[i] = p->current.anEq[i]; + } p->current.anEq[i] = 1; } } p->nRow++; @@ -839,37 +848,70 @@ ** for each indexed column. This additional integer is an estimate of ** the number of rows matched by a equality query on the index using ** a key with the corresponding number of fields. In other words, ** if the index is on columns (a,b) and the sqlite_stat1 value is ** "100 10 2", then SQLite estimates that: - ** - ** * the index contains 100 rows, - ** * "WHERE a=?" matches 10 rows, and - ** * "WHERE a=? AND b=?" matches 2 rows. + ** | | | + ** | | `-- "WHERE a=? AND b=?" matches approximately 2 rows + ** | `---- "WHERE a=?" matches approximately 10 rows + ** `-------- There are approximately 100 rows in the index total ** ** If D is the count of distinct values and K is the total number of ** rows, then each estimate is usually computed as: ** ** I = (K+D-1)/D ** - ** In other words, I is K/D rounded up to the next whole integer. - ** However, if I is between 1.0 and 1.1 (in other words if I is - ** close to 1.0 but just a little larger) then do not round up but - ** instead keep the I value at 1.0. + ** Adjustments to the I value are made in some cases. See comments + ** in-line below. */ sqlite3_str sStat; /* Text of the constructed "stat" line */ int i; /* Loop counter */ + int bUneven = 0; /* True if the uneven=... argument is needed */ + u64 nRow; /* Number of rows in the index */ sqlite3StrAccumInit(&sStat, 0, 0, 0, (p->nKeyCol+1)*100); - sqlite3_str_appendf(&sStat, "%llu", - p->nSkipAhead ? (u64)p->nEst : (u64)p->nRow); + nRow = p->nSkipAhead ? p->nEst : p->nRow; + sqlite3_str_appendf(&sStat, "%llu", nRow); for(i=0; inKeyCol; i++){ u64 nDistinct = p->current.anDLt[i] + 1; u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; - if( iVal==2 && p->nRow*10 <= nDistinct*11 ) iVal = 1; + u64 mx = p->current.amxEq[i]; + if( nDistinct==1 && p->nLimit>0 ){ + /* If we never saw more than a single value in a PRAGMA analysis_limit + ** search, then set the estimated number of matching rows to the + ** estimated number of rows in the index. */ + iVal = p->nEst; + }else if( iValnRow*10 <= nDistinct*11 ){ + /* If the value is less than or equal to 1.1, round it down to 1.0 */ + iVal = 1; + } sqlite3_str_appendf(&sStat, " %llu", iVal); assert( p->current.anEq[i] ); + + } + if( bUneven ){ + char cSep = '='; + sqlite3_str_appendf(&sStat, " var"); + for(i=0; inKeyCol; i++){ + u64 nDistinct = p->current.anDLt[i] + 1; + u64 iVal = (p->nRow + nDistinct - 1) / nDistinct; + u64 mx = p->current.amxEq[i]; + int iRatio = mx/iVal; + sqlite3_str_appendf(&sStat, "%c%d", cSep, iRatio); + cSep = ','; + } } sqlite3ResultStrAccum(context, &sStat); } #ifdef SQLITE_ENABLE_STAT4 else if( eCall==STAT_GET_ROWID ){ @@ -1512,11 +1554,11 @@ #ifdef SQLITE_ENABLE_STAT4 if( z==0 ) z = ""; #else assert( z!=0 ); #endif - for(i=0; *z && i='0' && c<='9' ){ v = v*10 + c - '0'; z++; } @@ -1536,28 +1578,74 @@ #else if( pIndex ){ #endif pIndex->bUnordered = 0; pIndex->noSkipScan = 0; + pIndex->bSlow = 0; + assert( aLog!=0 ); while( z[0] ){ if( sqlite3_strglob("unordered*", z)==0 ){ pIndex->bUnordered = 1; }else if( sqlite3_strglob("sz=[0-9]*", z)==0 ){ int sz = sqlite3Atoi(z+3); if( sz<2 ) sz = 2; pIndex->szIdxRow = sqlite3LogEst(sz); }else if( sqlite3_strglob("noskipscan*", z)==0 ){ pIndex->noSkipScan = 1; + }else if( sqlite3_strglob("var=[0-9]*", z)==0 ){ + /* An argument like "var=N1,N2,...NN" means that the maximum length + ** run of the same value is Nx times longer than the average for + ** the X-th column of the index. + ** + ** For this implementation, go through the iaRowLogEst[] array and + ** increase each value by 1/10th of the average value, to account + ** for the variability of the estimate. + ** + ** tag-20231231-02: The 1/10th value is tunable. See the tuning + ** comment in the body of the loop. The ANALYZE command only + ** inserts a var=... argument if one or more of the Nx values is + ** within the tuning range, so if changing the tuning factor here, + ** consider also changing it at tag-20232131-01. + ** + ** The stat column continue to hold the average run length for the + ** initial integers, for backwards compatibility. + */ + int jj = 1; + int kk = 4; + LogEst mx = aLog[0]; + for(jj=1; sqlite3Isdigit(z[kk]) && jj33 ){ + /* ^^----- TUNING --------------vv See tag 20231231-02 */ + LogEst adjusted = aLog[jj] + scale - 33; + if( adjusted>mx ) adjusted = mx; + aLog[jj] = adjusted; + } + if( z[kk]==',' ) kk++; + } } #ifdef SQLITE_ENABLE_COSTMULT else if( sqlite3_strglob("costmult=[0-9]*",z)==0 ){ pIndex->pTable->costMult = sqlite3LogEst(sqlite3Atoi(z+9)); } #endif while( z[0]!=0 && z[0]!=' ' ) z++; while( z[0]==' ' ) z++; } + + /* Set the bSlow flag if the peak number of rows obtained + ** from a full equality match is so large that a full table scan + ** seems likely to be faster than using the index. + */ + if( aLog[0] > 66 /* Index has more than 100 rows */ + && aLog[0] <= aLog[pIndex->nKeyCol] /* And only a single value seen */ + ){ + pIndex->bSlow = 1; + } } } /* ** This callback is invoked once for each index when reading the @@ -1615,10 +1703,11 @@ pTable->nRowLogEst = pIndex->aiRowLogEst[0]; pTable->tabFlags |= TF_HasStat1; } }else{ Index fakeIdx; + memset(&fakeIdx, 0, sizeof(fakeIdx)); fakeIdx.szIdxRow = pTable->szTabRow; #ifdef SQLITE_ENABLE_COSTMULT fakeIdx.pTable = pTable; #endif decodeIntArray((char*)z, 1, 0, &pTable->nRowLogEst, &fakeIdx); Index: src/pragma.c ================================================================== --- src/pragma.c +++ src/pragma.c @@ -1296,30 +1296,54 @@ } } break; #ifdef SQLITE_DEBUG + /* The output of this pragma is undocumented in the official documentation + ** because it is subject to change, and we don't want people coming to + ** rely on it. + ** + ** Columns: + ** tbl Name of a table that is being described + ** idx Name of an index (belonging to tbl) being described + ** wdth LogEst of the on-disk estimated bytes per row + ** hght LogEst of the estimated number of rows + ** flgs tabFlags for tables + ** est aiRowLogEst[] values for indexes + "slow" flag + */ case PragTyp_STATS: { Index *pIdx; HashElem *i; - pParse->nMem = 5; + sqlite3_str est; + sqlite3StrAccumInit(&est, 0, 0, 0, SQLITE_MAX_LENGTH); + pParse->nMem = 6; sqlite3CodeVerifySchema(pParse, iDb); for(i=sqliteHashFirst(&pDb->pSchema->tblHash); i; i=sqliteHashNext(i)){ Table *pTab = sqliteHashData(i); - sqlite3VdbeMultiLoad(v, 1, "ssiii", + sqlite3VdbeMultiLoad(v, 1, "ssiiis", sqlite3PreferredTableName(pTab->zName), 0, pTab->szTabRow, pTab->nRowLogEst, - pTab->tabFlags); + pTab->tabFlags, + 0); for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - sqlite3VdbeMultiLoad(v, 2, "siiiX", + int j; + est.nChar = 0; + for(j=1; jnKeyCol+1; j++){ + if( j>1 ) sqlite3_str_append(&est, " ", 1); + sqlite3_str_appendf(&est, "%d", pIdx->aiRowLogEst[j]); + } + if( pIdx->bSlow ) sqlite3_str_append(&est, " slow", 5); + sqlite3VdbeMultiLoad(v, 2, "siiisX", pIdx->zName, pIdx->szIdxRow, pIdx->aiRowLogEst[0], - pIdx->hasStat1); - sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 5); + pIdx->hasStat1, + sqlite3_str_value(&est)); + sqlite3VdbeAddOp2(v, OP_ResultRow, 1, 6); + sqlite3_str_reset(&est); } } } break; #endif Index: src/pragma.h ================================================================== --- src/pragma.h +++ src/pragma.h @@ -87,49 +87,50 @@ /* 16 */ "name", /* 17 */ "type", /* 18 */ "ncol", /* 19 */ "wr", /* 20 */ "strict", - /* 21 */ "seqno", /* Used by: index_xinfo */ - /* 22 */ "cid", - /* 23 */ "name", - /* 24 */ "desc", - /* 25 */ "coll", - /* 26 */ "key", - /* 27 */ "name", /* Used by: function_list */ - /* 28 */ "builtin", - /* 29 */ "type", - /* 30 */ "enc", - /* 31 */ "narg", - /* 32 */ "flags", - /* 33 */ "tbl", /* Used by: stats */ - /* 34 */ "idx", - /* 35 */ "wdth", - /* 36 */ "hght", - /* 37 */ "flgs", - /* 38 */ "seq", /* Used by: index_list */ - /* 39 */ "name", - /* 40 */ "unique", - /* 41 */ "origin", - /* 42 */ "partial", - /* 43 */ "table", /* Used by: foreign_key_check */ - /* 44 */ "rowid", - /* 45 */ "parent", - /* 46 */ "fkid", - /* index_info reuses 21 */ - /* 47 */ "seq", /* Used by: database_list */ - /* 48 */ "name", - /* 49 */ "file", - /* 50 */ "busy", /* Used by: wal_checkpoint */ - /* 51 */ "log", - /* 52 */ "checkpointed", - /* collation_list reuses 38 */ - /* 53 */ "database", /* Used by: lock_status */ - /* 54 */ "status", - /* 55 */ "cache_size", /* Used by: default_cache_size */ + /* 21 */ "tbl", /* Used by: stats */ + /* 22 */ "idx", + /* 23 */ "wdth", + /* 24 */ "hght", + /* 25 */ "flgs", + /* 26 */ "est", + /* 27 */ "seqno", /* Used by: index_xinfo */ + /* 28 */ "cid", + /* 29 */ "name", + /* 30 */ "desc", + /* 31 */ "coll", + /* 32 */ "key", + /* 33 */ "name", /* Used by: function_list */ + /* 34 */ "builtin", + /* 35 */ "type", + /* 36 */ "enc", + /* 37 */ "narg", + /* 38 */ "flags", + /* 39 */ "seq", /* Used by: index_list */ + /* 40 */ "name", + /* 41 */ "unique", + /* 42 */ "origin", + /* 43 */ "partial", + /* 44 */ "table", /* Used by: foreign_key_check */ + /* 45 */ "rowid", + /* 46 */ "parent", + /* 47 */ "fkid", + /* index_info reuses 27 */ + /* 48 */ "seq", /* Used by: database_list */ + /* 49 */ "name", + /* 50 */ "file", + /* 51 */ "busy", /* Used by: wal_checkpoint */ + /* 52 */ "log", + /* 53 */ "checkpointed", + /* collation_list reuses 39 */ + /* 54 */ "database", /* Used by: lock_status */ + /* 55 */ "status", + /* 56 */ "cache_size", /* Used by: default_cache_size */ /* module_list pragma_list reuses 9 */ - /* 56 */ "timeout", /* Used by: busy_timeout */ + /* 57 */ "timeout", /* Used by: busy_timeout */ }; /* Definitions of all built-in pragmas */ typedef struct PragmaName { const char *const zName; /* Name of pragma */ @@ -176,11 +177,11 @@ #endif #endif {/* zName: */ "busy_timeout", /* ePragTyp: */ PragTyp_BUSY_TIMEOUT, /* ePragFlg: */ PragFlg_Result0, - /* ColNames: */ 56, 1, + /* ColNames: */ 57, 1, /* iArg: */ 0 }, #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) {/* zName: */ "cache_size", /* ePragTyp: */ PragTyp_CACHE_SIZE, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, @@ -215,11 +216,11 @@ #endif #if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) {/* zName: */ "collation_list", /* ePragTyp: */ PragTyp_COLLATION_LIST, /* ePragFlg: */ PragFlg_Result0, - /* ColNames: */ 38, 2, + /* ColNames: */ 39, 2, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_COMPILEOPTION_DIAGS) {/* zName: */ "compile_options", /* ePragTyp: */ PragTyp_COMPILE_OPTIONS, @@ -250,18 +251,18 @@ #endif #if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) {/* zName: */ "database_list", /* ePragTyp: */ PragTyp_DATABASE_LIST, /* ePragFlg: */ PragFlg_Result0, - /* ColNames: */ 47, 3, + /* ColNames: */ 48, 3, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) && !defined(SQLITE_OMIT_DEPRECATED) {/* zName: */ "default_cache_size", /* ePragTyp: */ PragTyp_DEFAULT_CACHE_SIZE, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq|PragFlg_NoColumns1, - /* ColNames: */ 55, 1, + /* ColNames: */ 56, 1, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) #if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) {/* zName: */ "defer_foreign_keys", @@ -287,11 +288,11 @@ #endif #if !defined(SQLITE_OMIT_FOREIGN_KEY) && !defined(SQLITE_OMIT_TRIGGER) {/* zName: */ "foreign_key_check", /* ePragTyp: */ PragTyp_FOREIGN_KEY_CHECK, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_Result1|PragFlg_SchemaOpt, - /* ColNames: */ 43, 4, + /* ColNames: */ 44, 4, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_FOREIGN_KEY) {/* zName: */ "foreign_key_list", /* ePragTyp: */ PragTyp_FOREIGN_KEY_LIST, @@ -330,11 +331,11 @@ #if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) #if !defined(SQLITE_OMIT_INTROSPECTION_PRAGMAS) {/* zName: */ "function_list", /* ePragTyp: */ PragTyp_FUNCTION_LIST, /* ePragFlg: */ PragFlg_Result0, - /* ColNames: */ 27, 6, + /* ColNames: */ 33, 6, /* iArg: */ 0 }, #endif #endif {/* zName: */ "hard_heap_limit", /* ePragTyp: */ PragTyp_HARD_HEAP_LIMIT, @@ -359,21 +360,21 @@ #endif #if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) {/* zName: */ "index_info", /* ePragTyp: */ PragTyp_INDEX_INFO, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, - /* ColNames: */ 21, 3, + /* ColNames: */ 27, 3, /* iArg: */ 0 }, {/* zName: */ "index_list", /* ePragTyp: */ PragTyp_INDEX_LIST, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, - /* ColNames: */ 38, 5, + /* ColNames: */ 39, 5, /* iArg: */ 0 }, {/* zName: */ "index_xinfo", /* ePragTyp: */ PragTyp_INDEX_INFO, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result1|PragFlg_SchemaOpt, - /* ColNames: */ 21, 6, + /* ColNames: */ 27, 6, /* iArg: */ 1 }, #endif #if !defined(SQLITE_OMIT_INTEGRITY_CHECK) {/* zName: */ "integrity_check", /* ePragTyp: */ PragTyp_INTEGRITY_CHECK, @@ -409,11 +410,11 @@ #endif #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) {/* zName: */ "lock_status", /* ePragTyp: */ PragTyp_LOCK_STATUS, /* ePragFlg: */ PragFlg_Result0, - /* ColNames: */ 53, 2, + /* ColNames: */ 54, 2, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) {/* zName: */ "locking_mode", /* ePragTyp: */ PragTyp_LOCKING_MODE, @@ -548,11 +549,11 @@ #endif #if !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) && defined(SQLITE_DEBUG) {/* zName: */ "stats", /* ePragTyp: */ PragTyp_STATS, /* ePragFlg: */ PragFlg_NeedSchema|PragFlg_Result0|PragFlg_SchemaReq, - /* ColNames: */ 33, 5, + /* ColNames: */ 21, 6, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) {/* zName: */ "synchronous", /* ePragTyp: */ PragTyp_SYNCHRONOUS, @@ -644,11 +645,11 @@ /* ColNames: */ 0, 0, /* iArg: */ 0 }, {/* zName: */ "wal_checkpoint", /* ePragTyp: */ PragTyp_WAL_CHECKPOINT, /* ePragFlg: */ PragFlg_NeedSchema, - /* ColNames: */ 50, 3, + /* ColNames: */ 51, 3, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) {/* zName: */ "writable_schema", /* ePragTyp: */ PragTyp_FLAG, Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -2765,10 +2765,11 @@ unsigned isResized:1; /* True if resizeIndexObject() has been called */ unsigned isCovering:1; /* True if this is a covering index */ unsigned noSkipScan:1; /* Do not try to use skip-scan if true */ unsigned hasStat1:1; /* aiRowLogEst values come from sqlite_stat1 */ unsigned bNoQuery:1; /* Do not use this index to optimize queries */ + unsigned bSlow:1; /* This index is not good for equality lookups */ unsigned bAscKeyBug:1; /* True if the bba7b69f9849b5bf bug applies */ unsigned bHasVCol:1; /* Index references one or more VIRTUAL columns */ unsigned bHasExpr:1; /* Index contains an expression, either a literal ** expression, or a reference to a VIRTUAL column */ #ifdef SQLITE_ENABLE_STAT4 Index: src/where.c ================================================================== --- src/where.c +++ src/where.c @@ -2971,11 +2971,14 @@ opMask = WO_LT|WO_LE; }else{ assert( pNew->u.btree.nBtm==0 ); opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS; } - if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); + if( pProbe->bUnordered || pProbe->bSlow ){ + if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); + if( pProbe->bSlow ) opMask &= ~(WO_EQ|WO_IN|WO_IS); + } assert( pNew->u.btree.nEqnColumn ); assert( pNew->u.btree.nEqnKeyCol || pProbe->idxType!=SQLITE_IDXTYPE_PRIMARYKEY ); Index: tool/mkpragmatab.tcl ================================================================== --- tool/mkpragmatab.tcl +++ tool/mkpragmatab.tcl @@ -245,11 +245,11 @@ COLS: schema name type ncol wr strict IF: !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) NAME: stats FLAG: NeedSchema Result0 SchemaReq - COLS: tbl idx wdth hght flgs + COLS: tbl idx wdth hght flgs est IF: !defined(SQLITE_OMIT_SCHEMA_PRAGMAS) && defined(SQLITE_DEBUG) NAME: index_info TYPE: INDEX_INFO ARG: 0