/ Check-in [e6f7f97d]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | stat4-avgeq
Files: files | file ages | folders
SHA1: e6f7f97dbc677c9f01b23142928c3fa7307c2fba
User & Date: dan 2014-10-03 19:16:53
Context
2014-10-03
19:29
Fix a division-by-zero error that might occur if the sqlite_stat1 table is corrupt. check-in: f9c053b2 user: dan tags: stat4-avgeq
19:16
Improve the accuracy of the estimates used when searching an index for values not present in any stat4 samples under some circumstances. check-in: e6f7f97d user: dan tags: stat4-avgeq
16:00
Add requirements marks on the sqlite3_db_status() interface implementation. Fix a typo in the documentation. Fix the new sqlite3_result_text64() routine so that it works correctly with an encoding parameter of SQLITE_UTF16. check-in: d2fc3227 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/analyze.c.

1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
....
1512
1513
1514
1515
1516
1517
1518








1519
1520
1521
1522
1523
1524
1525
1526
1527
....
1572
1573
1574
1575
1576
1577
1578

1579
1580
1581
1582






1583




1584

1585
1586
1587
1588
1589

1590
1591

1592
1593
1594
1595
1596
1597



1598
1599
1600
1601
1602
1603
1604
....
1841
1842
1843
1844
1845
1846
1847





1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;
    }else
#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif
    {
      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else
................................................................................
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){








    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, pIndex->nKeyCol+1, 0, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif
................................................................................
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){

      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */
      tRowcnt nSum = 0;         /* Number of terms contributing to sumEq */
      tRowcnt avgEq = 0;






      tRowcnt nDLt = pFinal->anDLt[iCol];






      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index before pFinal. Set
      ** sumEq to the sum of the nEq values for column iCol for the same
      ** set (adding the value only once where there exist duplicate 
      ** prefixes).  */

      for(i=0; i<(pIdx->nSample-1); i++){
        if( aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] ){

          sumEq += aSample[i].anEq[iCol];
          nSum++;
        }
      }
      if( nDLt>nSum ){
        avgEq = (pFinal->anLt[iCol] - sumEq)/(nDLt - nSum);



      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

................................................................................
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;





  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */







|




|







 







>
>
>
>
>
>
>
>

|







 







>


<

>
>
>
>
>
>
|
>
>
>
>
|
>

|
|
|
<
>
|
|
>

|


<
<
>
>
>







 







>
>
>
>
>











1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
....
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
....
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589

1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607

1608
1609
1610
1611
1612
1613
1614
1615


1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
....
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
    while( (c=z[0])>='0' && c<='9' ){
      v = v*10 + c - '0';
      z++;
    }
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    if( aOut ){
      aOut[i] = v;
    }
#else
    assert( aOut==0 );
    UNUSED_PARAMETER(aOut);
#endif
    if( aLog ){
      aLog[i] = sqlite3LogEst(v);
    }
    if( *z==' ' ) z++;
  }
#ifndef SQLITE_ENABLE_STAT3_OR_STAT4
  assert( pIndex!=0 );
#else
................................................................................
    pIndex = sqlite3PrimaryKeyIndex(pTable);
  }else{
    pIndex = sqlite3FindIndex(pInfo->db, argv[1], pInfo->zDatabase);
  }
  z = argv[2];

  if( pIndex ){
    int nCol = pIndex->nKeyCol+1;
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
    tRowcnt * const aiRowEst = pIndex->aiRowEst = (tRowcnt*)sqlite3DbMallocZero(
        pInfo->db, sizeof(tRowcnt) * nCol
    );
#else
    tRowcnt * const aiRowEst = 0;
#endif
    pIndex->bUnordered = 0;
    decodeIntArray((char*)z, nCol, aiRowEst, pIndex->aiRowLogEst, pIndex);
    if( pIndex->pPartIdxWhere==0 ) pTable->nRowLogEst = pIndex->aiRowLogEst[0];
  }else{
    Index fakeIdx;
    fakeIdx.szIdxRow = pTable->szTabRow;
#ifdef SQLITE_ENABLE_COSTMULT
    fakeIdx.pTable = pTable;
#endif
................................................................................
      ** sample columns except the last. The last is always set to 1, as
      ** once the trailing PK fields are considered all index keys are
      ** unique.  */
      nCol = pIdx->nSampleCol-1;
      pIdx->aAvgEq[nCol] = 1;
    }
    for(iCol=0; iCol<nCol; iCol++){
      int nSample = pIdx->nSample;
      int i;                    /* Used to iterate through samples */
      tRowcnt sumEq = 0;        /* Sum of the nEq values */

      tRowcnt avgEq = 0;
      tRowcnt nRow;             /* Number of rows in index */
      i64 nSum100 = 0;          /* Number of terms contributing to sumEq */
      i64 nDist100;             /* Number of distinct values in index */

      if( pIdx->aiRowEst==0 ){
        nRow = pFinal->anLt[iCol];
        nDist100 = (i64)100 * pFinal->anDLt[iCol];
        nSample--;
      }else{
        nRow = pIdx->aiRowEst[0];
        nDist100 = ((i64)100 * pIdx->aiRowEst[0]) / pIdx->aiRowEst[iCol+1];
      }

      /* Set nSum to the number of distinct (iCol+1) field prefixes that
      ** occur in the stat4 table for this index. Set sumEq to the sum of 
      ** the nEq values for column iCol for the same set (adding the value 
      ** only once where there exist duplicate prefixes).  */

      for(i=0; i<nSample; i++){
        if( i==(pIdx->nSample-1)
         || aSample[i].anDLt[iCol]!=aSample[i+1].anDLt[iCol] 
        ){
          sumEq += aSample[i].anEq[iCol];
          nSum100 += 100;
        }
      }



      if( nDist100>nSum100 ){
        avgEq = ((i64)100 * (nRow - sumEq))/(nDist100 - nSum100);
      }
      if( avgEq==0 ) avgEq = 1;
      pIdx->aAvgEq[iCol] = avgEq;
    }
  }
}

................................................................................
  /* Load the statistics from the sqlite_stat4 table. */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  if( rc==SQLITE_OK ){
    int lookasideEnabled = db->lookaside.bEnabled;
    db->lookaside.bEnabled = 0;
    rc = loadStat4(db, sInfo.zDatabase);
    db->lookaside.bEnabled = lookasideEnabled;
  }
  for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
    Index *pIdx = sqliteHashData(i);
    sqlite3DbFree(db, pIdx->aiRowEst);
    pIdx->aiRowEst = 0;
  }
#endif

  if( rc==SQLITE_NOMEM ){
    db->mallocFailed = 1;
  }
  return rc;
}


#endif /* SQLITE_OMIT_ANALYZE */

Changes to src/sqliteInt.h.

1797
1798
1799
1800
1801
1802
1803

1804
1805
1806
1807
1808
1809
1810
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */

#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */







>







1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
  unsigned isResized:1;    /* True if resizeIndexObject() has been called */
  unsigned isCovering:1;   /* True if this is a covering index */
#ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  int nSample;             /* Number of elements in aSample[] */
  int nSampleCol;          /* Size of IndexSample.anEq[] and so on */
  tRowcnt *aAvgEq;         /* Average nEq values for keys not in aSample */
  IndexSample *aSample;    /* Samples of the left-most key */
  tRowcnt *aiRowEst;       /* Non-logarithmic stat1 data for this table */
#endif
};

/*
** Allowed values for Index.idxType
*/
#define SQLITE_IDXTYPE_APPDEF      0   /* Created using CREATE INDEX */