Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix collation sequence handling in code to estimate the expense of a range scan. |
---|---|
Timelines: | family | ancestors | pager-refactor-1 |
Files: | files | file ages | folders |
SHA1: |
359e8cb31bf86a4ee0a3aa710250bc4a |
User & Date: | dan 2009-08-08 09:29:37.000 |
Context
2009-08-08
| ||
09:29 | Fix collation sequence handling in code to estimate the expense of a range scan. Leaf check-in: 359e8cb31b user: dan tags: pager-refactor-1 | |
2009-08-07
| ||
15:43 | Add experimental implementation of the sqlite_stat2 table. This table stores evenly spaced data samples read from an index b-tree. It can be used to optimize range queries on the index when the required range is available at compile time (ie. when it is specified using SQL literals, not bound variables or other expressions). check-in: 929ed8a4cc user: dan tags: pager-refactor-1 | |
Changes
Changes to src/sqliteInt.h.
︙ | ︙ | |||
2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 | const void *sqlite3ValueText(sqlite3_value*, u8); int sqlite3ValueBytes(sqlite3_value*, u8); void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*)); void sqlite3ValueFree(sqlite3_value*); sqlite3_value *sqlite3ValueNew(sqlite3 *); char *sqlite3Utf16to8(sqlite3 *, const void*, int); int sqlite3ValueFromExpr(sqlite3 *, Expr *, u8, u8, sqlite3_value **); void sqlite3ValueApplyAffinity(sqlite3_value *, u8, u8); #ifndef SQLITE_AMALGAMATION extern const unsigned char sqlite3UpperToLower[]; extern const unsigned char sqlite3CtypeMap[]; extern SQLITE_WSD struct Sqlite3Config sqlite3Config; extern SQLITE_WSD FuncDefHash sqlite3GlobalFunctions; | > | 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 | const void *sqlite3ValueText(sqlite3_value*, u8); int sqlite3ValueBytes(sqlite3_value*, u8); void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8, void(*)(void*)); void sqlite3ValueFree(sqlite3_value*); sqlite3_value *sqlite3ValueNew(sqlite3 *); char *sqlite3Utf16to8(sqlite3 *, const void*, int); char *sqlite3Utf8to16(sqlite3 *, int, char *, int, int *); int sqlite3ValueFromExpr(sqlite3 *, Expr *, u8, u8, sqlite3_value **); void sqlite3ValueApplyAffinity(sqlite3_value *, u8, u8); #ifndef SQLITE_AMALGAMATION extern const unsigned char sqlite3UpperToLower[]; extern const unsigned char sqlite3CtypeMap[]; extern SQLITE_WSD struct Sqlite3Config sqlite3Config; extern SQLITE_WSD FuncDefHash sqlite3GlobalFunctions; |
︙ | ︙ |
Changes to src/test1.c.
︙ | ︙ | |||
2305 2306 2307 2308 2309 2310 2311 2312 | case SQLITE_UTF16BE: Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1)); break; default: assert(0); } pVal = sqlite3ValueNew(0); | > > | | | | | | | | | > > | 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 | case SQLITE_UTF16BE: Tcl_ListObjAppendElement(i,pX,Tcl_NewStringObj("UTF-16BE",-1)); break; default: assert(0); } sqlite3BeginBenignMalloc(); pVal = sqlite3ValueNew(0); if( pVal ){ sqlite3ValueSetStr(pVal, nA, zA, encin, SQLITE_STATIC); n = sqlite3_value_bytes(pVal); Tcl_ListObjAppendElement(i,pX, Tcl_NewStringObj((char*)sqlite3_value_text(pVal),n)); sqlite3ValueSetStr(pVal, nB, zB, encin, SQLITE_STATIC); n = sqlite3_value_bytes(pVal); Tcl_ListObjAppendElement(i,pX, Tcl_NewStringObj((char*)sqlite3_value_text(pVal),n)); sqlite3ValueFree(pVal); } sqlite3EndBenignMalloc(); Tcl_EvalObjEx(i, pX, 0); Tcl_DecrRefCount(pX); Tcl_GetIntFromObj(i, Tcl_GetObjResult(i), &res); return res; } static int test_collate( |
︙ | ︙ |
Changes to src/utf.c.
︙ | ︙ | |||
449 450 451 452 453 454 455 456 457 458 459 460 461 462 | sqlite3VdbeMemRelease(&m); m.z = 0; } assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z); } /* ** pZ is a UTF-16 encoded unicode string at least nChar characters long. ** Return the number of bytes in the first nChar unicode characters ** in pZ. nChar must be non-negative. */ int sqlite3Utf16ByteLen(const void *zIn, int nChar){ | > > > > > > > > > > > > > > > > > > > > > > > > | 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 | sqlite3VdbeMemRelease(&m); m.z = 0; } assert( (m.flags & MEM_Term)!=0 || db->mallocFailed ); assert( (m.flags & MEM_Str)!=0 || db->mallocFailed ); return (m.flags & MEM_Dyn)!=0 ? m.z : sqlite3DbStrDup(db, m.z); } /* ** Convert a UTF-8 string to the UTF-16 encoding specified by parameter ** enc. A pointer to the new string is returned, and the value of *pnOut ** is set to the length of the returned string in bytes. The call should ** arrange to call sqlite3DbFree() on the returned pointer when it is ** no longer required. ** ** If a malloc failure occurs, NULL is returned and the db.mallocFailed ** flag set. */ char *sqlite3Utf8to16(sqlite3 *db, int enc, char *z, int n, int *pnOut){ Mem m; memset(&m, 0, sizeof(m)); m.db = db; sqlite3VdbeMemSetStr(&m, z, n, SQLITE_UTF8, SQLITE_STATIC); if( sqlite3VdbeMemTranslate(&m, enc) ){ assert( db->mallocFailed ); return 0; } assert( m.z==m.zMalloc ); *pnOut = m.n; return m.z; } /* ** pZ is a UTF-16 encoded unicode string at least nChar characters long. ** Return the number of bytes in the first nChar unicode characters ** in pZ. nChar must be non-negative. */ int sqlite3Utf16ByteLen(const void *zIn, int nChar){ |
︙ | ︙ |
Changes to src/where.c.
︙ | ︙ | |||
1894 1895 1896 1897 1898 1899 1900 | /* Try to find a more efficient access pattern by using multiple indexes ** to optimize an OR expression within the WHERE clause. */ bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost); } #endif /* SQLITE_OMIT_VIRTUALTABLE */ | < | | > | > > > > > | > > > | < | > > | > > > > > > > > > > > > > | 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 | /* Try to find a more efficient access pattern by using multiple indexes ** to optimize an OR expression within the WHERE clause. */ bestOrClauseIndex(pParse, pWC, pSrc, notReady, pOrderBy, pCost); } #endif /* SQLITE_OMIT_VIRTUALTABLE */ /* ** Argument pIdx is a pointer to an index structure that has an array of ** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column ** stored in Index.aSample. The domain of values stored in said column ** may be thought of as divided into (SQLITE_INDEX_SAMPLES+1) regions. ** Region 0 contains all values smaller than the first sample value. Region ** 1 contains values larger than or equal to the value of the first sample, ** but smaller than the value of the second. And so on. ** ** If successful, this function determines which of the regions value ** pVal lies in, sets *piRegion to the region index and returns SQLITE_OK. ** Or, if an OOM occurs while converting text values between encodings, ** SQLITE_NOMEM is returned. */ static int whereRangeRegion( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ sqlite3_value *pVal, /* Value to consider */ int *piRegion /* OUT: Region of domain in which value lies */ ){ if( pVal ){ IndexSample *aSample = pIdx->aSample; int i = 0; int eType = sqlite3_value_type(pVal); if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){ double r = sqlite3_value_double(pVal); for(i=0; i<SQLITE_INDEX_SAMPLES; i++){ if( aSample[i].eType==SQLITE_NULL ) continue; if( aSample[i].eType>=SQLITE_TEXT || aSample[i].u.r>r ) break; } }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){ sqlite3 *db = pParse->db; CollSeq *pColl; const u8 *z; int n; if( eType==SQLITE_BLOB ){ z = (const u8 *)sqlite3_value_blob(pVal); pColl = db->pDfltColl; assert( pColl->enc==SQLITE_UTF8 ); }else{ pColl = sqlite3FindCollSeq(db, SQLITE_UTF8, *pIdx->azColl, 0); if( sqlite3CheckCollSeq(pParse, pColl) ){ return SQLITE_ERROR; } z = (const u8 *)sqlite3ValueText(pVal, pColl->enc); if( !z ){ return SQLITE_NOMEM; } assert( z && pColl && pColl->xCmp ); } n = sqlite3ValueBytes(pVal, pColl->enc); for(i=0; i<SQLITE_INDEX_SAMPLES; i++){ int r; int eSampletype = aSample[i].eType; if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue; if( (eSampletype!=eType) ) break; if( pColl->enc==SQLITE_UTF8 ){ r = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z); }else{ int nSample; char *zSample = sqlite3Utf8to16( db, pColl->enc, aSample[i].u.z, aSample[i].nByte, &nSample ); if( !zSample ){ assert( db->mallocFailed ); return SQLITE_NOMEM; } r = pColl->xCmp(pColl->pUser, nSample, zSample, n, z); sqlite3DbFree(db, zSample); } if( r>0 ) break; } } *piRegion = i; } return SQLITE_OK; } |
︙ | ︙ | |||
1990 1991 1992 1993 1994 1995 1996 | ** value of 1 indicates that the proposed range scan is expected to visit ** approximately 1/9 (11%) of the rows selected by the nEq equality constraints ** (if any). A return value of 9 indicates that it is expected that the ** range scan will visit 9/9 (100%) of the rows selected by the equality ** constraints. */ static int whereRangeScanEst( | | > | < | | > | | 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 | ** value of 1 indicates that the proposed range scan is expected to visit ** approximately 1/9 (11%) of the rows selected by the nEq equality constraints ** (if any). A return value of 9 indicates that it is expected that the ** range scan will visit 9/9 (100%) of the rows selected by the equality ** constraints. */ static int whereRangeScanEst( Parse *pParse, Index *p, int nEq, WhereTerm *pLower, WhereTerm *pUpper, int *piEst /* OUT: Return value */ ){ sqlite3 *db = pParse->db; sqlite3_value *pLowerVal = 0; sqlite3_value *pUpperVal = 0; int rc = SQLITE_OK; if( nEq==0 && p->aSample ){ int iEst; int iUpper = SQLITE_INDEX_SAMPLES; int iLower = 0; u8 aff = p->pTable->aCol[0].affinity; if( pLower ){ Expr *pExpr = pLower->pExpr->pRight; rc = sqlite3ValueFromExpr(db, pExpr, SQLITE_UTF8, aff, &pLowerVal); if( !pLowerVal ) goto fallback; } if( pUpper ){ Expr *pExpr = pUpper->pExpr->pRight; rc = sqlite3ValueFromExpr(db, pExpr, SQLITE_UTF8, aff, &pUpperVal); if( !pUpperVal ){ sqlite3ValueFree(pLowerVal); goto fallback; } } rc = whereRangeRegion(pParse, p, pUpperVal, &iUpper); if( rc==SQLITE_OK ){ rc = whereRangeRegion(pParse, p, pLowerVal, &iLower); } iEst = iUpper - iLower; if( iEst>=SQLITE_INDEX_SAMPLES ) iEst = SQLITE_INDEX_SAMPLES-1; else if( iEst<1 ) iEst = 1; sqlite3ValueFree(pLowerVal); sqlite3ValueFree(pUpperVal); *piEst = iEst; return rc; } fallback: assert( pLower || pUpper ); *piEst = (SQLITE_INDEX_SAMPLES-1) / ((pLower&&pUpper)?9:3); return rc; } /* ** Find the query plan for accessing a particular table. Write the ** best query plan and its cost into the WhereCost object supplied as the |
︙ | ︙ | |||
2290 2291 2292 2293 2294 2295 2296 | WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe); WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe); wsFlags |= WHERE_COLUMN_RANGE; if( pTop ) wsFlags |= WHERE_TOP_LIMIT; if( pBtm ) wsFlags |= WHERE_BTM_LIMIT; | | | 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 | WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pProbe); WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pProbe); wsFlags |= WHERE_COLUMN_RANGE; if( pTop ) wsFlags |= WHERE_TOP_LIMIT; if( pBtm ) wsFlags |= WHERE_BTM_LIMIT; whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &nRegion); cost = cost * nRegion / (SQLITE_INDEX_SAMPLES-1); nRow = nRow * nRegion / (SQLITE_INDEX_SAMPLES-1); } } /* Add the additional cost of sorting if that is a factor. */ |
︙ | ︙ |
Changes to test/analyze2.test.
1 2 3 4 5 6 7 8 9 10 | # 2009 August 06 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** | < < | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | # 2009 August 06 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # $Id: analyze.test,v 1.9 2008/08/11 18:44:58 drh Exp $ set testdir [file dirname $argv0] source $testdir/tester.tcl do_test analyze2-1.1 { |
︙ | ︙ | |||
127 128 129 130 131 132 133 134 135 | } {0 0 {TABLE t1 WITH INDEX t1_y}} do_test analyze2-4.6 { eqp "SELECT * FROM t1 WHERE x<444 AND y>'h'" } {0 0 {TABLE t1 WITH INDEX t1_y}} do_test analyze2-4.7 { eqp "SELECT * FROM t1 WHERE x<221 AND y>'h'" } {0 0 {TABLE t1 WITH INDEX t1_x}} finish_test | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | } {0 0 {TABLE t1 WITH INDEX t1_y}} do_test analyze2-4.6 { eqp "SELECT * FROM t1 WHERE x<444 AND y>'h'" } {0 0 {TABLE t1 WITH INDEX t1_y}} do_test analyze2-4.7 { eqp "SELECT * FROM t1 WHERE x<221 AND y>'h'" } {0 0 {TABLE t1 WITH INDEX t1_x}} do_test analyze2-5.1 { execsql { CREATE TABLE t3(a COLLATE nocase, b) } execsql { CREATE INDEX t3a ON t3(a) } execsql { CREATE INDEX t3b ON t3(b) } set alphabet [list A b C d E f G h I j] for {set i 0} {$i < 1000} {incr i} { set str [lindex $alphabet [expr ($i/100)%10]] append str [lindex $alphabet [expr ($i/ 10)%10]] append str [lindex $alphabet [expr ($i/ 1)%10]] execsql { INSERT INTO t3 VALUES($str, $str) } } execsql ANALYZE } {} do_test analyze2-5.2 { execsql "SELECT * FROM sqlite_stat2 WHERE idx='t3a'" } {t3 t3a AAA bbb CCC ddd EEE fff GGG hhh III jjj} do_test analyze2-5.3 { execsql "SELECT * FROM sqlite_stat2 WHERE idx='t3b'" } {t3 t3b AAA CCC EEE GGG III bbb ddd fff hhh jjj} do_test analyze2-5.4 { eqp "SELECT * FROM t3 WHERE a > 'A' AND a < 'C' AND b > 'A' AND b < 'C'" } {0 0 {TABLE t3 WITH INDEX t3b}} do_test analyze2-5.5 { eqp "SELECT * FROM t3 WHERE a > 'A' AND a < 'c' AND b > 'A' AND b < 'c'" } {0 0 {TABLE t3 WITH INDEX t3a}} proc test_collate {enc lhs rhs} { # puts $enc return [string compare $lhs $rhs] } do_test analyze2-6.1 { add_test_collate db 0 0 1 execsql { CREATE TABLE t4(x COLLATE test_collate) } execsql { CREATE INDEX t4x ON t4(x) } set alphabet [list a b c d e f g h i j] for {set i 0} {$i < 1000} {incr i} { set str [lindex $alphabet [expr ($i/100)%10]] append str [lindex $alphabet [expr ($i/ 10)%10]] append str [lindex $alphabet [expr ($i/ 1)%10]] execsql { INSERT INTO t4 VALUES($str) } } execsql ANALYZE } {} do_test analyze2-6.2 { execsql "SELECT * FROM sqlite_stat2 WHERE tbl='t4'" } {t4 t4x aaa bbb ccc ddd eee fff ggg hhh iii jjj} do_test analyze2-6.3 { eqp "SELECT * FROM t4 WHERE x>'ccc'" } {0 0 {TABLE t4 WITH INDEX t4x}} do_test analyze2-6.4 { eqp "SELECT * FROM t4 AS t41, t4 AS t42 WHERE t41.x>'ccc' AND t42.x>'ggg'" } {0 1 {TABLE t4 AS t42 WITH INDEX t4x} 1 0 {TABLE t4 AS t41 WITH INDEX t4x}} do_test analyze2-6.5 { eqp "SELECT * FROM t4 AS t41, t4 AS t42 WHERE t41.x>'ddd' AND t42.x>'ccc'" } {0 0 {TABLE t4 AS t41 WITH INDEX t4x} 1 1 {TABLE t4 AS t42 WITH INDEX t4x}} ifcapable memdebug { execsql { DELETE FROM t4 } db close source $testdir/malloc_common.tcl file copy -force test.db bak.db do_malloc_test analyze2-oom -tclprep { db close file copy -force bak.db test.db sqlite3 db test.db sqlite3_db_config_lookaside db 0 0 0 add_test_collate db 0 0 1 } -sqlbody { SELECT * FROM t4 AS t41, t4 AS t42 WHERE t41.x>'ddd' AND t42.x>'ccc' } } finish_test |