Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Optimize IN operators in the WHERE clause of queries using virtual tables. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
3d65c70343196b8f69c5293e77038398 |
User & Date: | drh 2012-12-14 17:54:38 |
References
2013-04-17
| ||
23:38 | • New ticket [f69b96e3] Over-aggressive optimization of ORDER BY using a virtual table with IN operator. (artifact: 2788bf16 user: drh) | |
2013-02-08
| ||
16:04 | Allow the "a=?1 OR a=?2" to "a IN (?1,?2)" transformation to work on virtual tables again. This was formerly restricted because virtual tables could not optimize IN terms. (See check-in [fad88e71cf195e].) But IN terms are now used by virtual tables (as of check-in [3d65c70343]) so the restriction can now be removed. (check-in: a917c1f0 user: drh tags: IN-with-ORDERBY) | |
Context
2012-12-18
| ||
11:59 | On atomic-write capable systems, if copying the contents of an in-memory journal to disk fails, close the (on disk) journal file before returning the error to the caller. This causes the subsequent rollback operation to use the in-memory journal. Fix for [df678d738adb]. (check-in: 8183d8d7 user: dan tags: trunk) | |
2012-12-17
| ||
16:46 | Prototype for PRAGMA that checks all foreign key constraints on a table. (check-in: 01c980e9 user: drh tags: foreign-key-check) | |
2012-12-14
| ||
17:54 | Optimize IN operators in the WHERE clause of queries using virtual tables. (check-in: 3d65c703 user: drh tags: trunk) | |
17:48 | Remove an unreachable branch. Improvements to comments. (Closed-Leaf check-in: d2fb7619 user: drh tags: vtab-IN-opt) | |
2012-12-13
| ||
18:57 | Generalize the min/max optimization so that if an appropriate index exists, the index it can be used by any aggregate query that contains only a single max() or min() and does not contain a GROUP BY clause. (check-in: 52e75594 user: drh tags: trunk) | |
Changes
Changes to src/where.c.
︙ | ︙ | |||
249 250 251 252 253 254 255 | #define WHERE_ROWID_RANGE 0x00002000 /* rowid<EXPR and/or rowid>EXPR */ #define WHERE_COLUMN_EQ 0x00010000 /* x=EXPR or x IN (...) or x IS NULL */ #define WHERE_COLUMN_RANGE 0x00020000 /* x<EXPR and/or x>EXPR */ #define WHERE_COLUMN_IN 0x00040000 /* x IN (...) */ #define WHERE_COLUMN_NULL 0x00080000 /* x IS NULL */ #define WHERE_INDEXED 0x000f0000 /* Anything that uses an index */ #define WHERE_NOT_FULLSCAN 0x100f3000 /* Does not do a full table scan */ | | | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 | #define WHERE_ROWID_RANGE 0x00002000 /* rowid<EXPR and/or rowid>EXPR */ #define WHERE_COLUMN_EQ 0x00010000 /* x=EXPR or x IN (...) or x IS NULL */ #define WHERE_COLUMN_RANGE 0x00020000 /* x<EXPR and/or x>EXPR */ #define WHERE_COLUMN_IN 0x00040000 /* x IN (...) */ #define WHERE_COLUMN_NULL 0x00080000 /* x IS NULL */ #define WHERE_INDEXED 0x000f0000 /* Anything that uses an index */ #define WHERE_NOT_FULLSCAN 0x100f3000 /* Does not do a full table scan */ #define WHERE_IN_ABLE 0x080f1000 /* Able to support an IN operator */ #define WHERE_TOP_LIMIT 0x00100000 /* x<EXPR or x<=EXPR constraint */ #define WHERE_BTM_LIMIT 0x00200000 /* x>EXPR or x>=EXPR constraint */ #define WHERE_BOTH_LIMIT 0x00300000 /* Both x>EXPR and x<EXPR */ #define WHERE_IDX_ONLY 0x00400000 /* Use index only - omit table */ #define WHERE_ORDERED 0x00800000 /* Output will appear in correct order */ #define WHERE_REVERSE 0x01000000 /* Scan in reverse order */ #define WHERE_UNIQUE 0x02000000 /* Selects no more than one row */ |
︙ | ︙ | |||
2052 2053 2054 2055 2056 2057 2058 | /* Count the number of possible WHERE clause constraints referring ** to this virtual table */ for(i=nTerm=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ if( pTerm->leftCursor != pSrc->iCursor ) continue; assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); | | | 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 | /* Count the number of possible WHERE clause constraints referring ** to this virtual table */ for(i=nTerm=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ if( pTerm->leftCursor != pSrc->iCursor ) continue; assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); if( pTerm->eOperator & (WO_ISNULL) ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; nTerm++; } /* If the ORDER BY clause contains only columns in the current ** virtual table then allocate space for the aOrderBy part of ** the sqlite3_index_info structure. |
︙ | ︙ | |||
2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 | *(int*)&pIdxInfo->nOrderBy = nOrderBy; *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = pUsage; for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ if( pTerm->leftCursor != pSrc->iCursor ) continue; assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); | > | > > | | | 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 | *(int*)&pIdxInfo->nOrderBy = nOrderBy; *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = pUsage; for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ u8 op; if( pTerm->leftCursor != pSrc->iCursor ) continue; assert( (pTerm->eOperator&(pTerm->eOperator-1))==0 ); testcase( pTerm->eOperator==WO_IN ); testcase( pTerm->eOperator==WO_ISNULL ); if( pTerm->eOperator & (WO_ISNULL) ) continue; if( pTerm->wtFlags & TERM_VNULL ) continue; pIdxCons[j].iColumn = pTerm->u.leftColumn; pIdxCons[j].iTermOffset = i; op = (u8)pTerm->eOperator; if( op==WO_IN ) op = WO_EQ; pIdxCons[j].op = op; /* The direct assignment in the previous line is possible only because ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The ** following asserts verify this fact. */ assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); j++; } for(i=0; i<nOrderBy; i++){ Expr *pExpr = pOrderBy->a[i].pExpr; pIdxOrderBy[i].iColumn = pExpr->iColumn; pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; } |
︙ | ︙ | |||
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 | Table *pTab = pSrc->pTab; sqlite3_index_info *pIdxInfo; struct sqlite3_index_constraint *pIdxCons; struct sqlite3_index_constraint_usage *pUsage; WhereTerm *pTerm; int i, j; int nOrderBy; double rCost; /* Make sure wsFlags is initialized to some sane value. Otherwise, if the ** malloc in allocateIndexInfo() fails and this function returns leaving ** wsFlags in an uninitialized state, the caller may behave unpredictably. */ memset(&p->cost, 0, sizeof(p->cost)); | > | 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 | Table *pTab = pSrc->pTab; sqlite3_index_info *pIdxInfo; struct sqlite3_index_constraint *pIdxCons; struct sqlite3_index_constraint_usage *pUsage; WhereTerm *pTerm; int i, j; int nOrderBy; int bAllowIN; /* Allow IN optimizations */ double rCost; /* Make sure wsFlags is initialized to some sane value. Otherwise, if the ** malloc in allocateIndexInfo() fails and this function returns leaving ** wsFlags in an uninitialized state, the caller may behave unpredictably. */ memset(&p->cost, 0, sizeof(p->cost)); |
︙ | ︙ | |||
2238 2239 2240 2241 2242 2243 2244 | /* The module name must be defined. Also, by this point there must ** be a pointer to an sqlite3_vtab structure. Otherwise ** sqlite3ViewGetColumnNames() would have picked up the error. */ assert( pTab->azModuleArg && pTab->azModuleArg[0] ); assert( sqlite3GetVTable(pParse->db, pTab) ); | > > | > | < | < < < < < < < < < | < < < < < > > > > > > > > > > > > > > > > > > > > > > > | | | | | > > > | > > > | | | | | | | | | | | | | | | | | | | | | | | > > | > > > > > > | | | > > > | 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 | /* The module name must be defined. Also, by this point there must ** be a pointer to an sqlite3_vtab structure. Otherwise ** sqlite3ViewGetColumnNames() would have picked up the error. */ assert( pTab->azModuleArg && pTab->azModuleArg[0] ); assert( sqlite3GetVTable(pParse->db, pTab) ); /* Try once or twice. On the first attempt, allow IN optimizations. ** If an IN optimization is accepted by the virtual table xBestIndex ** method, but the pInfo->aConstrainUsage.omit flag is not set, then ** the query will not work because it might allow duplicate rows in ** output. In that case, run the xBestIndex method a second time ** without the IN constraints. Usually this loop only runs once. ** The loop will exit using a "break" statement. */ for(bAllowIN=1; 1; bAllowIN--){ assert( bAllowIN==0 || bAllowIN==1 ); /* Set the aConstraint[].usable fields and initialize all ** output variables to zero. ** ** aConstraint[].usable is true for constraints where the right-hand ** side contains only references to tables to the left of the current ** table. In other words, if the constraint is of the form: ** ** column = expr ** ** and we are evaluating a join, then the constraint on column is ** only valid if all tables referenced in expr occur to the left ** of the table containing column. ** ** The aConstraints[] array contains entries for all constraints ** on the current table. That way we only have to compute it once ** even though we might try to pick the best index multiple times. ** For each attempt at picking an index, the order of tables in the ** join might be different so we have to recompute the usable flag ** each time. */ pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; pUsage = pIdxInfo->aConstraintUsage; for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ j = pIdxCons->iTermOffset; pTerm = &pWC->a[j]; if( (pTerm->prereqRight&p->notReady)==0 && (bAllowIN || pTerm->eOperator!=WO_IN) ){ pIdxCons->usable = 1; }else{ pIdxCons->usable = 0; } } memset(pUsage, 0, sizeof(pUsage[0])*pIdxInfo->nConstraint); if( pIdxInfo->needToFreeIdxStr ){ sqlite3_free(pIdxInfo->idxStr); } pIdxInfo->idxStr = 0; pIdxInfo->idxNum = 0; pIdxInfo->needToFreeIdxStr = 0; pIdxInfo->orderByConsumed = 0; /* ((double)2) In case of SQLITE_OMIT_FLOATING_POINT... */ pIdxInfo->estimatedCost = SQLITE_BIG_DBL / ((double)2); nOrderBy = pIdxInfo->nOrderBy; if( !p->pOrderBy ){ pIdxInfo->nOrderBy = 0; } if( vtabBestIndex(pParse, pTab, pIdxInfo) ){ return; } pIdxCons = *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint; for(i=0; i<pIdxInfo->nConstraint; i++, pIdxCons++){ if( pUsage[i].argvIndex>0 ){ j = pIdxCons->iTermOffset; pTerm = &pWC->a[j]; p->cost.used |= pTerm->prereqRight; if( pTerm->eOperator==WO_IN && pUsage[i].omit==0 ){ /* Do not attempt to use an IN constraint if the virtual table ** says that the equivalent EQ constraint cannot be safely omitted. ** If we do attempt to use such a constraint, some rows might be ** repeated in the output. */ break; } } } if( i>=pIdxInfo->nConstraint ) break; } /* If there is an ORDER BY clause, and the selected virtual table index ** does not satisfy it, increase the cost of the scan accordingly. This ** matches the processing for non-virtual tables in bestBtreeIndex(). */ rCost = pIdxInfo->estimatedCost; if( p->pOrderBy && pIdxInfo->orderByConsumed==0 ){ rCost += estLog(rCost)*rCost; |
︙ | ︙ | |||
4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 | #ifndef SQLITE_OMIT_VIRTUALTABLE if( (pLevel->plan.wsFlags & WHERE_VIRTUALTABLE)!=0 ){ /* Case 0: The table is a virtual-table. Use the VFilter and VNext ** to access the data. */ int iReg; /* P3 Value for OP_VFilter */ sqlite3_index_info *pVtabIdx = pLevel->plan.u.pVtabIdx; int nConstraint = pVtabIdx->nConstraint; struct sqlite3_index_constraint_usage *aUsage = pVtabIdx->aConstraintUsage; const struct sqlite3_index_constraint *aConstraint = pVtabIdx->aConstraint; sqlite3ExprCachePush(pParse); iReg = sqlite3GetTempRange(pParse, nConstraint+2); for(j=1; j<=nConstraint; j++){ for(k=0; k<nConstraint; k++){ if( aUsage[k].argvIndex==j ){ | > > | > > > > > | > | | 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 | #ifndef SQLITE_OMIT_VIRTUALTABLE if( (pLevel->plan.wsFlags & WHERE_VIRTUALTABLE)!=0 ){ /* Case 0: The table is a virtual-table. Use the VFilter and VNext ** to access the data. */ int iReg; /* P3 Value for OP_VFilter */ int addrNotFound; sqlite3_index_info *pVtabIdx = pLevel->plan.u.pVtabIdx; int nConstraint = pVtabIdx->nConstraint; struct sqlite3_index_constraint_usage *aUsage = pVtabIdx->aConstraintUsage; const struct sqlite3_index_constraint *aConstraint = pVtabIdx->aConstraint; sqlite3ExprCachePush(pParse); iReg = sqlite3GetTempRange(pParse, nConstraint+2); addrNotFound = pLevel->addrBrk; for(j=1; j<=nConstraint; j++){ for(k=0; k<nConstraint; k++){ if( aUsage[k].argvIndex==j ){ WhereTerm *pTerm = &pWC->a[aConstraint[k].iTermOffset]; int iTarget = iReg+j+1; if( pTerm->eOperator & WO_IN ){ codeEqualityTerm(pParse, pTerm, pLevel, iTarget); addrNotFound = pLevel->addrNxt; }else{ sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); } break; } } if( k==nConstraint ) break; } sqlite3VdbeAddOp2(v, OP_Integer, pVtabIdx->idxNum, iReg); sqlite3VdbeAddOp2(v, OP_Integer, j-1, iReg+1); sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, pVtabIdx->idxStr, pVtabIdx->needToFreeIdxStr ? P4_MPRINTF : P4_STATIC); pVtabIdx->needToFreeIdxStr = 0; for(j=0; j<nConstraint; j++){ if( aUsage[j].omit ){ int iTerm = aConstraint[j].iTermOffset; disableTerm(pLevel, &pWC->a[iTerm]); } |
︙ | ︙ |
Changes to test/vtab1.test.
︙ | ︙ | |||
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 | } {15 {} 16} do_test vtab1.13-3 { execsql { SELECT * FROM echo_c WHERE b IS NULL AND a = 15; } } {15 {} 16} do_test vtab1-14.1 { execsql { DELETE FROM c } set echo_module "" execsql { SELECT * FROM echo_c WHERE rowid IN (1, 2, 3) } set echo_module | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | | | 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 | } {15 {} 16} do_test vtab1.13-3 { execsql { SELECT * FROM echo_c WHERE b IS NULL AND a = 15; } } {15 {} 16} do_test vtab1-14.001 { execsql {SELECT rowid, * FROM echo_c WHERE +rowid IN (1,2,3)} } {1 3 G H 2 {} 15 16 3 15 {} 16} do_test vtab1-14.002 { execsql {SELECT rowid, * FROM echo_c WHERE rowid IN (1,2,3)} } {1 3 G H 2 {} 15 16 3 15 {} 16} do_test vtab1-14.003 { execsql {SELECT rowid, * FROM echo_c WHERE +rowid IN (0,1,5,2,'a',3,NULL)} } {1 3 G H 2 {} 15 16 3 15 {} 16} do_test vtab1-14.004 { execsql {SELECT rowid, * FROM echo_c WHERE rowid IN (0,1,5,'a',2,3,NULL)} } {1 3 G H 2 {} 15 16 3 15 {} 16} do_test vtab1-14.005 { execsql {SELECT rowid, * FROM echo_c WHERE rowid NOT IN (0,1,5,'a',2,3)} } {} do_test vtab1-14.006 { execsql {SELECT rowid, * FROM echo_c WHERE rowid NOT IN (0,5,'a',2,3)} } {1 3 G H} do_test vtab1-14.007 { execsql {SELECT rowid, * FROM echo_c WHERE +rowid NOT IN (0,5,'a',2,3,NULL)} } {} do_test vtab1-14.008 { execsql {SELECT rowid, * FROM echo_c WHERE rowid NOT IN (0,5,'a',2,3,NULL)} } {} do_test vtab1-14.011 { execsql {SELECT * FROM echo_c WHERE +a IN (1,3,8,'x',NULL,15,24)} } {3 G H 15 {} 16} do_test vtab1-14.012 { execsql {SELECT * FROM echo_c WHERE a IN (1,3,8,'x',NULL,15,24)} } {3 G H 15 {} 16} do_test vtab1-14.013 { execsql {SELECT * FROM echo_c WHERE a NOT IN (1,8,'x',15,24)} } {3 G H} do_test vtab1-14.014 { execsql {SELECT * FROM echo_c WHERE a NOT IN (1,8,'x',NULL,15,24)} } {} do_test vtab1-14.015 { execsql {SELECT * FROM echo_c WHERE +a NOT IN (1,8,'x',NULL,15,24)} } {} do_test vtab1-14.1 { execsql { DELETE FROM c } set echo_module "" execsql { SELECT * FROM echo_c WHERE rowid IN (1, 2, 3) } set echo_module } {/xBestIndex {SELECT rowid, . FROM 'c' WHERE rowid = .} xFilter {SELECT rowid, . FROM 'c' WHERE rowid = .} 1/} do_test vtab1-14.2 { set echo_module "" execsql { SELECT * FROM echo_c WHERE rowid = 1 } set echo_module } [list xBestIndex {SELECT rowid, * FROM 'c' WHERE rowid = ?} xFilter {SELECT rowid, * FROM 'c' WHERE rowid = ?} 1] do_test vtab1-14.3 { set echo_module "" execsql { SELECT * FROM echo_c WHERE a = 1 } set echo_module } [list xBestIndex {SELECT rowid, * FROM 'c' WHERE a = ?} xFilter {SELECT rowid, * FROM 'c' WHERE a = ?} 1] do_test vtab1-14.4 { set echo_module "" execsql { SELECT * FROM echo_c WHERE a IN (1, 2) } set echo_module } {/xBestIndex {SELECT rowid, . FROM 'c' WHERE a = .} xFilter {SELECT rowid, . FROM 'c' WHERE a = .} 1/} do_test vtab1-15.1 { execsql { CREATE TABLE t1(a, b, c); CREATE VIRTUAL TABLE echo_t1 USING echo(t1); } } {} |
︙ | ︙ |