Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | bedrock-3.46 |
Files: | files | file ages | folders |
SHA3-256: |
557a14a24a2b4bb94fc0f8a59db19e79 |
User & Date: | drh 2024-07-05 10:03:29 |
Context
2024-07-05
| ||
10:08 | Improved reuse of subqueries associated with IN operators, especially when the IN operator is duplicated due to predicate push-down. (check-in: 2a07caad user: drh tags: bedrock-3.46) | |
10:03 | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (check-in: 557a14a2 user: drh tags: bedrock-3.46) | |
2024-07-03
| ||
20:19 | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (check-in: baa83b46 user: drh tags: trunk) | |
2024-05-28
| ||
18:41 | Fix another problem with the sqlite3_log() message identifying the table or index that a conflicting page belongs to. (check-in: 19d5fd8a user: dan tags: bedrock) | |
Changes
Changes to src/expr.c.
︙ | ︙ | |||
3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; | > > > > > > > > > > > > > > > | | 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; int addrBloom = 0; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; if( addrOnce && OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ int regBloom = ++pParse->nMem; addrBloom = sqlite3VdbeAddOp2(v, OP_Blob, 10000, regBloom); VdbeComment((v, "Bloom filter")); dest.iSDParm2 = regBloom; } testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( addrBloom ){ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; if( dest.iSDParm2==0 ){ sqlite3VdbeChangeToNoop(v, addrBloom); }else{ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; } } if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; } assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */ assert( pEList!=0 ); assert( pEList->nExpr>0 ); assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); for(i=0; i<nVal; i++){ Expr *p = sqlite3VectorFieldSubexpr(pLeft, i); pKeyInfo->aColl[i] = sqlite3BinaryCompareCollSeq( |
︙ | ︙ | |||
3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); | > > > > > > > > > | 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ if( ExprHasProperty(pExpr, EP_Subrtn) ){ const VdbeOp *pOp = sqlite3VdbeGetOp(v, pExpr->y.sub.iAddr); assert( pOp->opcode==OP_Once || pParse->nErr ); if( pOp->opcode==OP_Once && pOp->p3>0 ){ assert( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ); sqlite3VdbeAddOp4Int(v, OP_Filter, pOp->p3, destIfFalse, rLhs, nVector); VdbeCoverage(v); } } sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); |
︙ | ︙ |
Changes to src/select.c.
︙ | ︙ | |||
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. | > > > > > > | 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); pDest->iSDParm2 = 0; /* Signal that any Bloom filter is unpopulated */ }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); if( pDest->iSDParm2 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, regResult, nResultCol); ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER")); } sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. |
︙ | ︙ | |||
3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns | > > > > > | 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); if( pDest->iSDParm2>0 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, pIn->iSdst, pIn->nSdst); ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER")); } sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3640 3641 3642 3643 3644 3645 3646 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing | > > > > | | 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing ** results. if pDest->iSDParm2 is positive, then it is ** a regsiter holding a Bloom filter for the IN operator ** that should be populated in addition to the ** pDest->iSDParm table. This SRT is used to ** implement "IN (SELECT ...)". ** ** SRT_EphemTab Create an temporary table pDest->iSDParm and store ** the result there. The cursor is left open after ** returning. This is like SRT_Table except that ** this destination uses OP_OpenEphemeral to create ** the table first. ** |
︙ | ︙ |
Changes to src/vdbe.c.
︙ | ︙ | |||
5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 | assert( pC->isTable==0 ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ r.pKeyInfo = pC->pKeyInfo; r.default_rc = 0; #ifdef SQLITE_DEBUG for(ii=0; ii<r.nField; ii++){ assert( memIsValid(&r.aMem[ii]) ); assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 ); if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]); } #endif rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult); | > | 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 | assert( pC->isTable==0 ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ r.pKeyInfo = pC->pKeyInfo; r.default_rc = 0; #ifdef SQLITE_DEBUG (void)sqlite3FaultSim(50); /* For use by --counter in TH3 */ for(ii=0; ii<r.nField; ii++){ assert( memIsValid(&r.aMem[ii]) ); assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 ); if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]); } #endif rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult); |
︙ | ︙ |
Changes to test/autoindex1.test.
︙ | ︙ | |||
181 182 183 184 185 186 187 | do_eqp_test autoindex1-500.1 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=?); } { QUERY PLAN |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?) `--LIST SUBQUERY xxxxxx | | > | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | do_eqp_test autoindex1-500.1 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=?); } { QUERY PLAN |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?) `--LIST SUBQUERY xxxxxx |--SCAN t502 `--CREATE BLOOM FILTER } do_eqp_test autoindex1-501 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=t501.b); } { QUERY PLAN |--SCAN t501 |
︙ | ︙ |
Changes to test/eqp.test.
︙ | ︙ | |||
307 308 309 310 311 312 313 | det 3.3.1 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2) } { QUERY PLAN |--SCAN t1 `--LIST SUBQUERY xxxxxx | | > | 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | det 3.3.1 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2) } { QUERY PLAN |--SCAN t1 `--LIST SUBQUERY xxxxxx |--SCAN t2 `--CREATE BLOOM FILTER } det 3.3.2 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2 WHERE t1.x!=t2.x) } { QUERY PLAN |--SCAN t1 `--CORRELATED LIST SUBQUERY xxxxxx |
︙ | ︙ |
Changes to test/pushdown.test.
︙ | ︙ | |||
271 272 273 274 275 276 277 | |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--MATERIALIZE k | | | `--SCAN 3 CONSTANT ROWS | | > | > | > | > | > | > | > | > | > | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--MATERIALIZE k | | | `--SCAN 3 CONSTANT ROWS | | |--SCAN k | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--SCAN k | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--SCAN k `--CREATE BLOOM FILTER } # ^^^^--- The key feature above is that the SEARCH for each subquery # uses all three fields of the index w, x, and y. Prior to the push-down # of "expr IN table", only the w term of the index would be used. Similar # for the following tests: # do_eqp_test 6.2 { SELECT max(z) FROM t0 WHERE w=123 AND x IN v1 AND y BETWEEN 44 AND 55; } { QUERY PLAN |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--CO-ROUTINE v1 | | | `--SCAN 3 CONSTANT ROWS | | |--SCAN v1 | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--CO-ROUTINE v1 | | `--SCAN 3 CONSTANT ROWS | |--SCAN v1 | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--CO-ROUTINE v1 | `--SCAN 3 CONSTANT ROWS |--SCAN v1 `--CREATE BLOOM FILTER } do_eqp_test 6.3 { SELECT max(z) FROM t0 WHERE w=123 AND x IN k1 AND y BETWEEN 44 AND 55; } { QUERY PLAN |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--SCAN k1 | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--SCAN k1 | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--SCAN k1 `--CREATE BLOOM FILTER } finish_test |
Changes to test/rowvalue4.test.
︙ | ︙ | |||
232 233 234 235 236 237 238 | SELECT * FROM d2 WHERE (a, b) IN (SELECT x, y FROM d1) AND (c) IN (SELECT y FROM d1) } { QUERY PLAN |--SEARCH d2 USING INDEX d2ab (a=? AND b=?) |--LIST SUBQUERY xxxxxx | | > | > | 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | SELECT * FROM d2 WHERE (a, b) IN (SELECT x, y FROM d1) AND (c) IN (SELECT y FROM d1) } { QUERY PLAN |--SEARCH d2 USING INDEX d2ab (a=? AND b=?) |--LIST SUBQUERY xxxxxx | |--SCAN d1 | `--CREATE BLOOM FILTER `--LIST SUBQUERY xxxxxx |--SCAN d1 `--CREATE BLOOM FILTER } do_execsql_test 6.0 { CREATE TABLE e1(a, b, c, d, e); CREATE INDEX e1ab ON e1(a, b); CREATE INDEX e1cde ON e1(c, d, e); } |
︙ | ︙ |