Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
baa83b460c677c210c7fa3f20314d7e0 |
User & Date: | drh 2024-07-03 20:19:33.270 |
References
2025-04-30
| ||
12:48 | Fix an issue in Bloom filters on RHS subsqueries to IN operators. See forum post 792a09cb3d for a description of the problem. Also improve comments related to [baa83b460c677c21] which was origin of the problem. (check-in: cdef486e21 user: drh tags: trunk) | |
2025-03-18
| ||
19:21 | Fix a problem that could occur when the RHS of an IN operator was a compound SELECT featuring an ORDER BY on a subquery that was flattened into one of the component SELECTs introduced by [baa83b460c677c21]. Problem reported in forum post 1e17219c88. (check-in: 7101ccd533 user: dan tags: trunk) | |
Context
2024-07-05
| ||
10:03 | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (check-in: 557a14a24a user: drh tags: bedrock-3.46) | |
2024-07-04
| ||
09:45 | For shell completion, use pragma_table_xinfo instead of pragma_table_info, so that generated columns are handled, as reported in forum post f0735e05d8d7e857. (check-in: a204ffc06b user: stephan tags: trunk) | |
2024-07-03
| ||
20:30 | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (Leaf check-in: 0bb306eb70 user: dan tags: bedrock-3.45-in-bloom) | |
20:19 | When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (check-in: baa83b460c user: drh tags: trunk) | |
20:10 | Show the creation of IN-operator Bloom filters in the EXPLAIN QUERY PLAN output. (Closed-Leaf check-in: c10a1b99d4 user: drh tags: in-bloom) | |
2024-07-02
| ||
13:54 | Add assert() statements to FTS5 to hush-up warnings from scan-build. (check-in: 77a76654e6 user: drh tags: trunk) | |
Changes
Changes to src/expr.c.
︙ | ︙ | |||
3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; | > > > > > > > > > > > > > > > | | 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; int addrBloom = 0; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; if( addrOnce && OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ int regBloom = ++pParse->nMem; addrBloom = sqlite3VdbeAddOp2(v, OP_Blob, 10000, regBloom); VdbeComment((v, "Bloom filter")); dest.iSDParm2 = regBloom; } testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( addrBloom ){ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; if( dest.iSDParm2==0 ){ sqlite3VdbeChangeToNoop(v, addrBloom); }else{ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; } } if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; } assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */ assert( pEList!=0 ); assert( pEList->nExpr>0 ); assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); for(i=0; i<nVal; i++){ Expr *p = sqlite3VectorFieldSubexpr(pLeft, i); pKeyInfo->aColl[i] = sqlite3BinaryCompareCollSeq( |
︙ | ︙ | |||
3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); | > > > > > > > > > | 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ if( ExprHasProperty(pExpr, EP_Subrtn) ){ const VdbeOp *pOp = sqlite3VdbeGetOp(v, pExpr->y.sub.iAddr); assert( pOp->opcode==OP_Once || pParse->nErr ); if( pOp->opcode==OP_Once && pOp->p3>0 ){ assert( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ); sqlite3VdbeAddOp4Int(v, OP_Filter, pOp->p3, destIfFalse, rLhs, nVector); VdbeCoverage(v); } } sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); |
︙ | ︙ |
Changes to src/select.c.
︙ | ︙ | |||
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. | > > > > > > | 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); pDest->iSDParm2 = 0; /* Signal that any Bloom filter is unpopulated */ }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); if( pDest->iSDParm2 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, regResult, nResultCol); ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER")); } sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. |
︙ | ︙ | |||
3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns | > > > > > | 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); if( pDest->iSDParm2>0 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, pIn->iSdst, pIn->nSdst); ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER")); } sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3618 3619 3620 3621 3622 3623 3624 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing | > > > > | | 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing ** results. if pDest->iSDParm2 is positive, then it is ** a regsiter holding a Bloom filter for the IN operator ** that should be populated in addition to the ** pDest->iSDParm table. This SRT is used to ** implement "IN (SELECT ...)". ** ** SRT_EphemTab Create an temporary table pDest->iSDParm and store ** the result there. The cursor is left open after ** returning. This is like SRT_Table except that ** this destination uses OP_OpenEphemeral to create ** the table first. ** |
︙ | ︙ |
Changes to src/vdbe.c.
︙ | ︙ | |||
5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 | assert( pC->isTable==0 ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ r.pKeyInfo = pC->pKeyInfo; r.default_rc = 0; #ifdef SQLITE_DEBUG for(ii=0; ii<r.nField; ii++){ assert( memIsValid(&r.aMem[ii]) ); assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 ); if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]); } #endif rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult); | > | 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 | assert( pC->isTable==0 ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ r.pKeyInfo = pC->pKeyInfo; r.default_rc = 0; #ifdef SQLITE_DEBUG (void)sqlite3FaultSim(50); /* For use by --counter in TH3 */ for(ii=0; ii<r.nField; ii++){ assert( memIsValid(&r.aMem[ii]) ); assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 ); if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]); } #endif rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult); |
︙ | ︙ |
Changes to test/autoindex1.test.
︙ | ︙ | |||
181 182 183 184 185 186 187 | do_eqp_test autoindex1-500.1 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=?); } { QUERY PLAN |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?) `--LIST SUBQUERY xxxxxx | | > | 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | do_eqp_test autoindex1-500.1 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=?); } { QUERY PLAN |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?) `--LIST SUBQUERY xxxxxx |--SCAN t502 `--CREATE BLOOM FILTER } do_eqp_test autoindex1-501 { SELECT b FROM t501 WHERE t501.a IN (SELECT x FROM t502 WHERE y=t501.b); } { QUERY PLAN |--SCAN t501 |
︙ | ︙ |
Changes to test/eqp.test.
︙ | ︙ | |||
307 308 309 310 311 312 313 | det 3.3.1 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2) } { QUERY PLAN |--SCAN t1 `--LIST SUBQUERY xxxxxx | | > | 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | det 3.3.1 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2) } { QUERY PLAN |--SCAN t1 `--LIST SUBQUERY xxxxxx |--SCAN t2 `--CREATE BLOOM FILTER } det 3.3.2 { SELECT * FROM t1 WHERE y IN (SELECT y FROM t2 WHERE t1.x!=t2.x) } { QUERY PLAN |--SCAN t1 `--CORRELATED LIST SUBQUERY xxxxxx |
︙ | ︙ |
Changes to test/pushdown.test.
︙ | ︙ | |||
271 272 273 274 275 276 277 | |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--MATERIALIZE k | | | `--SCAN 3 CONSTANT ROWS | | > | > | > | > | > | > | > | > | > | 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--MATERIALIZE k | | | `--SCAN 3 CONSTANT ROWS | | |--SCAN k | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--SCAN k | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--SCAN k `--CREATE BLOOM FILTER } # ^^^^--- The key feature above is that the SEARCH for each subquery # uses all three fields of the index w, x, and y. Prior to the push-down # of "expr IN table", only the w term of the index would be used. Similar # for the following tests: # do_eqp_test 6.2 { SELECT max(z) FROM t0 WHERE w=123 AND x IN v1 AND y BETWEEN 44 AND 55; } { QUERY PLAN |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--CO-ROUTINE v1 | | | `--SCAN 3 CONSTANT ROWS | | |--SCAN v1 | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--CO-ROUTINE v1 | | `--SCAN 3 CONSTANT ROWS | |--SCAN v1 | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--CO-ROUTINE v1 | `--SCAN 3 CONSTANT ROWS |--SCAN v1 `--CREATE BLOOM FILTER } do_eqp_test 6.3 { SELECT max(z) FROM t0 WHERE w=123 AND x IN k1 AND y BETWEEN 44 AND 55; } { QUERY PLAN |--CO-ROUTINE t0 | `--COMPOUND QUERY | |--LEFT-MOST SUBQUERY | | |--SEARCH t01 USING INDEX t01x (w=? AND x=? AND y>? AND y<?) | | `--LIST SUBQUERY xxxxxx | | |--SCAN k1 | | `--CREATE BLOOM FILTER | `--UNION ALL | |--SEARCH t02 USING INDEX t02x (w=? AND x=? AND y>? AND y<?) | `--LIST SUBQUERY xxxxxx | |--SCAN k1 | `--CREATE BLOOM FILTER |--SEARCH t0 `--LIST SUBQUERY xxxxxx |--SCAN k1 `--CREATE BLOOM FILTER } finish_test |
Changes to test/rowvalue4.test.
︙ | ︙ | |||
232 233 234 235 236 237 238 | SELECT * FROM d2 WHERE (a, b) IN (SELECT x, y FROM d1) AND (c) IN (SELECT y FROM d1) } { QUERY PLAN |--SEARCH d2 USING INDEX d2ab (a=? AND b=?) |--LIST SUBQUERY xxxxxx | | > | > | 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | SELECT * FROM d2 WHERE (a, b) IN (SELECT x, y FROM d1) AND (c) IN (SELECT y FROM d1) } { QUERY PLAN |--SEARCH d2 USING INDEX d2ab (a=? AND b=?) |--LIST SUBQUERY xxxxxx | |--SCAN d1 | `--CREATE BLOOM FILTER `--LIST SUBQUERY xxxxxx |--SCAN d1 `--CREATE BLOOM FILTER } do_execsql_test 6.0 { CREATE TABLE e1(a, b, c, d, e); CREATE INDEX e1ab ON e1(a, b); CREATE INDEX e1cde ON e1(c, d, e); } |
︙ | ︙ |