Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Use a Bloom filter to improve performance of IN operators when the RHS of the IN operator is a subquery. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | in-bloom |
Files: | files | file ages | folders |
SHA3-256: |
1933496539c19cbf429a39d6b0b1c6b1 |
User & Date: | drh 2024-07-03 17:51:48 |
Context
2024-07-03
| ||
18:56 | Add a new sqlite3FaultSim() call to OP_NotFound to use for testing purposes. (check-in: 84fd275b user: drh tags: in-bloom) | |
17:51 | Use a Bloom filter to improve performance of IN operators when the RHS of the IN operator is a subquery. (check-in: 19334965 user: drh tags: in-bloom) | |
2024-07-02
| ||
13:54 | Add assert() statements to FTS5 to hush-up warnings from scan-build. (check-in: 77a76654 user: drh tags: trunk) | |
Changes
Changes to src/expr.c.
︙ | ︙ | |||
3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; | > > > > > > > > > > > > > > > | | 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 | /* If the LHS and RHS of the IN operator do not match, that ** error will have been caught long before we reach this point. */ if( ALWAYS(pEList->nExpr==nVal) ){ Select *pCopy; SelectDest dest; int i; int rc; int addrBloom = 0; sqlite3SelectDestInit(&dest, SRT_Set, iTab); dest.zAffSdst = exprINAffinity(pParse, pExpr); pSelect->iLimit = 0; if( addrOnce && OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){ int regBloom = ++pParse->nMem; addrBloom = sqlite3VdbeAddOp2(v, OP_Blob, 10000, regBloom); VdbeComment((v, "Bloom filter")); dest.iSDParm2 = regBloom; } testcase( pSelect->selFlags & SF_Distinct ); testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */ pCopy = sqlite3SelectDup(pParse->db, pSelect, 0); rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest); sqlite3SelectDelete(pParse->db, pCopy); sqlite3DbFree(pParse->db, dest.zAffSdst); if( addrBloom ){ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; if( dest.iSDParm2==0 ){ sqlite3VdbeChangeToNoop(v, addrBloom); }else{ sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2; } } if( rc ){ sqlite3KeyInfoUnref(pKeyInfo); return; } assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */ assert( pEList!=0 ); assert( pEList->nExpr>0 ); assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); for(i=0; i<nVal; i++){ Expr *p = sqlite3VectorFieldSubexpr(pLeft, i); pKeyInfo->aColl[i] = sqlite3BinaryCompareCollSeq( |
︙ | ︙ | |||
3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); | > > > > > > > > > | 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 | sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs); VdbeCoverage(v); addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto); /* Return True */ }else{ sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector); if( destIfFalse==destIfNull ){ /* Combine Step 3 and Step 5 into a single opcode */ if( ExprHasProperty(pExpr, EP_Subrtn) ){ const VdbeOp *pOp = sqlite3VdbeGetOp(v, pExpr->y.sub.iAddr); assert( pOp->opcode==OP_Once || pParse->nErr ); if( pOp->opcode==OP_Once && pOp->p3>0 ){ assert( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ); sqlite3VdbeAddOp4Int(v, OP_Filter, pOp->p3, destIfFalse, rLhs, nVector); VdbeCoverage(v); } } sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse, rLhs, nVector); VdbeCoverage(v); goto sqlite3ExprCodeIN_finished; } /* Ordinary Step 3, for the case where FALSE and NULL are distinct */ addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0, rLhs, nVector); VdbeCoverage(v); |
︙ | ︙ |
Changes to src/select.c.
︙ | ︙ | |||
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. | > > > > > | 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 | if( pSort ){ /* At first glance you would think we could optimize out the ** ORDER BY in this case since the order of entries in the set ** does not matter. But there might be a LIMIT clause, in which ** case the order does matter */ pushOntoSorter( pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg); pDest->iSDParm2 = 0; /* Signal that any Bloom filter is unpopulated */ }else{ int r1 = sqlite3GetTempReg(pParse); assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol ); sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol, r1, pDest->zAffSdst, nResultCol); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol); if( pDest->iSDParm2 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, regResult, nResultCol); } sqlite3ReleaseTempReg(pParse, r1); } break; } /* If any row exist in the result set, record that fact and abort. |
︙ | ︙ | |||
3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns | > > > > | 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 | int r1; testcase( pIn->nSdst>1 ); r1 = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1, pDest->zAffSdst, pIn->nSdst); sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1, pIn->iSdst, pIn->nSdst); if( pDest->iSDParm2>0 ){ sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0, pIn->iSdst, pIn->nSdst); } sqlite3ReleaseTempReg(pParse, r1); break; } /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. Note that the select might return multiple columns |
︙ | ︙ |
Changes to src/sqliteInt.h.
︙ | ︙ | |||
3618 3619 3620 3621 3622 3623 3624 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing | > > > > | | 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 | ** Store the first column of the first result row ** in register pDest->iSDParm then abandon the rest ** of the query. This destination implies "LIMIT 1". ** ** SRT_Set The result must be a single column. Store each ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing ** results. if pDest->iSDParm2 is positive, then it is ** a regsiter holding a Bloom filter for the IN operator ** that should be populated in addition to the ** pDest->iSDParm table. This SRT is used to ** implement "IN (SELECT ...)". ** ** SRT_EphemTab Create an temporary table pDest->iSDParm and store ** the result there. The cursor is left open after ** returning. This is like SRT_Table except that ** this destination uses OP_OpenEphemeral to create ** the table first. ** |
︙ | ︙ |