SQLite

Check-in [0bb306eb]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | bedrock-3.45-in-bloom
Files: files | file ages | folders
SHA3-256: 0bb306eb70ef1df7734326d30359da7a15397171d3e25ab644633ef3ee1428ec
User & Date: dan 2024-07-03 20:30:31
Context
2024-07-03
20:30
When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (Leaf check-in: 0bb306eb user: dan tags: bedrock-3.45-in-bloom)
20:19
When constructing an ephermeral table to use as the right-hand side of an IN operator, also construct a Bloom filter to speed membership testing. (check-in: baa83b46 user: drh tags: trunk)
2024-06-27
18:18
Avoid 32-bit overflow when calculating ncycle for ".scanstats vm". (Leaf check-in: 78022f90 user: dan tags: bedrock-3.45)
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/expr.c.

3389
3390
3391
3392
3393
3394
3395

3396
3397
3398






3399
3400
3401
3402
3403
3404








3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
    /* If the LHS and RHS of the IN operator do not match, that
    ** error will have been caught long before we reach this point. */
    if( ALWAYS(pEList->nExpr==nVal) ){
      Select *pCopy;
      SelectDest dest;
      int i;
      int rc;

      sqlite3SelectDestInit(&dest, SRT_Set, iTab);
      dest.zAffSdst = exprINAffinity(pParse, pExpr);
      pSelect->iLimit = 0;






      testcase( pSelect->selFlags & SF_Distinct );
      testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */
      pCopy = sqlite3SelectDup(pParse->db, pSelect, 0);
      rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest);
      sqlite3SelectDelete(pParse->db, pCopy);
      sqlite3DbFree(pParse->db, dest.zAffSdst);








      if( rc ){
        sqlite3KeyInfoUnref(pKeyInfo);
        return;
      }     
      assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */
      assert( pEList!=0 );
      assert( pEList->nExpr>0 );
      assert( sqlite3KeyInfoIsWriteable(pKeyInfo) );
      for(i=0; i<nVal; i++){
        Expr *p = sqlite3VectorFieldSubexpr(pLeft, i);
        pKeyInfo->aColl[i] = sqlite3BinaryCompareCollSeq(







>



>
>
>
>
>
>






>
>
>
>
>
>
>
>



|







3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
    /* If the LHS and RHS of the IN operator do not match, that
    ** error will have been caught long before we reach this point. */
    if( ALWAYS(pEList->nExpr==nVal) ){
      Select *pCopy;
      SelectDest dest;
      int i;
      int rc;
      int addrBloom = 0;
      sqlite3SelectDestInit(&dest, SRT_Set, iTab);
      dest.zAffSdst = exprINAffinity(pParse, pExpr);
      pSelect->iLimit = 0;
      if( addrOnce && OptimizationEnabled(pParse->db, SQLITE_BloomFilter) ){
        int regBloom = ++pParse->nMem;
        addrBloom = sqlite3VdbeAddOp2(v, OP_Blob, 10000, regBloom);
        VdbeComment((v, "Bloom filter"));
        dest.iSDParm2 = regBloom;
      }
      testcase( pSelect->selFlags & SF_Distinct );
      testcase( pKeyInfo==0 ); /* Caused by OOM in sqlite3KeyInfoAlloc() */
      pCopy = sqlite3SelectDup(pParse->db, pSelect, 0);
      rc = pParse->db->mallocFailed ? 1 :sqlite3Select(pParse, pCopy, &dest);
      sqlite3SelectDelete(pParse->db, pCopy);
      sqlite3DbFree(pParse->db, dest.zAffSdst);
      if( addrBloom ){
        sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2;
        if( dest.iSDParm2==0 ){
          sqlite3VdbeChangeToNoop(v, addrBloom);
        }else{
          sqlite3VdbeGetOp(v, addrOnce)->p3 = dest.iSDParm2;
        }
      }
      if( rc ){
        sqlite3KeyInfoUnref(pKeyInfo);
        return;
      }
      assert( pKeyInfo!=0 ); /* OOM will cause exit after sqlite3Select() */
      assert( pEList!=0 );
      assert( pEList->nExpr>0 );
      assert( sqlite3KeyInfoIsWriteable(pKeyInfo) );
      for(i=0; i<nVal; i++){
        Expr *p = sqlite3VectorFieldSubexpr(pLeft, i);
        pKeyInfo->aColl[i] = sqlite3BinaryCompareCollSeq(
3840
3841
3842
3843
3844
3845
3846









3847
3848
3849
3850
3851
3852
3853
    sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs);
    VdbeCoverage(v);
    addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto);  /* Return True */
  }else{
    sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector);
    if( destIfFalse==destIfNull ){
      /* Combine Step 3 and Step 5 into a single opcode */









      sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse,
                           rLhs, nVector); VdbeCoverage(v);
      goto sqlite3ExprCodeIN_finished;
    }
    /* Ordinary Step 3, for the case where FALSE and NULL are distinct */
    addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0,
                                      rLhs, nVector); VdbeCoverage(v);







>
>
>
>
>
>
>
>
>







3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
    sqlite3VdbeAddOp3(v, OP_SeekRowid, iTab, destIfFalse, rLhs);
    VdbeCoverage(v);
    addrTruthOp = sqlite3VdbeAddOp0(v, OP_Goto);  /* Return True */
  }else{
    sqlite3VdbeAddOp4(v, OP_Affinity, rLhs, nVector, 0, zAff, nVector);
    if( destIfFalse==destIfNull ){
      /* Combine Step 3 and Step 5 into a single opcode */
      if( ExprHasProperty(pExpr, EP_Subrtn) ){
        const VdbeOp *pOp = sqlite3VdbeGetOp(v, pExpr->y.sub.iAddr);
        assert( pOp->opcode==OP_Once || pParse->nErr );
        if( pOp->opcode==OP_Once && pOp->p3>0 ){
          assert( OptimizationEnabled(pParse->db, SQLITE_BloomFilter) );
          sqlite3VdbeAddOp4Int(v, OP_Filter, pOp->p3, destIfFalse,
                               rLhs, nVector); VdbeCoverage(v);
        }
      }
      sqlite3VdbeAddOp4Int(v, OP_NotFound, iTab, destIfFalse,
                           rLhs, nVector); VdbeCoverage(v);
      goto sqlite3ExprCodeIN_finished;
    }
    /* Ordinary Step 3, for the case where FALSE and NULL are distinct */
    addrTruthOp = sqlite3VdbeAddOp4Int(v, OP_Found, iTab, 0,
                                      rLhs, nVector); VdbeCoverage(v);

Changes to src/select.c.

1373
1374
1375
1376
1377
1378
1379

1380
1381
1382
1383
1384
1385





1386
1387
1388
1389
1390
1391
1392
      if( pSort ){
        /* At first glance you would think we could optimize out the
        ** ORDER BY in this case since the order of entries in the set
        ** does not matter.  But there might be a LIMIT clause, in which
        ** case the order does matter */
        pushOntoSorter(
            pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg);

      }else{
        int r1 = sqlite3GetTempReg(pParse);
        assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol );
        sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol,
            r1, pDest->zAffSdst, nResultCol);
        sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol);





        sqlite3ReleaseTempReg(pParse, r1);
      }
      break;
    }


    /* If any row exist in the result set, record that fact and abort.







>






>
>
>
>
>







1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
      if( pSort ){
        /* At first glance you would think we could optimize out the
        ** ORDER BY in this case since the order of entries in the set
        ** does not matter.  But there might be a LIMIT clause, in which
        ** case the order does matter */
        pushOntoSorter(
            pParse, pSort, p, regResult, regOrig, nResultCol, nPrefixReg);
        pDest->iSDParm2 = 0; /* Signal that any Bloom filter is unpopulated */
      }else{
        int r1 = sqlite3GetTempReg(pParse);
        assert( sqlite3Strlen30(pDest->zAffSdst)==nResultCol );
        sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult, nResultCol,
            r1, pDest->zAffSdst, nResultCol);
        sqlite3VdbeAddOp4Int(v, OP_IdxInsert, iParm, r1, regResult, nResultCol);
        if( pDest->iSDParm2 ){
          sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0,
                               regResult, nResultCol);
          ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER"));
        }
        sqlite3ReleaseTempReg(pParse, r1);
      }
      break;
    }


    /* If any row exist in the result set, record that fact and abort.
3306
3307
3308
3309
3310
3311
3312





3313
3314
3315
3316
3317
3318
3319
      int r1;
      testcase( pIn->nSdst>1 );
      r1 = sqlite3GetTempReg(pParse);
      sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst,
          r1, pDest->zAffSdst, pIn->nSdst);
      sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1,
                           pIn->iSdst, pIn->nSdst);





      sqlite3ReleaseTempReg(pParse, r1);
      break;
    }

    /* If this is a scalar select that is part of an expression, then
    ** store the results in the appropriate memory cell and break out
    ** of the scan loop.  Note that the select might return multiple columns







>
>
>
>
>







3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
      int r1;
      testcase( pIn->nSdst>1 );
      r1 = sqlite3GetTempReg(pParse);
      sqlite3VdbeAddOp4(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst,
          r1, pDest->zAffSdst, pIn->nSdst);
      sqlite3VdbeAddOp4Int(v, OP_IdxInsert, pDest->iSDParm, r1,
                           pIn->iSdst, pIn->nSdst);
      if( pDest->iSDParm2>0 ){
        sqlite3VdbeAddOp4Int(v, OP_FilterAdd, pDest->iSDParm2, 0,
                             pIn->iSdst, pIn->nSdst);
        ExplainQueryPlan((pParse, 0, "CREATE BLOOM FILTER"));
      }
      sqlite3ReleaseTempReg(pParse, r1);
      break;
    }

    /* If this is a scalar select that is part of an expression, then
    ** store the results in the appropriate memory cell and break out
    ** of the scan loop.  Note that the select might return multiple columns

Changes to src/sqliteInt.h.

3621
3622
3623
3624
3625
3626
3627




3628
3629
3630
3631
3632
3633
3634
3635
**                     Store the first column of the first result row
**                     in register pDest->iSDParm then abandon the rest
**                     of the query.  This destination implies "LIMIT 1".
**
**     SRT_Set         The result must be a single column.  Store each
**                     row of result as the key in table pDest->iSDParm.
**                     Apply the affinity pDest->affSdst before storing




**                     results.  Used to implement "IN (SELECT ...)".
**
**     SRT_EphemTab    Create an temporary table pDest->iSDParm and store
**                     the result there. The cursor is left open after
**                     returning.  This is like SRT_Table except that
**                     this destination uses OP_OpenEphemeral to create
**                     the table first.
**







>
>
>
>
|







3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
**                     Store the first column of the first result row
**                     in register pDest->iSDParm then abandon the rest
**                     of the query.  This destination implies "LIMIT 1".
**
**     SRT_Set         The result must be a single column.  Store each
**                     row of result as the key in table pDest->iSDParm.
**                     Apply the affinity pDest->affSdst before storing
**                     results.  if pDest->iSDParm2 is positive, then it is
**                     a regsiter holding a Bloom filter for the IN operator
**                     that should be populated in addition to the 
**                     pDest->iSDParm table.  This SRT is used to
**                     implement "IN (SELECT ...)".
**
**     SRT_EphemTab    Create an temporary table pDest->iSDParm and store
**                     the result there. The cursor is left open after
**                     returning.  This is like SRT_Table except that
**                     this destination uses OP_OpenEphemeral to create
**                     the table first.
**

Changes to src/vdbe.c.

5324
5325
5326
5327
5328
5329
5330

5331
5332
5333
5334
5335
5336
5337
  assert( pC->isTable==0 );
  r.nField = (u16)pOp->p4.i;
  if( r.nField>0 ){
    /* Key values in an array of registers */
    r.pKeyInfo = pC->pKeyInfo;
    r.default_rc = 0;
#ifdef SQLITE_DEBUG

    for(ii=0; ii<r.nField; ii++){
      assert( memIsValid(&r.aMem[ii]) );
      assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 );
      if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]);
    }
#endif
    rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult);







>







5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
  assert( pC->isTable==0 );
  r.nField = (u16)pOp->p4.i;
  if( r.nField>0 ){
    /* Key values in an array of registers */
    r.pKeyInfo = pC->pKeyInfo;
    r.default_rc = 0;
#ifdef SQLITE_DEBUG
    (void)sqlite3FaultSim(50);  /* For use by --counter in TH3 */
    for(ii=0; ii<r.nField; ii++){
      assert( memIsValid(&r.aMem[ii]) );
      assert( (r.aMem[ii].flags & MEM_Zero)==0 || r.aMem[ii].n==0 );
      if( ii ) REGISTER_TRACE(pOp->p3+ii, &r.aMem[ii]);
    }
#endif
    rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &pC->seekResult);

Changes to test/autoindex1.test.

181
182
183
184
185
186
187
188

189
190
191
192
193
194
195
do_eqp_test autoindex1-500.1 {
  SELECT b FROM t501
   WHERE t501.a IN (SELECT x FROM t502 WHERE y=?);
} {
  QUERY PLAN
  |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?)
  `--LIST SUBQUERY xxxxxx
     `--SCAN t502

}
do_eqp_test autoindex1-501 {
  SELECT b FROM t501
   WHERE t501.a IN (SELECT x FROM t502 WHERE y=t501.b);
} {
  QUERY PLAN
  |--SCAN t501







|
>







181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
do_eqp_test autoindex1-500.1 {
  SELECT b FROM t501
   WHERE t501.a IN (SELECT x FROM t502 WHERE y=?);
} {
  QUERY PLAN
  |--SEARCH t501 USING INTEGER PRIMARY KEY (rowid=?)
  `--LIST SUBQUERY xxxxxx
     |--SCAN t502
     `--CREATE BLOOM FILTER
}
do_eqp_test autoindex1-501 {
  SELECT b FROM t501
   WHERE t501.a IN (SELECT x FROM t502 WHERE y=t501.b);
} {
  QUERY PLAN
  |--SCAN t501

Changes to test/eqp.test.

307
308
309
310
311
312
313
314

315
316
317
318
319
320
321

det 3.3.1 {
  SELECT * FROM t1 WHERE y IN (SELECT y FROM t2)
} {
  QUERY PLAN
  |--SCAN t1
  `--LIST SUBQUERY xxxxxx
     `--SCAN t2

}
det 3.3.2 {
  SELECT * FROM t1 WHERE y IN (SELECT y FROM t2 WHERE t1.x!=t2.x)
} {
  QUERY PLAN
  |--SCAN t1
  `--CORRELATED LIST SUBQUERY xxxxxx







|
>







307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

det 3.3.1 {
  SELECT * FROM t1 WHERE y IN (SELECT y FROM t2)
} {
  QUERY PLAN
  |--SCAN t1
  `--LIST SUBQUERY xxxxxx
     |--SCAN t2
     `--CREATE BLOOM FILTER
}
det 3.3.2 {
  SELECT * FROM t1 WHERE y IN (SELECT y FROM t2 WHERE t1.x!=t2.x)
} {
  QUERY PLAN
  |--SCAN t1
  `--CORRELATED LIST SUBQUERY xxxxxx

Changes to test/pushdown.test.

222
223
224
225
226
227
228

229
230
  CREATE TABLE t4(d INT);  INSERT INTO t4 VALUES(4);
  CREATE TABLE t5(e INT);  INSERT INTO t5 VALUES(5);
  SELECT *
    FROM t1 JOIN t2 ON null RIGHT JOIN t3 ON true
          LEFT JOIN (t4 JOIN t5 ON d+1=e) ON d=4
   WHERE e>0;
} {- - 3 4 5}


finish_test







>


222
223
224
225
226
227
228
229
230
231
  CREATE TABLE t4(d INT);  INSERT INTO t4 VALUES(4);
  CREATE TABLE t5(e INT);  INSERT INTO t5 VALUES(5);
  SELECT *
    FROM t1 JOIN t2 ON null RIGHT JOIN t3 ON true
          LEFT JOIN (t4 JOIN t5 ON d+1=e) ON d=4
   WHERE e>0;
} {- - 3 4 5}


finish_test

Changes to test/rowvalue4.test.

232
233
234
235
236
237
238
239

240
241

242
243
244
245
246
247
248
  SELECT * FROM d2 WHERE 
    (a, b) IN (SELECT x, y FROM d1) AND
    (c) IN (SELECT y FROM d1)
} {
  QUERY PLAN
  |--SEARCH d2 USING INDEX d2ab (a=? AND b=?)
  |--LIST SUBQUERY xxxxxx
  |  `--SCAN d1

  `--LIST SUBQUERY xxxxxx
     `--SCAN d1

}

do_execsql_test 6.0 {
  CREATE TABLE e1(a, b, c, d, e);
  CREATE INDEX e1ab ON e1(a, b);
  CREATE INDEX e1cde ON e1(c, d, e);
}







|
>

|
>







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  SELECT * FROM d2 WHERE 
    (a, b) IN (SELECT x, y FROM d1) AND
    (c) IN (SELECT y FROM d1)
} {
  QUERY PLAN
  |--SEARCH d2 USING INDEX d2ab (a=? AND b=?)
  |--LIST SUBQUERY xxxxxx
  |  |--SCAN d1
  |  `--CREATE BLOOM FILTER
  `--LIST SUBQUERY xxxxxx
     |--SCAN d1
     `--CREATE BLOOM FILTER
}

do_execsql_test 6.0 {
  CREATE TABLE e1(a, b, c, d, e);
  CREATE INDEX e1ab ON e1(a, b);
  CREATE INDEX e1cde ON e1(c, d, e);
}