/ Check-in [41de1643]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Tune the query planner to be more aggressive about using automatic indexes on views and subqueries for which there is not opportunity to declare a persistent schema index.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 41de1643bfc9ae25e20790d707e2789b665baa2b
User & Date: drh 2014-09-22 14:30:51
Context
2014-09-22
19:51
Size reduction and substantial performance increase for cellSizePtr(). check-in: bc8bbf32 user: drh tags: trunk
14:30
Tune the query planner to be more aggressive about using automatic indexes on views and subqueries for which there is not opportunity to declare a persistent schema index. check-in: 41de1643 user: drh tags: trunk
03:22
Disable shared memory operations using the unix-nolock VFS. check-in: 10a6e510 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/where.c.

4714
4715
4716
4717
4718
4719
4720
4721
4722





4723



4724
4725
4726
4727
4728
4729
4730
      if( termCanDriveIndex(pTerm, pSrc, 0) ){
        pNew->u.btree.nEq = 1;
        pNew->u.btree.nSkip = 0;
        pNew->u.btree.pIndex = 0;
        pNew->nLTerm = 1;
        pNew->aLTerm[0] = pTerm;
        /* TUNING: One-time cost for computing the automatic index is
        ** approximately 7*N*log2(N) where N is the number of rows in
        ** the table being indexed. */





        pNew->rSetup = rLogSize + rSize + 28;  assert( 28==sqlite3LogEst(7) );



        ApplyCostMultiplier(pNew->rSetup, pTab->costMult);
        /* TUNING: Each index lookup yields 20 rows in the table.  This
        ** is more than the usual guess of 10 rows, since we have no way
        ** of knowing how selective the index will ultimately be.  It would
        ** not be unreasonable to make this value much larger. */
        pNew->nOut = 43;  assert( 43==sqlite3LogEst(20) );
        pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut);







|
|
>
>
>
>
>
|
>
>
>







4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
      if( termCanDriveIndex(pTerm, pSrc, 0) ){
        pNew->u.btree.nEq = 1;
        pNew->u.btree.nSkip = 0;
        pNew->u.btree.pIndex = 0;
        pNew->nLTerm = 1;
        pNew->aLTerm[0] = pTerm;
        /* TUNING: One-time cost for computing the automatic index is
        ** estimated to be X*N*log2(N) where N is the number of rows in
        ** the table being indexed and where X is 7 (LogEst=28) for normal
        ** tables or 1.375 (LogEst=4) for views and subqueries.  The value
        ** of X is smaller for views and subqueries so that the query planner
        ** will be more aggressive about generating automatic indexes for
        ** those objects, since there is no opportunity to add schema
        ** indexes on subqueries and views. */
        pNew->rSetup = rLogSize + rSize + 4;
        if( pTab->pSelect==0 && (pTab->tabFlags & TF_Ephemeral)==0 ){
          pNew->rSetup += 24;
        }
        ApplyCostMultiplier(pNew->rSetup, pTab->costMult);
        /* TUNING: Each index lookup yields 20 rows in the table.  This
        ** is more than the usual guess of 10 rows, since we have no way
        ** of knowing how selective the index will ultimately be.  It would
        ** not be unreasonable to make this value much larger. */
        pNew->nOut = 43;  assert( 43==sqlite3LogEst(20) );
        pNew->rRun = sqlite3LogEstAdd(rLogSize,pNew->nOut);

Changes to test/autoindex1.test.

408
409
410
411
412
413
414
415

































































































416
  EXPLAIN QUERY PLAN
  SELECT * FROM 
        data JOIN mimetypes ON (data.mimetype_id=mimetypes._id) 
             JOIN raw_contacts ON (data.raw_contact_id=raw_contacts._id) 
             JOIN accounts ON (raw_contacts.account_id=accounts._id)
   WHERE mimetypes._id=10 AND data14 IS NOT NULL;
} {/SEARCH TABLE data .*SEARCH TABLE raw_contacts/}


































































































finish_test








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
  EXPLAIN QUERY PLAN
  SELECT * FROM 
        data JOIN mimetypes ON (data.mimetype_id=mimetypes._id) 
             JOIN raw_contacts ON (data.raw_contact_id=raw_contacts._id) 
             JOIN accounts ON (raw_contacts.account_id=accounts._id)
   WHERE mimetypes._id=10 AND data14 IS NOT NULL;
} {/SEARCH TABLE data .*SEARCH TABLE raw_contacts/}

# Another test case from an important user of SQLite.  The key feature of
# this test is that the "aggindex" subquery should make use of an
# automatic index.  If it does, the query is fast.  If it does not, the
# query is deathly slow.  It worked OK in 3.7.17 but started going slow
# with version 3.8.0.  The problem was fixed for 3.8.7 by reducing the
# cost estimate for automatic indexes on views and subqueries.
#
db close
forcedelete test.db
sqlite3 db test.db
do_execsql_test autoindex1-900 {
  CREATE TABLE messages (ROWID INTEGER PRIMARY KEY AUTOINCREMENT, message_id, document_id BLOB, in_reply_to, remote_id INTEGER, sender INTEGER, subject_prefix, subject INTEGER, date_sent INTEGER, date_received INTEGER, date_created INTEGER, date_last_viewed INTEGER, mailbox INTEGER, remote_mailbox INTEGER, original_mailbox INTEGER, flags INTEGER, read, flagged, size INTEGER, color, encoding, type INTEGER, pad, conversation_id INTEGER DEFAULT -1, snippet TEXT DEFAULT NULL, fuzzy_ancestor INTEGER DEFAULT NULL, automated_conversation INTEGER DEFAULT 0, root_status INTEGER DEFAULT -1, conversation_position INTEGER DEFAULT -1);
  CREATE INDEX date_index ON messages(date_received);
  CREATE INDEX date_last_viewed_index ON messages(date_last_viewed);
  CREATE INDEX date_created_index ON messages(date_created);
  CREATE INDEX message_message_id_mailbox_index ON messages(message_id, mailbox);
  CREATE INDEX message_document_id_index ON messages(document_id);
  CREATE INDEX message_read_index ON messages(read);
  CREATE INDEX message_flagged_index ON messages(flagged);
  CREATE INDEX message_mailbox_index ON messages(mailbox, date_received);
  CREATE INDEX message_remote_mailbox_index ON messages(remote_mailbox, remote_id);
  CREATE INDEX message_type_index ON messages(type);
  CREATE INDEX message_conversation_id_conversation_position_index ON messages(conversation_id, conversation_position);
  CREATE INDEX message_fuzzy_ancestor_index ON messages(fuzzy_ancestor);
  CREATE INDEX message_subject_fuzzy_ancestor_index ON messages(subject, fuzzy_ancestor);
  CREATE INDEX message_sender_subject_automated_conversation_index ON messages(sender, subject, automated_conversation);
  CREATE INDEX message_sender_index ON messages(sender);
  CREATE INDEX message_root_status ON messages(root_status);
  CREATE TABLE subjects (ROWID INTEGER PRIMARY KEY, subject COLLATE RTRIM, normalized_subject COLLATE RTRIM);
  CREATE INDEX subject_subject_index ON subjects(subject);
  CREATE INDEX subject_normalized_subject_index ON subjects(normalized_subject);
  CREATE TABLE addresses (ROWID INTEGER PRIMARY KEY, address COLLATE NOCASE, comment, UNIQUE(address, comment));
  CREATE INDEX addresses_address_index ON addresses(address);
  CREATE TABLE mailboxes (ROWID INTEGER PRIMARY KEY, url UNIQUE, total_count INTEGER DEFAULT 0, unread_count INTEGER DEFAULT 0, unseen_count INTEGER DEFAULT 0, deleted_count INTEGER DEFAULT 0, unread_count_adjusted_for_duplicates INTEGER DEFAULT 0, change_identifier, source INTEGER, alleged_change_identifier);
  CREATE INDEX mailboxes_source_index ON mailboxes(source);
  CREATE TABLE labels (ROWID INTEGER PRIMARY KEY, message_id INTEGER NOT NULL, mailbox_id INTEGER NOT NULL, UNIQUE(message_id, mailbox_id));
  CREATE INDEX labels_message_id_mailbox_id_index ON labels(message_id, mailbox_id);
  CREATE INDEX labels_mailbox_id_index ON labels(mailbox_id);
  
  explain query plan
  SELECT messages.ROWID,
         messages.message_id,
         messages.remote_id,
         messages.date_received,
         messages.date_sent,
         messages.flags,
         messages.size,
         messages.color,
         messages.date_last_viewed,
         messages.subject_prefix,
         subjects.subject,
         sender.comment,
         sender.address,
         NULL,
         messages.mailbox,
         messages.original_mailbox,
         NULL,
         NULL,
         messages.type,
         messages.document_id,
         sender,
         NULL,
         messages.conversation_id,
         messages.conversation_position,
         agglabels.labels
   FROM mailboxes AS mailbox
        JOIN messages ON mailbox.ROWID = messages.mailbox
        LEFT OUTER JOIN subjects ON messages.subject = subjects.ROWID
        LEFT OUTER JOIN addresses AS sender ON messages.sender = sender.ROWID
        LEFT OUTER JOIN (
               SELECT message_id, group_concat(mailbox_id) as labels
               FROM labels GROUP BY message_id
             ) AS agglabels ON messages.ROWID = agglabels.message_id
  WHERE (mailbox.url = 'imap://email.app@imap.gmail.com/%5BGmail%5D/All%20Mail')
    AND (messages.ROWID IN (
            SELECT labels.message_id
              FROM labels JOIN mailboxes ON labels.mailbox_id = mailboxes.ROWID
             WHERE mailboxes.url = 'imap://email.app@imap.gmail.com/INBOX'))
    AND messages.mailbox in (6,12,18,24,30,36,42,1,7,13,19,25,31,37,43,2,8,
                             14,20,26,32,38,3,9,15,21,27,33,39,4,10,16,22,28,
                             34,40,5,11,17,23,35,41)
   ORDER BY date_received DESC;
} {/agglabels USING AUTOMATIC COVERING INDEX/}

# A test case for VIEWs
#
do_execsql_test autoindex1-901 {
  CREATE TABLE t1(x INTEGER PRIMARY KEY, y, z);
  CREATE TABLE t2(a, b);
  CREATE VIEW agg2 AS SELECT a, sum(b) AS m FROM t2 GROUP BY a;
  EXPLAIN QUERY PLAN
  SELECT t1.z, agg2.m
    FROM t1 JOIN agg2 ON t1.y=agg2.m
   WHERE t1.x IN (1,2,3);
} {/USING AUTOMATIC COVERING INDEX/}


finish_test