/ Check-in [b29ac50a]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizations for fts5 queries that match against a specific column.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: b29ac50af0491a780a5a4c0985d88d0e5e014ba3
User & Date: dan 2015-05-28 19:57:12
Context
2015-05-29
15:55
Add syntax to fts5 used to specify that a phrase or NEAR group should match a subset of columns. For example "[col1 col2 ...] : <phrase>". check-in: 0fc0ea20 user: dan tags: fts5
2015-05-28
19:57
Optimizations for fts5 queries that match against a specific column. check-in: b29ac50a user: dan tags: fts5
14:37
Remove some dead code from fts5. Add auxiliary function api tests to the same. check-in: 0f9df202 user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_expr.c.

647
648
649
650
651
652
653







































654
655
656
657
658
659
660
...
670
671
672
673
674
675
676

677
678



679
680
681
682
683
684
685


686
687
688
689
690
691
692
693
694
695
696













697
698
699
700
701
702
703
...
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725

726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
      }
    }
  }while( bMatch==0 );

  pNode->iRowid = iLast;
  return rc;
}








































/*
** Argument pNode points to a NEAR node. All individual term iterators 
** point to valid entries (not EOF).
*
** This function tests if the term iterators currently all point to the
** same rowid, and if so, if the row matches the phrase and NEAR constraints. 
................................................................................
** EOF.
*/
static int fts5ExprNearNextMatch(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;


  if( pNear->nPhrase==1 



   && pNear->apPhrase[0]->nTerm==1 
   && pNear->iCol<0
  ){
    /* If this "NEAR" object is actually a single phrase that consists of
    ** a single term only, then the row that it currently points to must
    ** be a match. All that is required is to populate pPhrase->poslist
    ** with the position-list data for the only term.  */


    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
    assert( pPhrase->poslist.nSpace==0 );
    return sqlite3Fts5IterPoslist(pIter, 
        (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid
    );
  }else{
    int rc = SQLITE_OK;

    while( 1 ){
      int i;














      /* Advance the iterators until they all point to the same rowid */
      rc = fts5ExprNearNextRowidMatch(pExpr, pNode);
      if( rc!=SQLITE_OK || pNode->bEof ) break;

      /* Check that each phrase in the nearset matches the current row.
      ** Populate the pPhrase->poslist buffers at the same time. If any
................................................................................
        if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
          int bMatch = 0;
          rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
          if( bMatch==0 ) break;
        }else{
          rc = sqlite3Fts5IterPoslistBuffer(
              pPhrase->aTerm[0].pIter, &pPhrase->poslist
          );
        }
      }

      if( rc==SQLITE_OK && i==pNear->nPhrase ){
        int bMatch = 1;
        if( pNear->nPhrase>1 ){
          rc = fts5ExprNearIsMatch(pNear, &bMatch);
        }
        if( rc!=SQLITE_OK || bMatch ) break;
      }


      /* If control flows to here, then the current rowid is not a match.
      ** Advance all term iterators in all phrases to the next rowid. */
      if( rc==SQLITE_OK ){
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0);
      }
      if( pNode->bEof || rc!=SQLITE_OK ) break;
    }

    return rc;
  }
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>

<
>
>
>
|
<
<
|
|
<
|
>
>
|
|
|
|
|
|
<
<

|
|
>
>
>
>
>
>
>
>
>
>
>
>
>







 







|










|
>
|
|
|
|
|
|
|

|
<







647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
...
709
710
711
712
713
714
715
716
717

718
719
720
721


722
723

724
725
726
727
728
729
730
731
732


733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
...
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787

788
789
790
791
792
793
794
      }
    }
  }while( bMatch==0 );

  pNode->iRowid = iLast;
  return rc;
}

/*
** IN/OUT parameter (*pa) points to a position list n bytes in size. If
** the position list contains entries for column iCol, then (*pa) is set
** to point to the sub-position-list for that column and the number of
** bytes in it returned. Or, if the argument position list does not
** contain any entries for column iCol, return 0.
*/
static int fts5ExprExtractCol(
  const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  int n,                          /* IN: Size of poslist in bytes */
  int iCol                        /* Column to extract from poslist */
){
  int ii;
  int iCurrent = 0;
  const u8 *p = *pa;
  const u8 *pEnd = &p[n];         /* One byte past end of position list */
  u8 prev = 0;

  while( iCol!=iCurrent ){
    /* Advance pointer p until it points to pEnd or an 0x01 byte that is
    ** not part of a varint */
    while( !(prev & 0x80) && *p!=0x01 ){
      prev = *p++;
      if( p==pEnd ) return 0;
    }
    *pa = p++;
    p += getVarint32(p, iCurrent);
  }

  /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  ** not part of a varint */
  while( p<pEnd && !(prev & 0x80) && *p!=0x01 ){
    prev = *p++;
  }
  return p - (*pa);
}



/*
** Argument pNode points to a NEAR node. All individual term iterators 
** point to valid entries (not EOF).
*
** This function tests if the term iterators currently all point to the
** same rowid, and if so, if the row matches the phrase and NEAR constraints. 
................................................................................
** EOF.
*/
static int fts5ExprNearNextMatch(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = SQLITE_OK;


  while( 1 ){
    int i;

    if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){


      /* If this "NEAR" object is actually a single phrase that consists 
      ** of a single term only, then grab pointers into the poslist

      ** managed by the fts5_index.c iterator object. This is much faster 
      ** than synthesizing a new poslist the way we have to for more
      ** complicated phrase or NEAR expressions.  */
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
      Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
      assert( pPhrase->poslist.nSpace==0 );
      rc = sqlite3Fts5IterPoslist(pIter, 
          (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n, &pNode->iRowid
      );



      /* If the term may match any column, then this must be a match. 
      ** Return immediately in this case. Otherwise, try to find the
      ** part of the poslist that corresponds to the required column.
      ** If it can be found, return. If it cannot, the next iteration
      ** of the loop will test the next rowid in the database for this
      ** term.  */
      if( pNear->iCol<0 ) return rc;

      pPhrase->poslist.n = fts5ExprExtractCol(
          (const u8**)&pPhrase->poslist.p,
          pPhrase->poslist.n,
          pNear->iCol
      );
      if( pPhrase->poslist.n ) return rc;
    }else{

      /* Advance the iterators until they all point to the same rowid */
      rc = fts5ExprNearNextRowidMatch(pExpr, pNode);
      if( rc!=SQLITE_OK || pNode->bEof ) break;

      /* Check that each phrase in the nearset matches the current row.
      ** Populate the pPhrase->poslist buffers at the same time. If any
................................................................................
        if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
          int bMatch = 0;
          rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
          if( bMatch==0 ) break;
        }else{
          rc = sqlite3Fts5IterPoslistBuffer(
              pPhrase->aTerm[0].pIter, &pPhrase->poslist
              );
        }
      }

      if( rc==SQLITE_OK && i==pNear->nPhrase ){
        int bMatch = 1;
        if( pNear->nPhrase>1 ){
          rc = fts5ExprNearIsMatch(pNear, &bMatch);
        }
        if( rc!=SQLITE_OK || bMatch ) break;
      }
    }

    /* If control flows to here, then the current rowid is not a match.
    ** Advance all term iterators in all phrases to the next rowid. */
    if( rc==SQLITE_OK ){
      rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0);
    }
    if( pNode->bEof || rc!=SQLITE_OK ) break;
  }

  return rc;

}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/

Changes to ext/fts5/test/fts5aa.test.

422
423
424
425
426
427
428






429
430
431
432
  set res [list]
  db eval { SELECT * FROM b2 ORDER BY rowid ASC } {
    lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }]
  }
  set res
} {{a b c} {a b c} {a b c}}








finish_test









>
>
>
>
>
>




422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
  set res [list]
  db eval { SELECT * FROM b2 ORDER BY rowid ASC } {
    lappend res [execsql { SELECT * FROM b2 ORDER BY rowid ASC }]
  }
  set res
} {{a b c} {a b c} {a b c}}

reset_db
do_execsql_test 18.1 {
  CREATE VIRTUAL TABLE c2 USING fts5(x, y);
  INSERT INTO c2 VALUES('x x x', 'x x x');
  SELECT rowid FROM c2 WHERE c2 MATCH 'y:x';
} {1}

finish_test