/ Check-in [5165de54]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid making redundant copies of position-lists within the fts5 code.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 5165de548b84825cb000d33e5d3de12b0ef112c0
User & Date: dan 2015-05-23 15:43:05
Context
2015-05-25
11:46
Avoid redundant loads from the %_data table in the fts5 code. check-in: 02069782 user: dan tags: fts5
2015-05-23
15:43
Avoid making redundant copies of position-lists within the fts5 code. check-in: 5165de54 user: dan tags: fts5
2015-05-22
07:44
Increase test coverage of fts5_vocab.c. check-in: 065ab83a user: dan tags: fts5
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/unicode/mkunicode.tcl.

483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
  }
  tl_print_table_footer toggle
  tl_print_ioff_table $liOff

  puts [subst -nocommands {
  int ret = c;

  assert( c>=0 );
  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;







<







483
484
485
486
487
488
489

490
491
492
493
494
495
496
  }
  tl_print_table_footer toggle
  tl_print_ioff_table $liOff

  puts [subst -nocommands {
  int ret = c;


  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

Changes to ext/fts5/fts5Int.h.

281
282
283
284
285
286
287

288
289
290
291
292
293
294
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn);


/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*







>







281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
** using sqlite3Fts5IndexQuery().
*/
int sqlite3Fts5IterEof(Fts5IndexIter*);
int sqlite3Fts5IterNext(Fts5IndexIter*);
int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
i64 sqlite3Fts5IterRowid(Fts5IndexIter*);
int sqlite3Fts5IterPoslist(Fts5IndexIter*, const u8 **pp, int *pn);
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf);

/*
** Close an iterator opened by sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter*);

/*

Changes to ext/fts5/fts5_expr.c.

668
669
670
671
672
673
674
675
676



















677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697

698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717

718
719
720
721
722
723
724
....
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
** otherwise. It is not considered an error code if an iterator reaches
** EOF.
*/
static int fts5ExprNearNextMatch(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  int rc = SQLITE_OK;
  Fts5ExprNearset *pNear = pNode->pNear;



















  while( 1 ){
    int i;

    /* Advance the iterators until they all point to the same rowid */
    rc = fts5ExprNearNextRowidMatch(pExpr, pNode);
    if( rc!=SQLITE_OK || pNode->bEof ) break;

    /* Check that each phrase in the nearset matches the current row.
    ** Populate the pPhrase->poslist buffers at the same time. If any
    ** phrase is not a match, break out of the loop early.  */
    for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
        int bMatch = 0;
        rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
        if( bMatch==0 ) break;
      }else{
        int n;
        const u8 *a;
        rc = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &a, &n);
        fts5BufferSet(&rc, &pPhrase->poslist, n, a);

      }
    }

    if( rc==SQLITE_OK && i==pNear->nPhrase ){
      int bMatch = 1;
      if( pNear->nPhrase>1 ){
        rc = fts5ExprNearIsMatch(pNear, &bMatch);
      }
      if( rc!=SQLITE_OK || bMatch ) break;
    }

    /* If control flows to here, then the current rowid is not a match.
    ** Advance all term iterators in all phrases to the next rowid. */
    if( rc==SQLITE_OK ){
      rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0);
    }
    if( pNode->bEof || rc!=SQLITE_OK ) break;
  }

  return rc;

}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/
................................................................................
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
      }
    }
    fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}

/*
** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is







<

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
<
<
|
|
>
|
|

|
|
|
|
|
|
|

|
|
|
|
|
|
|

|
>







 







|







668
669
670
671
672
673
674

675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711


712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
....
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
** otherwise. It is not considered an error code if an iterator reaches
** EOF.
*/
static int fts5ExprNearNextMatch(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){

  Fts5ExprNearset *pNear = pNode->pNear;

  if( pNear->nPhrase==1 
   && pNear->apPhrase[0]->nTerm==1 
   && pNear->iCol<0
  ){
    /* If this "NEAR" object is actually a single phrase that consists of
    ** a single term only, then the row that it currently points to must
    ** be a match. All that is required is to populate pPhrase->poslist
    ** with the position-list data for the only term.  */
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
    assert( pPhrase->poslist.nSpace==0 );
    pNode->iRowid = sqlite3Fts5IterRowid(pIter);
    return sqlite3Fts5IterPoslist(pIter, 
        (const u8**)&pPhrase->poslist.p, &pPhrase->poslist.n
    );
  }else{
    int rc = SQLITE_OK;

    while( 1 ){
      int i;

      /* Advance the iterators until they all point to the same rowid */
      rc = fts5ExprNearNextRowidMatch(pExpr, pNode);
      if( rc!=SQLITE_OK || pNode->bEof ) break;

      /* Check that each phrase in the nearset matches the current row.
      ** Populate the pPhrase->poslist buffers at the same time. If any
      ** phrase is not a match, break out of the loop early.  */
      for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
        Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
        if( pPhrase->nTerm>1 || pNear->iCol>=0 ){
          int bMatch = 0;
          rc = fts5ExprPhraseIsMatch(pExpr, pNear->iCol, pPhrase, &bMatch);
          if( bMatch==0 ) break;
        }else{


          rc = sqlite3Fts5IterPoslistBuffer(
              pPhrase->aTerm[0].pIter, &pPhrase->poslist
          );
        }
      }

      if( rc==SQLITE_OK && i==pNear->nPhrase ){
        int bMatch = 1;
        if( pNear->nPhrase>1 ){
          rc = fts5ExprNearIsMatch(pNear, &bMatch);
        }
        if( rc!=SQLITE_OK || bMatch ) break;
      }

      /* If control flows to here, then the current rowid is not a match.
      ** Advance all term iterators in all phrases to the next rowid. */
      if( rc==SQLITE_OK ){
        rc = fts5ExprNearAdvanceFirst(pExpr, pNode, 0, 0);
      }
      if( pNode->bEof || rc!=SQLITE_OK ) break;
    }

    return rc;
  }
}

/*
** Initialize all term iterators in the pNear object. If any term is found
** to match no documents at all, set *pbEof to true and return immediately,
** without initializing any further iterators.
*/
................................................................................
    for(i=0; i<pPhrase->nTerm; i++){
      Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
      sqlite3_free(pTerm->zTerm);
      if( pTerm->pIter ){
        sqlite3Fts5IterClose(pTerm->pIter);
      }
    }
    if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
    sqlite3_free(pPhrase);
  }
}

/*
** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is

Changes to ext/fts5/fts5_index.c.

4053
4054
4055
4056
4057
4058
4059





















4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092

4093
4094
4095
4096
4097
4098
4099
....
4682
4683
4684
4685
4686
4687
4688
4689





4690
4691

4692











4693








4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
    fts5StructureWrite(p, pStruct);
  }
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}























/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static void fts5MultiIterPoslist(
  Fts5Index *p,
  Fts5MultiSegIter *pMulti,
  int bSz,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );

    fts5ChunkIterInit(p, pSeg, &iter);

    if( fts5ChunkIterEof(p, &iter)==0 ){
      if( bSz ){
        /* WRITEPOSLISTSIZE */
        fts5BufferAppendVarint(&p->rc, pBuf, iter.nRem * 2);
      }
      while( fts5ChunkIterEof(p, &iter)==0 ){
        fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
        fts5ChunkIterNext(p, &iter);
      }
    }
    fts5ChunkIterRelease(&iter);

  }
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  if( pIter->i<pIter->n ){
    int bDummy;
    if( pIter->i ){
................................................................................
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{
    Fts5Index *p = pIter->pIndex;





    fts5BufferZero(&pIter->poslist);
    fts5MultiIterPoslist(p, pIter->pMulti, 0, &pIter->poslist);

    *pn = pIter->poslist.n;











    *pp = pIter->poslist.p;








  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
    if( pIter->pDoclist ){







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>












|



<



<
<
<
|
|
|
|
<
<
<
<
<
<
>







 







|
>
>
>
>
>
|
<
>
|
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>

|








4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096

4097
4098
4099



4100
4101
4102
4103






4104
4105
4106
4107
4108
4109
4110
4111
....
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
4705
4706
4707

4708
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
    fts5StructureWrite(p, pStruct);
  }
  fts5StructureRelease(pStruct);

  return fts5IndexReturn(p);
}

/*
** Iterator pIter currently points to a valid entry (not EOF). This
** function appends the position list data for the current entry to
** buffer pBuf. It does not make a copy of the position-list size
** field.
*/
static void fts5SegiterPoslist(
  Fts5Index *p,
  Fts5SegIter *pSeg,
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){
    Fts5ChunkIter iter;
    fts5ChunkIterInit(p, pSeg, &iter);
    while( fts5ChunkIterEof(p, &iter)==0 ){
      fts5BufferAppendBlob(&p->rc, pBuf, iter.n, iter.p);
      fts5ChunkIterNext(p, &iter);
    }
    fts5ChunkIterRelease(&iter);
  }
}

/*
** Iterator pMulti currently points to a valid entry (not EOF). This
** function appends a copy of the position-list of the entry pMulti 
** currently points to to buffer pBuf.
**
** If an error occurs, an error code is left in p->rc. It is assumed
** no error has already occurred when this function is called.
*/
static void fts5MultiIterPoslist(
  Fts5Index *p,
  Fts5MultiSegIter *pMulti,
  int bSz,                        /* Append a size field before the data */
  Fts5Buffer *pBuf
){
  if( p->rc==SQLITE_OK ){

    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    assert( fts5MultiIterEof(p, pMulti)==0 );




    if( bSz ){
      /* WRITEPOSLISTSIZE */
      fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2);
    }






    fts5SegiterPoslist(p, pSeg, pBuf);
  }
}

static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
  if( pIter->i<pIter->n ){
    int bDummy;
    if( pIter->i ){
................................................................................
*/
int sqlite3Fts5IterPoslist(Fts5IndexIter *pIter, const u8 **pp, int *pn){
  assert( pIter->pIndex->rc==SQLITE_OK );
  if( pIter->pDoclist ){
    *pn = pIter->pDoclist->nPoslist;
    *pp = pIter->pDoclist->aPoslist;
  }else{
    Fts5MultiSegIter *pMulti = pIter->pMulti;
    Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ];
    *pn = pSeg->nPos;
    if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){
      *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset];
    }else{
      fts5BufferZero(&pIter->poslist);

      fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist);
      *pp = pIter->poslist.p;
    }
  }
  return fts5IndexReturn(pIter->pIndex);
}

/*
** This function is similar to sqlite3Fts5IterPoslist(), except that it
** copies the position list into the buffer supplied as the second 
** argument.
*/
int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){
  Fts5Index *p = pIter->pIndex;
  Fts5DoclistIter *pDoclist = pIter->pDoclist;
  assert( p->rc==SQLITE_OK );
  if( pDoclist ){
    fts5BufferSet(&p->rc, pBuf, pDoclist->nPoslist, pDoclist->aPoslist);
  }else{
    Fts5MultiSegIter *pMulti = pIter->pMulti;
    fts5BufferZero(pBuf);
    fts5MultiIterPoslist(p, pMulti, 0, pBuf);
  }
  return fts5IndexReturn(p);
}

/*
** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
*/
void sqlite3Fts5IterClose(Fts5IndexIter *pIter){
  if( pIter ){
    if( pIter->pDoclist ){

Changes to ext/fts5/fts5_unicode2.c.

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
   65514, 65521, 65527, 65528, 65529, 
  };

  int ret = c;

  assert( c>=0 );
  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;







<







317
318
319
320
321
322
323

324
325
326
327
328
329
330
   65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 
   65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 
   65514, 65521, 65527, 65528, 65529, 
  };

  int ret = c;


  assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );

  if( c<128 ){
    if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
  }else if( c<65536 ){
    const struct TableEntry *p;
    int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;

Changes to ext/fts5/tool/loadfts5.tcl.

105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
db func loadfile loadfile

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    # db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }







|







105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
db func loadfile loadfile

db transaction {
  set pref ""
  if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
  catch {
    db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
    db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
  }
  if {$O(automerge)>=0} {
    if {$O(vtab) == "fts5"} {
      db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
    } else {
      db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
    }