/ Check-in [4ea015ab]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix an fts5 problem in extracting columns from position lists containing large varints.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 4ea015ab983300d420ef104cca550b22a6395866
User & Date: dan 2015-06-03 11:23:30
Context
2015-06-05
19:05
Make use of range constraints on the rowid field of an fts5 table in full-text queries. check-in: 32cbc0ed user: dan tags: fts5
2015-06-03
11:23
Fix an fts5 problem in extracting columns from position lists containing large varints. check-in: 4ea015ab user: dan tags: fts5
2015-06-02
19:38
Change the fts5 multi-column syntax to use parenthesis instead of square brackets. check-in: ab85a6fc user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_expr.c.

   664    664     const u8 *p = *pa;
   665    665     const u8 *pEnd = &p[n];         /* One byte past end of position list */
   666    666     u8 prev = 0;
   667    667   
   668    668     while( iCol!=iCurrent ){
   669    669       /* Advance pointer p until it points to pEnd or an 0x01 byte that is
   670    670       ** not part of a varint */
   671         -    while( !(prev & 0x80) && *p!=0x01 ){
          671  +    while( (prev & 0x80) || *p!=0x01 ){
   672    672         prev = *p++;
   673    673         if( p==pEnd ) return 0;
   674    674       }
   675    675       *pa = p++;
   676    676       p += fts5GetVarint32(p, iCurrent);
   677    677     }
   678    678   
   679    679     /* Advance pointer p until it points to pEnd or an 0x01 byte that is
   680    680     ** not part of a varint */
   681         -  while( p<pEnd && !(prev & 0x80) && *p!=0x01 ){
          681  +  assert( (prev & 0x80)==0 );
          682  +  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
   682    683       prev = *p++;
   683    684     }
   684    685     return p - (*pa);
   685    686   }
   686    687   
   687    688   static int fts5ExprExtractColset (
   688    689     Fts5ExprColset *pColset,        /* Colset to filter on */
................................................................................
   706    707   static int fts5ExprNearTest(
   707    708     int *pRc,
   708    709     Fts5Expr *pExpr,                /* Expression that pNear is a part of */
   709    710     Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
   710    711   ){
   711    712     Fts5ExprNearset *pNear = pNode->pNear;
   712    713     int rc = *pRc;
   713         -
   714         -  if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){
   715         -    /* If this "NEAR" object is actually a single phrase that consists 
   716         -    ** of a single term only, then grab pointers into the poslist
   717         -    ** managed by the fts5_index.c iterator object. This is much faster 
   718         -    ** than synthesizing a new poslist the way we have to for more
   719         -    ** complicated phrase or NEAR expressions.  */
   720         -    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
   721         -    Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
   722         -    Fts5ExprColset *pColset = pNear->pColset;
   723         -    const u8 *pPos;
   724         -    int nPos;
   725         -
   726         -    if( rc!=SQLITE_OK ) return 0;
   727         -    rc = sqlite3Fts5IterPoslist(pIter, &pPos, &nPos, &pNode->iRowid);
   728         -
   729         -    /* If the term may match any column, then this must be a match. 
   730         -    ** Return immediately in this case. Otherwise, try to find the
   731         -    ** part of the poslist that corresponds to the required column.
   732         -    ** If it can be found, return. If it cannot, the next iteration
   733         -    ** of the loop will test the next rowid in the database for this
   734         -    ** term.  */
   735         -    if( pColset==0 ){
   736         -      assert( pPhrase->poslist.nSpace==0 );
   737         -      pPhrase->poslist.p = (u8*)pPos;
   738         -      pPhrase->poslist.n = nPos;
   739         -    }else if( pColset->nCol==1 ){
   740         -      assert( pPhrase->poslist.nSpace==0 );
   741         -      pPhrase->poslist.n = fts5ExprExtractCol(&pPos, nPos, pColset->aiCol[0]);
   742         -      pPhrase->poslist.p = (u8*)pPos;
   743         -    }else if( rc==SQLITE_OK ){
   744         -      rc = fts5ExprExtractColset(pColset, pPos, nPos, &pPhrase->poslist);
   745         -    }
   746         -
   747         -    *pRc = rc;
   748         -    return (pPhrase->poslist.n>0);
   749         -  }else{
   750         -    int i;
   751         -
   752         -    /* Check that each phrase in the nearset matches the current row.
   753         -    ** Populate the pPhrase->poslist buffers at the same time. If any
   754         -    ** phrase is not a match, break out of the loop early.  */
   755         -    for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
   756         -      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
   757         -      if( pPhrase->nTerm>1 || pNear->pColset ){
   758         -        int bMatch = 0;
   759         -        rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
   760         -        if( bMatch==0 ) break;
   761         -      }else{
   762         -        rc = sqlite3Fts5IterPoslistBuffer(
   763         -            pPhrase->aTerm[0].pIter, &pPhrase->poslist
   764         -        );
   765         -      }
   766         -    }
   767         -
   768         -    *pRc = rc;
   769         -    if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
   770         -      return 1;
   771         -    }
          714  +  int i;
          715  +
          716  +  /* Check that each phrase in the nearset matches the current row.
          717  +  ** Populate the pPhrase->poslist buffers at the same time. If any
          718  +  ** phrase is not a match, break out of the loop early.  */
          719  +  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
          720  +    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
          721  +    if( pPhrase->nTerm>1 || pNear->pColset ){
          722  +      int bMatch = 0;
          723  +      rc = fts5ExprPhraseIsMatch(pExpr, pNear->pColset, pPhrase, &bMatch);
          724  +      if( bMatch==0 ) break;
          725  +    }else{
          726  +      rc = sqlite3Fts5IterPoslistBuffer(
          727  +          pPhrase->aTerm[0].pIter, &pPhrase->poslist
          728  +      );
          729  +    }
          730  +  }
          731  +
          732  +  *pRc = rc;
          733  +  if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
          734  +    return 1;
   772    735     }
   773    736   
   774    737     return 0;
   775    738   }
   776    739   
   777    740   static int fts5ExprTokenTest(
   778    741     Fts5Expr *pExpr,                /* Expression that pNear is a part of */
................................................................................
   935    898     }else{
   936    899       if( iLhs>iRhs ) return -1;
   937    900       return (iLhs < iRhs);
   938    901     }
   939    902   }
   940    903   
   941    904   static void fts5ExprSetEof(Fts5ExprNode *pNode){
   942         -  if( pNode ){
   943         -    int i;
   944         -    pNode->bEof = 1;
   945         -    for(i=0; i<pNode->nChild; i++){
   946         -      fts5ExprSetEof(pNode->apChild[i]);
   947         -    }
          905  +  int i;
          906  +  pNode->bEof = 1;
          907  +  for(i=0; i<pNode->nChild; i++){
          908  +    fts5ExprSetEof(pNode->apChild[i]);
   948    909     }
   949    910   }
   950    911   
   951    912   static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
   952    913     if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
   953    914       Fts5ExprNearset *pNear = pNode->pNear;
   954    915       int i;
................................................................................
  1558   1519   
  1559   1520   Fts5ExprColset *sqlite3Fts5ParseColset(
  1560   1521     Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
  1561   1522     Fts5ExprColset *pColset,        /* Existing colset object */
  1562   1523     Fts5Token *p
  1563   1524   ){
  1564   1525     Fts5ExprColset *pRet = 0;
         1526  +  int iCol;
         1527  +  char *z;                        /* Dequoted copy of token p */
  1565   1528   
         1529  +  z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
  1566   1530     if( pParse->rc==SQLITE_OK ){
  1567         -    int iCol;
  1568         -    char *z = 0;
  1569         -    int rc = fts5ParseStringFromToken(p, &z);
  1570         -    if( rc==SQLITE_OK ){
  1571         -      Fts5Config *pConfig = pParse->pConfig;
  1572         -      sqlite3Fts5Dequote(z);
  1573         -      for(iCol=0; iCol<pConfig->nCol; iCol++){
  1574         -        if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ){
  1575         -          break;
  1576         -        }
  1577         -      }
  1578         -      if( iCol==pConfig->nCol ){
  1579         -        sqlite3Fts5ParseError(pParse, "no such column: %s", z);
  1580         -      }
  1581         -      sqlite3_free(z);
         1531  +    Fts5Config *pConfig = pParse->pConfig;
         1532  +    sqlite3Fts5Dequote(z);
         1533  +    for(iCol=0; iCol<pConfig->nCol; iCol++){
         1534  +      if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
         1535  +    }
         1536  +    if( iCol==pConfig->nCol ){
         1537  +      sqlite3Fts5ParseError(pParse, "no such column: %s", z);
  1582   1538       }else{
  1583         -      pParse->rc = rc;
  1584         -    }
  1585         -
  1586         -    if( pParse->rc==SQLITE_OK ){
  1587   1539         pRet = fts5ParseColset(pParse, pColset, iCol);
  1588   1540       }
         1541  +    sqlite3_free(z);
  1589   1542     }
  1590   1543   
  1591         -  if( pParse->rc!=SQLITE_OK ){
  1592         -    assert( pRet==0 );
         1544  +  if( pRet==0 ){
         1545  +    assert( pParse->rc!=SQLITE_OK );
  1593   1546       sqlite3_free(pColset);
  1594   1547     }
  1595   1548   
  1596   1549     return pRet;
  1597   1550   }
  1598   1551   
  1599   1552   void sqlite3Fts5ParseSetColset(
................................................................................
  1767   1720           zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
  1768   1721         }
  1769   1722   
  1770   1723         if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
  1771   1724         if( zRet==0 ) return 0;
  1772   1725       }
  1773   1726   
  1774         -    if( zRet==0 ) return 0;
  1775         -
  1776   1727     }else{
  1777   1728       char const *zOp = 0;
  1778   1729       int i;
  1779   1730       switch( pExpr->eType ){
  1780   1731         case FTS5_AND: zOp = "AND"; break;
  1781   1732         case FTS5_NOT: zOp = "NOT"; break;
  1782   1733         default: 

Changes to ext/fts5/test/fts5auto.test.

   228    228   
   229    229   do_execsql_test 1.0 {
   230    230     CREATE VIRTUAL TABLE tt USING fts5(a, b, c, d, e, f);
   231    231   } {}
   232    232   
   233    233   fts5_aux_test_functions db
   234    234   
   235         -proc matchdata {expr {order ASC}} {
   236         -  set tclexpr [db one {
          235  +proc matchdata {expr tbl collist {order ASC}} {
          236  +
          237  +  set cols ""
          238  +  foreach e $collist {
          239  +    append cols ", '$e'"
          240  +  }
          241  +
          242  +  set tclexpr [db one [subst -novar {
   237    243       SELECT fts5_expr_tcl(
   238         -      $expr, 'nearset $cols -pc ::pc', 'a','b','c','d','e','f'
          244  +      $expr, 'nearset $cols -pc ::pc' [set cols]
   239    245       )
   240         -  }]
          246  +  }]]
   241    247     set res [list]
   242    248   
   243         -  db eval "SELECT rowid, * FROM tt ORDER BY rowid $order" {
   244         -    set cols [list $a $b $c $d $e $f]
          249  +  db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
          250  +    set cols [list]
          251  +    foreach col $x(*) {
          252  +      if {$col != "rowid"} { lappend cols $x($col) }
          253  +    }
          254  +    # set cols [list $a $b $c $d $e $f]
   245    255       set ::pc 0
   246    256       set rowdata [eval $tclexpr]
   247         -    if {$rowdata != ""} { lappend res $rowid $rowdata }
          257  +    if {$rowdata != ""} { lappend res $x(rowid) $rowdata }
   248    258     }
   249    259   
   250    260     set res
   251    261   }
   252    262   
   253         -proc do_auto_test {tn expr} { 
          263  +proc do_auto_test {tn tbl cols expr} { 
   254    264     foreach order {asc desc} {
   255         -    set res [matchdata $expr $order]
   256         -    set testname "3.$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"
          265  +    set res [matchdata $expr $tbl $cols $order]
          266  +    set testname "$tn.[string range $order 0 0].rows=[expr [llength $res]/2]"
   257    267   
   258    268       set ::autotest_expr $expr
   259    269       do_execsql_test $testname [subst -novar {
   260         -      SELECT rowid, fts5_test_poslist(tt) FROM tt 
   261         -      WHERE tt MATCH $::autotest_expr ORDER BY rowid [set order]
          270  +      SELECT rowid, fts5_test_poslist([set tbl]) FROM [set tbl] 
          271  +      WHERE [set tbl] MATCH $::autotest_expr ORDER BY rowid [set order]
   262    272       }] $res
   263    273     }
   264    274   
   265    275   
   266    276   }
   267    277   
   268    278   #-------------------------------------------------------------------------
................................................................................
   306    316       A.3 { {a b f} : x }
   307    317       A.4 { {f a b} : x }
   308    318       A.5 { {f a b} : x y }
   309    319       A.6 { {f a b} : x + y }
   310    320       A.7 { {c a b} : x + c }
   311    321       A.8 { {c d} : "l m" }
   312    322       A.9 { {c e} : "l m" }
          323  +    A.10 { {a b c a b c a b c f f e} : "l m" }
   313    324   
   314    325       B.1 { a NOT b }
   315    326       B.2 { a NOT a:b }
   316    327       B.3 { a OR (b AND c) }
   317    328       B.4 { a OR (b AND {a b c}:c) }
   318    329       B.5 { a OR "b c" }
   319    330       B.6 { a OR b OR c }
   320    331   
   321    332       C.1 { a OR (b AND "b c") }
   322    333       C.2 { a OR (b AND "z c") }
   323    334     } {
   324         -    do_auto_test 3.$fold.$tn $expr
          335  +    do_auto_test 3.$fold.$tn tt {a b c d e f} $expr
          336  +  }
          337  +}
          338  +
          339  +proc replace_elems {list args} {
          340  +  set ret $list
          341  +  foreach {idx elem} $args {
          342  +    set ret [lreplace $ret $idx $idx $elem]
          343  +  }
          344  +  set ret
          345  +}
          346  +
          347  +#-------------------------------------------------------------------------
          348  +#
          349  +set bigdoc [string trim [string repeat "a " 1000]]
          350  +do_test 4.0 {
          351  +  set a [replace_elems $bigdoc  50 x  950 x]
          352  +  set b [replace_elems $bigdoc  20 y   21 x  887 x 888 y]
          353  +  set c [replace_elems $bigdoc   1 z  444 z  789 z]
          354  +  execsql {
          355  +    CREATE VIRTUAL TABLE yy USING fts5(c1, c2, c3);
          356  +    INSERT INTO yy(rowid, c1, c2, c3) VALUES(-56789, $a, $b, $c);
          357  +    INSERT INTO yy(rowid, c1, c2, c3) VALUES(250, $a, $b, $c);
   325    358     }
          359  +} {}
          360  +
          361  +foreach {tn expr} {
          362  +  1 x    
          363  +  2 y    
          364  +  3 z
          365  +
          366  +  4 {c1 : x} 5 {c2 : x} 6 {c3 : x}
          367  +  7 {c1 : y} 8 {c2 : y} 9 {c3 : y}
          368  +  10 {c1 : z} 11 {c2 : z} 12 {c3 : z}
          369  +
          370  +
          371  +} {
          372  +breakpoint
          373  +  do_auto_test 4.$tn yy {c1 c2 c3} $expr
   326    374   }
          375  +
          376  +
   327    377   
   328    378   finish_test
   329    379   

Changes to ext/fts5/test/fts5fault4.test.

   366    366     faultsim_restore_and_reopen
   367    367     db eval { SELECT * FROM vv }
   368    368   } -body {
   369    369     db eval { SELECT * FROM vv }
   370    370   } -test {
   371    371     faultsim_test_result {0 {a 1 1 b 1 1}} 
   372    372   }
          373  +
          374  +#-------------------------------------------------------------------------
          375  +# OOM in multi-column token query.
          376  +#
          377  +reset_db
          378  +do_execsql_test 13.0 {
          379  +  CREATE VIRTUAL TABLE ft USING fts5(x, y, z);
          380  +  INSERT INTO ft(ft, rank) VALUES('pgsz', 32);
          381  +  INSERT INTO ft VALUES(
          382  +      'x x x x x x x x x x x x x x x x',
          383  +      'y y y y y y y y y y y y y y y y',
          384  +      'z z z z z z z z x x x x x x x x'
          385  +  );
          386  +  INSERT INTO ft SELECT * FROM ft;
          387  +  INSERT INTO ft SELECT * FROM ft;
          388  +  INSERT INTO ft SELECT * FROM ft;
          389  +  INSERT INTO ft SELECT * FROM ft;
          390  +}
          391  +faultsim_save_and_close
          392  +do_faultsim_test 13.1 -faults oom-t* -prep {
          393  +  faultsim_restore_and_reopen
          394  +  db eval { SELECT * FROM ft }
          395  +} -body {
          396  +  db eval { SELECT rowid FROM ft WHERE ft MATCH '{x z}: x' }
          397  +} -test {
          398  +  faultsim_test_result {0 {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16}}
          399  +}
          400  +
   373    401   
   374    402   finish_test
   375    403