SQLite

Check-in [0cdf5028]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts3-prefix-query-fix
Files: files | file ages | folders
SHA1: 0cdf502885ea7e5805d7ba3719f055f5d48fc78d
User & Date: dan 2015-04-06 09:05:29
Context
2015-04-06
11:04
Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids. (check-in: 3ad829e5 user: dan tags: trunk)
09:05
Fix a problem with fts3 prefix terms within phrase queries on "order=DESC" tables with a mix of negative and positive rowids. (Closed-Leaf check-in: 0cdf5028 user: dan tags: fts3-prefix-query-fix)
2015-04-04
16:49
Fix a problem with resolving ORDER BY clauses that feature COLLATE clauses attached to compound SELECT statements. (check-in: edc1de2a user: dan tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513

2514
2515
2516
2517
2518
2519
2520
2521
2522





2523
2524

2525
2526
2527
2528
2529
2530
2531
**
** If the docids in the input doclists are sorted in ascending order,
** parameter bDescDoclist should be false. If they are sorted in ascending 
** order, it should be passed a non-zero value.
**
** The right-hand input doclist is overwritten by this function.
*/
static void fts3DoclistPhraseMerge(
  int bDescDoclist,               /* True if arguments are desc */
  int nDist,                      /* Distance from left to right (1=adjacent) */
  char *aLeft, int nLeft,         /* Left doclist */
  char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
){
  sqlite3_int64 i1 = 0;
  sqlite3_int64 i2 = 0;
  sqlite3_int64 iPrev = 0;

  char *pEnd1 = &aLeft[nLeft];
  char *pEnd2 = &aRight[*pnRight];
  char *p1 = aLeft;
  char *p2 = aRight;
  char *p;
  int bFirstOut = 0;
  char *aOut = aRight;

  assert( nDist>0 );






  p = aOut;

  fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);

  while( p1 && p2 ){
    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
    if( iDiff==0 ){
      char *pSave = p;







|



|




>






|


>
>
>
>
>
|

>







2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
**
** If the docids in the input doclists are sorted in ascending order,
** parameter bDescDoclist should be false. If they are sorted in ascending 
** order, it should be passed a non-zero value.
**
** The right-hand input doclist is overwritten by this function.
*/
static int fts3DoclistPhraseMerge(
  int bDescDoclist,               /* True if arguments are desc */
  int nDist,                      /* Distance from left to right (1=adjacent) */
  char *aLeft, int nLeft,         /* Left doclist */
  char **paRight, int *pnRight    /* IN/OUT: Right/output doclist */
){
  sqlite3_int64 i1 = 0;
  sqlite3_int64 i2 = 0;
  sqlite3_int64 iPrev = 0;
  char *aRight = *paRight;
  char *pEnd1 = &aLeft[nLeft];
  char *pEnd2 = &aRight[*pnRight];
  char *p1 = aLeft;
  char *p2 = aRight;
  char *p;
  int bFirstOut = 0;
  char *aOut;

  assert( nDist>0 );
  if( bDescDoclist ){
    aOut = sqlite3_malloc(*pnRight + FTS3_VARINT_MAX);
    if( aOut==0 ) return SQLITE_NOMEM;
  }else{
    aOut = aRight;
  }
  p = aOut;

  fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
  fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);

  while( p1 && p2 ){
    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
    if( iDiff==0 ){
      char *pSave = p;
2546
2547
2548
2549
2550
2551
2552






2553
2554
2555
2556
2557
2558
2559
    }else{
      fts3PoslistCopy(0, &p2);
      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
    }
  }

  *pnRight = (int)(p - aOut);






}

/*
** Argument pList points to a position list nList bytes in size. This
** function checks to see if the position list contains any entries for
** a token in position 0 (of any column). If so, it writes argument iDelta
** to the output buffer pOut, followed by a position list consisting only







>
>
>
>
>
>







2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
    }else{
      fts3PoslistCopy(0, &p2);
      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
    }
  }

  *pnRight = (int)(p - aOut);
  if( bDescDoclist ){
    sqlite3_free(aRight);
    *paRight = aOut;
  }

  return SQLITE_OK;
}

/*
** Argument pList points to a position list nList bytes in size. This
** function checks to see if the position list contains any entries for
** a token in position 0 (of any column). If so, it writes argument iDelta
** to the output buffer pOut, followed by a position list consisting only
2670
2671
2672
2673
2674
2675
2676
2677














2678
2679
2680
2681
2682
2683
2684
2685
  Fts3Table *p,                   /* FTS table handle */
  TermSelect *pTS,                /* TermSelect object to merge into */
  char *aDoclist,                 /* Pointer to doclist */
  int nDoclist                    /* Size of aDoclist in bytes */
){
  if( pTS->aaOutput[0]==0 ){
    /* If this is the first term selected, copy the doclist to the output
    ** buffer using memcpy(). */














    pTS->aaOutput[0] = sqlite3_malloc(nDoclist);
    pTS->anOutput[0] = nDoclist;
    if( pTS->aaOutput[0] ){
      memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
    }else{
      return SQLITE_NOMEM;
    }
  }else{







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|







2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
  Fts3Table *p,                   /* FTS table handle */
  TermSelect *pTS,                /* TermSelect object to merge into */
  char *aDoclist,                 /* Pointer to doclist */
  int nDoclist                    /* Size of aDoclist in bytes */
){
  if( pTS->aaOutput[0]==0 ){
    /* If this is the first term selected, copy the doclist to the output
    ** buffer using memcpy(). 
    **
    ** Add FTS3_VARINT_MAX bytes of unused space to the end of the 
    ** allocation. This is so as to ensure that the buffer is big enough
    ** to hold the current doclist AND'd with any other doclist. If the
    ** doclists are stored in order=ASC order, this padding would not be
    ** required (since the size of [doclistA AND doclistB] is always less
    ** than or equal to the size of [doclistA] in that case). But this is
    ** not true for order=DESC. For example, a doclist containing (1, -1) 
    ** may be smaller than (-1), as in the first example the -1 may be stored
    ** as a single-byte delta, whereas in the second it must be stored as a
    ** FTS3_VARINT_MAX byte varint.
    **
    ** Similar padding is added in the fts3DoclistOrMerge() function.
    */
    pTS->aaOutput[0] = sqlite3_malloc(nDoclist + FTS3_VARINT_MAX + 1);
    pTS->anOutput[0] = nDoclist;
    if( pTS->aaOutput[0] ){
      memcpy(pTS->aaOutput[0], aDoclist, nDoclist);
    }else{
      return SQLITE_NOMEM;
    }
  }else{
3927
3928
3929
3930
3931
3932
3933


3934
3935
3936
3937
3938
3939
3940
3941

3942
3943
3944
3945
3946
3947
3948
/*
** Arguments pList/nList contain the doclist for token iToken of phrase p.
** It is merged into the main doclist stored in p->doclist.aAll/nAll.
**
** This function assumes that pList points to a buffer allocated using
** sqlite3_malloc(). This function takes responsibility for eventually
** freeing the buffer.


*/
static void fts3EvalPhraseMergeToken(
  Fts3Table *pTab,                /* FTS Table pointer */
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){

  assert( iToken!=p->iDoclistToken );

  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }







>
>

|






>







3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
/*
** Arguments pList/nList contain the doclist for token iToken of phrase p.
** It is merged into the main doclist stored in p->doclist.aAll/nAll.
**
** This function assumes that pList points to a buffer allocated using
** sqlite3_malloc(). This function takes responsibility for eventually
** freeing the buffer.
**
** SQLITE_OK is returned if successful, or SQLITE_NOMEM if an error occurs.
*/
static int fts3EvalPhraseMergeToken(
  Fts3Table *pTab,                /* FTS Table pointer */
  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
  int iToken,                     /* Token pList/nList corresponds to */
  char *pList,                    /* Pointer to doclist */
  int nList                       /* Number of bytes in pList */
){
  int rc = SQLITE_OK;
  assert( iToken!=p->iDoclistToken );

  if( pList==0 ){
    sqlite3_free(p->doclist.aAll);
    p->doclist.aAll = 0;
    p->doclist.nAll = 0;
  }
3973
3974
3975
3976
3977
3978
3979

3980

3981
3982
3983
3984
3985
3986

3987
3988
3989
3990
3991
3992
3993
      pRight = p->doclist.aAll;
      nRight = p->doclist.nAll;
      pLeft = pList;
      nLeft = nList;
      nDiff = p->iDoclistToken - iToken;
    }


    fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight);

    sqlite3_free(pLeft);
    p->doclist.aAll = pRight;
    p->doclist.nAll = nRight;
  }

  if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;

}

/*
** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
** does not take deferred tokens into account.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.







>
|
>






>







4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
      pRight = p->doclist.aAll;
      nRight = p->doclist.nAll;
      pLeft = pList;
      nLeft = nList;
      nDiff = p->iDoclistToken - iToken;
    }

    rc = fts3DoclistPhraseMerge(
        pTab->bDescIdx, nDiff, pLeft, nLeft, &pRight, &nRight
    );
    sqlite3_free(pLeft);
    p->doclist.aAll = pRight;
    p->doclist.nAll = nRight;
  }

  if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
  return rc;
}

/*
** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
** does not take deferred tokens into account.
**
** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
    assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );

    if( pToken->pSegcsr ){
      int nThis = 0;
      char *pThis = 0;
      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
      if( rc==SQLITE_OK ){
        fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
      }
    }
    assert( pToken->pSegcsr==0 );
  }

  return rc;
}







|







4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
    assert( pToken->pDeferred==0 || pToken->pSegcsr==0 );

    if( pToken->pSegcsr ){
      int nThis = 0;
      char *pThis = 0;
      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
      if( rc==SQLITE_OK ){
        rc = fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
      }
    }
    assert( pToken->pSegcsr==0 );
  }

  return rc;
}
4807
4808
4809
4810
4811
4812
4813
4814





4815
4816
4817
4818
4819
4820
4821
4822
4823
        ** part of a multi-token phrase. Either way, the entire doclist will
        ** (eventually) be loaded into memory. It may as well be now. */
        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
        assert( rc==SQLITE_OK || pList==0 );
        if( rc==SQLITE_OK ){





          int nCount;
          fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
          nCount = fts3DoclistCountDocids(
              pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
          );
          if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
        }
      }
    }








>
>
>
>
>

<







4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853

4854
4855
4856
4857
4858
4859
4860
        ** part of a multi-token phrase. Either way, the entire doclist will
        ** (eventually) be loaded into memory. It may as well be now. */
        Fts3PhraseToken *pToken = pTC->pToken;
        int nList = 0;
        char *pList = 0;
        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
        assert( rc==SQLITE_OK || pList==0 );
        if( rc==SQLITE_OK ){
          rc = fts3EvalPhraseMergeToken(
              pTab, pTC->pPhrase, pTC->iToken,pList,nList
          );
        }
        if( rc==SQLITE_OK ){
          int nCount;

          nCount = fts3DoclistCountDocids(
              pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
          );
          if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
        }
      }
    }
Changes to test/fts3prefix.test.
269
270
271
272
273
274
275


















276
277
  CREATE VIRTUAL TABLE t2 USING fts4(prefix=);
  INSERT INTO t1 VALUES('He dressed himself in cycling clothes');
  INSERT INTO t2 VALUES('He dressed himself in cycling clothes');
} {}
do_execsql_test 6.5.2 {
  SELECT md5sum(quote(root)) FROM t1_segdir;
} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]



















finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
  CREATE VIRTUAL TABLE t2 USING fts4(prefix=);
  INSERT INTO t1 VALUES('He dressed himself in cycling clothes');
  INSERT INTO t2 VALUES('He dressed himself in cycling clothes');
} {}
do_execsql_test 6.5.2 {
  SELECT md5sum(quote(root)) FROM t1_segdir;
} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]


do_execsql_test 7.0 {
  CREATE VIRTUAL TABLE t6 USING fts4(x,order=DESC);
  INSERT INTO t6(docid, x) VALUES(-1,'a b');
  INSERT INTO t6(docid, x) VALUES(1, 'b');
}
do_execsql_test 7.1 {
  SELECT docid FROM t6 WHERE t6 MATCH '"a* b"';
} {-1}
do_execsql_test 7.2 {
  SELECT docid FROM t6 WHERE t6 MATCH 'a*';
} {-1}
do_execsql_test 7.3 {
  SELECT docid FROM t6 WHERE t6 MATCH 'a* b';
} {-1}



finish_test