/ Check-in [0e3c5454]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix bugs in fts5 synonym processing for detail=col and other modes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5-offsets
Files: files | file ages | folders
SHA1: 0e3c545423246231ddac8fed2f103d71d556e17f
User & Date: dan 2016-01-11 17:30:28
Context
2016-01-11
18:23
Add the fts5speed.tcl script to ext/fts5/tool/. check-in: 2c0b9355 user: dan tags: fts5-offsets
17:30
Fix bugs in fts5 synonym processing for detail=col and other modes. check-in: 0e3c5454 user: dan tags: fts5-offsets
2016-01-08
17:21
Begin adding fts5 tests involving synonyms and detail=none/col tables. check-in: b3e6f15e user: dan tags: fts5-offsets
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_expr.c.

36
37
38
39
40
41
42

43
44
45
46
47
48
49
...
231
232
233
234
235
236
237

238
239
240
241
242
243
244
...
295
296
297
298
299
300
301
302
303

304
305
306
307
308
309
310
...
315
316
317
318
319
320
321




322
323


324

325
326
327
328
329
330
331
...
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
...
753
754
755
756
757
758
759





















760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783

784
785
786
787
788
789
790
....
1602
1603
1604
1605
1606
1607
1608

1609
1610
1611
1612
1613
1614
1615
....
2449
2450
2451
2452
2453
2454
2455
2456
2457

2458
2459
2460
2461















2462

2463
2464
2465
2466

2467
2468
2469
#include <stdio.h>
void sqlite3Fts5ParserTrace(FILE*, char*);
#endif


struct Fts5Expr {
  Fts5Index *pIndex;

  Fts5ExprNode *pRoot;
  int bDesc;                      /* Iterate in descending rowid order */
  int nPhrase;                    /* Number of phrases in expression */
  Fts5ExprPhrase **apExprPhrase;  /* Pointers to phrase objects */
};

/*
................................................................................
    *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
    if( pNew==0 ){
      sParse.rc = SQLITE_NOMEM;
      sqlite3Fts5ParseNodeFree(sParse.pExpr);
    }else{
      pNew->pRoot = sParse.pExpr;
      pNew->pIndex = 0;

      pNew->apExprPhrase = sParse.apPhrase;
      pNew->nPhrase = sParse.nPhrase;
      sParse.apPhrase = 0;
    }
  }

  sqlite3_free(sParse.apPhrase);
................................................................................
  if( pbEof && bRetValid==0 ) *pbEof = 1;
  return iRet;
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymPoslist(
  Fts5ExprTerm *pTerm, 

  Fts5Colset *pColset,
  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  u8 **pa, int *pn
){
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
................................................................................

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;




      i64 dummy;
      rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy);


      if( rc!=SQLITE_OK ) goto synonym_poslist_out;

      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
................................................................................
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymPoslist(
          pTerm, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n
      );
    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy);
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
    aIter[i].bFlag = (u8)bFlag;
................................................................................
static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = *pRc;





















  int i;

  /* Check that each phrase in the nearset matches the current row.
  ** Populate the pPhrase->poslist buffers at the same time. If any
  ** phrase is not a match, break out of the loop early.  */
  for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
    if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
      int bMatch = 0;
      rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
      if( bMatch==0 ) break;
    }else{
      rc = sqlite3Fts5IterPoslistBuffer(
          pPhrase->aTerm[0].pIter, &pPhrase->poslist
      );
    }
  }

  *pRc = rc;
  if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
    return 1;
  }

  return 0;

}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
){
  /* As this "NEAR" object is actually a single phrase that consists 
................................................................................
      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }
  }

  if( rc==SQLITE_OK ){
    /* All the allocations succeeded. Put the expression object together. */
    pNew->pIndex = pExpr->pIndex;

    pNew->nPhrase = 1;
    pNew->apExprPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->nPhrase = 1;
    sCtx.pPhrase->pNode = pNew->pRoot;

    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
................................................................................
  Fts5Expr *pExpr, 
  int iPhrase, 
  const u8 **ppCollist, 
  int *pnCollist
){
  Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
  Fts5ExprNode *pNode = pPhrase->pNode;
  assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );


  if( pNode->bEof==0 
   && pNode->iRowid==pExpr->pRoot->iRowid 
   && pPhrase->poslist.n>0
  ){















    sqlite3Fts5IterCollist(pPhrase->aTerm[0].pIter, ppCollist, pnCollist);

  }else{
    *ppCollist = 0;
    *pnCollist = 0;
  }

  return SQLITE_OK;
}








>







 







>







 







|

>







 







>
>
>
>
|
|
>
>

>







 







|
|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|
<
|
>







 







>







 







|

>




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>




>
|


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
...
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
...
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
...
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
...
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812

813
814
815
816
817
818
819
820
821
....
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
....
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
#include <stdio.h>
void sqlite3Fts5ParserTrace(FILE*, char*);
#endif


struct Fts5Expr {
  Fts5Index *pIndex;
  Fts5Config *pConfig;
  Fts5ExprNode *pRoot;
  int bDesc;                      /* Iterate in descending rowid order */
  int nPhrase;                    /* Number of phrases in expression */
  Fts5ExprPhrase **apExprPhrase;  /* Pointers to phrase objects */
};

/*
................................................................................
    *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
    if( pNew==0 ){
      sParse.rc = SQLITE_NOMEM;
      sqlite3Fts5ParseNodeFree(sParse.pExpr);
    }else{
      pNew->pRoot = sParse.pExpr;
      pNew->pIndex = 0;
      pNew->pConfig = pConfig;
      pNew->apExprPhrase = sParse.apPhrase;
      pNew->nPhrase = sParse.nPhrase;
      sParse.apPhrase = 0;
    }
  }

  sqlite3_free(sParse.apPhrase);
................................................................................
  if( pbEof && bRetValid==0 ) *pbEof = 1;
  return iRet;
}

/*
** Argument pTerm must be a synonym iterator.
*/
static int fts5ExprSynonymList(
  Fts5ExprTerm *pTerm, 
  int bCollist, 
  Fts5Colset *pColset,
  i64 iRowid,
  int *pbDel,                     /* OUT: Caller should sqlite3_free(*pa) */
  u8 **pa, int *pn
){
  Fts5PoslistReader aStatic[4];
  Fts5PoslistReader *aIter = aStatic;
................................................................................

  assert( pTerm->pSynonym );
  for(p=pTerm; p; p=p->pSynonym){
    Fts5IndexIter *pIter = p->pIter;
    if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){
      const u8 *a;
      int n;

      if( bCollist ){
        rc = sqlite3Fts5IterCollist(pIter, &a, &n);
      }else{
        i64 dummy;
        rc = sqlite3Fts5IterPoslist(pIter, pColset, &a, &n, &dummy);
      }

      if( rc!=SQLITE_OK ) goto synonym_poslist_out;
      if( n==0 ) continue;
      if( nIter==nAlloc ){
        int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
        Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc(nByte);
        if( aNew==0 ){
          rc = SQLITE_NOMEM;
          goto synonym_poslist_out;
        }
................................................................................
  for(i=0; i<pPhrase->nTerm; i++){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
    i64 dummy;
    int n = 0;
    int bFlag = 0;
    const u8 *a = 0;
    if( pTerm->pSynonym ){
      rc = fts5ExprSynonymList(
          pTerm, 0, pColset, pNode->iRowid, &bFlag, (u8**)&a, &n
      );
    }else{
      rc = sqlite3Fts5IterPoslist(pTerm->pIter, pColset, &a, &n, &dummy);
    }
    if( rc!=SQLITE_OK ) goto ismatch_out;
    sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
    aIter[i].bFlag = (u8)bFlag;
................................................................................
static int fts5ExprNearTest(
  int *pRc,
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
){
  Fts5ExprNearset *pNear = pNode->pNear;
  int rc = *pRc;

  if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
    Fts5ExprTerm *pTerm;
    Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
    pPhrase->poslist.n = 0;
    for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
      Fts5IndexIter *pIter = pTerm->pIter;
      if( sqlite3Fts5IterEof(pIter)==0 ){
        int n;
        i64 iRowid;
        rc = sqlite3Fts5IterPoslist(pIter, pNear->pColset, 0, &n, &iRowid);
        if( rc!=SQLITE_OK ){
          *pRc = rc;
          return 0;
        }else if( iRowid==pNode->iRowid && n>0 ){
          pPhrase->poslist.n = 1;
        }
      }
    }
    return pPhrase->poslist.n;
  }else{
    int i;

    /* Check that each phrase in the nearset matches the current row.
    ** Populate the pPhrase->poslist buffers at the same time. If any
    ** phrase is not a match, break out of the loop early.  */
    for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
      Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
      if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym || pNear->pColset ){
        int bMatch = 0;
        rc = fts5ExprPhraseIsMatch(pNode, pNear->pColset, pPhrase, &bMatch);
        if( bMatch==0 ) break;
      }else{
        rc = sqlite3Fts5IterPoslistBuffer(
            pPhrase->aTerm[0].pIter, &pPhrase->poslist
        );
      }
    }

    *pRc = rc;
    if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
      return 1;
    }

    return 0;
  }
}

static int fts5ExprTokenTest(
  Fts5Expr *pExpr,                /* Expression that pNear is a part of */
  Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
){
  /* As this "NEAR" object is actually a single phrase that consists 
................................................................................
      sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
    }
  }

  if( rc==SQLITE_OK ){
    /* All the allocations succeeded. Put the expression object together. */
    pNew->pIndex = pExpr->pIndex;
    pNew->pConfig = pExpr->pConfig;
    pNew->nPhrase = 1;
    pNew->apExprPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
    pNew->pRoot->pNear->nPhrase = 1;
    sCtx.pPhrase->pNode = pNew->pRoot;

    if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){
................................................................................
  Fts5Expr *pExpr, 
  int iPhrase, 
  const u8 **ppCollist, 
  int *pnCollist
){
  Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
  Fts5ExprNode *pNode = pPhrase->pNode;
  int rc = SQLITE_OK;

  assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
  if( pNode->bEof==0 
   && pNode->iRowid==pExpr->pRoot->iRowid 
   && pPhrase->poslist.n>0
  ){
    Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
    if( pTerm->pSynonym ){
      int bDel = 0;
      u8 *a;
      rc = fts5ExprSynonymList(
          pTerm, 1, 0, pNode->iRowid, &bDel, &a, pnCollist
      );
      if( bDel ){
        sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, *pnCollist, a);
        *ppCollist = pPhrase->poslist.p;
        sqlite3_free(a);
      }else{
        *ppCollist = a;
      }
    }else{
      sqlite3Fts5IterCollist(pPhrase->aTerm[0].pIter, ppCollist, pnCollist);
    }
  }else{
    *ppCollist = 0;
    *pnCollist = 0;
  }

  return rc;
}

Changes to ext/fts5/test/fts5synonym2.test.

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
..
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
...
116
117
118
119
120
121
122
123















124
125
126
127
128
129
130

131
132
133
134
135
136
137
138
139
#
#***********************************************************************
#
# Tests focusing on custom tokenizers that support synonyms.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5synonym

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

................................................................................
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags == "query"} {
      foreach s $::SYNDICT($w)  { sqlite3_fts5_token -colo $s $iStart $iEnd }
    }
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}
................................................................................
foreach_detail_mode $testprefix {

sqlite3_fts5_create_tokenizer db tcl tcl_create
fts5_aux_test_functions db

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%);

  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');
  INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii');
  INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4');
  INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4');
................................................................................
  INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii');
  INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4');
  INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii');
  INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one');
}

foreach {tn expr} {
  1 "eight"















} {
  if {[fts5_expr_ok $expr ss]==0} {
    do_test 1.$tn.OMITTED { list } [list]
    continue
  }

  set res [fts5_query_data $expr ss ASC ::SYNDICT]

  do_execsql_test 1.$tn.[llength $res].asc {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
  } $res
}

}

finish_test








|







 







|







 







<







 







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|
>









9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
..
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
..
63
64
65
66
67
68
69

70
71
72
73
74
75
76
...
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#
#***********************************************************************
#
# Tests focusing on custom tokenizers that support synonyms.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5synonym2

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

................................................................................
  }
}

proc tcl_tokenize {tflags text} {
  foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    sqlite3_fts5_token $w $iStart $iEnd
    if {$tflags == "query"} {
      foreach s $::syn($w)  { sqlite3_fts5_token -colo $s $iStart $iEnd }
    }
  }
}

proc tcl_create {args} {
  return "tcl_tokenize"
}
................................................................................
foreach_detail_mode $testprefix {

sqlite3_fts5_create_tokenizer db tcl tcl_create
fts5_aux_test_functions db

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE ss USING fts5(a, b, tokenize=tcl, detail=%DETAIL%);

  INSERT INTO ss VALUES('5 5 five seven 3 seven i', '2 1 5 0 two 1 i');
  INSERT INTO ss VALUES('six ix iii 7 i vii iii', 'one seven nine 4 9 1 vi');
  INSERT INTO ss VALUES('6 viii i five six zero seven', '5 v iii iv iv 3');
  INSERT INTO ss VALUES('9 ii six 8 1 6', 'six 4 iv iv 7');
  INSERT INTO ss VALUES('1 5 4 eight ii iv iii', 'nine 2 eight ix v vii');
  INSERT INTO ss VALUES('one 7 seven six 2 two', '1 2 four 7 4 3 4');
  INSERT INTO ss VALUES('eight iv 4 nine vii six 1', '5 6 v one zero 4');
................................................................................
  INSERT INTO ss VALUES('eight vii eight six 3', 'i vii 1 six 9 vii');
  INSERT INTO ss VALUES('9 0 viii viii five', 'i 1 viii ix 3 4');
  INSERT INTO ss VALUES('three nine 5 nine viii four zero', 'ii i 1 5 2 viii');
  INSERT INTO ss VALUES('5 vii three 9 four', 'three five one 7 2 eight one');
}

foreach {tn expr} {
  1.1 "one"   1.2 "two"   1.3 "three"   1.4 "four"
  1.5 "v"     1.6 "vi"    1.7 "vii"     1.8 "viii"
  1.9 "9"    1.10 "0"    1.11 "1"      1.12 "2"

  2.1 "one OR two OR three OR four"
  2.2 "(one AND two) OR (three AND four)"
  2.3 "(one AND two) OR (three AND four) NOT five"
  2.4 "(one AND two) NOT 6"

  3.1 "b:one AND a:two"
  3.2 "b:one OR a:two"
  3.3 "a:one OR b:1 OR {a b} : i"

  4.1 "NEAR(one two, 2)"
  4.2 "NEAR(one two three, 2)"
  4.3 "NEAR(eight nine, 1) OR NEAR(six seven, 1)"
} {
  if {[fts5_expr_ok $expr ss]==0} {
    do_test 1.$tn.OMITTED { list } [list]
    continue
  }

  set res [fts5_query_data $expr ss ASC ::syn]
  breakpoint
  do_execsql_test 1.$tn.[llength $res].asc {
    SELECT rowid, fts5_test_poslist(ss), fts5_test_collist(ss) FROM ss($expr)
  } $res
}

}

finish_test