SQLite

Check-in [528036c828]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the FTS1 test cases and add new tests. Comments added to the FTS1 code. (CVS 3409)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 528036c828c93c78ca879bf89a52131b72e24067
User & Date: drh 2006-09-13 12:36:09.000
Context
2006-09-13
15:20
Module spec parser enhancements for FTS1. Now able to cope with column names in the spec that are SQL keywords or have special characters, etc. Also added support for additional control lines. Column names can be followed by a type specifier (which is ignored.) (CVS 3410) (check-in: adb780e0dc user: drh tags: trunk)
12:36
Fix the FTS1 test cases and add new tests. Comments added to the FTS1 code. (CVS 3409) (check-in: 528036c828 user: drh tags: trunk)
02:18
Allow virtual tables to contain multiple full-text-indexed columns. Added a magic column "_all" which can be used for querying all columns in a table at once.

For now, each posting list stores position/offset information for multiple columns. We may implement separate posting lists for separate columns at some future point. (CVS 3408) (check-in: 366a70b086 user: adamd tags: trunk)

Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to ext/fts1/fts1.c.
428
429
430
431
432
433
434
435

436
437
438
439


440
441
442
443
444
445
446
428
429
430
431
432
433
434

435
436
437


438
439
440
441
442
443
444
445
446







-
+


-
-
+
+







    if( docid==0 ){
      skipPositionList(&r);
      continue;
    }
    printf("%s%lld", zSep, docid);
    zSep =  ",";
    if( p->iType>=DL_POSITIONS ){
      int iPos;
      int iPos, iCol;
      const char *zDiv = "";
      printf("(");
      while( (iPos = readPosition(&r))>=0 ){
        printf("%s%d", zDiv, iPos);
      while( (iPos = readPosition(&r, &iCol))>=0 ){
        printf("%s%d:%d", zDiv, iCol, iPos);
        zDiv = ":";
      }
      printf(")");
    }
  }
  printf("\n");
  fflush(stdout);
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
606
607
608
609
610
611
612

613
614
615
616
617
618
619
620







-
+







      }
      if( pOut->iType>=DL_POSITIONS ){
        docListAddPos(pOut, iRightCol, iRightPos);
      }
      iLeftPos = readPosition(pLeft, &iLeftCol);
      iRightPos = readPosition(pRight, &iRightCol);
    }else if( iRightCol<iLeftCol ||
              iRightCol==iLeftCol && iRightPos<iLeftPos+1 ){
              (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
      iRightPos = readPosition(pRight, &iRightCol);
    }else{
      iLeftPos = readPosition(pLeft, &iLeftCol);
    }
  }
  if( iLeftPos>=0 ) skipPositionList(pLeft);
  if( iRightPos>=0 ) skipPositionList(pRight);
1122
1123
1124
1125
1126
1127
1128






1129
1130
1131







1132
1133
1134
1135
1136
1137
1138
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135


1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149







+
+
+
+
+
+

-
-
+
+
+
+
+
+
+







  return rc==SQLITE_DONE ? SQLITE_ROW : rc;
}

/* Load the segment doclists for term pTerm and merge them in
** appropriate order into out.  Returns SQLITE_OK if successful.  If
** there are no segments for pTerm, successfully returns an empty
** doclist in out.
**
** Each document consists of 1 or more "columns".  The number of
** columns is v->nColumn.  If iColumn==v->nColumn, then return
** position information about all columns.  If iColumn<v->nColumn,
** then only return position information about the iColumn-th column
** (where the first column is 0).
*/
static int term_select_all(fulltext_vtab *v, int iColumn,
                           const char *pTerm, int nTerm, DocList *out){
static int term_select_all(
  fulltext_vtab *v,     /* The fulltext index we are querying against */
  int iColumn,          /* If <nColumn, only look at the iColumn-th column */
  const char *pTerm,    /* The term whose posting lists we want */
  int nTerm,            /* Number of bytes in pTerm */
  DocList *out          /* Write the resulting doclist here */
){
  DocList doclist;
  sqlite3_stmt *s;
  int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  if( rc!=SQLITE_OK ) return rc;

  rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  if( rc!=SQLITE_OK ) return rc;
1442
1443
1444
1445
1446
1447
1448
1449

1450
1451
1452
1453
1454
1455
1456
1453
1454
1455
1456
1457
1458
1459

1460
1461
1462
1463
1464
1465
1466
1467







-
+







  if( rc!=SQLITE_OK ) return rc;

  rc = connect(db, &spec, ppVTab, pzErr);
  destroyTableSpec(&spec);
  return rc;
}

  /* The %_content table holds the text of each full-text item, with
  /* The %_content table holds the text of each document, with
  ** the rowid used as the docid.
  **
  ** The %_term table maps each term to a document list blob
  ** containing elements sorted by ascending docid, each element
  ** encoded as:
  **
  **   docid varint-encoded
1641
1642
1643
1644
1645
1646
1647
1648

1649
1650
1651
1652
1653
1654
1655
1652
1653
1654
1655
1656
1657
1658

1659
1660
1661
1662
1663
1664
1665
1666







-
+







** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The result is stored in pTerm->doclist.
*/
static int docListOfTerm(
  fulltext_vtab *v,     /* The full text index */
  int iColumn,           /* column to restrict to */
  int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
  QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
  DocList **ppResult    /* Write the result here */
){
  DocList *pLeft, *pRight, *pNew;
  int i, rc;

  pLeft = docListNew(DL_POSITIONS);
1885
1886
1887
1888
1889
1890
1891
















1892
1893
1894





1895
1896
1897
1898
1899
1900
1901
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918



1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+







  }

  queryDestroy(&q);
  *pResult = pLeft;
  return rc;
}

/*
** This is the xFilter interface for the virtual table.  See
** the virtual table xFilter method documentation for additional
** information.
**
** If idxNum==QUERY_GENERIC then do a full table scan against
** the %_content table.
**
** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
** in the %_content table.
**
** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
** column on the left-hand side of the MATCH operator is column
** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
** side of the MATCH operator.
*/
static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
                          int idxNum, const char *idxStr,
                          int argc, sqlite3_value **argv){
static int fulltextFilter(
  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
){
  fulltext_cursor *c = (fulltext_cursor *) pCursor;
  fulltext_vtab *v = cursor_vtab(c);
  int rc;
  StringBuffer sb;

  TRACE(("FTS1 Filter %p\n",pCursor));

Changes to test/fts1a.test.
1
2
3
4
5
6
7
8
9
10
11
12
13
14

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

32
33
34
35
36
37
38
1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

31
32
33
34
35
36
37
38













-
+
















-
+







# 2006 September 9
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS1 module.
#
# $Id: fts1a.test,v 1.1 2006/09/10 03:34:06 drh Exp $
# $Id: fts1a.test,v 1.2 2006/09/13 12:36:09 drh Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS1 is defined, omit this file.
ifcapable !fts1 {
  finish_test
  return
}

# Construct a full-text search table containing five keywords:
# one, two, three, four, and five, in various combinations.  The
# rowid for each will be a bitmask for the elements it contains.
#
db eval {
  CREATE VIRTUAL TABLE t1 USING fts1;
  CREATE VIRTUAL TABLE t1 USING fts1(content);
  INSERT INTO t1(content) VALUES('one');
  INSERT INTO t1(content) VALUES('two');
  INSERT INTO t1(content) VALUES('one two');
  INSERT INTO t1(content) VALUES('three');
  INSERT INTO t1(content) VALUES('one three');
  INSERT INTO t1(content) VALUES('two three');
  INSERT INTO t1(content) VALUES('one two three');
Added test/fts1b.test.




















































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
# 2006 September 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
# This file implements regression tests for SQLite library.  The
# focus of this script is testing the FTS1 module.
#
# $Id: fts1b.test,v 1.1 2006/09/13 12:36:09 drh Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS1 is defined, omit this file.
ifcapable !fts1 {
  finish_test
  return
}

# Fill the full-text index "t1" with phrases in english, spanish,
# and german.  For the i-th row, fill in the names for the bits
# that are set in the value of i.  The least significant bit is
# 1.  For example,  the value 5 is 101 in binary which will be
# converted to "one three" in english.
#
proc fill_multilanguage_fulltext_t1 {} {
  set english {one two three four five}
  set spanish {un dos tres cuatro cinco}
  set german {eine zwei drei vier funf}
  
  for {set i 1} {$i<=31} {incr i} {
    set cmd "INSERT INTO t1 VALUES"
    set vset {}
    foreach lang {english spanish german} {
      set words {}
      for {set j 0; set k 1} {$j<5} {incr j; incr k $k} {
        if {$k&$i} {lappend words [lindex [set $lang] $j]}
      }
      lappend vset "'$words'"
    }
    set sql "INSERT INTO t1(english,spanish,german) VALUES([join $vset ,])"
    # puts $sql
    db eval $sql
  }
}

# Construct a full-text search table containing five keywords:
# one, two, three, four, and five, in various combinations.  The
# rowid for each will be a bitmask for the elements it contains.
#
db eval {
  CREATE VIRTUAL TABLE t1 USING fts1(english,spanish,german);
}
fill_multilanguage_fulltext_t1

do_test fts1b-1.1 {
  execsql {SELECT rowid FROM t1 WHERE english MATCH 'one'}
} {1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31}
do_test fts1b-1.2 {
  execsql {SELECT rowid FROM t1 WHERE spanish MATCH 'one'}
} {}
do_test fts1b-1.3 {
  execsql {SELECT rowid FROM t1 WHERE german MATCH 'one'}
} {}
do_test fts1b-1.4 {
  execsql {SELECT rowid FROM t1 WHERE _all MATCH 'one'}
} {1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31}
do_test fts1b-1.5 {
  execsql {SELECT rowid FROM t1 WHERE _all MATCH 'one dos drei'}
} {7 15 23 31}
do_test fts1b-1.6 {
  execsql {SELECT english, spanish, german FROM t1 WHERE rowid=1}
} {one un eine}
do_test fts1b-1.7 {
  execsql {SELECT rowid FROM t1 WHERE _all MATCH '"one un"'}
} {}

finish_test