/ Check-in [528036c8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the FTS1 test cases and add new tests. Comments added to the FTS1 code. (CVS 3409)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 528036c828c93c78ca879bf89a52131b72e24067
User & Date: drh 2006-09-13 12:36:09
Context
2006-09-13
15:20
Module spec parser enhancements for FTS1. Now able to cope with column names in the spec that are SQL keywords or have special characters, etc. Also added support for additional control lines. Column names can be followed by a type specifier (which is ignored.) (CVS 3410) check-in: adb780e0 user: drh tags: trunk
12:36
Fix the FTS1 test cases and add new tests. Comments added to the FTS1 code. (CVS 3409) check-in: 528036c8 user: drh tags: trunk
02:18
Allow virtual tables to contain multiple full-text-indexed columns. Added a magic column "_all" which can be used for querying all columns in a table at once.

For now, each posting list stores position/offset information for multiple columns. We may implement separate posting lists for separate columns at some future point. (CVS 3408) check-in: 366a70b0 user: adamd tags: trunk

Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts1/fts1.c.

   428    428       if( docid==0 ){
   429    429         skipPositionList(&r);
   430    430         continue;
   431    431       }
   432    432       printf("%s%lld", zSep, docid);
   433    433       zSep =  ",";
   434    434       if( p->iType>=DL_POSITIONS ){
   435         -      int iPos;
          435  +      int iPos, iCol;
   436    436         const char *zDiv = "";
   437    437         printf("(");
   438         -      while( (iPos = readPosition(&r))>=0 ){
   439         -        printf("%s%d", zDiv, iPos);
          438  +      while( (iPos = readPosition(&r, &iCol))>=0 ){
          439  +        printf("%s%d:%d", zDiv, iCol, iPos);
   440    440           zDiv = ":";
   441    441         }
   442    442         printf(")");
   443    443       }
   444    444     }
   445    445     printf("\n");
   446    446     fflush(stdout);
................................................................................
   606    606         }
   607    607         if( pOut->iType>=DL_POSITIONS ){
   608    608           docListAddPos(pOut, iRightCol, iRightPos);
   609    609         }
   610    610         iLeftPos = readPosition(pLeft, &iLeftCol);
   611    611         iRightPos = readPosition(pRight, &iRightCol);
   612    612       }else if( iRightCol<iLeftCol ||
   613         -              iRightCol==iLeftCol && iRightPos<iLeftPos+1 ){
          613  +              (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
   614    614         iRightPos = readPosition(pRight, &iRightCol);
   615    615       }else{
   616    616         iLeftPos = readPosition(pLeft, &iLeftCol);
   617    617       }
   618    618     }
   619    619     if( iLeftPos>=0 ) skipPositionList(pLeft);
   620    620     if( iRightPos>=0 ) skipPositionList(pRight);
................................................................................
  1122   1122     return rc==SQLITE_DONE ? SQLITE_ROW : rc;
  1123   1123   }
  1124   1124   
  1125   1125   /* Load the segment doclists for term pTerm and merge them in
  1126   1126   ** appropriate order into out.  Returns SQLITE_OK if successful.  If
  1127   1127   ** there are no segments for pTerm, successfully returns an empty
  1128   1128   ** doclist in out.
         1129  +**
         1130  +** Each document consists of 1 or more "columns".  The number of
         1131  +** columns is v->nColumn.  If iColumn==v->nColumn, then return
         1132  +** position information about all columns.  If iColumn<v->nColumn,
         1133  +** then only return position information about the iColumn-th column
         1134  +** (where the first column is 0).
  1129   1135   */
  1130         -static int term_select_all(fulltext_vtab *v, int iColumn,
  1131         -                           const char *pTerm, int nTerm, DocList *out){
         1136  +static int term_select_all(
         1137  +  fulltext_vtab *v,     /* The fulltext index we are querying against */
         1138  +  int iColumn,          /* If <nColumn, only look at the iColumn-th column */
         1139  +  const char *pTerm,    /* The term whose posting lists we want */
         1140  +  int nTerm,            /* Number of bytes in pTerm */
         1141  +  DocList *out          /* Write the resulting doclist here */
         1142  +){
  1132   1143     DocList doclist;
  1133   1144     sqlite3_stmt *s;
  1134   1145     int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  1135   1146     if( rc!=SQLITE_OK ) return rc;
  1136   1147   
  1137   1148     rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1138   1149     if( rc!=SQLITE_OK ) return rc;
................................................................................
  1442   1453     if( rc!=SQLITE_OK ) return rc;
  1443   1454   
  1444   1455     rc = connect(db, &spec, ppVTab, pzErr);
  1445   1456     destroyTableSpec(&spec);
  1446   1457     return rc;
  1447   1458   }
  1448   1459   
  1449         -  /* The %_content table holds the text of each full-text item, with
         1460  +  /* The %_content table holds the text of each document, with
  1450   1461     ** the rowid used as the docid.
  1451   1462     **
  1452   1463     ** The %_term table maps each term to a document list blob
  1453   1464     ** containing elements sorted by ascending docid, each element
  1454   1465     ** encoded as:
  1455   1466     **
  1456   1467     **   docid varint-encoded
................................................................................
  1641   1652   ** is the first term of a phrase query, go ahead and evaluate the phrase
  1642   1653   ** query and return the doclist for the entire phrase query.
  1643   1654   **
  1644   1655   ** The result is stored in pTerm->doclist.
  1645   1656   */
  1646   1657   static int docListOfTerm(
  1647   1658     fulltext_vtab *v,     /* The full text index */
  1648         -  int iColumn,           /* column to restrict to */
         1659  +  int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
  1649   1660     QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
  1650   1661     DocList **ppResult    /* Write the result here */
  1651   1662   ){
  1652   1663     DocList *pLeft, *pRight, *pNew;
  1653   1664     int i, rc;
  1654   1665   
  1655   1666     pLeft = docListNew(DL_POSITIONS);
................................................................................
  1885   1896     }
  1886   1897   
  1887   1898     queryDestroy(&q);
  1888   1899     *pResult = pLeft;
  1889   1900     return rc;
  1890   1901   }
  1891   1902   
  1892         -static int fulltextFilter(sqlite3_vtab_cursor *pCursor,
  1893         -                          int idxNum, const char *idxStr,
  1894         -                          int argc, sqlite3_value **argv){
         1903  +/*
         1904  +** This is the xFilter interface for the virtual table.  See
         1905  +** the virtual table xFilter method documentation for additional
         1906  +** information.
         1907  +**
         1908  +** If idxNum==QUERY_GENERIC then do a full table scan against
         1909  +** the %_content table.
         1910  +**
         1911  +** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
         1912  +** in the %_content table.
         1913  +**
         1914  +** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
         1915  +** column on the left-hand side of the MATCH operator is column
         1916  +** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
         1917  +** side of the MATCH operator.
         1918  +*/
         1919  +static int fulltextFilter(
         1920  +  sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
         1921  +  int idxNum, const char *idxStr,   /* Which indexing scheme to use */
         1922  +  int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
         1923  +){
  1895   1924     fulltext_cursor *c = (fulltext_cursor *) pCursor;
  1896   1925     fulltext_vtab *v = cursor_vtab(c);
  1897   1926     int rc;
  1898   1927     StringBuffer sb;
  1899   1928   
  1900   1929     TRACE(("FTS1 Filter %p\n",pCursor));
  1901   1930   

Changes to test/fts1a.test.

     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #*************************************************************************
    11     11   # This file implements regression tests for SQLite library.  The
    12     12   # focus of this script is testing the FTS1 module.
    13     13   #
    14         -# $Id: fts1a.test,v 1.1 2006/09/10 03:34:06 drh Exp $
           14  +# $Id: fts1a.test,v 1.2 2006/09/13 12:36:09 drh Exp $
    15     15   #
    16     16   
    17     17   set testdir [file dirname $argv0]
    18     18   source $testdir/tester.tcl
    19     19   
    20     20   # If SQLITE_ENABLE_FTS1 is defined, omit this file.
    21     21   ifcapable !fts1 {
................................................................................
    24     24   }
    25     25   
    26     26   # Construct a full-text search table containing five keywords:
    27     27   # one, two, three, four, and five, in various combinations.  The
    28     28   # rowid for each will be a bitmask for the elements it contains.
    29     29   #
    30     30   db eval {
    31         -  CREATE VIRTUAL TABLE t1 USING fts1;
           31  +  CREATE VIRTUAL TABLE t1 USING fts1(content);
    32     32     INSERT INTO t1(content) VALUES('one');
    33     33     INSERT INTO t1(content) VALUES('two');
    34     34     INSERT INTO t1(content) VALUES('one two');
    35     35     INSERT INTO t1(content) VALUES('three');
    36     36     INSERT INTO t1(content) VALUES('one three');
    37     37     INSERT INTO t1(content) VALUES('two three');
    38     38     INSERT INTO t1(content) VALUES('one two three');

Added test/fts1b.test.

            1  +# 2006 September 13
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#*************************************************************************
           11  +# This file implements regression tests for SQLite library.  The
           12  +# focus of this script is testing the FTS1 module.
           13  +#
           14  +# $Id: fts1b.test,v 1.1 2006/09/13 12:36:09 drh Exp $
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +
           20  +# If SQLITE_ENABLE_FTS1 is defined, omit this file.
           21  +ifcapable !fts1 {
           22  +  finish_test
           23  +  return
           24  +}
           25  +
           26  +# Fill the full-text index "t1" with phrases in english, spanish,
           27  +# and german.  For the i-th row, fill in the names for the bits
           28  +# that are set in the value of i.  The least significant bit is
           29  +# 1.  For example,  the value 5 is 101 in binary which will be
           30  +# converted to "one three" in english.
           31  +#
           32  +proc fill_multilanguage_fulltext_t1 {} {
           33  +  set english {one two three four five}
           34  +  set spanish {un dos tres cuatro cinco}
           35  +  set german {eine zwei drei vier funf}
           36  +  
           37  +  for {set i 1} {$i<=31} {incr i} {
           38  +    set cmd "INSERT INTO t1 VALUES"
           39  +    set vset {}
           40  +    foreach lang {english spanish german} {
           41  +      set words {}
           42  +      for {set j 0; set k 1} {$j<5} {incr j; incr k $k} {
           43  +        if {$k&$i} {lappend words [lindex [set $lang] $j]}
           44  +      }
           45  +      lappend vset "'$words'"
           46  +    }
           47  +    set sql "INSERT INTO t1(english,spanish,german) VALUES([join $vset ,])"
           48  +    # puts $sql
           49  +    db eval $sql
           50  +  }
           51  +}
           52  +
           53  +# Construct a full-text search table containing five keywords:
           54  +# one, two, three, four, and five, in various combinations.  The
           55  +# rowid for each will be a bitmask for the elements it contains.
           56  +#
           57  +db eval {
           58  +  CREATE VIRTUAL TABLE t1 USING fts1(english,spanish,german);
           59  +}
           60  +fill_multilanguage_fulltext_t1
           61  +
           62  +do_test fts1b-1.1 {
           63  +  execsql {SELECT rowid FROM t1 WHERE english MATCH 'one'}
           64  +} {1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31}
           65  +do_test fts1b-1.2 {
           66  +  execsql {SELECT rowid FROM t1 WHERE spanish MATCH 'one'}
           67  +} {}
           68  +do_test fts1b-1.3 {
           69  +  execsql {SELECT rowid FROM t1 WHERE german MATCH 'one'}
           70  +} {}
           71  +do_test fts1b-1.4 {
           72  +  execsql {SELECT rowid FROM t1 WHERE _all MATCH 'one'}
           73  +} {1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31}
           74  +do_test fts1b-1.5 {
           75  +  execsql {SELECT rowid FROM t1 WHERE _all MATCH 'one dos drei'}
           76  +} {7 15 23 31}
           77  +do_test fts1b-1.6 {
           78  +  execsql {SELECT english, spanish, german FROM t1 WHERE rowid=1}
           79  +} {one un eine}
           80  +do_test fts1b-1.7 {
           81  +  execsql {SELECT rowid FROM t1 WHERE _all MATCH '"one un"'}
           82  +} {}
           83  +
           84  +finish_test