SQLite

Artifact [ad3069f099]
Login

Artifact ad3069f099ff593a2196422244fa218f8942dfb9:


# 2015 Jan 13
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#
# This file containst tests focused on prefix indexes.
#

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5prefix

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {
  finish_test
  return
}

do_execsql_test 1.0 {
  CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
  INSERT INTO xx VALUES('one two three');
  INSERT INTO xx VALUES('four five six');
  INSERT INTO xx VALUES('seven eight nine ten');
}

do_execsql_test 1.1 {
  SELECT rowid FROM xx WHERE xx MATCH 't*'
} {1 3}


#-------------------------------------------------------------------------
# Check that prefix indexes really do index n-character prefixes, not 
# n-byte prefixes. Use the ascii tokenizer so as not to be confused by
# diacritic removal.
#
do_execsql_test 2.0 { 
  CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) 
}

do_test 2.1 {
  foreach {rowid string} {
    1 "\xCA\xCB\xCC\xCD"
    2 "\u1234\u5678\u4321\u8765"
  } {
    execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
  }
} {}

do_execsql_test 2.2 {
  INSERT INTO t1(t1) VALUES('integrity-check');
}

foreach {tn q res} {
  1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
  2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
} {
  do_execsql_test 2.3.$tn $q $res
}

#-------------------------------------------------------------------------
# Check that prefix queries with:
#
#   * a column filter, and
#   * no prefix index.
#
# work Ok.
#
do_execsql_test 3.0 {
  CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
  INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
  BEGIN;
    INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
    INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
    INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
    INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
    INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
    INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
    INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
    INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
    INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
  COMMIT;
}

foreach {tn match res} {
  1 "a : c*" {1 2 4 6 7 9}
  2 "b : c*" {1 3 4 5 6 8 9}
  3 "c : c*" {1 2 4 6 7 8 9}
  4 "a : b*" {1 3 5 6 7 8 9}
  5 "b : b*" {1 2 3 5 7 9}
  6 "c : b*" {1 3 7 9}
  7 "a : a*" {1 3 4 5 6 8}
  8 "b : a*" {2 3 4 6 7 8}
  9 "c : a*" {2 3 4 5 6 7 8 9}
} {
  do_execsql_test 3.1.$tn {
    SELECT rowid FROM t3($match)
  } $res
}

do_test 3.2 {
  expr srand(0)
  execsql { DELETE FROM t3 }
  for {set i 0} {$i < 1000} {incr i} {
    set a [fts5_rnddoc 3]
    set b [fts5_rnddoc 8]
    set c [fts5_rnddoc 20]
    execsql { INSERT INTO t3 VALUES($a, $b, $c) }
  }
  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
} {}

proc gmatch {col pattern} {
  expr {[lsearch -glob $col $pattern]>=0}
}
db func gmatch gmatch

proc ghl {col pattern} {
  foreach t $col {
    if {[string match $pattern $t]} {
      lappend res "*$t*"
    } else {
      lappend res $t
    }
  }
  set res
}
db func ghl ghl

set COLS(a) 0
set COLS(b) 1
set COLS(c) 2

for {set x 0} {$x<2} {incr x} {
  foreach {tn pattern} {
    1  {xa*}
    2  {xb*}
    3  {xc*}
    4  {xd*}
    5  {xe*}
    6  {xf*}
    7  {xg*}
    8  {xh*}
    9  {xi*}
    10 {xj*}
  } {
    foreach col {a b c} {

      # Check that the list of returned rowids is correct.
      #
      set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
      set query "$col : $pattern"
      do_execsql_test 3.3.$x.$tn.$col.rowid {
        SELECT rowid FROM t3($query);
      } $res

      # Check that the highlight() function works.
      #
      set res [db eval \
        "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')"
      ]
      set idx $COLS($col)
      do_execsql_test 3.3.$x.$tn.$col.highlight {
        SELECT highlight(t3, $idx, '*', '*') FROM t3($query);
      } $res
    }

    foreach colset {{a b} {b c} {c a} {a c} {b a}} {
      # Check that the list of returned rowids is correct.
      #
      foreach {col1 col2} $colset {}
      set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')"
      set res [db eval "SELECT rowid FROM t3 WHERE $expr"]
      set query "{$colset} : $pattern"
      do_execsql_test 3.3.$x.$tn.{$colset}.rowid {
        SELECT rowid FROM t3($query);
      } $res

      set resq    "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')"
      append resq " FROM t3 WHERE $expr"
      set res [db eval $resq]
      set idx1 $COLS($col1)
      set idx2 $COLS($col2)
      do_execsql_test 3.3.$x.$tn.{$colset}.highlight {
        SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*')
          FROM t3($query)
      } $res
    }
  }
  execsql { INSERT INTO t3(t3) VALUES('optimize') }
  execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
}


finish_test