/ Artifact Content
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Artifact 1e5570df758f7e0b27ff0e087ee96f559d5cc9ade80afa9421537a8a20a7cfbf:

# 2014 Dec 20
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
# Tests focusing on the fts5 tokenizers

source [file join [file dirname [info script]] fts5_common.tcl]
set testprefix fts5unicode

# If SQLITE_ENABLE_FTS5 is defined, omit this file.
ifcapable !fts5 {

proc tokenize_test {tn tokenizer input output} {
  uplevel [list do_test $tn [subst -nocommands {
    set ret {}
    foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
      lappend ret [set z]
    set ret
  }] [list {*}$output]]

foreach {tn t} {1 ascii 2 unicode61} {
  tokenize_test 1.$tn.0 $t {A B C D} {a b c d}
  tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely}
  tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely}
  tokenize_test 1.$tn.3 $t {} {}

# Check that "unicode61" really is the default tokenizer.

do_execsql_test 2.0 "
  CREATE VIRTUAL TABLE t2 USING fts5(x, tokenize = unicode61);
  CREATE VIRTUAL TABLE t3 USING fts5(x, tokenize = ascii);
  INSERT INTO t1 VALUES('\xC0\xC8\xCC');
  INSERT INTO t2 VALUES('\xC0\xC8\xCC');
  INSERT INTO t3 VALUES('\xC0\xC8\xCC');
do_execsql_test 2.1 "
  SELECT 't1' FROM t1 WHERE t1 MATCH '\xE0\xE8\xEC';
  SELECT 't2' FROM t2 WHERE t2 MATCH '\xE0\xE8\xEC';
  SELECT 't3' FROM t3 WHERE t3 MATCH '\xE0\xE8\xEC';
" {t1 t2}