# 2012 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !fts3 { finish_test ; return }
set ::testprefix fts4unicode
proc do_unicode_token_test {tn input res} {
set input [string map {' ''} $input]
uplevel [list do_execsql_test $tn "
SELECT fts3_tokenizer_test('unicode', '$input');
" [list [list {*}$res]]]
}
do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü}
do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx}
# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E"
do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"
do_unicode_token_test 1.6 "The quick brown fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
0 the The 1 quick quick 2 brown brown 3 fox fox
}
finish_test