/ Check-in [e71495a8]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:If SQLITE_DISABLE_FTS3_UNICODE is defined, do not build the "unicode61" tokenizer.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts4-unicode
Files: files | file ages | folders
SHA1: e71495a817b479bc23c5403d99255e3f098eb054
User & Date: dan 2012-05-26 18:28:14
Context
2012-05-26
18:42
Merge fts4-unicode branch with trunk. check-in: 25ba1f84 user: dan tags: trunk
18:28
If SQLITE_DISABLE_FTS3_UNICODE is defined, do not build the "unicode61" tokenizer. Closed-Leaf check-in: e71495a8 user: dan tags: fts4-unicode
17:57
Change the format of the tables used by sqlite3FtsUnicodeTolower() to make them a little smaller. check-in: b89d3834 user: dan tags: fts4-unicode
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

3550
3551
3552
3553
3554
3555
3556

3557

3558
3559
3560
3561
3562
3563
3564
....
3566
3567
3568
3569
3570
3571
3572

3573

3574
3575
3576
3577
3578




3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603


3604

3605
3606
3607
3608
3609
3610
3611
**
** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
** to by the argument to point to the "simple" tokenizer implementation.
** And so on.
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);

void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);

#ifdef SQLITE_ENABLE_ICU
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif

/*
** Initialise the fts3 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
................................................................................
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts3Init(sqlite3 *db){
  int rc = SQLITE_OK;
  Fts3Hash *pHash = 0;
  const sqlite3_tokenizer_module *pSimple = 0;
  const sqlite3_tokenizer_module *pPorter = 0;

  const sqlite3_tokenizer_module *pUnicode = 0;


#ifdef SQLITE_ENABLE_ICU
  const sqlite3_tokenizer_module *pIcu = 0;
  sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif





#ifdef SQLITE_TEST
  rc = sqlite3Fts3InitTerm(db);
  if( rc!=SQLITE_OK ) return rc;
#endif

  rc = sqlite3Fts3InitAux(db);
  if( rc!=SQLITE_OK ) return rc;

  sqlite3Fts3SimpleTokenizerModule(&pSimple);
  sqlite3Fts3PorterTokenizerModule(&pPorter);
  sqlite3Fts3UnicodeTokenizer(&pUnicode);

  /* Allocate and initialise the hash-table used to store tokenizers. */
  pHash = sqlite3_malloc(sizeof(Fts3Hash));
  if( !pHash ){
    rc = SQLITE_NOMEM;
  }else{
    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
  }

  /* Load the built-in tokenizers into the hash table */
  if( rc==SQLITE_OK ){
    if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
     || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 


     || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) 

#ifdef SQLITE_ENABLE_ICU
     || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
#endif
    ){
      rc = SQLITE_NOMEM;
    }
  }







>

>







 







>

>





>
>
>
>











<













>
>

>







3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
....
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597

3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
**
** Calling sqlite3Fts3SimpleTokenizerModule() sets the value pointed
** to by the argument to point to the "simple" tokenizer implementation.
** And so on.
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#ifndef SQLITE_DISABLE_FTS3_UNICODE
void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif

/*
** Initialise the fts3 extension. If this extension is built as part
** of the sqlite library, then this function is called directly by
................................................................................
** function is called by the sqlite3_extension_init() entry point.
*/
int sqlite3Fts3Init(sqlite3 *db){
  int rc = SQLITE_OK;
  Fts3Hash *pHash = 0;
  const sqlite3_tokenizer_module *pSimple = 0;
  const sqlite3_tokenizer_module *pPorter = 0;
#ifndef SQLITE_DISABLE_FTS3_UNICODE
  const sqlite3_tokenizer_module *pUnicode = 0;
#endif

#ifdef SQLITE_ENABLE_ICU
  const sqlite3_tokenizer_module *pIcu = 0;
  sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif

#ifndef SQLITE_DISABLE_FTS3_UNICODE
  sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif

#ifdef SQLITE_TEST
  rc = sqlite3Fts3InitTerm(db);
  if( rc!=SQLITE_OK ) return rc;
#endif

  rc = sqlite3Fts3InitAux(db);
  if( rc!=SQLITE_OK ) return rc;

  sqlite3Fts3SimpleTokenizerModule(&pSimple);
  sqlite3Fts3PorterTokenizerModule(&pPorter);


  /* Allocate and initialise the hash-table used to store tokenizers. */
  pHash = sqlite3_malloc(sizeof(Fts3Hash));
  if( !pHash ){
    rc = SQLITE_NOMEM;
  }else{
    sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1);
  }

  /* Load the built-in tokenizers into the hash table */
  if( rc==SQLITE_OK ){
    if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
     || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) 

#ifndef SQLITE_DISABLE_FTS3_UNICODE
     || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) 
#endif
#ifdef SQLITE_ENABLE_ICU
     || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
#endif
    ){
      rc = SQLITE_NOMEM;
    }
  }

Changes to ext/fts3/fts3_unicode.c.

9
10
11
12
13
14
15


16
17
18
19
20
21
22
...
237
238
239
240
241
242
243

**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "unicode" full-text-search tokenizer.
*/



#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
................................................................................
    unicodeNext,
    0,
  };
  *ppModule = &module;
}

#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */








>
>







 







>
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
...
239
240
241
242
243
244
245
246
**    May you share freely, never taking more than you give.
**
******************************************************************************
**
** Implementation of the "unicode" full-text-search tokenizer.
*/

#ifndef SQLITE_DISABLE_FTS3_UNICODE

#include "fts3Int.h"
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
................................................................................
    unicodeNext,
    0,
  };
  *ppModule = &module;
}

#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

Changes to ext/fts3/fts3_unicode2.c.

11
12
13
14
15
16
17


18
19
20
21
22
23
24
...
285
286
287
288
289
290
291

******************************************************************************
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/



#include <assert.h>

/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
................................................................................
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}








>
>







 







>
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
...
287
288
289
290
291
292
293
294
******************************************************************************
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/

#ifndef SQLITE_DISABLE_FTS3_UNICODE

#include <assert.h>

/*
** Return true if the argument corresponds to a unicode codepoint
** classified as either a letter or a number. Otherwise false.
**
** The results are undefined if the value passed to this function
................................................................................
  
  else if( c>=66560 && c<66600 ){
    ret = c + 40;
  }

  return ret;
}
#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */

Changes to ext/fts3/unicode/mkunicode.tcl.

503
504
505
506
507
508
509


510
511
512
513
514
515
516
...
562
563
564
565
566
567
568
569


*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
  }]
  puts ""


  puts "#include <assert.h>"
  puts ""
}

proc print_test_main {} {
  puts ""
  puts "#include <stdio.h>"
................................................................................
# was specified.
#
if {$::generate_test_code} {
  print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
  print_tolower_test sqlite3FtsUnicodeTolower 
  print_test_main 
}










>
>







 








>
>
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
...
564
565
566
567
568
569
570
571
572
573
*/

/*
** DO NOT EDIT THIS MACHINE GENERATED FILE.
*/
  }]
  puts ""
  puts "#ifndef SQLITE_DISABLE_FTS3_UNICODE"
  puts ""
  puts "#include <assert.h>"
  puts ""
}

proc print_test_main {} {
  puts ""
  puts "#include <stdio.h>"
................................................................................
# was specified.
#
if {$::generate_test_code} {
  print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
  print_tolower_test sqlite3FtsUnicodeTolower 
  print_test_main 
}

puts "#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */"

Changes to src/test_config.c.

302
303
304
305
306
307
308






309
310
311
312
313
314
315
#endif

#ifdef SQLITE_ENABLE_FTS3
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
#endif







#ifdef SQLITE_OMIT_GET_TABLE
  Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "gettable", "1", TCL_GLOBAL_ONLY);
#endif








>
>
>
>
>
>







302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#endif

#ifdef SQLITE_ENABLE_FTS3
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "1", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3", "0", TCL_GLOBAL_ONLY);
#endif

#if !defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_DISABLE_FTS3_UNICODE)
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "0", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "fts3_unicode", "1", TCL_GLOBAL_ONLY);
#endif

#ifdef SQLITE_OMIT_GET_TABLE
  Tcl_SetVar2(interp, "sqlite_options", "gettable", "0", TCL_GLOBAL_ONLY);
#else
  Tcl_SetVar2(interp, "sqlite_options", "gettable", "1", TCL_GLOBAL_ONLY);
#endif

Changes to test/fts3fault2.test.

127
128
129
130
131
132
133

134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

156
  db eval {SELECT * FROM sqlite_master}
} -body {
  execsql { INSERT INTO ft(ft) VALUES('rebuild') }
} -test {
  faultsim_test_result {0 {}}
}


do_test 5.0 {
  faultsim_delete_and_reopen
  execsql {
    CREATE VIRTUAL TABLE ft USING fts4(a, tokenize=unicode61);
  }
  faultsim_save_and_close
} {}

do_faultsim_test 5.1 -faults oom* -prep {
  faultsim_restore_and_reopen
  db eval {SELECT * FROM sqlite_master}
} -body {
  execsql { INSERT INTO ft VALUES('the quick brown fox'); }
  execsql { INSERT INTO ft VALUES(
      'theunusuallylongtokenthatjustdragsonandonandonandthendragsonsomemoreeof'
    );
  }
  execsql { SELECT docid FROM ft WHERE ft MATCH 'th*' }
} -test {
  faultsim_test_result {0 {1 2}}
}


finish_test







>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
  db eval {SELECT * FROM sqlite_master}
} -body {
  execsql { INSERT INTO ft(ft) VALUES('rebuild') }
} -test {
  faultsim_test_result {0 {}}
}

ifcapable fts3_unicode {
  do_test 5.0 {
    faultsim_delete_and_reopen
    execsql {
      CREATE VIRTUAL TABLE ft USING fts4(a, tokenize=unicode61);
    }
    faultsim_save_and_close
  } {}
  
  do_faultsim_test 5.1 -faults oom* -prep {
    faultsim_restore_and_reopen
    db eval {SELECT * FROM sqlite_master}
  } -body {
    execsql { INSERT INTO ft VALUES('the quick brown fox'); }
    execsql { INSERT INTO ft VALUES(
       'theunusuallylongtokenthatjustdragsonandonandonandthendragsonsomemoreeof'
      );
    }
    execsql { SELECT docid FROM ft WHERE ft MATCH 'th*' }
  } -test {
    faultsim_test_result {0 {1 2}}
  }
}

finish_test

Changes to test/fts4unicode.test.

10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !fts3 { finish_test ; return }
set ::testprefix fts4unicode

proc do_unicode_token_test {tn input res} {
  set input [string map {' ''} $input]
  uplevel [list do_execsql_test $tn "
    SELECT fts3_tokenizer_test('unicode61', '$input');
  " [list [list {*}$res]]]







|







10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#*************************************************************************
#
# The tests in this file focus on testing the "unicode" FTS tokenizer.
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
ifcapable !fts3_unicode { finish_test ; return }
set ::testprefix fts4unicode

proc do_unicode_token_test {tn input res} {
  set input [string map {' ''} $input]
  uplevel [list do_execsql_test $tn "
    SELECT fts3_tokenizer_test('unicode61', '$input');
  " [list [list {*}$res]]]