Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add special fast paths to sqlite3FtsUnicodeTolower() and Isalnum() for codepoints in the ASCII range. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-unicode |
Files: | files | file ages | folders |
SHA1: |
cf7b25d47687635a04f4347d45f135c6 |
User & Date: | dan 2012-05-25 19:50:12.903 |
Context
2012-05-26
| ||
14:54 | Change the name of the "unicode" tokenizer to "unicode61" to emphasize that the case folding and separator-character identification routines are based on unicode version 6.1. (check-in: 8f3e60aa22 user: dan tags: fts4-unicode) | |
2012-05-25
| ||
19:50 | Add special fast paths to sqlite3FtsUnicodeTolower() and Isalnum() for codepoints in the ASCII range. (check-in: cf7b25d476 user: dan tags: fts4-unicode) | |
18:48 | Fix comments in generated file fts3_unicode2.c. (check-in: 3dc567ef47 user: dan tags: fts4-unicode) | |
Changes
Changes to ext/fts3/fts3_unicode.c.
︙ | ︙ | |||
197 198 199 200 201 202 203 204 205 206 207 208 209 210 | do { /* Grow the output buffer if required. */ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; } /* Write the folded case of the last character read to the output */ zEnd = z; WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode)); /* If the cursor is not at EOF, read the next character */ | > | 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | do { /* Grow the output buffer if required. */ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ zEnd = z; WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode)); /* If the cursor is not at EOF, read the next character */ |
︙ | ︙ |
Changes to ext/fts3/fts3_unicode2.c.
︙ | ︙ | |||
117 118 119 120 121 122 123 124 | 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, 0x43FFF401, }; | > > > > > | | 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, 0x43FFF401, }; static const unsigned int aAscii[4] = { 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, }; if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ |
︙ | ︙ | |||
232 233 234 235 236 237 238 | }; int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); | > > | | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 | }; int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); |
︙ | ︙ |
Changes to ext/fts3/unicode/mkunicode.tcl.
︙ | ︙ | |||
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | if {($i % 5)==0} {puts "" ; puts -nonewline " "} puts -nonewline " $u32," incr i } puts "" puts " \};" } proc print_isalnum {zFunc lRange} { puts "/*" puts "** Return true if the argument corresponds to a unicode codepoint" puts "** classified as either a letter or a number. Otherwise false." puts "**" puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" an_print_range_array $lRange puts { | > > > > > > > > > > > > > > > > > > > > > > > > | | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | if {($i % 5)==0} {puts "" ; puts -nonewline " "} puts -nonewline " $u32," incr i } puts "" puts " \};" } proc an_print_ascii_bitmap {lRange} { foreach range $lRange { foreach {iFirst nRange} $range {} for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { if {$i<=127} { set a($i) 1 } } } set aAscii [list 0 0 0 0] foreach key [array names a] { set idx [expr $key >> 5] lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))] } puts " static const unsigned int aAscii\[4\] = \{" puts -nonewline " " foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] } puts "" puts " \};" } proc print_isalnum {zFunc lRange} { puts "/*" puts "** Return true if the argument corresponds to a unicode codepoint" puts "** classified as either a letter or a number. Otherwise false." puts "**" puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" an_print_range_array $lRange an_print_ascii_bitmap $lRange puts { if( c<128 ){ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; if( key >= aEntry[iTest] ){ |
︙ | ︙ | |||
361 362 363 364 365 366 367 | tl_print_table_footer toggle puts { int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); | > > | | 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 | tl_print_table_footer toggle puts { int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); if( c<128 ){ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){ int iTest = (iHi + iLo) / 2; int cmp = (c - aEntry[iTest].iCode); |
︙ | ︙ |