Index: ext/fts3/fts3_unicode.c ================================================================== --- ext/fts3/fts3_unicode.c +++ ext/fts3/fts3_unicode.c @@ -199,10 +199,11 @@ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); if( !zNew ) return SQLITE_NOMEM; zOut = &zNew[zOut - pCsr->zToken]; pCsr->zToken = zNew; + pCsr->nAlloc += 64; } /* Write the folded case of the last character read to the output */ zEnd = z; WRITE_UTF8(zOut, sqlite3FtsUnicodeTolower(iCode)); Index: ext/fts3/fts3_unicode2.c ================================================================== --- ext/fts3/fts3_unicode2.c +++ ext/fts3/fts3_unicode2.c @@ -119,12 +119,17 @@ 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001, 0x43FFF401, }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; - if( c<(1<<22) ){ + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ @@ -234,11 +239,13 @@ int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); - if( c<65536 ){ + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){ Index: ext/fts3/unicode/mkunicode.tcl ================================================================== --- ext/fts3/unicode/mkunicode.tcl +++ ext/fts3/unicode/mkunicode.tcl @@ -102,10 +102,31 @@ incr i } puts "" puts " \};" } + +proc an_print_ascii_bitmap {lRange} { + foreach range $lRange { + foreach {iFirst nRange} $range {} + for {set i $iFirst} {$i < ($iFirst+$nRange)} {incr i} { + if {$i<=127} { set a($i) 1 } + } + } + + set aAscii [list 0 0 0 0] + foreach key [array names a] { + set idx [expr $key >> 5] + lset aAscii $idx [expr [lindex $aAscii $idx] | (1 << ($key&0x001F))] + } + + puts " static const unsigned int aAscii\[4\] = \{" + puts -nonewline " " + foreach v $aAscii { puts -nonewline [format " 0x%08X," $v] } + puts "" + puts " \};" +} proc print_isalnum {zFunc lRange} { puts "/*" puts "** Return true if the argument corresponds to a unicode codepoint" puts "** classified as either a letter or a number. Otherwise false." @@ -113,12 +134,15 @@ puts "** The results are undefined if the value passed to this function" puts "** is less than zero." puts "*/" puts "int ${zFunc}\(int c)\{" an_print_range_array $lRange + an_print_ascii_bitmap $lRange puts { - if( c<(1<<22) ){ + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; int iRes; int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; while( iHi>=iLo ){ @@ -363,11 +387,13 @@ int ret = c; assert( c>=0 ); assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); - if( c<65536 ){ + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; int iLo = 0; int iRes = -1; while( iHi>=iLo ){