Index: src/tokenize.c ================================================================== --- src/tokenize.c +++ src/tokenize.c @@ -16,16 +16,96 @@ ** parser for analysis. */ #include "sqliteInt.h" #include +/* Character classes for tokenizing +** +** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented +** using a lookup table, whereas a switch() directly on c uses a binary search. +** The lookup table is much faster. To maximize speed, and to ensure that +** a lookup table is used, all of the classes need to be small integers and +** all of them need to be used within the switch. +*/ +#define CC_X 0 /* The letter 'x' or 'X'. Start of x'01234fed' */ +#define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */ +#define CC_ID 2 /* unicode characters usable in IDs */ +#define CC_DIGIT 3 /* Digits */ +#define CC_DOLLAR 4 /* '$' */ +#define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */ +#define CC_VARNUM 6 /* '?'. Numeric SQL variables */ +#define CC_SPACE 7 /* Space characters */ +#define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */ +#define CC_QUOTE2 9 /* '['. [...] style quoted ids */ +#define CC_PIPE 10 /* '|'. Bitwise OR or concatenate */ +#define CC_MINUS 11 /* '-'. Minus or SQL-style comment */ +#define CC_LT 12 /* '<'. Part of < or <= or <> */ +#define CC_GT 13 /* '>'. Part of > or >= */ +#define CC_EQ 14 /* '='. Part of = or == */ +#define CC_BANG 15 /* '!'. Part of != */ +#define CC_SLASH 16 /* '/'. / or c-style comment */ +#define CC_LP 17 /* '(' */ +#define CC_RP 18 /* ')' */ +#define CC_SEMI 19 /* ';' */ +#define CC_PLUS 20 /* '+' */ +#define CC_STAR 21 /* '*' */ +#define CC_PERCENT 22 /* '%' */ +#define CC_COMMA 23 /* ',' */ +#define CC_AND 24 /* '&' */ +#define CC_TILDA 25 /* '~' */ +#define CC_DOT 26 /* '.' */ +#define CC_ILLEGAL 27 /* Illegal character */ + +static const unsigned char aiClass[] = { +#ifdef SQLITE_ASCII +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ +/* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27, +/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, +/* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, +/* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1, +/* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +/* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27, +/* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +/* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +#endif +#ifdef SQLITE_EBCDIC +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ +/* 0x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 7, 7, 27, 27, +/* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 2x */ 27, 27, 27, 27, 27, 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 3x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 4x */ 7, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 12, 17, 20, 10, +/* 5x */ 24, 27, 27, 27, 27, 27, 27, 27, 27, 27, 15, 4, 21, 18, 19, 27, +/* 6x */ 11, 16, 27, 27, 27, 27, 27, 27, 27, 27, 27, 23, 22, 1, 13, 7, +/* 7x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 8, 5, 5, 5, 8, 14, 8, +/* 8x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, +/* 9x */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, +/* 9x */ 25, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, +/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 9, 27, 27, 27, 27, 27, +/* Cx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, +/* Dx */ 27, 1, 1, 1, 1, 1, 1, 1, 1, 1, 27, 27, 27, 27, 27, 27, +/* Ex */ 27, 27, 1, 1, 1, 1, 1, 0, 1, 1, 27, 27, 27, 27, 27, 27, +/* Fx */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 27, 27, 27, 27, 27, 27, +#endif +}; + /* -** The charMap() macro maps alphabetic characters into their +** The charMap() macro maps alphabetic characters (only) into their ** lower-case ASCII equivalent. On ASCII machines, this is just ** an upper-to-lower case map. On EBCDIC machines we also need -** to adjust the encoding. Only alphabetic characters and underscores -** need to be translated. +** to adjust the encoding. The mapping is only valid for alphabetics +** which are the only characters for which this feature is used. +** +** Used by keywordhash.h */ #ifdef SQLITE_ASCII # define charMap(X) sqlite3UpperToLower[(unsigned char)X] #endif #ifdef SQLITE_EBCDIC @@ -113,69 +193,69 @@ ** Return the length of the token that begins at z[0]. ** Store the token type in *tokenType before returning. */ int sqlite3GetToken(const unsigned char *z, int *tokenType){ int i, c; - switch( *z ){ - case ' ': case '\t': case '\n': case '\f': case '\r': { + switch( aiClass[*z] ){ + case CC_SPACE: { testcase( z[0]==' ' ); testcase( z[0]=='\t' ); testcase( z[0]=='\n' ); testcase( z[0]=='\f' ); testcase( z[0]=='\r' ); for(i=1; sqlite3Isspace(z[i]); i++){} *tokenType = TK_SPACE; return i; } - case '-': { + case CC_MINUS: { if( z[1]=='-' ){ for(i=2; (c=z[i])!=0 && c!='\n'; i++){} *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ return i; } *tokenType = TK_MINUS; return 1; } - case '(': { + case CC_LP: { *tokenType = TK_LP; return 1; } - case ')': { + case CC_RP: { *tokenType = TK_RP; return 1; } - case ';': { + case CC_SEMI: { *tokenType = TK_SEMI; return 1; } - case '+': { + case CC_PLUS: { *tokenType = TK_PLUS; return 1; } - case '*': { + case CC_STAR: { *tokenType = TK_STAR; return 1; } - case '/': { + case CC_SLASH: { if( z[1]!='*' || z[2]==0 ){ *tokenType = TK_SLASH; return 1; } for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} if( c ) i++; *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ return i; } - case '%': { + case CC_PERCENT: { *tokenType = TK_REM; return 1; } - case '=': { + case CC_EQ: { *tokenType = TK_EQ; return 1 + (z[1]=='='); } - case '<': { + case CC_LT: { if( (c=z[1])=='=' ){ *tokenType = TK_LE; return 2; }else if( c=='>' ){ *tokenType = TK_NE; @@ -186,11 +266,11 @@ }else{ *tokenType = TK_LT; return 1; } } - case '>': { + case CC_GT: { if( (c=z[1])=='=' ){ *tokenType = TK_GE; return 2; }else if( c=='>' ){ *tokenType = TK_RSHIFT; @@ -198,43 +278,41 @@ }else{ *tokenType = TK_GT; return 1; } } - case '!': { + case CC_BANG: { if( z[1]!='=' ){ *tokenType = TK_ILLEGAL; return 2; }else{ *tokenType = TK_NE; return 2; } } - case '|': { + case CC_PIPE: { if( z[1]!='|' ){ *tokenType = TK_BITOR; return 1; }else{ *tokenType = TK_CONCAT; return 2; } } - case ',': { + case CC_COMMA: { *tokenType = TK_COMMA; return 1; } - case '&': { + case CC_AND: { *tokenType = TK_BITAND; return 1; } - case '~': { + case CC_TILDA: { *tokenType = TK_BITNOT; return 1; } - case '`': - case '\'': - case '"': { + case CC_QUOTE: { int delim = z[0]; testcase( delim=='`' ); testcase( delim=='\'' ); testcase( delim=='"' ); for(i=1; (c=z[i])!=0; i++){ @@ -255,11 +333,11 @@ }else{ *tokenType = TK_ILLEGAL; return i; } } - case '.': { + case CC_DOT: { #ifndef SQLITE_OMIT_FLOATING_POINT if( !sqlite3Isdigit(z[1]) ) #endif { *tokenType = TK_DOT; @@ -266,12 +344,11 @@ return 1; } /* If the next character is a digit, this is a floating point ** number that begins with ".". Fall thru into the next case */ } - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': { + case CC_DIGIT: { testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); testcase( z[0]=='9' ); *tokenType = TK_INTEGER; @@ -302,26 +379,22 @@ *tokenType = TK_ILLEGAL; i++; } return i; } - case '[': { + case CC_QUOTE2: { for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} *tokenType = c==']' ? TK_ID : TK_ILLEGAL; return i; } - case '?': { + case CC_VARNUM: { *tokenType = TK_VARIABLE; for(i=1; sqlite3Isdigit(z[i]); i++){} return i; } -#ifndef SQLITE_OMIT_TCL_VARIABLE - case '$': -#endif - case '@': /* For compatibility with MS SQL Server */ - case '#': - case ':': { + case CC_DOLLAR: + case CC_VARALPHA: { int n = 0; testcase( z[0]=='$' ); testcase( z[0]=='@' ); testcase( z[0]==':' ); testcase( z[0]=='#' ); *tokenType = TK_VARIABLE; for(i=1; (c=z[i])!=0; i++){ @@ -347,11 +420,11 @@ } if( n==0 ) *tokenType = TK_ILLEGAL; return i; } #ifndef SQLITE_OMIT_BLOB_LITERAL - case 'x': case 'X': { + case CC_X: { testcase( z[0]=='x' ); testcase( z[0]=='X' ); if( z[1]=='\'' ){ *tokenType = TK_BLOB; for(i=2; sqlite3Isxdigit(z[i]); i++){} if( z[i]!='\'' || i%2 ){ @@ -359,24 +432,32 @@ while( z[i] && z[i]!='\'' ){ i++; } } if( z[i] ) i++; return i; } - /* Otherwise fall through to the next case */ + i = 1; + break; } #endif - default: { - if( !IdChar(*z) ){ - break; - } - for(i=1; IdChar(z[i]); i++){} + case CC_KYWD: { + for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} + if( IdChar(z[i]) ){ i++; break; } *tokenType = TK_ID; return keywordCode((char*)z, i, tokenType); } + case CC_ID: { + i = 1; + break; + } + default: { + *tokenType = TK_ILLEGAL; + return 1; + } } - *tokenType = TK_ILLEGAL; - return 1; + while( IdChar(z[i]) ){ i++; } + *tokenType = TK_ID; + return i; } /* ** Run the parser on the given SQL string. The parser structure is ** passed in. An SQLITE_ status code is returned. If an error occurs Index: tool/mkkeywordhash.c ================================================================== --- tool/mkkeywordhash.c +++ tool/mkkeywordhash.c @@ -275,11 +275,14 @@ }; /* Number of keywords */ static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); -/* Map all alphabetic characters into the same case */ +/* Map all alphabetic characters into lower-case for hashing. This is +** only valid for alphabetics. In particular it does not work for '_' +** and so the hash cannot be on a keyword position that might be an '_'. +*/ #define charMap(X) (0x20|(X)) /* ** Comparision function for two Keyword records */ @@ -563,24 +566,32 @@ j = 0; } } printf("%s };\n", j==0 ? "" : "\n"); - printf(" int h, i;\n"); + printf(" int i, j;\n"); + printf(" const char *zKW;\n"); printf(" if( n>=2 ){\n"); - printf(" h = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n", + printf(" i = ((charMap(z[0])*4) ^ (charMap(z[n-1])*3) ^ n) %% %d;\n", bestSize); - printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); - printf(" if( aLen[i]==n &&" - " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); + printf(" for(i=((int)aHash[i])-1; i>=0; i=((int)aNext[i])-1){\n"); + printf(" if( aLen[i]!=n ) continue;\n"); + printf(" j = 0;\n"); + printf(" zKW = &zText[aOffset[i]];\n"); + printf("#ifdef SQLITE_ASCII\n"); + printf(" while( j