Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | In LEMON, fix a bug in the text formatter introduced by the previous commit. Also add the new "%token_class" directive for defining symbolic names that stand any one of a collection of tokens. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | lemon-updates |
Files: | files | file ages | folders |
SHA1: |
da7890ca6b1d8e511377a46904712022 |
User & Date: | drh 2014-01-11 03:06:18.172 |
Context
2014-01-11
| ||
03:27 | Add the "%token_class" directive to the LEMON parser generator. This opens up the possibility of simplifying the parser. Also remove all calls to sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings on OpenBSD. (Aside: It is this change to avoid harmless compiler warnings that was the cause of the reason spat of bugs.) (check-in: 8eb48c04bd user: drh tags: trunk) | |
03:13 | Add the "%token_class" directive to the LEMON parser generator. This opens up the possibility of simplifying the parser. Also remove all calls to sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings on OpenBSD. (Closed-Leaf check-in: 4e4483b2d4 user: drh tags: buggy-lemon) | |
03:06 | In LEMON, fix a bug in the text formatter introduced by the previous commit. Also add the new "%token_class" directive for defining symbolic names that stand any one of a collection of tokens. (Closed-Leaf check-in: da7890ca6b user: drh tags: lemon-updates) | |
2014-01-10
| ||
23:21 | Do not use sprintf(), strcpy() or strcat() in the implementation of the lemon parser generator tool, to avoid compiler warnings in OpenBSD. (check-in: e43c522dde user: drh tags: lemon-updates) | |
Changes
Changes to tool/lemon.c.
︙ | ︙ | |||
67 68 69 70 71 72 73 | ** %.*s ** */ static void lemon_addtext( char *zBuf, /* The buffer to which text is added */ int *pnUsed, /* Slots of the buffer used so far */ const char *zIn, /* Text to add */ | | > > > > | > > > > > > | | | | | | | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | ** %.*s ** */ static void lemon_addtext( char *zBuf, /* The buffer to which text is added */ int *pnUsed, /* Slots of the buffer used so far */ const char *zIn, /* Text to add */ int nIn, /* Bytes of text to add. -1 to use strlen() */ int iWidth /* Field width. Negative to left justify */ ){ if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} while( iWidth>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth--; } if( nIn==0 ) return; memcpy(&zBuf[*pnUsed], zIn, nIn); *pnUsed += nIn; while( (-iWidth)>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth++; } zBuf[*pnUsed] = 0; } static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ int i, j, k, c, size; int nUsed = 0; const char *z; char zTemp[50]; str[0] = 0; for(i=j=0; (c = zFormat[i])!=0; i++){ if( c=='%' ){ int iWidth = 0; lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); c = zFormat[++i]; if( isdigit(c) || (c=='-' && isdigit(zFormat[i+1])) ){ if( c=='-' ) i++; while( isdigit(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; if( c=='-' ) iWidth = -iWidth; c = zFormat[i]; } if( c=='d' ){ int v = va_arg(ap, int); if( v<0 ){ lemon_addtext(str, &nUsed, "-", 1, iWidth); v = -v; }else if( v==0 ){ lemon_addtext(str, &nUsed, "0", 1, iWidth); } k = 0; while( v>0 ){ k++; zTemp[sizeof(zTemp)-k] = (v%10) + '0'; v /= 10; } lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); }else if( c=='s' ){ z = va_arg(ap, const char*); lemon_addtext(str, &nUsed, z, -1, iWidth); }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ i += 2; k = va_arg(ap, int); z = va_arg(ap, const char*); lemon_addtext(str, &nUsed, z, k, iWidth); }else if( c=='%' ){ lemon_addtext(str, &nUsed, "%", 1, 0); }else{ fprintf(stderr, "illegal format\n"); exit(1); } j = i+1; } } lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); return nUsed; } static int lemon_sprintf(char *str, const char *format, ...){ va_list ap; int rc; va_start(ap, format); rc = lemon_vsprintf(str, format, ap); |
︙ | ︙ | |||
1534 1535 1536 1537 1538 1539 1540 | if( lem.errorcnt ) exit(lem.errorcnt); if( lem.nrule==0 ){ fprintf(stderr,"Empty grammar.\n"); exit(1); } /* Count and index the symbols of the grammar */ | < > | | | > > > | 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 | if( lem.errorcnt ) exit(lem.errorcnt); if( lem.nrule==0 ){ fprintf(stderr,"Empty grammar.\n"); exit(1); } /* Count and index the symbols of the grammar */ Symbol_new("{default}"); lem.nsymbol = Symbol_count(); lem.symbols = Symbol_arrayof(); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i; while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); lem.nsymbol = i - 1; for(i=1; isupper(lem.symbols[i]->name[0]); i++); lem.nterminal = i; /* Generate a reprint of the grammar, if requested on the command line */ if( rpflag ){ Reprint(&lem); }else{ |
︙ | ︙ | |||
2027 2028 2029 2030 2031 2032 2033 | PRECEDENCE_MARK_1, PRECEDENCE_MARK_2, RESYNC_AFTER_RULE_ERROR, RESYNC_AFTER_DECL_ERROR, WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_FALLBACK_ID, | | > > > | 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 | PRECEDENCE_MARK_1, PRECEDENCE_MARK_2, RESYNC_AFTER_RULE_ERROR, RESYNC_AFTER_DECL_ERROR, WAITING_FOR_DESTRUCTOR_SYMBOL, WAITING_FOR_DATATYPE_SYMBOL, WAITING_FOR_FALLBACK_ID, WAITING_FOR_WILDCARD_ID, WAITING_FOR_CLASS_ID, WAITING_FOR_CLASS_TOKEN }; struct pstate { char *filename; /* Name of the input file */ int tokenlineno; /* Linenumber at which current token starts */ int errorcnt; /* Number of errors so far */ char *tokenstart; /* Text of current token */ struct lemon *gp; /* Global state vector */ enum e_state state; /* The state of the parser */ struct symbol *fallback; /* The fallback token */ struct symbol *tkclass; /* Token class symbol */ struct symbol *lhs; /* Left-hand side of current rule */ const char *lhsalias; /* Alias for the LHS */ int nrhs; /* Number of right-hand side symbols seen */ struct symbol *rhs[MAXRHS]; /* RHS symbols */ const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */ struct rule *prevrule; /* Previous rule parsed */ const char *declkeyword; /* Keyword of a declaration */ |
︙ | ︙ | |||
2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 | }else if( strcmp(x,"type")==0 ){ psp->state = WAITING_FOR_DATATYPE_SYMBOL; }else if( strcmp(x,"fallback")==0 ){ psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; }else if( strcmp(x,"wildcard")==0 ){ psp->state = WAITING_FOR_WILDCARD_ID; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } }else{ | > > | 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 | }else if( strcmp(x,"type")==0 ){ psp->state = WAITING_FOR_DATATYPE_SYMBOL; }else if( strcmp(x,"fallback")==0 ){ psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; }else if( strcmp(x,"wildcard")==0 ){ psp->state = WAITING_FOR_WILDCARD_ID; }else if( strcmp(x,"token_class")==0 ){ psp->state = WAITING_FOR_CLASS_ID; }else{ ErrorMsg(psp->filename,psp->tokenlineno, "Unknown declaration keyword: \"%%%s\".",x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } }else{ |
︙ | ︙ | |||
2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 | psp->gp->wildcard = sp; }else{ ErrorMsg(psp->filename, psp->tokenlineno, "Extra wildcard to token: %s", x); psp->errorcnt++; } } break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ case RESYNC_AFTER_DECL_ERROR: if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 | psp->gp->wildcard = sp; }else{ ErrorMsg(psp->filename, psp->tokenlineno, "Extra wildcard to token: %s", x); psp->errorcnt++; } } break; case WAITING_FOR_CLASS_ID: if( !islower(x[0]) ){ ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class must be followed by an identifier: ", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else if( Symbol_find(x) ){ ErrorMsg(psp->filename, psp->tokenlineno, "Symbol \"%s\" already used", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; }else{ psp->tkclass = Symbol_new(x); psp->tkclass->type = MULTITERMINAL; psp->state = WAITING_FOR_CLASS_TOKEN; } break; case WAITING_FOR_CLASS_TOKEN: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; }else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){ struct symbol *msp = psp->tkclass; msp->nsubsym++; msp->subsym = (struct symbol **) realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym); if( !isupper(x[0]) ) x++; msp->subsym[msp->nsubsym-1] = Symbol_new(x); }else{ ErrorMsg(psp->filename, psp->tokenlineno, "%%token_class argument \"%s\" should be a token", x); psp->errorcnt++; psp->state = RESYNC_AFTER_DECL_ERROR; } break; case RESYNC_AFTER_RULE_ERROR: /* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; ** break; */ case RESYNC_AFTER_DECL_ERROR: if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; |
︙ | ︙ | |||
2863 2864 2865 2866 2867 2868 2869 | } for(rp=lemp->rule; rp; rp=rp->next){ printf("%s",rp->lhs->name); /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ printf(" ::="); for(i=0; i<rp->nrhs; i++){ sp = rp->rhs[i]; | < > > > | 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 | } for(rp=lemp->rule; rp; rp=rp->next){ printf("%s",rp->lhs->name); /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ printf(" ::="); for(i=0; i<rp->nrhs; i++){ sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ printf(" %s", sp->subsym[0]->name); for(j=1; j<sp->nsubsym; j++){ printf("|%s", sp->subsym[j]->name); } }else{ printf(" %s", sp->name); } /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ } printf("."); if( rp->precsym ) printf(" [%s]",rp->precsym->name); /* if( rp->code ) printf("\n %s",rp->code); */ printf("\n"); |
︙ | ︙ | |||
2889 2890 2891 2892 2893 2894 2895 | int i, j; rp = cfp->rp; fprintf(fp,"%s ::=",rp->lhs->name); for(i=0; i<=rp->nrhs; i++){ if( i==cfp->dot ) fprintf(fp," *"); if( i==rp->nrhs ) break; sp = rp->rhs[i]; | < > > > | 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 | int i, j; rp = cfp->rp; fprintf(fp,"%s ::=",rp->lhs->name); for(i=0; i<=rp->nrhs; i++){ if( i==cfp->dot ) fprintf(fp," *"); if( i==rp->nrhs ) break; sp = rp->rhs[i]; if( sp->type==MULTITERMINAL ){ fprintf(fp," %s", sp->subsym[0]->name); for(j=1; j<sp->nsubsym; j++){ fprintf(fp,"|%s",sp->subsym[j]->name); } }else{ fprintf(fp," %s", sp->name); } } } /* #define TEST */ #if 0 /* Print a set */ |
︙ | ︙ | |||
3640 3641 3642 3643 3644 3645 3646 3647 | ** Write text on "out" that describes the rule "rp". */ static void writeRuleText(FILE *out, struct rule *rp){ int j; fprintf(out,"%s ::=", rp->lhs->name); for(j=0; j<rp->nrhs; j++){ struct symbol *sp = rp->rhs[j]; fprintf(out," %s", sp->name); | > < > > | 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 | ** Write text on "out" that describes the rule "rp". */ static void writeRuleText(FILE *out, struct rule *rp){ int j; fprintf(out,"%s ::=", rp->lhs->name); for(j=0; j<rp->nrhs; j++){ struct symbol *sp = rp->rhs[j]; if( sp->type!=MULTITERMINAL ){ fprintf(out," %s", sp->name); }else{ int k; fprintf(out," %s", sp->subsym[0]->name); for(k=1; k<sp->nsubsym; k++){ fprintf(out,"|%s",sp->subsym[k]->name); } } } } |
︙ | ︙ | |||
4110 4111 4112 4113 4114 4115 4116 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; else prefix = ""; in = file_open(lemp,".h","rb"); if( in ){ int nextChar; for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ | | > | | 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 | if( lemp->tokenprefix ) prefix = lemp->tokenprefix; else prefix = ""; in = file_open(lemp,".h","rb"); if( in ){ int nextChar; for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){ lemon_sprintf(pattern,"#define %s%-30s %3d\n", prefix,lemp->symbols[i]->name,i); if( strcmp(line,pattern) ) break; } nextChar = fgetc(in); fclose(in); if( i==lemp->nterminal && nextChar==EOF ){ /* No change in the file. Don't rewrite it. */ return; } } out = file_open(lemp,".h","wb"); if( out ){ for(i=1; i<lemp->nterminal; i++){ fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); } fclose(out); } return; } /* Reduce the size of the action tables, if possible, by making use |
︙ | ︙ | |||
4493 4494 4495 4496 4497 4498 4499 | sp->useCnt = 0; Symbol_insert(sp,sp->name); } sp->useCnt++; return sp; } | | > > > > | | | | | < | | 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 | sp->useCnt = 0; Symbol_insert(sp,sp->name); } sp->useCnt++; return sp; } /* Compare two symbols for sorting purposes. Return negative, ** zero, or positive if a is less then, equal to, or greater ** than b. ** ** Symbols that begin with upper case letters (terminals or tokens) ** must sort before symbols that begin with lower case letters ** (non-terminals). And MULTITERMINAL symbols (created using the ** %token_class directive) must sort at the very end. Other than ** that, the order does not matter. ** ** We find experimentally that leaving the symbols in their original ** order (the order they appeared in the grammar file) gives the ** smallest parser tables in SQLite. */ int Symbolcmpp(const void *_a, const void *_b) { const struct symbol *a = *(const struct symbol **) _a; const struct symbol *b = *(const struct symbol **) _b; int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; return i1==i2 ? a->index - b->index : i1 - i2; } /* There is one instance of the following structure for each ** associative array of type "x2". */ struct s_x2 { int size; /* The number of available slots. */ |
︙ | ︙ |