Index: tool/lemon.c ================================================================== --- tool/lemon.c +++ tool/lemon.c @@ -411,10 +411,11 @@ char *filename; /* Name of the input file */ char *outname; /* Name of the current output file */ char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ + int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ int nolinenosflag; /* True if #line statements should not be printed */ char *argv0; /* Name of the program */ @@ -587,14 +588,16 @@ int mnLookahead; /* Minimum aLookahead[].lookahead */ int mnAction; /* Action associated with mnLookahead */ int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ + int nterminal; /* Number of terminal symbols */ + int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ -#define acttab_size(X) ((X)->nAction) +#define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) /* The value for the N-th entry in yy_lookahead */ @@ -606,17 +609,19 @@ free( p->aLookahead ); free( p ); } /* Allocate a new acttab structure */ -acttab *acttab_alloc(void){ +acttab *acttab_alloc(int nsymbol, int nterminal){ acttab *p = (acttab *) calloc( 1, sizeof(*p) ); if( p==0 ){ fprintf(stderr,"Unable to allocate memory for a new acttab."); exit(1); } memset(p, 0, sizeof(*p)); + p->nsymbol = nsymbol; + p->nterminal = nterminal; return p; } /* Add a new action to the current transaction set. ** @@ -653,20 +658,28 @@ ** Add the transaction set built up with prior calls to acttab_action() ** into the current action table. Then reset the transaction set back ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. +** +** If the makeItSafe parameter is true, then the offset is chosen so that +** it is impossible to overread the yy_lookaside[] table regardless of +** the lookaside token. This is done for the terminal symbols, as they +** come from external inputs and can contain syntax errors. When makeItSafe +** is false, there is more flexibility in selecting offsets, resulting in +** a smaller table. For non-terminal symbols, which are never syntax errors, +** makeItSafe can be false. */ -int acttab_insert(acttab *p){ - int i, j, k, n; +int acttab_insert(acttab *p, int makeItSafe){ + int i, j, k, n, end; assert( p->nLookahead>0 ); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ - n = p->mxLookahead + 1; + n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; p->aAction = (struct lookahead_action *) realloc( p->aAction, sizeof(p->aAction[0])*p->nActionAlloc); @@ -684,11 +697,12 @@ ** duplicate of the current transaction set. Fall out of the loop ** if and when the duplicate is found. ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ - for(i=p->nAction-1; i>=0; i--){ + end = makeItSafe ? p->mnLookahead : 0; + for(i=p->nAction-1; i>=end; i--){ if( p->aAction[i].lookahead==p->mnLookahead ){ /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ if( p->aAction[i].action!=p->mnAction ) continue; for(j=0; jnLookahead; j++){ @@ -714,16 +728,17 @@ /* If no existing offsets exactly match the current transaction, find an ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ - if( i<0 ){ + if( inAction, which means the ** transaction will be appended. */ - for(i=0; inActionAlloc - p->mxLookahead; i++){ + i = makeItSafe ? p->mnLookahead : 0; + for(; inActionAlloc - p->mxLookahead; i++){ if( p->aAction[i].lookahead<0 ){ for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; if( k<0 ) break; if( p->aAction[k].lookahead>=0 ) break; @@ -737,21 +752,39 @@ } } } } /* Insert transaction set at index i. */ +#if 0 + printf("Acttab:"); + for(j=0; jnLookahead; j++){ + printf(" %d", p->aLookahead[j].lookahead); + } + printf(" inserted at %d\n", i); +#endif for(j=0; jnLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } + if( makeItSafe && i+p->nterminal>p->nAction ) p->nAction = i+p->nterminal; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the ** index into yy_action of the action */ return i - p->mnLookahead; } + +/* +** Return the size of the action table without the trailing syntax error +** entries. +*/ +int acttab_action_size(acttab *p){ + int n = p->nAction; + while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; } + return n; +} /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON ** parser generator. @@ -1722,10 +1755,11 @@ stats_line("total symbols", lem.nsymbol); stats_line("rules", lem.nrule); stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); + stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); } @@ -4165,11 +4199,11 @@ mxNtOfst = mnNtOfst = 0; /* In an effort to minimize the action table size, use the heuristic ** of placing the largest action sets first */ for(i=0; inxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); - pActtab = acttab_alloc(); + pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for(i=0; inxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ for(ap=stp->ap; ap; ap=ap->next){ int action; @@ -4176,11 +4210,11 @@ if( ap->sp->index>=lemp->nterminal ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iTknOfst = acttab_insert(pActtab); + stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfstiTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ for(ap=stp->ap; ap; ap=ap->next){ int action; @@ -4188,11 +4222,11 @@ if( ap->sp->index==lemp->nsymbol ) continue; action = compute_action(lemp, ap); if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iNtOfst = acttab_insert(pActtab); + stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfstiNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } #if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ { int jj, nn; @@ -4247,11 +4281,11 @@ ** shifting non-terminals after a reduce. ** yy_default[] Default action for each state. */ /* Output the yy_action table */ - lemp->nactiontab = n = acttab_size(pActtab); + lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n*szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; for(i=j=0; inlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n*szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; insymbol; @@ -4285,11 +4320,10 @@ fprintf(out, "};\n"); lineno++; /* Output the yy_shift_ofst[] table */ n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; - fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; fprintf(out, "static const %s yy_shift_ofst[] = {\n", minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); @@ -4310,11 +4344,10 @@ } } fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ - fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; @@ -4380,14 +4413,12 @@ /* Generate a table containing the symbolic name of every symbol */ for(i=0; insymbol; i++){ lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); - fprintf(out," %-15s",line); - if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + fprintf(out," /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++; } - if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every ** rule in the rule set of the grammar. This information is used ** when tracing REDUCE actions. Index: tool/lempar.c ================================================================== --- tool/lempar.c +++ tool/lempar.c @@ -129,23 +129,17 @@ ** Given state S and lookahead X, the action is computed as either: ** ** (A) N = yy_action[ yy_shift_ofst[S] + X ] ** (B) N = yy_default[S] ** -** The (A) formula is preferred. The B formula is used instead if: -** (1) The yy_shift_ofst[S]+X value is out of range, or -** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or -** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. -** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that -** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. -** Hence only tests (1) and (2) need to be evaluated.) +** The (A) formula is preferred. The B formula is used instead if +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X. ** ** The formulas above are for computing the action when the lookahead is ** a terminal symbol. If the lookahead is a non-terminal (as occurs after ** a reduce action) then the yy_reduce_ofst[] array is used in place of -** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of -** YY_SHIFT_USE_DFLT. +** the yy_shift_ofst[] array. ** ** The following are the tables generated in this section: ** ** yy_action[] A single table containing all actions. ** yy_lookahead[] A table containing the lookahead for each entry in @@ -476,11 +470,12 @@ assert( stateno <= YY_SHIFT_COUNT ); do{ i = yy_shift_ofst[stateno]; assert( iLookAhead!=YYNOCODE ); i += iLookAhead; - if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + assert( i>=0 && i=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ return yy_default[stateno];