/ Check-in [7bfe7a36]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In the LEMON-generated parser, rearrange the meanings of integer action codes so that reduce actions occur last. This means that the most common case (reduce actions) can be recognized with a single comparison operation, thus speeding up the main parser loop, slightly.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | lemon-improvements
Files: files | file ages | folders
SHA3-256: 7bfe7a360261ac7227840db49487c2f0fe338a2f1b868fcaada1e04a8d2b8f7a
User & Date: drh 2017-12-24 23:38:10
Context
2017-12-25
00:10
In the LEMON-generated parser, avoid unnecessary tests for the acceptance state. check-in: fdbb35c5 user: drh tags: lemon-improvements
2017-12-24
23:38
In the LEMON-generated parser, rearrange the meanings of integer action codes so that reduce actions occur last. This means that the most common case (reduce actions) can be recognized with a single comparison operation, thus speeding up the main parser loop, slightly. check-in: 7bfe7a36 user: drh tags: lemon-improvements
17:06
Improved parser tracing output. check-in: 25be5750 user: drh tags: lemon-improvements
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/lemon.c.

   380    380     struct rule *rule;       /* List of all rules */
   381    381     struct rule *startRule;  /* First rule */
   382    382     int nstate;              /* Number of states */
   383    383     int nxstate;             /* nstate with tail degenerate states removed */
   384    384     int nrule;               /* Number of rules */
   385    385     int nsymbol;             /* Number of terminal and nonterminal symbols */
   386    386     int nterminal;           /* Number of terminal symbols */
          387  +  int minShiftReduce;      /* Minimum shift-reduce action value */
          388  +  int errAction;           /* Error action value */
          389  +  int accAction;           /* Accept action value */
          390  +  int noAction;            /* No-op action value */
          391  +  int minReduce;           /* Minimum reduce action */
          392  +  int maxAction;           /* Maximum action value of any kind */
   387    393     struct symbol **symbols; /* Sorted array of pointers to symbols */
   388    394     int errorcnt;            /* Number of errors */
   389    395     struct symbol *errsym;   /* The error symbol */
   390    396     struct symbol *wildcard; /* Token that matches anything */
   391    397     char *name;              /* Name of the generated parser */
   392    398     char *arg;               /* Declaration of the 3th argument to parser */
   393    399     char *tokentype;         /* Type of terminal symbols in the parser stack */
................................................................................
  3015   3021     if( fp==0 && *mode=='w' ){
  3016   3022       fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname);
  3017   3023       lemp->errorcnt++;
  3018   3024       return 0;
  3019   3025     }
  3020   3026     return fp;
  3021   3027   }
         3028  +
         3029  +/* Print the text of a rule
         3030  +*/
         3031  +void rule_print(FILE *out, struct rule *rp){
         3032  +  int i, j;
         3033  +  fprintf(out, "%s",rp->lhs->name);
         3034  +  /*    if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */
         3035  +  fprintf(out," ::=");
         3036  +  for(i=0; i<rp->nrhs; i++){
         3037  +    struct symbol *sp = rp->rhs[i];
         3038  +    if( sp->type==MULTITERMINAL ){
         3039  +      fprintf(out," %s", sp->subsym[0]->name);
         3040  +      for(j=1; j<sp->nsubsym; j++){
         3041  +        fprintf(out,"|%s", sp->subsym[j]->name);
         3042  +      }
         3043  +    }else{
         3044  +      fprintf(out," %s", sp->name);
         3045  +    }
         3046  +    /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */
         3047  +  }
         3048  +}
  3022   3049   
  3023   3050   /* Duplicate the input file without comments and without actions
  3024   3051   ** on rules */
  3025   3052   void Reprint(struct lemon *lemp)
  3026   3053   {
  3027   3054     struct rule *rp;
  3028   3055     struct symbol *sp;
................................................................................
  3043   3070         sp = lemp->symbols[j];
  3044   3071         assert( sp->index==j );
  3045   3072         printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
  3046   3073       }
  3047   3074       printf("\n");
  3048   3075     }
  3049   3076     for(rp=lemp->rule; rp; rp=rp->next){
  3050         -    printf("%s",rp->lhs->name);
  3051         -    /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
  3052         -    printf(" ::=");
  3053         -    for(i=0; i<rp->nrhs; i++){
  3054         -      sp = rp->rhs[i];
  3055         -      if( sp->type==MULTITERMINAL ){
  3056         -        printf(" %s", sp->subsym[0]->name);
  3057         -        for(j=1; j<sp->nsubsym; j++){
  3058         -          printf("|%s", sp->subsym[j]->name);
  3059         -        }
  3060         -      }else{
  3061         -        printf(" %s", sp->name);
  3062         -      }
  3063         -      /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
  3064         -    }
         3077  +    rule_print(stdout, rp);
  3065   3078       printf(".");
  3066   3079       if( rp->precsym ) printf(" [%s]",rp->precsym->name);
  3067   3080       /* if( rp->code ) printf("\n    %s",rp->code); */
  3068   3081       printf("\n");
  3069   3082     }
  3070   3083   }
  3071   3084   
................................................................................
  3317   3330   */
  3318   3331   PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
  3319   3332   {
  3320   3333     int act;
  3321   3334     switch( ap->type ){
  3322   3335       case SHIFT:  act = ap->x.stp->statenum;                        break;
  3323   3336       case SHIFTREDUCE: {
  3324         -      act = ap->x.rp->iRule + lemp->nstate;
  3325   3337         /* Since a SHIFT is inherient after a prior REDUCE, convert any
  3326   3338         ** SHIFTREDUCE action with a nonterminal on the LHS into a simple
  3327   3339         ** REDUCE action: */
  3328         -      if( ap->sp->index>=lemp->nterminal ) act += lemp->nrule;
         3340  +      if( ap->sp->index>=lemp->nterminal ){
         3341  +        act = lemp->minReduce + ap->x.rp->iRule;
         3342  +      }else{
         3343  +        act = lemp->minShiftReduce + ap->x.rp->iRule;
         3344  +      }
  3329   3345         break;
  3330   3346       }
  3331         -    case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break;
  3332         -    case ERROR:  act = lemp->nstate + lemp->nrule*2;               break;
  3333         -    case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1;           break;
         3347  +    case REDUCE: act = lemp->minReduce + ap->x.rp->iRule;          break;
         3348  +    case ERROR:  act = lemp->errAction;                            break;
         3349  +    case ACCEPT: act = lemp->accAction;                            break;
  3334   3350       default:     act = -1; break;
  3335   3351     }
  3336   3352     return act;
  3337   3353   }
  3338   3354   
  3339   3355   #define LINESIZE 1000
  3340   3356   /* The next cluster of routines are for reading the template file
................................................................................
  4034   4050     int szActionType;     /* sizeof(YYACTIONTYPE) */
  4035   4051     int szCodeType;       /* sizeof(YYCODETYPE)   */
  4036   4052     const char *name;
  4037   4053     int mnTknOfst, mxTknOfst;
  4038   4054     int mnNtOfst, mxNtOfst;
  4039   4055     struct axset *ax;
  4040   4056   
         4057  +  lemp->minShiftReduce = lemp->nstate;
         4058  +  lemp->errAction = lemp->minShiftReduce + lemp->nrule;
         4059  +  lemp->accAction = lemp->errAction + 1;
         4060  +  lemp->noAction = lemp->accAction + 1;
         4061  +  lemp->minReduce = lemp->noAction + 1;
         4062  +  lemp->maxAction = lemp->minReduce + lemp->nrule;
         4063  +
  4041   4064     in = tplt_open(lemp);
  4042   4065     if( in==0 ) return;
  4043   4066     out = file_open(lemp,".c","wb");
  4044   4067     if( out==0 ){
  4045   4068       fclose(in);
  4046   4069       return;
  4047   4070     }
................................................................................
  4072   4095     tplt_xfer(lemp->name,in,out,&lineno);
  4073   4096   
  4074   4097     /* Generate the defines */
  4075   4098     fprintf(out,"#define YYCODETYPE %s\n",
  4076   4099       minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++;
  4077   4100     fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1);  lineno++;
  4078   4101     fprintf(out,"#define YYACTIONTYPE %s\n",
  4079         -    minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++;
         4102  +    minimum_size_type(0,lemp->maxAction,&szActionType)); lineno++;
  4080   4103     if( lemp->wildcard ){
  4081   4104       fprintf(out,"#define YYWILDCARD %d\n",
  4082   4105          lemp->wildcard->index); lineno++;
  4083   4106     }
  4084   4107     print_stack_union(out,lemp,&lineno,mhflag);
  4085   4108     fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++;
  4086   4109     if( lemp->stacksize ){
................................................................................
  4197   4220     }
  4198   4221   
  4199   4222     /* Finish rendering the constants now that the action table has
  4200   4223     ** been computed */
  4201   4224     fprintf(out,"#define YYNSTATE             %d\n",lemp->nxstate);  lineno++;
  4202   4225     fprintf(out,"#define YYNRULE              %d\n",lemp->nrule);  lineno++;
  4203   4226     fprintf(out,"#define YY_MAX_SHIFT         %d\n",lemp->nxstate-1); lineno++;
  4204         -  fprintf(out,"#define YY_MIN_SHIFTREDUCE   %d\n",lemp->nstate); lineno++;
  4205         -  i = lemp->nstate + lemp->nrule;
         4227  +  i = lemp->minShiftReduce;
         4228  +  fprintf(out,"#define YY_MIN_SHIFTREDUCE   %d\n",i); lineno++;
         4229  +  i += lemp->nrule;
  4206   4230     fprintf(out,"#define YY_MAX_SHIFTREDUCE   %d\n", i-1); lineno++;
  4207         -  fprintf(out,"#define YY_MIN_REDUCE        %d\n", i); lineno++;
  4208         -  i = lemp->nstate + lemp->nrule*2;
         4231  +  fprintf(out,"#define YY_ERROR_ACTION      %d\n", lemp->errAction); lineno++;
         4232  +  fprintf(out,"#define YY_ACCEPT_ACTION     %d\n", lemp->accAction); lineno++;
         4233  +  fprintf(out,"#define YY_NO_ACTION         %d\n", lemp->noAction); lineno++;
         4234  +  fprintf(out,"#define YY_MIN_REDUCE        %d\n", lemp->minReduce); lineno++;
         4235  +  i = lemp->minReduce + lemp->nrule;
  4209   4236     fprintf(out,"#define YY_MAX_REDUCE        %d\n", i-1); lineno++;
  4210         -  fprintf(out,"#define YY_ERROR_ACTION      %d\n", i); lineno++;
  4211         -  fprintf(out,"#define YY_ACCEPT_ACTION     %d\n", i+1); lineno++;
  4212         -  fprintf(out,"#define YY_NO_ACTION         %d\n", i+2); lineno++;
  4213   4237     tplt_xfer(lemp->name,in,out,&lineno);
  4214   4238   
  4215   4239     /* Now output the action table and its associates:
  4216   4240     **
  4217   4241     **  yy_action[]        A single table containing all actions.
  4218   4242     **  yy_lookahead[]     A table containing the lookahead for each entry in
  4219   4243     **                     yy_action.  Used to detect hash collisions.
................................................................................
  4227   4251     /* Output the yy_action table */
  4228   4252     lemp->nactiontab = n = acttab_size(pActtab);
  4229   4253     lemp->tablesize += n*szActionType;
  4230   4254     fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++;
  4231   4255     fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
  4232   4256     for(i=j=0; i<n; i++){
  4233   4257       int action = acttab_yyaction(pActtab, i);
  4234         -    if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
         4258  +    if( action<0 ) action = lemp->noAction;
  4235   4259       if( j==0 ) fprintf(out," /* %5d */ ", i);
  4236   4260       fprintf(out, " %4d,", action);
  4237   4261       if( j==9 || i==n-1 ){
  4238   4262         fprintf(out, "\n"); lineno++;
  4239   4263         j = 0;
  4240   4264       }else{
  4241   4265         j++;
................................................................................
  4316   4340     /* Output the default action table */
  4317   4341     fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
  4318   4342     n = lemp->nxstate;
  4319   4343     lemp->tablesize += n*szActionType;
  4320   4344     for(i=j=0; i<n; i++){
  4321   4345       stp = lemp->sorted[i];
  4322   4346       if( j==0 ) fprintf(out," /* %5d */ ", i);
  4323         -    fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule);
         4347  +    if( stp->iDfltReduce<0 ){
         4348  +      fprintf(out, " %4d,", lemp->errAction);
         4349  +    }else{
         4350  +      fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce);
         4351  +    }
  4324   4352       if( j==9 || i==n-1 ){
  4325   4353         fprintf(out, "\n"); lineno++;
  4326   4354         j = 0;
  4327   4355       }else{
  4328   4356         j++;
  4329   4357       }
  4330   4358     }
................................................................................
  4397   4425       struct symbol *dflt_sp = 0;
  4398   4426       int once = 1;
  4399   4427       for(i=0; i<lemp->nsymbol; i++){
  4400   4428         struct symbol *sp = lemp->symbols[i];
  4401   4429         if( sp==0 || sp->type==TERMINAL ||
  4402   4430             sp->index<=0 || sp->destructor!=0 ) continue;
  4403   4431         if( once ){
  4404         -        fprintf(out, "      /* Default NON-TERMINAL Destructor */\n"); lineno++;
         4432  +        fprintf(out, "      /* Default NON-TERMINAL Destructor */\n");lineno++;
  4405   4433           once = 0;
  4406   4434         }
  4407   4435         fprintf(out,"    case %d: /* %s */\n", sp->index, sp->name); lineno++;
  4408   4436         dflt_sp = sp;
  4409   4437       }
  4410   4438       if( dflt_sp!=0 ){
  4411   4439         emit_destructor_code(out,dflt_sp,lemp,&lineno);
................................................................................
  4440   4468     tplt_xfer(lemp->name,in,out,&lineno);
  4441   4469   
  4442   4470     /* Generate the table of rule information
  4443   4471     **
  4444   4472     ** Note: This code depends on the fact that rules are number
  4445   4473     ** sequentually beginning with 0.
  4446   4474     */
  4447         -  for(rp=lemp->rule; rp; rp=rp->next){
  4448         -    fprintf(out,"  { %d, %d },\n",rp->lhs->index,-rp->nrhs); lineno++;
         4475  +  for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
         4476  +    fprintf(out,"  { %4d, %4d }, /* (%d) ",rp->lhs->index,-rp->nrhs,i);
         4477  +    rule_print(out, rp);
         4478  +    fprintf(out," */\n"); lineno++;
  4449   4479     }
  4450   4480     tplt_xfer(lemp->name,in,out,&lineno);
  4451   4481   
  4452   4482     /* Generate code which execution during each REDUCE action */
  4453   4483     i = 0;
  4454   4484     for(rp=lemp->rule; rp; rp=rp->next){
  4455   4485       i += translate_code(lemp, rp);
................................................................................
  4707   4737     int i;
  4708   4738     struct state *stp;
  4709   4739     struct action *ap;
  4710   4740   
  4711   4741     for(i=0; i<lemp->nstate; i++){
  4712   4742       stp = lemp->sorted[i];
  4713   4743       stp->nTknAct = stp->nNtAct = 0;
  4714         -    stp->iDfltReduce = lemp->nrule;  /* Init dflt action to "syntax error" */
         4744  +    stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */
  4715   4745       stp->iTknOfst = NO_OFFSET;
  4716   4746       stp->iNtOfst = NO_OFFSET;
  4717   4747       for(ap=stp->ap; ap; ap=ap->next){
  4718   4748         int iAction = compute_action(lemp,ap);
  4719   4749         if( iAction>=0 ){
  4720   4750           if( ap->sp->index<lemp->nterminal ){
  4721   4751             stp->nTknAct++;
  4722   4752           }else if( ap->sp->index<lemp->nsymbol ){
  4723   4753             stp->nNtAct++;
  4724   4754           }else{
  4725   4755             assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp );
  4726         -          stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule;
         4756  +          stp->iDfltReduce = iAction;
  4727   4757           }
  4728   4758         }
  4729   4759       }
  4730   4760     }
  4731   4761     qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]),
  4732   4762           stateResortCompare);
  4733   4763     for(i=0; i<lemp->nstate; i++){

Changes to tool/lempar.c.

    71     71   **    YYERRORSYMBOL      is the code number of the error symbol.  If not
    72     72   **                       defined, then do no error processing.
    73     73   **    YYNSTATE           the combined number of states.
    74     74   **    YYNRULE            the number of rules in the grammar
    75     75   **    YY_MAX_SHIFT       Maximum value for shift actions
    76     76   **    YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
    77     77   **    YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
    78         -**    YY_MIN_REDUCE      Minimum value for reduce actions
    79         -**    YY_MAX_REDUCE      Maximum value for reduce actions
    80     78   **    YY_ERROR_ACTION    The yy_action[] code for syntax error
    81     79   **    YY_ACCEPT_ACTION   The yy_action[] code for accept
    82     80   **    YY_NO_ACTION       The yy_action[] code for no-op
           81  +**    YY_MIN_REDUCE      Minimum value for reduce actions
           82  +**    YY_MAX_REDUCE      Maximum value for reduce actions
    83     83   */
    84     84   #ifndef INTERFACE
    85     85   # define INTERFACE 1
    86     86   #endif
    87     87   /************* Begin control #defines *****************************************/
    88     88   %%
    89     89   /************* End control #defines *******************************************/
................................................................................
   111    111   **
   112    112   **   0 <= N <= YY_MAX_SHIFT             Shift N.  That is, push the lookahead
   113    113   **                                      token onto the stack and goto state N.
   114    114   **
   115    115   **   N between YY_MIN_SHIFTREDUCE       Shift to an arbitrary state then
   116    116   **     and YY_MAX_SHIFTREDUCE           reduce by rule N-YY_MIN_SHIFTREDUCE.
   117    117   **
   118         -**   N between YY_MIN_REDUCE            Reduce by rule N-YY_MIN_REDUCE
   119         -**     and YY_MAX_REDUCE
   120         -**
   121    118   **   N == YY_ERROR_ACTION               A syntax error has occurred.
   122    119   **
   123    120   **   N == YY_ACCEPT_ACTION              The parser accepts its input.
   124    121   **
   125    122   **   N == YY_NO_ACTION                  No such action.  Denotes unused
   126    123   **                                      slots in the yy_action[] table.
          124  +**
          125  +**   N between YY_MIN_REDUCE            Reduce by rule N-YY_MIN_REDUCE
          126  +**     and YY_MAX_REDUCE
   127    127   **
   128    128   ** The action table is constructed as a single large table named yy_action[].
   129    129   ** Given state S and lookahead X, the action is computed as either:
   130    130   **
   131    131   **    (A)   N = yy_action[ yy_shift_ofst[S] + X ]
   132    132   **    (B)   N = yy_default[S]
   133    133   **
................................................................................
   864    864                 yyTracePrompt,yyTokenName[yymajor],stateno-YY_MIN_REDUCE);
   865    865       }
   866    866     }
   867    867   #endif
   868    868   
   869    869     do{
   870    870       yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor);
   871         -    if( yyact <= YY_MAX_SHIFTREDUCE ){
          871  +    if( yyact >= YY_MIN_REDUCE ){
          872  +      yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
          873  +    }else if( yyact <= YY_MAX_SHIFTREDUCE ){
   872    874         yy_shift(yypParser,yyact,yymajor,yyminor);
   873    875   #ifndef YYNOERRORRECOVERY
   874    876         yypParser->yyerrcnt--;
   875    877   #endif
   876    878         yymajor = YYNOCODE;
   877         -    }else if( yyact <= YY_MAX_REDUCE ){
   878         -      yy_reduce(yypParser,yyact-YY_MIN_REDUCE,yymajor,yyminor);
   879    879       }else{
   880    880         assert( yyact == YY_ERROR_ACTION );
   881    881         yyminorunion.yy0 = yyminor;
   882    882   #ifdef YYERRORSYMBOL
   883    883         int yymx;
   884    884   #endif
   885    885   #ifndef NDEBUG