/ Check-in [7eb0198d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance LEMON so that it generates the action table in such a way that no range check is needed on the lookahead table to verify that the next input token is valid. This makes the lookahead table slightly larger (about 120 bytes) but helps the parser to run faster.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | lemon-improvements
Files: files | file ages | folders
SHA3-256: 7eb0198d0102e97e4b7ad9e359d95985e55e09c510ea4b360265ac8feb9ed814
User & Date: drh 2017-12-25 04:15:38
Context
2017-12-26
18:04
Add support for measuring and reporting coverage of the parser state machine using the SQLITE_TESTCTRL_PARSER_COVERAGE test-control. check-in: 1253a872 user: drh tags: lemon-improvements
2017-12-25
04:15
Enhance LEMON so that it generates the action table in such a way that no range check is needed on the lookahead table to verify that the next input token is valid. This makes the lookahead table slightly larger (about 120 bytes) but helps the parser to run faster. check-in: 7eb0198d user: drh tags: lemon-improvements
00:10
In the LEMON-generated parser, avoid unnecessary tests for the acceptance state. check-in: fdbb35c5 user: drh tags: lemon-improvements
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/lemon.c.

   409    409     char *tokendest;         /* Code to execute to destroy token data */
   410    410     char *vardest;           /* Code for the default non-terminal destructor */
   411    411     char *filename;          /* Name of the input file */
   412    412     char *outname;           /* Name of the current output file */
   413    413     char *tokenprefix;       /* A prefix added to token names in the .h file */
   414    414     int nconflict;           /* Number of parsing conflicts */
   415    415     int nactiontab;          /* Number of entries in the yy_action[] table */
          416  +  int nlookaheadtab;       /* Number of entries in yy_lookahead[] */
   416    417     int tablesize;           /* Total table size of all tables in bytes */
   417    418     int basisflag;           /* Print only basis configurations */
   418    419     int has_fallback;        /* True if any %fallback is seen in the grammar */
   419    420     int nolinenosflag;       /* True if #line statements should not be printed */
   420    421     char *argv0;             /* Name of the program */
   421    422   };
   422    423   
................................................................................
   585    586       *aAction,                  /* The yy_action[] table under construction */
   586    587       *aLookahead;               /* A single new transaction set */
   587    588     int mnLookahead;             /* Minimum aLookahead[].lookahead */
   588    589     int mnAction;                /* Action associated with mnLookahead */
   589    590     int mxLookahead;             /* Maximum aLookahead[].lookahead */
   590    591     int nLookahead;              /* Used slots in aLookahead[] */
   591    592     int nLookaheadAlloc;         /* Slots allocated in aLookahead[] */
          593  +  int nterminal;               /* Number of terminal symbols */
          594  +  int nsymbol;                 /* total number of symbols */
   592    595   };
   593    596   
   594    597   /* Return the number of entries in the yy_action table */
   595         -#define acttab_size(X) ((X)->nAction)
          598  +#define acttab_lookahead_size(X) ((X)->nAction)
   596    599   
   597    600   /* The value for the N-th entry in yy_action */
   598    601   #define acttab_yyaction(X,N)  ((X)->aAction[N].action)
   599    602   
   600    603   /* The value for the N-th entry in yy_lookahead */
   601    604   #define acttab_yylookahead(X,N)  ((X)->aAction[N].lookahead)
   602    605   
................................................................................
   604    607   void acttab_free(acttab *p){
   605    608     free( p->aAction );
   606    609     free( p->aLookahead );
   607    610     free( p );
   608    611   }
   609    612   
   610    613   /* Allocate a new acttab structure */
   611         -acttab *acttab_alloc(void){
          614  +acttab *acttab_alloc(int nsymbol, int nterminal){
   612    615     acttab *p = (acttab *) calloc( 1, sizeof(*p) );
   613    616     if( p==0 ){
   614    617       fprintf(stderr,"Unable to allocate memory for a new acttab.");
   615    618       exit(1);
   616    619     }
   617    620     memset(p, 0, sizeof(*p));
          621  +  p->nsymbol = nsymbol;
          622  +  p->nterminal = nterminal;
   618    623     return p;
   619    624   }
   620    625   
   621    626   /* Add a new action to the current transaction set.
   622    627   **
   623    628   ** This routine is called once for each lookahead for a particular
   624    629   ** state.
................................................................................
   651    656   
   652    657   /*
   653    658   ** Add the transaction set built up with prior calls to acttab_action()
   654    659   ** into the current action table.  Then reset the transaction set back
   655    660   ** to an empty set in preparation for a new round of acttab_action() calls.
   656    661   **
   657    662   ** Return the offset into the action table of the new transaction.
          663  +**
          664  +** If the makeItSafe parameter is true, then the offset is chosen so that
          665  +** it is impossible to overread the yy_lookaside[] table regardless of
          666  +** the lookaside token.  This is done for the terminal symbols, as they
          667  +** come from external inputs and can contain syntax errors.  When makeItSafe
          668  +** is false, there is more flexibility in selecting offsets, resulting in
          669  +** a smaller table.  For non-terminal symbols, which are never syntax errors,
          670  +** makeItSafe can be false.
   658    671   */
   659         -int acttab_insert(acttab *p){
   660         -  int i, j, k, n;
          672  +int acttab_insert(acttab *p, int makeItSafe){
          673  +  int i, j, k, n, end;
   661    674     assert( p->nLookahead>0 );
   662    675   
   663    676     /* Make sure we have enough space to hold the expanded action table
   664    677     ** in the worst case.  The worst case occurs if the transaction set
   665    678     ** must be appended to the current action table
   666    679     */
   667         -  n = p->mxLookahead + 1;
          680  +  n = p->nsymbol + 1;
   668    681     if( p->nAction + n >= p->nActionAlloc ){
   669    682       int oldAlloc = p->nActionAlloc;
   670    683       p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
   671    684       p->aAction = (struct lookahead_action *) realloc( p->aAction,
   672    685                             sizeof(p->aAction[0])*p->nActionAlloc);
   673    686       if( p->aAction==0 ){
   674    687         fprintf(stderr,"malloc failed\n");
................................................................................
   682    695   
   683    696     /* Scan the existing action table looking for an offset that is a
   684    697     ** duplicate of the current transaction set.  Fall out of the loop
   685    698     ** if and when the duplicate is found.
   686    699     **
   687    700     ** i is the index in p->aAction[] where p->mnLookahead is inserted.
   688    701     */
   689         -  for(i=p->nAction-1; i>=0; i--){
          702  +  end = makeItSafe ? p->mnLookahead : 0;
          703  +  for(i=p->nAction-1; i>=end; i--){
   690    704       if( p->aAction[i].lookahead==p->mnLookahead ){
   691    705         /* All lookaheads and actions in the aLookahead[] transaction
   692    706         ** must match against the candidate aAction[i] entry. */
   693    707         if( p->aAction[i].action!=p->mnAction ) continue;
   694    708         for(j=0; j<p->nLookahead; j++){
   695    709           k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   696    710           if( k<0 || k>=p->nAction ) break;
................................................................................
   712    726       }
   713    727     }
   714    728   
   715    729     /* If no existing offsets exactly match the current transaction, find an
   716    730     ** an empty offset in the aAction[] table in which we can add the
   717    731     ** aLookahead[] transaction.
   718    732     */
   719         -  if( i<0 ){
          733  +  if( i<end ){
   720    734       /* Look for holes in the aAction[] table that fit the current
   721    735       ** aLookahead[] transaction.  Leave i set to the offset of the hole.
   722    736       ** If no holes are found, i is left at p->nAction, which means the
   723    737       ** transaction will be appended. */
   724         -    for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){
          738  +    i = makeItSafe ? p->mnLookahead : 0;
          739  +    for(; i<p->nActionAlloc - p->mxLookahead; i++){
   725    740         if( p->aAction[i].lookahead<0 ){
   726    741           for(j=0; j<p->nLookahead; j++){
   727    742             k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   728    743             if( k<0 ) break;
   729    744             if( p->aAction[k].lookahead>=0 ) break;
   730    745           }
   731    746           if( j<p->nLookahead ) continue;
................................................................................
   735    750           if( j==p->nAction ){
   736    751             break;  /* Fits in empty slots */
   737    752           }
   738    753         }
   739    754       }
   740    755     }
   741    756     /* Insert transaction set at index i. */
          757  +#if 0
          758  +  printf("Acttab:");
          759  +  for(j=0; j<p->nLookahead; j++){
          760  +    printf(" %d", p->aLookahead[j].lookahead);
          761  +  }
          762  +  printf(" inserted at %d\n", i);
          763  +#endif
   742    764     for(j=0; j<p->nLookahead; j++){
   743    765       k = p->aLookahead[j].lookahead - p->mnLookahead + i;
   744    766       p->aAction[k] = p->aLookahead[j];
   745    767       if( k>=p->nAction ) p->nAction = k+1;
   746    768     }
          769  +  if( makeItSafe && i+p->nterminal>p->nAction ) p->nAction = i+p->nterminal;
   747    770     p->nLookahead = 0;
   748    771   
   749    772     /* Return the offset that is added to the lookahead in order to get the
   750    773     ** index into yy_action of the action */
   751    774     return i - p->mnLookahead;
   752    775   }
          776  +
          777  +/*
          778  +** Return the size of the action table without the trailing syntax error
          779  +** entries.
          780  +*/
          781  +int acttab_action_size(acttab *p){
          782  +  int n = p->nAction;
          783  +  while( n>0 && p->aAction[n-1].lookahead<0 ){ n--; }
          784  +  return n;
          785  +}
   753    786   
   754    787   /********************** From the file "build.c" *****************************/
   755    788   /*
   756    789   ** Routines to construction the finite state machine for the LEMON
   757    790   ** parser generator.
   758    791   */
   759    792   
................................................................................
  1720   1753       stats_line("terminal symbols", lem.nterminal);
  1721   1754       stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal);
  1722   1755       stats_line("total symbols", lem.nsymbol);
  1723   1756       stats_line("rules", lem.nrule);
  1724   1757       stats_line("states", lem.nxstate);
  1725   1758       stats_line("conflicts", lem.nconflict);
  1726   1759       stats_line("action table entries", lem.nactiontab);
         1760  +    stats_line("lookahead table entries", lem.nlookaheadtab);
  1727   1761       stats_line("total table size (bytes)", lem.tablesize);
  1728   1762     }
  1729   1763     if( lem.nconflict > 0 ){
  1730   1764       fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict);
  1731   1765     }
  1732   1766   
  1733   1767     /* return 0 on success, 1 on failure. */
................................................................................
  4163   4197     }
  4164   4198     mxTknOfst = mnTknOfst = 0;
  4165   4199     mxNtOfst = mnNtOfst = 0;
  4166   4200     /* In an effort to minimize the action table size, use the heuristic
  4167   4201     ** of placing the largest action sets first */
  4168   4202     for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i;
  4169   4203     qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare);
  4170         -  pActtab = acttab_alloc();
         4204  +  pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal);
  4171   4205     for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){
  4172   4206       stp = ax[i].stp;
  4173   4207       if( ax[i].isTkn ){
  4174   4208         for(ap=stp->ap; ap; ap=ap->next){
  4175   4209           int action;
  4176   4210           if( ap->sp->index>=lemp->nterminal ) continue;
  4177   4211           action = compute_action(lemp, ap);
  4178   4212           if( action<0 ) continue;
  4179   4213           acttab_action(pActtab, ap->sp->index, action);
  4180   4214         }
  4181         -      stp->iTknOfst = acttab_insert(pActtab);
         4215  +      stp->iTknOfst = acttab_insert(pActtab, 1);
  4182   4216         if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
  4183   4217         if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
  4184   4218       }else{
  4185   4219         for(ap=stp->ap; ap; ap=ap->next){
  4186   4220           int action;
  4187   4221           if( ap->sp->index<lemp->nterminal ) continue;
  4188   4222           if( ap->sp->index==lemp->nsymbol ) continue;
  4189   4223           action = compute_action(lemp, ap);
  4190   4224           if( action<0 ) continue;
  4191   4225           acttab_action(pActtab, ap->sp->index, action);
  4192   4226         }
  4193         -      stp->iNtOfst = acttab_insert(pActtab);
         4227  +      stp->iNtOfst = acttab_insert(pActtab, 0);
  4194   4228         if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
  4195   4229         if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
  4196   4230       }
  4197   4231   #if 0  /* Uncomment for a trace of how the yy_action[] table fills out */
  4198   4232       { int jj, nn;
  4199   4233         for(jj=nn=0; jj<pActtab->nAction; jj++){
  4200   4234           if( pActtab->aAction[jj].action<0 ) nn++;
................................................................................
  4245   4279     **                     shifting terminals.
  4246   4280     **  yy_reduce_ofst[]   For each state, the offset into yy_action for
  4247   4281     **                     shifting non-terminals after a reduce.
  4248   4282     **  yy_default[]       Default action for each state.
  4249   4283     */
  4250   4284   
  4251   4285     /* Output the yy_action table */
  4252         -  lemp->nactiontab = n = acttab_size(pActtab);
         4286  +  lemp->nactiontab = n = acttab_action_size(pActtab);
  4253   4287     lemp->tablesize += n*szActionType;
  4254   4288     fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++;
  4255   4289     fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
  4256   4290     for(i=j=0; i<n; i++){
  4257   4291       int action = acttab_yyaction(pActtab, i);
  4258   4292       if( action<0 ) action = lemp->noAction;
  4259   4293       if( j==0 ) fprintf(out," /* %5d */ ", i);
................................................................................
  4264   4298       }else{
  4265   4299         j++;
  4266   4300       }
  4267   4301     }
  4268   4302     fprintf(out, "};\n"); lineno++;
  4269   4303   
  4270   4304     /* Output the yy_lookahead table */
         4305  +  lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab);
  4271   4306     lemp->tablesize += n*szCodeType;
  4272   4307     fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
  4273   4308     for(i=j=0; i<n; i++){
  4274   4309       int la = acttab_yylookahead(pActtab, i);
  4275   4310       if( la<0 ) la = lemp->nsymbol;
  4276   4311       if( j==0 ) fprintf(out," /* %5d */ ", i);
  4277   4312       fprintf(out, " %4d,", la);
................................................................................
  4283   4318       }
  4284   4319     }
  4285   4320     fprintf(out, "};\n"); lineno++;
  4286   4321   
  4287   4322     /* Output the yy_shift_ofst[] table */
  4288   4323     n = lemp->nxstate;
  4289   4324     while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
  4290         -  fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++;
  4291   4325     fprintf(out, "#define YY_SHIFT_COUNT    (%d)\n", n-1); lineno++;
  4292   4326     fprintf(out, "#define YY_SHIFT_MIN      (%d)\n", mnTknOfst); lineno++;
  4293   4327     fprintf(out, "#define YY_SHIFT_MAX      (%d)\n", mxTknOfst); lineno++;
  4294   4328     fprintf(out, "static const %s yy_shift_ofst[] = {\n",
  4295   4329          minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz));
  4296   4330          lineno++;
  4297   4331     lemp->tablesize += n*sz;
................................................................................
  4308   4342       }else{
  4309   4343         j++;
  4310   4344       }
  4311   4345     }
  4312   4346     fprintf(out, "};\n"); lineno++;
  4313   4347   
  4314   4348     /* Output the yy_reduce_ofst[] table */
  4315         -  fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
  4316   4349     n = lemp->nxstate;
  4317   4350     while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
  4318   4351     fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++;
  4319   4352     fprintf(out, "#define YY_REDUCE_MIN   (%d)\n", mnNtOfst); lineno++;
  4320   4353     fprintf(out, "#define YY_REDUCE_MAX   (%d)\n", mxNtOfst); lineno++;
  4321   4354     fprintf(out, "static const %s yy_reduce_ofst[] = {\n",
  4322   4355             minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++;
................................................................................
  4378   4411     }
  4379   4412     tplt_xfer(lemp->name, in, out, &lineno);
  4380   4413   
  4381   4414     /* Generate a table containing the symbolic name of every symbol
  4382   4415     */
  4383   4416     for(i=0; i<lemp->nsymbol; i++){
  4384   4417       lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name);
  4385         -    fprintf(out,"  %-15s",line);
  4386         -    if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
         4418  +    fprintf(out,"  /* %4d */ \"%s\",\n",i, lemp->symbols[i]->name); lineno++;
  4387   4419     }
  4388         -  if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
  4389   4420     tplt_xfer(lemp->name,in,out,&lineno);
  4390   4421   
  4391   4422     /* Generate a table containing a text string that describes every
  4392   4423     ** rule in the rule set of the grammar.  This information is used
  4393   4424     ** when tracing REDUCE actions.
  4394   4425     */
  4395   4426     for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){

Changes to tool/lempar.c.

   127    127   **
   128    128   ** The action table is constructed as a single large table named yy_action[].
   129    129   ** Given state S and lookahead X, the action is computed as either:
   130    130   **
   131    131   **    (A)   N = yy_action[ yy_shift_ofst[S] + X ]
   132    132   **    (B)   N = yy_default[S]
   133    133   **
   134         -** The (A) formula is preferred.  The B formula is used instead if:
   135         -**    (1)  The yy_shift_ofst[S]+X value is out of range, or
   136         -**    (2)  yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or
   137         -**    (3)  yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT.
   138         -** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that
   139         -** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X.
   140         -** Hence only tests (1) and (2) need to be evaluated.)
          134  +** The (A) formula is preferred.  The B formula is used instead if
          135  +** yy_lookahead[yy_shift_ofst[S]+X] is not equal to X.
   141    136   **
   142    137   ** The formulas above are for computing the action when the lookahead is
   143    138   ** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
   144    139   ** a reduce action) then the yy_reduce_ofst[] array is used in place of
   145         -** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of
   146         -** YY_SHIFT_USE_DFLT.
          140  +** the yy_shift_ofst[] array.
   147    141   **
   148    142   ** The following are the tables generated in this section:
   149    143   **
   150    144   **  yy_action[]        A single table containing all actions.
   151    145   **  yy_lookahead[]     A table containing the lookahead for each entry in
   152    146   **                     yy_action.  Used to detect hash collisions.
   153    147   **  yy_shift_ofst[]    For each state, the offset into yy_action for
................................................................................
   474    468    
   475    469     if( stateno>YY_MAX_SHIFT ) return stateno;
   476    470     assert( stateno <= YY_SHIFT_COUNT );
   477    471     do{
   478    472       i = yy_shift_ofst[stateno];
   479    473       assert( iLookAhead!=YYNOCODE );
   480    474       i += iLookAhead;
   481         -    if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
          475  +    assert( i>=0 && i<sizeof(yy_lookahead)/sizeof(yy_lookahead[0]) );
          476  +    if( yy_lookahead[i]!=iLookAhead ){
   482    477   #ifdef YYFALLBACK
   483    478         YYCODETYPE iFallback;            /* Fallback token */
   484    479         if( iLookAhead<sizeof(yyFallback)/sizeof(yyFallback[0])
   485    480                && (iFallback = yyFallback[iLookAhead])!=0 ){
   486    481   #ifndef NDEBUG
   487    482           if( yyTraceFILE ){
   488    483             fprintf(yyTraceFILE, "%sFALLBACK %s => %s\n",
................................................................................
   537    532     if( stateno>YY_REDUCE_COUNT ){
   538    533       return yy_default[stateno];
   539    534     }
   540    535   #else
   541    536     assert( stateno<=YY_REDUCE_COUNT );
   542    537   #endif
   543    538     i = yy_reduce_ofst[stateno];
   544         -  assert( i!=YY_REDUCE_USE_DFLT );
   545    539     assert( iLookAhead!=YYNOCODE );
   546    540     i += iLookAhead;
   547    541   #ifdef YYERRORSYMBOL
   548    542     if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){
   549    543       return yy_default[stateno];
   550    544     }
   551    545   #else