/ Check-in [f39974eb]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:About 0.5KiB of additional compression in the parser tables. (CVS 2764)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f39974ebd81f274dc4cf6cf94e6e87ee7b4a0814
User & Date: drh 2005-11-06 04:06:59
Context
2005-11-14
11:51
Fix documentation typo. (CVS 2765) check-in: c9b413ea user: drh tags: trunk
2005-11-06
04:06
About 0.5KiB of additional compression in the parser tables. (CVS 2764) check-in: f39974eb user: drh tags: trunk
2005-11-05
15:11
Work around a bug in MSVC++. Ticket #1513. (CVS 2763) check-in: 6331860e user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/parse.y.

    10     10   **
    11     11   *************************************************************************
    12     12   ** This file contains SQLite's grammar for SQL.  Process this file
    13     13   ** using the lemon parser generator to generate C code that runs
    14     14   ** the parser.  Lemon will also generate a header file containing
    15     15   ** numeric codes for all of the tokens.
    16     16   **
    17         -** @(#) $Id: parse.y,v 1.182 2005/11/03 02:03:13 drh Exp $
           17  +** @(#) $Id: parse.y,v 1.183 2005/11/06 04:06:59 drh Exp $
    18     18   */
    19     19   
    20     20   // All token codes are small integers with #defines that begin with "TK_"
    21     21   %token_prefix TK_
    22     22   
    23     23   // The type of the data attached to each token is Token.  This is also the
    24     24   // default type for non-terminals.
................................................................................
   175    175     DATABASE DEFERRED DESC DETACH EACH END EXCLUSIVE EXPLAIN FAIL FOR
   176    176     IGNORE IMMEDIATE INITIALLY INSTEAD LIKE_KW MATCH PLAN QUERY KEY
   177    177     OF OFFSET PRAGMA RAISE REPLACE RESTRICT ROW STATEMENT
   178    178     TEMP TRIGGER VACUUM VIEW
   179    179   %ifdef SQLITE_OMIT_COMPOUND_SELECT
   180    180     EXCEPT INTERSECT UNION
   181    181   %endif
   182         -  REINDEX RENAME CTIME_KW ALTER
          182  +  REINDEX RENAME CTIME_KW
   183    183     .
   184    184   
   185    185   // Define operator precedence early so that this is the first occurance
   186    186   // of the operator tokens in the grammer.  Keeping the operators together
   187    187   // causes them to be assigned integer values that are close together,
   188    188   // which keeps parser tables smaller.
   189    189   //
................................................................................
   204    204   %left STAR SLASH REM.
   205    205   %left CONCAT.
   206    206   %right UMINUS UPLUS BITNOT.
   207    207   
   208    208   // And "ids" is an identifer-or-string.
   209    209   //
   210    210   %type ids {Token}
   211         -ids(A) ::= ID(X).        {A = X;}
   212         -ids(A) ::= STRING(X).    {A = X;}
          211  +ids(A) ::= ID|STRING(X).   {A = X;}
   213    212   
   214    213   // The name of a column or table can be any of the following:
   215    214   //
   216    215   %type nm {Token}
   217    216   nm(A) ::= ID(X).         {A = X;}
   218    217   nm(A) ::= STRING(X).     {A = X;}
   219    218   nm(A) ::= JOIN_KW(X).    {A = X;}
................................................................................
   377    376     if( Z ){
   378    377       Z->op = Y;
   379    378       Z->pPrior = X;
   380    379     }
   381    380     A = Z;
   382    381   }
   383    382   %type multiselect_op {int}
   384         -multiselect_op(A) ::= UNION(OP).      {A = @OP;}
   385         -multiselect_op(A) ::= UNION ALL.      {A = TK_ALL;}
   386         -multiselect_op(A) ::= INTERSECT(OP).  {A = @OP;}
   387         -multiselect_op(A) ::= EXCEPT(OP).     {A = @OP;}
          383  +multiselect_op(A) ::= UNION(OP).             {A = @OP;}
          384  +multiselect_op(A) ::= UNION ALL.             {A = TK_ALL;}
          385  +multiselect_op(A) ::= EXCEPT|INTERSECT(OP).  {A = @OP;}
   388    386   %endif // SQLITE_OMIT_COMPOUND_SELECT
   389    387   oneselect(A) ::= SELECT distinct(D) selcollist(W) from(X) where_opt(Y)
   390    388                    groupby_opt(P) having_opt(Q) orderby_opt(Z) limit_opt(L). {
   391    389     A = sqlite3SelectNew(W,X,Y,P,Q,Z,D,L.pLimit,L.pOffset);
   392    390   }
   393    391   
   394    392   // The "distinct" nonterminal is true (1) if the DISTINCT keyword is
................................................................................
   497    495   
   498    496   %type fullname {SrcList*}
   499    497   %destructor fullname {sqlite3SrcListDelete($$);}
   500    498   fullname(A) ::= nm(X) dbnm(Y).  {A = sqlite3SrcListAppend(0,&X,&Y);}
   501    499   
   502    500   %type joinop {int}
   503    501   %type joinop2 {int}
   504         -joinop(X) ::= COMMA.                   { X = JT_INNER; }
   505         -joinop(X) ::= JOIN.                    { X = JT_INNER; }
          502  +joinop(X) ::= COMMA|JOIN.              { X = JT_INNER; }
   506    503   joinop(X) ::= JOIN_KW(A) JOIN.         { X = sqlite3JoinType(pParse,&A,0,0); }
   507    504   joinop(X) ::= JOIN_KW(A) nm(B) JOIN.   { X = sqlite3JoinType(pParse,&A,&B,0); }
   508    505   joinop(X) ::= JOIN_KW(A) nm(B) nm(C) JOIN.
   509    506                                          { X = sqlite3JoinType(pParse,&A,&B,&C); }
   510    507   
   511    508   %type on_opt {Expr*}
   512    509   %destructor on_opt {sqlite3ExprDelete($$);}
................................................................................
   641    638   expr(A) ::= nm(X) DOT nm(Y) DOT nm(Z). {
   642    639     Expr *temp1 = sqlite3Expr(TK_ID, 0, 0, &X);
   643    640     Expr *temp2 = sqlite3Expr(TK_ID, 0, 0, &Y);
   644    641     Expr *temp3 = sqlite3Expr(TK_ID, 0, 0, &Z);
   645    642     Expr *temp4 = sqlite3Expr(TK_DOT, temp2, temp3, 0);
   646    643     A = sqlite3Expr(TK_DOT, temp1, temp4, 0);
   647    644   }
   648         -term(A) ::= INTEGER(X).      {A = sqlite3Expr(@X, 0, 0, &X);}
   649         -term(A) ::= FLOAT(X).        {A = sqlite3Expr(@X, 0, 0, &X);}
          645  +term(A) ::= INTEGER|FLOAT|BLOB(X).      {A = sqlite3Expr(@X, 0, 0, &X);}
   650    646   term(A) ::= STRING(X).       {A = sqlite3Expr(@X, 0, 0, &X);}
   651         -term(A) ::= BLOB(X).         {A = sqlite3Expr(@X, 0, 0, &X);}
   652    647   expr(A) ::= REGISTER(X).     {A = sqlite3RegisterExpr(pParse, &X);}
   653    648   expr(A) ::= VARIABLE(X).     {
   654    649     Token *pToken = &X;
   655    650     Expr *pExpr = A = sqlite3Expr(TK_VARIABLE, 0, 0, pToken);
   656    651     sqlite3ExprAssignVarNumber(pParse, pExpr);
   657    652   }
   658    653   %ifndef SQLITE_OMIT_CAST
................................................................................
   674    669   }
   675    670   term(A) ::= CTIME_KW(OP). {
   676    671     /* The CURRENT_TIME, CURRENT_DATE, and CURRENT_TIMESTAMP values are
   677    672     ** treated as functions that return constants */
   678    673     A = sqlite3ExprFunction(0,&OP);
   679    674     if( A ) A->op = TK_CONST_FUNC;  
   680    675   }
   681         -expr(A) ::= expr(X) AND(OP) expr(Y).    {A = sqlite3Expr(@OP, X, Y, 0);}
   682         -expr(A) ::= expr(X) OR(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   683         -expr(A) ::= expr(X) LT(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   684         -expr(A) ::= expr(X) GT(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   685         -expr(A) ::= expr(X) LE(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   686         -expr(A) ::= expr(X) GE(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   687         -expr(A) ::= expr(X) NE(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   688         -expr(A) ::= expr(X) EQ(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
   689         -expr(A) ::= expr(X) BITAND(OP) expr(Y). {A = sqlite3Expr(@OP, X, Y, 0);}
   690         -expr(A) ::= expr(X) BITOR(OP) expr(Y).  {A = sqlite3Expr(@OP, X, Y, 0);}
   691         -expr(A) ::= expr(X) LSHIFT(OP) expr(Y). {A = sqlite3Expr(@OP, X, Y, 0);}
   692         -expr(A) ::= expr(X) RSHIFT(OP) expr(Y). {A = sqlite3Expr(@OP, X, Y, 0);}
   693         -expr(A) ::= expr(X) PLUS(OP) expr(Y).   {A = sqlite3Expr(@OP, X, Y, 0);}
   694         -expr(A) ::= expr(X) MINUS(OP) expr(Y).  {A = sqlite3Expr(@OP, X, Y, 0);}
   695         -expr(A) ::= expr(X) STAR(OP) expr(Y).   {A = sqlite3Expr(@OP, X, Y, 0);}
   696         -expr(A) ::= expr(X) SLASH(OP) expr(Y).  {A = sqlite3Expr(@OP, X, Y, 0);}
   697         -expr(A) ::= expr(X) REM(OP) expr(Y).    {A = sqlite3Expr(@OP, X, Y, 0);}
   698         -expr(A) ::= expr(X) CONCAT(OP) expr(Y). {A = sqlite3Expr(@OP, X, Y, 0);}
          676  +expr(A) ::= expr(X) AND(OP) expr(Y).            {A = sqlite3Expr(@OP, X, Y, 0);}
          677  +expr(A) ::= expr(X) OR(OP) expr(Y).             {A = sqlite3Expr(@OP, X, Y, 0);}
          678  +expr(A) ::= expr(X) LT|GT|GE|LE(OP) expr(Y).    {A = sqlite3Expr(@OP, X, Y, 0);}
          679  +expr(A) ::= expr(X) EQ|NE(OP) expr(Y).          {A = sqlite3Expr(@OP, X, Y, 0);}
          680  +expr(A) ::= expr(X) BITAND|BITOR|LSHIFT|RSHIFT(OP) expr(Y).
          681  +                                                {A = sqlite3Expr(@OP, X, Y, 0);}
          682  +expr(A) ::= expr(X) PLUS|MINUS(OP) expr(Y).     {A = sqlite3Expr(@OP, X, Y, 0);}
          683  +expr(A) ::= expr(X) STAR|SLASH|REM(OP) expr(Y). {A = sqlite3Expr(@OP, X, Y, 0);}
          684  +expr(A) ::= expr(X) CONCAT(OP) expr(Y).         {A = sqlite3Expr(@OP, X, Y, 0);}
   699    685   %type likeop {struct LikeOp}
   700    686   likeop(A) ::= LIKE_KW(X).     {A.operator = X; A.not = 0;}
   701    687   likeop(A) ::= NOT LIKE_KW(X). {A.operator = X; A.not = 1;}
   702    688   %type escape {Expr*}
   703    689   escape(X) ::= ESCAPE expr(A). [ESCAPE] {X = A;}
   704    690   escape(X) ::= .               [ESCAPE] {X = 0;}
   705    691   expr(A) ::= expr(X) likeop(OP) expr(Y) escape(E).  [LIKE_KW]  {
................................................................................
   709    695       pList = sqlite3ExprListAppend(pList, E, 0);
   710    696     }
   711    697     A = sqlite3ExprFunction(pList, &OP.operator);
   712    698     if( OP.not ) A = sqlite3Expr(TK_NOT, A, 0, 0);
   713    699     sqlite3ExprSpan(A, &X->span, &Y->span);
   714    700   }
   715    701   
   716         -expr(A) ::= expr(X) ISNULL(E). {
   717         -  A = sqlite3Expr(TK_ISNULL, X, 0, 0);
          702  +expr(A) ::= expr(X) ISNULL|NOTNULL(E). {
          703  +  A = sqlite3Expr(@E, X, 0, 0);
   718    704     sqlite3ExprSpan(A,&X->span,&E);
   719    705   }
   720    706   expr(A) ::= expr(X) IS NULL(E). {
   721    707     A = sqlite3Expr(TK_ISNULL, X, 0, 0);
   722    708     sqlite3ExprSpan(A,&X->span,&E);
   723    709   }
   724         -expr(A) ::= expr(X) NOTNULL(E). {
   725         -  A = sqlite3Expr(TK_NOTNULL, X, 0, 0);
   726         -  sqlite3ExprSpan(A,&X->span,&E);
   727         -}
   728    710   expr(A) ::= expr(X) NOT NULL(E). {
   729    711     A = sqlite3Expr(TK_NOTNULL, X, 0, 0);
   730    712     sqlite3ExprSpan(A,&X->span,&E);
   731    713   }
   732    714   expr(A) ::= expr(X) IS NOT NULL(E). {
   733    715     A = sqlite3Expr(TK_NOTNULL, X, 0, 0);
   734    716     sqlite3ExprSpan(A,&X->span,&E);
   735    717   }
   736         -expr(A) ::= NOT(B) expr(X). {
   737         -  A = sqlite3Expr(@B, X, 0, 0);
   738         -  sqlite3ExprSpan(A,&B,&X->span);
   739         -}
   740         -expr(A) ::= BITNOT(B) expr(X). {
          718  +expr(A) ::= NOT|BITNOT(B) expr(X). {
   741    719     A = sqlite3Expr(@B, X, 0, 0);
   742    720     sqlite3ExprSpan(A,&B,&X->span);
   743    721   }
   744    722   expr(A) ::= MINUS(B) expr(X). [UMINUS] {
   745    723     A = sqlite3Expr(TK_UMINUS, X, 0, 0);
   746    724     sqlite3ExprSpan(A,&B,&X->span);
   747    725   }
................................................................................
   916    894     sqlite3Pragma(pParse,&X,&Z,&Y,1);
   917    895   }
   918    896   cmd ::= PRAGMA nm(X) dbnm(Z) LP nm(Y) RP. {sqlite3Pragma(pParse,&X,&Z,&Y,0);}
   919    897   cmd ::= PRAGMA nm(X) dbnm(Z).             {sqlite3Pragma(pParse,&X,&Z,0,0);}
   920    898   %endif // SQLITE_OMIT_PRAGMA
   921    899   plus_num(A) ::= plus_opt number(X).   {A = X;}
   922    900   minus_num(A) ::= MINUS number(X).     {A = X;}
   923         -number(A) ::= INTEGER(X).             {A = X;}
   924         -number(A) ::= FLOAT(X).               {A = X;}
          901  +number(A) ::= INTEGER|FLOAT(X).       {A = X;}
   925    902   plus_opt ::= PLUS.
   926    903   plus_opt ::= .
   927    904   
   928    905   //////////////////////////// The CREATE TRIGGER command /////////////////////
   929    906   
   930    907   %ifndef SQLITE_OMIT_TRIGGER
   931    908   
................................................................................
   947    924   trigger_time(A) ::= BEFORE.      { A = TK_BEFORE; }
   948    925   trigger_time(A) ::= AFTER.       { A = TK_AFTER;  }
   949    926   trigger_time(A) ::= INSTEAD OF.  { A = TK_INSTEAD;}
   950    927   trigger_time(A) ::= .            { A = TK_BEFORE; }
   951    928   
   952    929   %type trigger_event {struct TrigEvent}
   953    930   %destructor trigger_event {sqlite3IdListDelete($$.b);}
   954         -trigger_event(A) ::= DELETE(OP).              {A.a = @OP; A.b = 0;}
   955         -trigger_event(A) ::= INSERT(OP).              {A.a = @OP; A.b = 0;}
          931  +trigger_event(A) ::= DELETE|INSERT(OP).       {A.a = @OP; A.b = 0;}
   956    932   trigger_event(A) ::= UPDATE(OP).              {A.a = @OP; A.b = 0;}
   957    933   trigger_event(A) ::= UPDATE OF inscollist(X). {A.a = TK_UPDATE; A.b = X;}
   958    934   
   959    935   %type foreach_clause {int}
   960    936   foreach_clause(A) ::= .                   { A = TK_ROW; }
   961    937   foreach_clause(A) ::= FOR EACH ROW.       { A = TK_ROW; }
   962    938   foreach_clause(A) ::= FOR EACH STATEMENT. { A = TK_STATEMENT; }

Changes to tool/lemon.c.

   116    116   /* Symbols (terminals and nonterminals) of the grammar are stored
   117    117   ** in the following: */
   118    118   struct symbol {
   119    119     char *name;              /* Name of the symbol */
   120    120     int index;               /* Index number for this symbol */
   121    121     enum {
   122    122       TERMINAL,
   123         -    NONTERMINAL
          123  +    NONTERMINAL,
          124  +    MULTITERMINAL
   124    125     } type;                  /* Symbols are all either TERMINALS or NTs */
   125    126     struct rule *rule;       /* Linked list of rules of this (if an NT) */
   126    127     struct symbol *fallback; /* fallback token in case this token doesn't parse */
   127    128     int prec;                /* Precedence if defined (-1 otherwise) */
   128    129     enum e_assoc {
   129    130       LEFT,
   130    131       RIGHT,
................................................................................
   137    138                              ** popped from the stack during error processing */
   138    139     int destructorln;        /* Line number of destructor code */
   139    140     char *datatype;          /* The data type of information held by this
   140    141                              ** object. Only used if type==NONTERMINAL */
   141    142     int dtnum;               /* The data type number.  In the parser, the value
   142    143                              ** stack is a union.  The .yy%d element of this
   143    144                              ** union is the correct data type for this object */
          145  +  /* The following fields are used by MULTITERMINALs only */
          146  +  int nsubsym;             /* Number of constituent symbols in the MULTI */
          147  +  struct symbol **subsym;  /* Array of constituent symbols */
   144    148   };
   145    149   
   146    150   /* Each production rule in the grammar is stored in the following
   147    151   ** structure.  */
   148    152   struct rule {
   149    153     struct symbol *lhs;      /* Left-hand side of the rule */
   150    154     char *lhsalias;          /* Alias for the LHS (NULL if none) */
................................................................................
   581    585   */
   582    586   void FindRulePrecedences(xp)
   583    587   struct lemon *xp;
   584    588   {
   585    589     struct rule *rp;
   586    590     for(rp=xp->rule; rp; rp=rp->next){
   587    591       if( rp->precsym==0 ){
   588         -      int i;
   589         -      for(i=0; i<rp->nrhs; i++){
   590         -        if( rp->rhs[i]->prec>=0 ){
          592  +      int i, j;
          593  +      for(i=0; i<rp->nrhs && rp->precsym==0; i++){
          594  +        struct symbol *sp = rp->rhs[i];
          595  +        if( sp->type==MULTITERMINAL ){
          596  +          for(j=0; j<sp->nsubsym; j++){
          597  +            if( sp->subsym[j]->prec>=0 ){
          598  +              rp->precsym = sp->subsym[j];
          599  +              break;
          600  +            }
          601  +          }
          602  +        }else if( sp->prec>=0 ){
   591    603             rp->precsym = rp->rhs[i];
   592         -          break;
   593    604   	}
   594    605         }
   595    606       }
   596    607     }
   597    608     return;
   598    609   }
   599    610   
................................................................................
   601    612   ** Then go back and compute the first sets of every nonterminal.
   602    613   ** The first set is the set of all terminal symbols which can begin
   603    614   ** a string generated by that nonterminal.
   604    615   */
   605    616   void FindFirstSets(lemp)
   606    617   struct lemon *lemp;
   607    618   {
   608         -  int i;
          619  +  int i, j;
   609    620     struct rule *rp;
   610    621     int progress;
   611    622   
   612    623     for(i=0; i<lemp->nsymbol; i++){
   613    624       lemp->symbols[i]->lambda = B_FALSE;
   614    625     }
   615    626     for(i=lemp->nterminal; i<lemp->nsymbol; i++){
................................................................................
   618    629   
   619    630     /* First compute all lambdas */
   620    631     do{
   621    632       progress = 0;
   622    633       for(rp=lemp->rule; rp; rp=rp->next){
   623    634         if( rp->lhs->lambda ) continue;
   624    635         for(i=0; i<rp->nrhs; i++){
   625         -         if( rp->rhs[i]->lambda==B_FALSE ) break;
          636  +         struct symbol *sp = rp->rhs[i];
          637  +         if( sp->type!=TERMINAL || sp->lambda==B_FALSE ) break;
   626    638         }
   627    639         if( i==rp->nrhs ){
   628    640           rp->lhs->lambda = B_TRUE;
   629    641           progress = 1;
   630    642         }
   631    643       }
   632    644     }while( progress );
................................................................................
   637    649       progress = 0;
   638    650       for(rp=lemp->rule; rp; rp=rp->next){
   639    651         s1 = rp->lhs;
   640    652         for(i=0; i<rp->nrhs; i++){
   641    653           s2 = rp->rhs[i];
   642    654           if( s2->type==TERMINAL ){
   643    655             progress += SetAdd(s1->firstset,s2->index);
          656  +          break;
          657  +        }else if( s2->type==MULTITERMINAL ){
          658  +          for(j=0; j<s2->nsubsym; j++){
          659  +            progress += SetAdd(s1->firstset,s2->subsym[j]->index);
          660  +          }
   644    661             break;
   645    662   	}else if( s1==s2 ){
   646    663             if( s1->lambda==B_FALSE ) break;
   647    664   	}else{
   648    665             progress += SetUnion(s1->firstset,s2->firstset);
   649    666             if( s2->lambda==B_FALSE ) break;
   650    667   	}
................................................................................
   684    701   
   685    702     /* Make sure the start symbol doesn't occur on the right-hand side of
   686    703     ** any rule.  Report an error if it does.  (YACC would generate a new
   687    704     ** start symbol in this case.) */
   688    705     for(rp=lemp->rule; rp; rp=rp->next){
   689    706       int i;
   690    707       for(i=0; i<rp->nrhs; i++){
   691         -      if( rp->rhs[i]==sp ){
          708  +      if( rp->rhs[i]==sp ){   /* FIX ME:  Deal with multiterminals */
   692    709           ErrorMsg(lemp->filename,0,
   693    710   "The start symbol \"%s\" occurs on the \
   694    711   right-hand side of a rule. This will result in a parser which \
   695    712   does not work properly.",sp->name);
   696    713           lemp->errorcnt++;
   697    714         }
   698    715       }
................................................................................
   755    772       stp->statenum = lemp->nstate++; /* Every state gets a sequence number */
   756    773       stp->ap = 0;                 /* No actions, yet. */
   757    774       State_insert(stp,stp->bp);   /* Add to the state table */
   758    775       buildshifts(lemp,stp);       /* Recursively compute successor states */
   759    776     }
   760    777     return stp;
   761    778   }
          779  +
          780  +/*
          781  +** Return true if two symbols are the same.
          782  +*/
          783  +int same_symbol(a,b)
          784  +struct symbol *a;
          785  +struct symbol *b;
          786  +{
          787  +  int i;
          788  +  if( a==b ) return 1;
          789  +  if( a->type!=MULTITERMINAL ) return 0;
          790  +  if( b->type!=MULTITERMINAL ) return 0;
          791  +  if( a->nsubsym!=b->nsubsym ) return 0;
          792  +  for(i=0; i<a->nsubsym; i++){
          793  +    if( a->subsym[i]!=b->subsym[i] ) return 0;
          794  +  }
          795  +  return 1;
          796  +}
   762    797   
   763    798   /* Construct all successor states to the given state.  A "successor"
   764    799   ** state is any state which can be reached by a shift action.
   765    800   */
   766    801   PRIVATE void buildshifts(lemp,stp)
   767    802   struct lemon *lemp;
   768    803   struct state *stp;     /* The state from which successors are computed */
................................................................................
   788    823       /* For every configuration in the state "stp" which has the symbol "sp"
   789    824       ** following its dot, add the same configuration to the basis set under
   790    825       ** construction but with the dot shifted one symbol to the right. */
   791    826       for(bcfp=cfp; bcfp; bcfp=bcfp->next){
   792    827         if( bcfp->status==COMPLETE ) continue;    /* Already used */
   793    828         if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */
   794    829         bsp = bcfp->rp->rhs[bcfp->dot];           /* Get symbol after dot */
   795         -      if( bsp!=sp ) continue;                   /* Must be same as for "cfp" */
          830  +      if( !same_symbol(bsp,sp) ) continue;      /* Must be same as for "cfp" */
   796    831         bcfp->status = COMPLETE;                  /* Mark this config as used */
   797    832         new = Configlist_addbasis(bcfp->rp,bcfp->dot+1);
   798    833         Plink_add(&new->bplp,bcfp);
   799    834       }
   800    835   
   801    836       /* Get a pointer to the state described by the basis configuration set
   802    837       ** constructed in the preceding loop */
   803    838       newstp = getstate(lemp);
   804    839   
   805    840       /* The state "newstp" is reached from the state "stp" by a shift action
   806    841       ** on the symbol "sp" */
   807         -    Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
          842  +    if( sp->type==MULTITERMINAL ){
          843  +      int i;
          844  +      for(i=0; i<sp->nsubsym; i++){
          845  +        Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp);
          846  +      }
          847  +    }else{
          848  +      Action_add(&stp->ap,SHIFT,sp,(char *)newstp);
          849  +    }
   808    850     }
   809    851   }
   810    852   
   811    853   /*
   812    854   ** Construct the propagation links
   813    855   */
   814    856   void FindLinks(lemp)
................................................................................
  1162   1204         }
  1163   1205         for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){
  1164   1206           newcfp = Configlist_add(newrp,0);
  1165   1207           for(i=dot+1; i<rp->nrhs; i++){
  1166   1208             xsp = rp->rhs[i];
  1167   1209             if( xsp->type==TERMINAL ){
  1168   1210               SetAdd(newcfp->fws,xsp->index);
         1211  +            break;
         1212  +          }else if( xsp->type==MULTITERMINAL ){
         1213  +            int k;
         1214  +            for(k=0; k<xsp->nsubsym; k++){
         1215  +              SetAdd(newcfp->fws, xsp->subsym[k]->index);
         1216  +            }
  1169   1217               break;
  1170   1218   	  }else{
  1171   1219               SetUnion(newcfp->fws,xsp->firstset);
  1172   1220               if( xsp->lambda==B_FALSE ) break;
  1173   1221   	  }
  1174   1222   	}
  1175   1223           if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp);
................................................................................
  2087   2135   	  }
  2088   2136             psp->prevrule = rp;
  2089   2137   	}
  2090   2138           psp->state = WAITING_FOR_DECL_OR_RULE;
  2091   2139         }else if( isalpha(x[0]) ){
  2092   2140           if( psp->nrhs>=MAXRHS ){
  2093   2141             ErrorMsg(psp->filename,psp->tokenlineno,
  2094         -            "Too many symbol on RHS or rule beginning at \"%s\".",
         2142  +            "Too many symbols on RHS or rule beginning at \"%s\".",
  2095   2143               x);
  2096   2144             psp->errorcnt++;
  2097   2145             psp->state = RESYNC_AFTER_RULE_ERROR;
  2098   2146   	}else{
  2099   2147             psp->rhs[psp->nrhs] = Symbol_new(x);
  2100   2148             psp->alias[psp->nrhs] = 0;
  2101   2149             psp->nrhs++;
  2102   2150   	}
         2151  +      }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){
         2152  +        struct symbol *msp = psp->rhs[psp->nrhs-1];
         2153  +        if( msp->type!=MULTITERMINAL ){
         2154  +          struct symbol *origsp = msp;
         2155  +          msp = malloc(sizeof(*msp));
         2156  +          memset(msp, 0, sizeof(*msp));
         2157  +          msp->type = MULTITERMINAL;
         2158  +          msp->nsubsym = 1;
         2159  +          msp->subsym = malloc(sizeof(struct symbol*));
         2160  +          msp->subsym[0] = origsp;
         2161  +          msp->name = origsp->name;
         2162  +          psp->rhs[psp->nrhs-1] = msp;
         2163  +        }
         2164  +        msp->nsubsym++;
         2165  +        msp->subsym = realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym);
         2166  +        msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]);
         2167  +        if( islower(x[1]) || islower(msp->subsym[0]->name[0]) ){
         2168  +          ErrorMsg(psp->filename,psp->tokenlineno,
         2169  +            "Cannot form a compound containing a non-terminal");
         2170  +          psp->errorcnt++;
         2171  +        }
  2103   2172         }else if( x[0]=='(' && psp->nrhs>0 ){
  2104   2173           psp->state = RHS_ALIAS_1;
  2105   2174         }else{
  2106   2175           ErrorMsg(psp->filename,psp->tokenlineno,
  2107   2176             "Illegal character on RHS of rule: \"%s\".",x);
  2108   2177           psp->errorcnt++;
  2109   2178           psp->state = RESYNC_AFTER_RULE_ERROR;
................................................................................
  2483   2552         }
  2484   2553       }else if( isalnum(c) ){          /* Identifiers */
  2485   2554         while( (c= *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
  2486   2555         nextcp = cp;
  2487   2556       }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */
  2488   2557         cp += 3;
  2489   2558         nextcp = cp;
         2559  +    }else if( (c=='/' || c=='|') && isalpha(cp[1]) ){
         2560  +      cp += 2;
         2561  +      while( (c = *cp)!=0 && (isalnum(c) || c=='_') ) cp++;
         2562  +      nextcp = cp;
  2490   2563       }else{                          /* All other (one character) operators */
  2491   2564         cp++;
  2492   2565         nextcp = cp;
  2493   2566       }
  2494   2567       c = *cp;
  2495   2568       *cp = 0;                        /* Null terminate the token */
  2496   2569       parseonetoken(&ps);             /* Parse the token */
................................................................................
  2642   2715         assert( sp->index==j );
  2643   2716         printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name);
  2644   2717       }
  2645   2718       printf("\n");
  2646   2719     }
  2647   2720     for(rp=lemp->rule; rp; rp=rp->next){
  2648   2721       printf("%s",rp->lhs->name);
  2649         -/*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
         2722  +    /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
  2650   2723       printf(" ::=");
  2651   2724       for(i=0; i<rp->nrhs; i++){
  2652         -      printf(" %s",rp->rhs[i]->name);
  2653         -/*      if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
         2725  +      sp = rp->rhs[i];
         2726  +      printf(" %s", sp->name);
         2727  +      if( sp->type==MULTITERMINAL ){
         2728  +        for(j=1; j<sp->nsubsym; j++){
         2729  +          printf("|%s", sp->subsym[j]->name);
         2730  +        }
         2731  +      }
         2732  +      /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
  2654   2733       }
  2655   2734       printf(".");
  2656   2735       if( rp->precsym ) printf(" [%s]",rp->precsym->name);
  2657         -/*    if( rp->code ) printf("\n    %s",rp->code); */
         2736  +    /* if( rp->code ) printf("\n    %s",rp->code); */
  2658   2737       printf("\n");
  2659   2738     }
  2660   2739   }
  2661   2740   
  2662   2741   void ConfigPrint(fp,cfp)
  2663   2742   FILE *fp;
  2664   2743   struct config *cfp;
  2665   2744   {
  2666   2745     struct rule *rp;
  2667         -  int i;
         2746  +  struct symbol *sp;
         2747  +  int i, j;
  2668   2748     rp = cfp->rp;
  2669   2749     fprintf(fp,"%s ::=",rp->lhs->name);
  2670   2750     for(i=0; i<=rp->nrhs; i++){
  2671   2751       if( i==cfp->dot ) fprintf(fp," *");
  2672   2752       if( i==rp->nrhs ) break;
  2673         -    fprintf(fp," %s",rp->rhs[i]->name);
         2753  +    sp = rp->rhs[i];
         2754  +    fprintf(fp," %s", sp->name);
         2755  +    if( sp->type==MULTITERMINAL ){
         2756  +      for(j=1; j<sp->nsubsym; j++){
         2757  +        fprintf(fp,"|%s",sp->subsym[j]->name);
         2758  +      }
         2759  +    }
  2674   2760     }
  2675   2761   }
  2676   2762   
  2677   2763   /* #define TEST */
  2678         -#ifdef TEST
         2764  +#if 0
  2679   2765   /* Print a set */
  2680   2766   PRIVATE void SetPrint(out,set,lemp)
  2681   2767   FILE *out;
  2682   2768   char *set;
  2683   2769   struct lemon *lemp;
  2684   2770   {
  2685   2771     int i;
................................................................................
  2765   2851           sprintf(buf,"(%d)",cfp->rp->index);
  2766   2852           fprintf(fp,"    %5s ",buf);
  2767   2853         }else{
  2768   2854           fprintf(fp,"          ");
  2769   2855         }
  2770   2856         ConfigPrint(fp,cfp);
  2771   2857         fprintf(fp,"\n");
  2772         -#ifdef TEST
         2858  +#if 0
  2773   2859         SetPrint(fp,cfp->fws,lemp);
  2774   2860         PlinkPrint(fp,cfp->fplp,"To  ");
  2775   2861         PlinkPrint(fp,cfp->bplp,"From");
  2776   2862   #endif
  2777   2863         if( lemp->basisflag ) cfp=cfp->bp;
  2778   2864         else                  cfp=cfp->next;
  2779   2865       }
................................................................................
  3109   3195           for(i=0; i<rp->nrhs; i++){
  3110   3196             if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){
  3111   3197               if( cp!=rp->code && cp[-1]=='@' ){
  3112   3198                 /* If the argument is of the form @X then substituted
  3113   3199                 ** the token number of X, not the value of X */
  3114   3200                 append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0);
  3115   3201               }else{
  3116         -              append_str("yymsp[%d].minor.yy%d",0,
  3117         -                         i-rp->nrhs+1,rp->rhs[i]->dtnum);
         3202  +              struct symbol *sp = rp->rhs[i];
         3203  +              int dtnum;
         3204  +              if( sp->type==MULTITERMINAL ){
         3205  +                dtnum = sp->subsym[0]->dtnum;
         3206  +              }else{
         3207  +                dtnum = sp->dtnum;
         3208  +              }
         3209  +              append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum);
  3118   3210               }
  3119   3211               cp = xp;
  3120   3212               used[i] = 1;
  3121   3213               break;
  3122   3214             }
  3123   3215           }
  3124   3216         }
................................................................................
  3632   3724     /* Generate a table containing a text string that describes every
  3633   3725     ** rule in the rule set of the grammer.  This information is used
  3634   3726     ** when tracing REDUCE actions.
  3635   3727     */
  3636   3728     for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){
  3637   3729       assert( rp->index==i );
  3638   3730       fprintf(out," /* %3d */ \"%s ::=", i, rp->lhs->name);
  3639         -    for(j=0; j<rp->nrhs; j++) fprintf(out," %s",rp->rhs[j]->name);
         3731  +    for(j=0; j<rp->nrhs; j++){
         3732  +      struct symbol *sp = rp->rhs[j];
         3733  +      fprintf(out," %s", sp->name);
         3734  +      if( sp->type==MULTITERMINAL ){
         3735  +        int k;
         3736  +        for(k=1; k<sp->nsubsym; k++){
         3737  +          fprintf(out,"|%s",sp->subsym[k]->name);
         3738  +        }
         3739  +      }
         3740  +    }
  3640   3741       fprintf(out,"\",\n"); lineno++;
  3641   3742     }
  3642   3743     tplt_xfer(lemp->name,in,out,&lineno);
  3643   3744   
  3644   3745     /* Generate code which executes every time a symbol is popped from
  3645   3746     ** the stack while processing errors or while destroying the parser. 
  3646   3747     ** (In other words, generate the %destructor actions)