/ Check-in [da7890ca]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In LEMON, fix a bug in the text formatter introduced by the previous commit. Also add the new "%token_class" directive for defining symbolic names that stand any one of a collection of tokens.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | lemon-updates
Files: files | file ages | folders
SHA1: da7890ca6b1d8e511377a469047120220e8c3b2d
User & Date: drh 2014-01-11 03:06:18
Context
2014-01-11
03:27
Add the "%token_class" directive to the LEMON parser generator. This opens up the possibility of simplifying the parser. Also remove all calls to sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings on OpenBSD. (Aside: It is this change to avoid harmless compiler warnings that was the cause of the reason spat of bugs.) check-in: 8eb48c04 user: drh tags: trunk
03:13
Add the "%token_class" directive to the LEMON parser generator. This opens up the possibility of simplifying the parser. Also remove all calls to sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings on OpenBSD. Closed-Leaf check-in: 4e4483b2 user: drh tags: buggy-lemon
03:06
In LEMON, fix a bug in the text formatter introduced by the previous commit. Also add the new "%token_class" directive for defining symbolic names that stand any one of a collection of tokens. Closed-Leaf check-in: da7890ca user: drh tags: lemon-updates
2014-01-10
23:21
Do not use sprintf(), strcpy() or strcat() in the implementation of the lemon parser generator tool, to avoid compiler warnings in OpenBSD. check-in: e43c522d user: drh tags: lemon-updates
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/lemon.c.

    67     67   **   %.*s
    68     68   **
    69     69   */
    70     70   static void lemon_addtext(
    71     71     char *zBuf,           /* The buffer to which text is added */
    72     72     int *pnUsed,          /* Slots of the buffer used so far */
    73     73     const char *zIn,      /* Text to add */
    74         -  int nIn               /* Bytes of text to add.  -1 to use strlen() */
           74  +  int nIn,              /* Bytes of text to add.  -1 to use strlen() */
           75  +  int iWidth            /* Field width.  Negative to left justify */
    75     76   ){
    76     77     if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){}
           78  +  while( iWidth>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth--; }
    77     79     if( nIn==0 ) return;
    78     80     memcpy(&zBuf[*pnUsed], zIn, nIn);
    79     81     *pnUsed += nIn;
           82  +  while( (-iWidth)>nIn ){ zBuf[*(pnUsed++)] = ' '; iWidth++; }
    80     83     zBuf[*pnUsed] = 0;
    81     84   }
    82     85   static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){
    83     86     int i, j, k, c, size;
    84     87     int nUsed = 0;
    85     88     const char *z;
    86     89     char zTemp[50];
    87     90     str[0] = 0;
    88     91     for(i=j=0; (c = zFormat[i])!=0; i++){
    89     92       if( c=='%' ){
    90         -      lemon_addtext(str, &nUsed, &zFormat[j], i-j);
           93  +      int iWidth = 0;
           94  +      lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
    91     95         c = zFormat[++i];
           96  +      if( isdigit(c) || (c=='-' && isdigit(zFormat[i+1])) ){
           97  +        if( c=='-' ) i++;
           98  +        while( isdigit(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0';
           99  +        if( c=='-' ) iWidth = -iWidth;
          100  +        c = zFormat[i];
          101  +      }
    92    102         if( c=='d' ){
    93    103           int v = va_arg(ap, int);
    94    104           if( v<0 ){
    95         -          lemon_addtext(str, &nUsed, "-", 1);
          105  +          lemon_addtext(str, &nUsed, "-", 1, iWidth);
    96    106             v = -v;
    97    107           }else if( v==0 ){
    98         -          lemon_addtext(str, &nUsed, "0", 1);
          108  +          lemon_addtext(str, &nUsed, "0", 1, iWidth);
    99    109           }
   100    110           k = 0;
   101    111           while( v>0 ){
   102    112             k++;
   103    113             zTemp[sizeof(zTemp)-k] = (v%10) + '0';
   104    114             v /= 10;
   105    115           }
   106         -        lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k);
          116  +        lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth);
   107    117         }else if( c=='s' ){
   108    118           z = va_arg(ap, const char*);
   109         -        lemon_addtext(str, &nUsed, z, -1);
          119  +        lemon_addtext(str, &nUsed, z, -1, iWidth);
   110    120         }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){
   111    121           i += 2;
   112    122           k = va_arg(ap, int);
   113    123           z = va_arg(ap, const char*);
   114         -        lemon_addtext(str, &nUsed, z, k);
          124  +        lemon_addtext(str, &nUsed, z, k, iWidth);
   115    125         }else if( c=='%' ){
   116         -        lemon_addtext(str, &nUsed, "%", 1);
          126  +        lemon_addtext(str, &nUsed, "%", 1, 0);
   117    127         }else{
   118    128           fprintf(stderr, "illegal format\n");
   119    129           exit(1);
   120    130         }
   121    131         j = i+1;
   122    132       }
   123    133     }
   124         -  lemon_addtext(str, &nUsed, &zFormat[j], i-j);
          134  +  lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0);
   125    135     return nUsed;
   126    136   }
   127    137   static int lemon_sprintf(char *str, const char *format, ...){
   128    138     va_list ap;
   129    139     int rc;
   130    140     va_start(ap, format);
   131    141     rc = lemon_vsprintf(str, format, ap);
................................................................................
  1534   1544     if( lem.errorcnt ) exit(lem.errorcnt);
  1535   1545     if( lem.nrule==0 ){
  1536   1546       fprintf(stderr,"Empty grammar.\n");
  1537   1547       exit(1);
  1538   1548     }
  1539   1549   
  1540   1550     /* Count and index the symbols of the grammar */
  1541         -  lem.nsymbol = Symbol_count();
  1542   1551     Symbol_new("{default}");
         1552  +  lem.nsymbol = Symbol_count();
  1543   1553     lem.symbols = Symbol_arrayof();
  1544         -  for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
  1545         -  qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), Symbolcmpp);
  1546         -  for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i;
         1554  +  for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
         1555  +  qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp);
         1556  +  for(i=0; i<lem.nsymbol; i++) lem.symbols[i]->index = i;
         1557  +  while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; }
         1558  +  assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 );
         1559  +  lem.nsymbol = i - 1;
  1547   1560     for(i=1; isupper(lem.symbols[i]->name[0]); i++);
  1548   1561     lem.nterminal = i;
  1549   1562   
  1550   1563     /* Generate a reprint of the grammar, if requested on the command line */
  1551   1564     if( rpflag ){
  1552   1565       Reprint(&lem);
  1553   1566     }else{
................................................................................
  2027   2040     PRECEDENCE_MARK_1,
  2028   2041     PRECEDENCE_MARK_2,
  2029   2042     RESYNC_AFTER_RULE_ERROR,
  2030   2043     RESYNC_AFTER_DECL_ERROR,
  2031   2044     WAITING_FOR_DESTRUCTOR_SYMBOL,
  2032   2045     WAITING_FOR_DATATYPE_SYMBOL,
  2033   2046     WAITING_FOR_FALLBACK_ID,
  2034         -  WAITING_FOR_WILDCARD_ID
         2047  +  WAITING_FOR_WILDCARD_ID,
         2048  +  WAITING_FOR_CLASS_ID,
         2049  +  WAITING_FOR_CLASS_TOKEN
  2035   2050   };
  2036   2051   struct pstate {
  2037   2052     char *filename;       /* Name of the input file */
  2038   2053     int tokenlineno;      /* Linenumber at which current token starts */
  2039   2054     int errorcnt;         /* Number of errors so far */
  2040   2055     char *tokenstart;     /* Text of current token */
  2041   2056     struct lemon *gp;     /* Global state vector */
  2042   2057     enum e_state state;        /* The state of the parser */
  2043   2058     struct symbol *fallback;   /* The fallback token */
         2059  +  struct symbol *tkclass;    /* Token class symbol */
  2044   2060     struct symbol *lhs;        /* Left-hand side of current rule */
  2045   2061     const char *lhsalias;      /* Alias for the LHS */
  2046   2062     int nrhs;                  /* Number of right-hand side symbols seen */
  2047   2063     struct symbol *rhs[MAXRHS];  /* RHS symbols */
  2048   2064     const char *alias[MAXRHS]; /* Aliases for each RHS symbol (or NULL) */
  2049   2065     struct rule *prevrule;     /* Previous rule parsed */
  2050   2066     const char *declkeyword;   /* Keyword of a declaration */
................................................................................
  2341   2357           }else if( strcmp(x,"type")==0 ){
  2342   2358             psp->state = WAITING_FOR_DATATYPE_SYMBOL;
  2343   2359           }else if( strcmp(x,"fallback")==0 ){
  2344   2360             psp->fallback = 0;
  2345   2361             psp->state = WAITING_FOR_FALLBACK_ID;
  2346   2362           }else if( strcmp(x,"wildcard")==0 ){
  2347   2363             psp->state = WAITING_FOR_WILDCARD_ID;
         2364  +        }else if( strcmp(x,"token_class")==0 ){
         2365  +          psp->state = WAITING_FOR_CLASS_ID;
  2348   2366           }else{
  2349   2367             ErrorMsg(psp->filename,psp->tokenlineno,
  2350   2368               "Unknown declaration keyword: \"%%%s\".",x);
  2351   2369             psp->errorcnt++;
  2352   2370             psp->state = RESYNC_AFTER_DECL_ERROR;
  2353   2371           }
  2354   2372         }else{
................................................................................
  2508   2526             psp->gp->wildcard = sp;
  2509   2527           }else{
  2510   2528             ErrorMsg(psp->filename, psp->tokenlineno,
  2511   2529               "Extra wildcard to token: %s", x);
  2512   2530             psp->errorcnt++;
  2513   2531           }
  2514   2532         }
         2533  +      break;
         2534  +    case WAITING_FOR_CLASS_ID:
         2535  +      if( !islower(x[0]) ){
         2536  +        ErrorMsg(psp->filename, psp->tokenlineno,
         2537  +          "%%token_class must be followed by an identifier: ", x);
         2538  +        psp->errorcnt++;
         2539  +        psp->state = RESYNC_AFTER_DECL_ERROR;
         2540  +     }else if( Symbol_find(x) ){
         2541  +        ErrorMsg(psp->filename, psp->tokenlineno,
         2542  +          "Symbol \"%s\" already used", x);
         2543  +        psp->errorcnt++;
         2544  +        psp->state = RESYNC_AFTER_DECL_ERROR;
         2545  +      }else{
         2546  +        psp->tkclass = Symbol_new(x);
         2547  +        psp->tkclass->type = MULTITERMINAL;
         2548  +        psp->state = WAITING_FOR_CLASS_TOKEN;
         2549  +      }
         2550  +      break;
         2551  +    case WAITING_FOR_CLASS_TOKEN:
         2552  +      if( x[0]=='.' ){
         2553  +        psp->state = WAITING_FOR_DECL_OR_RULE;
         2554  +      }else if( isupper(x[0]) || ((x[0]=='|' || x[0]=='/') && isupper(x[1])) ){
         2555  +        struct symbol *msp = psp->tkclass;
         2556  +        msp->nsubsym++;
         2557  +        msp->subsym = (struct symbol **) realloc(msp->subsym,
         2558  +          sizeof(struct symbol*)*msp->nsubsym);
         2559  +        if( !isupper(x[0]) ) x++;
         2560  +        msp->subsym[msp->nsubsym-1] = Symbol_new(x);
         2561  +      }else{
         2562  +        ErrorMsg(psp->filename, psp->tokenlineno,
         2563  +          "%%token_class argument \"%s\" should be a token", x);
         2564  +        psp->errorcnt++;
         2565  +        psp->state = RESYNC_AFTER_DECL_ERROR;
         2566  +      }
  2515   2567         break;
  2516   2568       case RESYNC_AFTER_RULE_ERROR:
  2517   2569   /*      if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
  2518   2570   **      break; */
  2519   2571       case RESYNC_AFTER_DECL_ERROR:
  2520   2572         if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE;
  2521   2573         if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD;
................................................................................
  2863   2915     }
  2864   2916     for(rp=lemp->rule; rp; rp=rp->next){
  2865   2917       printf("%s",rp->lhs->name);
  2866   2918       /*    if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
  2867   2919       printf(" ::=");
  2868   2920       for(i=0; i<rp->nrhs; i++){
  2869   2921         sp = rp->rhs[i];
  2870         -      printf(" %s", sp->name);
  2871   2922         if( sp->type==MULTITERMINAL ){
         2923  +        printf(" %s", sp->subsym[0]->name);
  2872   2924           for(j=1; j<sp->nsubsym; j++){
  2873   2925             printf("|%s", sp->subsym[j]->name);
  2874   2926           }
         2927  +      }else{
         2928  +        printf(" %s", sp->name);
  2875   2929         }
  2876   2930         /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
  2877   2931       }
  2878   2932       printf(".");
  2879   2933       if( rp->precsym ) printf(" [%s]",rp->precsym->name);
  2880   2934       /* if( rp->code ) printf("\n    %s",rp->code); */
  2881   2935       printf("\n");
................................................................................
  2889   2943     int i, j;
  2890   2944     rp = cfp->rp;
  2891   2945     fprintf(fp,"%s ::=",rp->lhs->name);
  2892   2946     for(i=0; i<=rp->nrhs; i++){
  2893   2947       if( i==cfp->dot ) fprintf(fp," *");
  2894   2948       if( i==rp->nrhs ) break;
  2895   2949       sp = rp->rhs[i];
  2896         -    fprintf(fp," %s", sp->name);
  2897   2950       if( sp->type==MULTITERMINAL ){
         2951  +      fprintf(fp," %s", sp->subsym[0]->name);
  2898   2952         for(j=1; j<sp->nsubsym; j++){
  2899   2953           fprintf(fp,"|%s",sp->subsym[j]->name);
  2900   2954         }
         2955  +    }else{
         2956  +      fprintf(fp," %s", sp->name);
  2901   2957       }
  2902   2958     }
  2903   2959   }
  2904   2960   
  2905   2961   /* #define TEST */
  2906   2962   #if 0
  2907   2963   /* Print a set */
................................................................................
  3640   3696   ** Write text on "out" that describes the rule "rp".
  3641   3697   */
  3642   3698   static void writeRuleText(FILE *out, struct rule *rp){
  3643   3699     int j;
  3644   3700     fprintf(out,"%s ::=", rp->lhs->name);
  3645   3701     for(j=0; j<rp->nrhs; j++){
  3646   3702       struct symbol *sp = rp->rhs[j];
  3647         -    fprintf(out," %s", sp->name);
  3648         -    if( sp->type==MULTITERMINAL ){
         3703  +    if( sp->type!=MULTITERMINAL ){
         3704  +      fprintf(out," %s", sp->name);
         3705  +    }else{
  3649   3706         int k;
         3707  +      fprintf(out," %s", sp->subsym[0]->name);
  3650   3708         for(k=1; k<sp->nsubsym; k++){
  3651   3709           fprintf(out,"|%s",sp->subsym[k]->name);
  3652   3710         }
  3653   3711       }
  3654   3712     }
  3655   3713   }
  3656   3714   
................................................................................
  4110   4168   
  4111   4169     if( lemp->tokenprefix ) prefix = lemp->tokenprefix;
  4112   4170     else                    prefix = "";
  4113   4171     in = file_open(lemp,".h","rb");
  4114   4172     if( in ){
  4115   4173       int nextChar;
  4116   4174       for(i=1; i<lemp->nterminal && fgets(line,LINESIZE,in); i++){
  4117         -      lemon_sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
         4175  +      lemon_sprintf(pattern,"#define %s%-30s %3d\n",
         4176  +                    prefix,lemp->symbols[i]->name,i);
  4118   4177         if( strcmp(line,pattern) ) break;
  4119   4178       }
  4120   4179       nextChar = fgetc(in);
  4121   4180       fclose(in);
  4122   4181       if( i==lemp->nterminal && nextChar==EOF ){
  4123   4182         /* No change in the file.  Don't rewrite it. */
  4124   4183         return;
  4125   4184       }
  4126   4185     }
  4127   4186     out = file_open(lemp,".h","wb");
  4128   4187     if( out ){
  4129   4188       for(i=1; i<lemp->nterminal; i++){
  4130         -      fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i);
         4189  +      fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i);
  4131   4190       }
  4132   4191       fclose(out);  
  4133   4192     }
  4134   4193     return;
  4135   4194   }
  4136   4195   
  4137   4196   /* Reduce the size of the action tables, if possible, by making use
................................................................................
  4493   4552       sp->useCnt = 0;
  4494   4553       Symbol_insert(sp,sp->name);
  4495   4554     }
  4496   4555     sp->useCnt++;
  4497   4556     return sp;
  4498   4557   }
  4499   4558   
  4500         -/* Compare two symbols for working purposes
         4559  +/* Compare two symbols for sorting purposes.  Return negative,
         4560  +** zero, or positive if a is less then, equal to, or greater
         4561  +** than b.
  4501   4562   **
  4502   4563   ** Symbols that begin with upper case letters (terminals or tokens)
  4503   4564   ** must sort before symbols that begin with lower case letters
  4504         -** (non-terminals).  Other than that, the order does not matter.
         4565  +** (non-terminals).  And MULTITERMINAL symbols (created using the
         4566  +** %token_class directive) must sort at the very end. Other than
         4567  +** that, the order does not matter.
  4505   4568   **
  4506   4569   ** We find experimentally that leaving the symbols in their original
  4507   4570   ** order (the order they appeared in the grammar file) gives the
  4508   4571   ** smallest parser tables in SQLite.
  4509   4572   */
  4510   4573   int Symbolcmpp(const void *_a, const void *_b)
  4511   4574   {
  4512         -  const struct symbol **a = (const struct symbol **) _a;
  4513         -  const struct symbol **b = (const struct symbol **) _b;
  4514         -  int i1 = (**a).index + 10000000*((**a).name[0]>'Z');
  4515         -  int i2 = (**b).index + 10000000*((**b).name[0]>'Z');
  4516         -  assert( i1!=i2 || strcmp((**a).name,(**b).name)==0 );
  4517         -  return i1-i2;
         4575  +  const struct symbol *a = *(const struct symbol **) _a;
         4576  +  const struct symbol *b = *(const struct symbol **) _b;
         4577  +  int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1;
         4578  +  int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1;
         4579  +  return i1==i2 ? a->index - b->index : i1 - i2;
  4518   4580   }
  4519   4581   
  4520   4582   /* There is one instance of the following structure for each
  4521   4583   ** associative array of type "x2".
  4522   4584   */
  4523   4585   struct s_x2 {
  4524   4586     int size;               /* The number of available slots. */