/ Check-in [cb5d1f83]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimizations to the SQL language grammar that result in a small size reduction and speed increase.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: cb5d1f83e0a33d546d4c0cb817ef1f8440d1f738
User & Date: drh 2014-01-11 03:54:05
Context
2014-01-11
12:52
In LEMON, limit the size of the grammar file to 100MB. This ensures that the program will never experience integer overflow. To be doubly sure, use calloc() instead of malloc() when allocating arrays. check-in: 29ba458d user: drh tags: trunk
03:54
Optimizations to the SQL language grammar that result in a small size reduction and speed increase. check-in: cb5d1f83 user: drh tags: trunk
03:27
Add the "%token_class" directive to the LEMON parser generator. This opens up the possibility of simplifying the parser. Also remove all calls to sprintf(), strcpy(), and strcat() from LEMON to avoid compiler warnings on OpenBSD. (Aside: It is this change to avoid harmless compiler warnings that was the cause of the reason spat of bugs.) check-in: 8eb48c04 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to addopcodes.awk.

26
27
28
29
30
31
32

33
  printf "#define TK_%-29s %4d\n", "UNCLOSED_STRING", ++max
  printf "#define TK_%-29s %4d\n", "FUNCTION",        ++max
  printf "#define TK_%-29s %4d\n", "COLUMN",          ++max
  printf "#define TK_%-29s %4d\n", "AGG_FUNCTION",    ++max
  printf "#define TK_%-29s %4d\n", "AGG_COLUMN",      ++max
  printf "#define TK_%-29s %4d\n", "UMINUS",          ++max
  printf "#define TK_%-29s %4d\n", "UPLUS",           ++max

}







>

26
27
28
29
30
31
32
33
34
  printf "#define TK_%-29s %4d\n", "UNCLOSED_STRING", ++max
  printf "#define TK_%-29s %4d\n", "FUNCTION",        ++max
  printf "#define TK_%-29s %4d\n", "COLUMN",          ++max
  printf "#define TK_%-29s %4d\n", "AGG_FUNCTION",    ++max
  printf "#define TK_%-29s %4d\n", "AGG_COLUMN",      ++max
  printf "#define TK_%-29s %4d\n", "UMINUS",          ++max
  printf "#define TK_%-29s %4d\n", "UPLUS",           ++max
  printf "#define TK_%-29s %4d\n", "REGISTER",        ++max
}

Changes to src/parse.y.

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
...
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
...
772
773
774
775
776
777
778
779

780
781
782
783
784
785
786
787
788




789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
....
1161
1162
1163
1164
1165
1166
1167

1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
  pParse->constraintName.n = 0;
}


// An IDENTIFIER can be a generic identifier, or one of several
// keywords.  Any non-standard keyword can also be an identifier.
//
%type id {Token}
id(A) ::= ID(X).         {A = X;}
id(A) ::= INDEXED(X).    {A = X;}

// The following directive causes tokens ABORT, AFTER, ASC, etc. to
// fallback to ID if they will not parse as their original value.
// This obviates the need for the "id" nonterminal.
//
%fallback ID
  ABORT ACTION AFTER ANALYZE ASC ATTACH BEFORE BEGIN BY CASCADE CAST COLUMNKW
................................................................................
%left STAR SLASH REM.
%left CONCAT.
%left COLLATE.
%right BITNOT.

// And "ids" is an identifer-or-string.
//
%type ids {Token}
ids(A) ::= ID|STRING(X).   {A = X;}

// The name of a column or table can be any of the following:
//
%type nm {Token}
nm(A) ::= id(X).         {A = X;}
nm(A) ::= STRING(X).     {A = X;}
nm(A) ::= JOIN_KW(X).    {A = X;}
................................................................................
  Expr *temp3 = sqlite3PExpr(pParse, TK_ID, 0, 0, &Z);
  Expr *temp4 = sqlite3PExpr(pParse, TK_DOT, temp2, temp3, 0);
  A.pExpr = sqlite3PExpr(pParse, TK_DOT, temp1, temp4, 0);
  spanSet(&A,&X,&Z);
}
term(A) ::= INTEGER|FLOAT|BLOB(X).  {spanExpr(&A, pParse, @X, &X);}
term(A) ::= STRING(X).              {spanExpr(&A, pParse, @X, &X);}
expr(A) ::= REGISTER(X).     {

  /* When doing a nested parse, one can include terms in an expression
  ** that look like this:   #1 #2 ...  These terms refer to registers
  ** in the virtual machine.  #N is the N-th register. */
  if( pParse->nested==0 ){
    sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &X);
    A.pExpr = 0;
  }else{
    A.pExpr = sqlite3PExpr(pParse, TK_REGISTER, 0, 0, &X);
    if( A.pExpr ) sqlite3GetInt32(&X.z[1], &A.pExpr->iTable);




  }
  spanSet(&A, &X, &X);
}
expr(A) ::= VARIABLE(X).     {
  spanExpr(&A, pParse, TK_VARIABLE, &X);
  sqlite3ExprAssignVarNumber(pParse, A.pExpr);
  spanSet(&A, &X, &X);
}
expr(A) ::= expr(E) COLLATE ids(C). {
  A.pExpr = sqlite3ExprAddCollateToken(pParse, E.pExpr, &C);
  A.zStart = E.zStart;
  A.zEnd = &C.z[C.n];
}
%ifndef SQLITE_OMIT_CAST
expr(A) ::= CAST(X) LP expr(E) AS typetoken(T) RP(Y). {
  A.pExpr = sqlite3PExpr(pParse, TK_CAST, E.pExpr, 0, &T);
  spanSet(&A,&X,&Y);
}
%endif  SQLITE_OMIT_CAST
expr(A) ::= ID(X) LP distinct(D) exprlist(Y) RP(E). {
  if( Y && Y->nExpr>pParse->db->aLimit[SQLITE_LIMIT_FUNCTION_ARG] ){
    sqlite3ErrorMsg(pParse, "too many arguments on function %T", &X);
  }
  A.pExpr = sqlite3ExprFunction(pParse, Y, &X);
  spanSet(&A,&X,&E);
  if( D && A.pExpr ){
    A.pExpr->flags |= EP_Distinct;
  }
}
expr(A) ::= ID(X) LP STAR RP(E). {
  A.pExpr = sqlite3ExprFunction(pParse, 0, &X);
  spanSet(&A,&X,&E);
}
term(A) ::= CTIME_KW(OP). {
  A.pExpr = sqlite3ExprFunction(pParse, 0, &OP);
  spanSet(&A, &OP, &OP);
}
................................................................................

nmnum(A) ::= plus_num(X).             {A = X;}
nmnum(A) ::= nm(X).                   {A = X;}
nmnum(A) ::= ON(X).                   {A = X;}
nmnum(A) ::= DELETE(X).               {A = X;}
nmnum(A) ::= DEFAULT(X).              {A = X;}
%endif SQLITE_OMIT_PRAGMA

plus_num(A) ::= PLUS number(X).       {A = X;}
plus_num(A) ::= number(X).            {A = X;}
minus_num(A) ::= MINUS number(X).     {A = X;}
number(A) ::= INTEGER|FLOAT(X).       {A = X;}

//////////////////////////// The CREATE TRIGGER command /////////////////////

%ifndef SQLITE_OMIT_TRIGGER

cmd ::= createkw trigger_decl(A) BEGIN trigger_cmd_list(S) END(Z). {
  Token all;
  all.z = A.z;







|
<
<







 







|
<







 







|
>
|
|
|
|
|
|
|
|
|
>
>
>
>



<
<
<
<
<











|









|







 







>



<
<







190
191
192
193
194
195
196
197


198
199
200
201
202
203
204
...
235
236
237
238
239
240
241
242

243
244
245
246
247
248
249
...
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793





794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
....
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168


1169
1170
1171
1172
1173
1174
1175
  pParse->constraintName.n = 0;
}


// An IDENTIFIER can be a generic identifier, or one of several
// keywords.  Any non-standard keyword can also be an identifier.
//
%token_class id  ID|INDEXED.



// The following directive causes tokens ABORT, AFTER, ASC, etc. to
// fallback to ID if they will not parse as their original value.
// This obviates the need for the "id" nonterminal.
//
%fallback ID
  ABORT ACTION AFTER ANALYZE ASC ATTACH BEFORE BEGIN BY CASCADE CAST COLUMNKW
................................................................................
%left STAR SLASH REM.
%left CONCAT.
%left COLLATE.
%right BITNOT.

// And "ids" is an identifer-or-string.
//
%token_class ids  ID|STRING.


// The name of a column or table can be any of the following:
//
%type nm {Token}
nm(A) ::= id(X).         {A = X;}
nm(A) ::= STRING(X).     {A = X;}
nm(A) ::= JOIN_KW(X).    {A = X;}
................................................................................
  Expr *temp3 = sqlite3PExpr(pParse, TK_ID, 0, 0, &Z);
  Expr *temp4 = sqlite3PExpr(pParse, TK_DOT, temp2, temp3, 0);
  A.pExpr = sqlite3PExpr(pParse, TK_DOT, temp1, temp4, 0);
  spanSet(&A,&X,&Z);
}
term(A) ::= INTEGER|FLOAT|BLOB(X).  {spanExpr(&A, pParse, @X, &X);}
term(A) ::= STRING(X).              {spanExpr(&A, pParse, @X, &X);}
expr(A) ::= VARIABLE(X).     {
  if( X.n>=2 && X.z[0]=='#' && sqlite3Isdigit(X.z[1]) ){
    /* When doing a nested parse, one can include terms in an expression
    ** that look like this:   #1 #2 ...  These terms refer to registers
    ** in the virtual machine.  #N is the N-th register. */
    if( pParse->nested==0 ){
      sqlite3ErrorMsg(pParse, "near \"%T\": syntax error", &X);
      A.pExpr = 0;
    }else{
      A.pExpr = sqlite3PExpr(pParse, TK_REGISTER, 0, 0, &X);
      if( A.pExpr ) sqlite3GetInt32(&X.z[1], &A.pExpr->iTable);
    }
  }else{
    spanExpr(&A, pParse, TK_VARIABLE, &X);
    sqlite3ExprAssignVarNumber(pParse, A.pExpr);
  }
  spanSet(&A, &X, &X);
}





expr(A) ::= expr(E) COLLATE ids(C). {
  A.pExpr = sqlite3ExprAddCollateToken(pParse, E.pExpr, &C);
  A.zStart = E.zStart;
  A.zEnd = &C.z[C.n];
}
%ifndef SQLITE_OMIT_CAST
expr(A) ::= CAST(X) LP expr(E) AS typetoken(T) RP(Y). {
  A.pExpr = sqlite3PExpr(pParse, TK_CAST, E.pExpr, 0, &T);
  spanSet(&A,&X,&Y);
}
%endif  SQLITE_OMIT_CAST
expr(A) ::= id(X) LP distinct(D) exprlist(Y) RP(E). {
  if( Y && Y->nExpr>pParse->db->aLimit[SQLITE_LIMIT_FUNCTION_ARG] ){
    sqlite3ErrorMsg(pParse, "too many arguments on function %T", &X);
  }
  A.pExpr = sqlite3ExprFunction(pParse, Y, &X);
  spanSet(&A,&X,&E);
  if( D && A.pExpr ){
    A.pExpr->flags |= EP_Distinct;
  }
}
expr(A) ::= id(X) LP STAR RP(E). {
  A.pExpr = sqlite3ExprFunction(pParse, 0, &X);
  spanSet(&A,&X,&E);
}
term(A) ::= CTIME_KW(OP). {
  A.pExpr = sqlite3ExprFunction(pParse, 0, &OP);
  spanSet(&A, &OP, &OP);
}
................................................................................

nmnum(A) ::= plus_num(X).             {A = X;}
nmnum(A) ::= nm(X).                   {A = X;}
nmnum(A) ::= ON(X).                   {A = X;}
nmnum(A) ::= DELETE(X).               {A = X;}
nmnum(A) ::= DEFAULT(X).              {A = X;}
%endif SQLITE_OMIT_PRAGMA
%token_class number INTEGER|FLOAT.
plus_num(A) ::= PLUS number(X).       {A = X;}
plus_num(A) ::= number(X).            {A = X;}
minus_num(A) ::= MINUS number(X).     {A = X;}


//////////////////////////// The CREATE TRIGGER command /////////////////////

%ifndef SQLITE_OMIT_TRIGGER

cmd ::= createkw trigger_decl(A) BEGIN trigger_cmd_list(S) END(Z). {
  Token all;
  all.z = A.z;

Changes to src/tokenize.c.

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

321
322
323

324
325
326
327
328
329
330
      return i;
    }
    case '?': {
      *tokenType = TK_VARIABLE;
      for(i=1; sqlite3Isdigit(z[i]); i++){}
      return i;
    }
    case '#': {
      for(i=1; sqlite3Isdigit(z[i]); i++){}
      if( i>1 ){
        /* Parameters of the form #NNN (where NNN is a number) are used
        ** internally by sqlite3NestedParse.  */
        *tokenType = TK_REGISTER;
        return i;
      }
      /* Fall through into the next case if the '#' is not followed by
      ** a digit. Try to match #AAAA where AAAA is a parameter name. */
    }
#ifndef SQLITE_OMIT_TCL_VARIABLE
    case '$':
#endif
    case '@':  /* For compatibility with MS SQL Server */

    case ':': {
      int n = 0;
      testcase( z[0]=='$' );  testcase( z[0]=='@' );  testcase( z[0]==':' );

      *tokenType = TK_VARIABLE;
      for(i=1; (c=z[i])!=0; i++){
        if( IdChar(c) ){
          n++;
#ifndef SQLITE_OMIT_TCL_VARIABLE
        }else if( c=='(' && n>0 ){
          do{







<
<
<
<
<
<
<
<
<
<
<




>


|
>







299
300
301
302
303
304
305











306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
      return i;
    }
    case '?': {
      *tokenType = TK_VARIABLE;
      for(i=1; sqlite3Isdigit(z[i]); i++){}
      return i;
    }











#ifndef SQLITE_OMIT_TCL_VARIABLE
    case '$':
#endif
    case '@':  /* For compatibility with MS SQL Server */
    case '#':
    case ':': {
      int n = 0;
      testcase( z[0]=='$' );  testcase( z[0]=='@' );
      testcase( z[0]==':' );  testcase( z[0]=='#' );
      *tokenType = TK_VARIABLE;
      for(i=1; (c=z[i])!=0; i++){
        if( IdChar(c) ){
          n++;
#ifndef SQLITE_OMIT_TCL_VARIABLE
        }else if( c=='(' && n>0 ){
          do{