SQLite

Check-in [380f09c1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Allow control characters in JSON5 string literals. Forum thread 05182119f69c3a92.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 380f09c194caff557640692d2f255f8cdc1dcfed5976711686466692f4d7a60d
User & Date: drh 2024-01-31 15:29:29
Context
2024-01-31
20:11
Replace an conditional assignment that was made obsolete by [d4c193f0b49f4950] with an assert(). The conditional was added by [d6fd512f50513ab7] as a fix for tickets [c36cdb4afd504dc1], [4051a7f931d9ba24], and [d6fd512f50513ab7] which means now [d4c193f0b49f4950] is the correct fix for those tickets. that check-in (check-in: 44b5524d user: drh tags: trunk)
15:29
Allow control characters in JSON5 string literals. Forum thread 05182119f69c3a92. (check-in: 380f09c1 user: drh tags: trunk)
15:20
Add NEVER() and ALWAYS() macros for the JSON5-control-character change. Also fix an incorrect comparison used to determine if a buffer needed to be resized. (Closed-Leaf check-in: e3c0c0e6 user: drh tags: json5-ctrl-char)
12:15
Fix a memory allocation bug in the (debug-use-only) json_parse() SQL function. (check-in: 32ce7dac user: drh tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/json.c.
616
617
618
619
620
621
622


































623
624
625
626
627
628
629
static void jsonAppendSeparator(JsonString *p){
  char c;
  if( p->nUsed==0 ) return;
  c = p->zBuf[p->nUsed-1];
  if( c=='[' || c=='{' ) return;
  jsonAppendChar(p, ',');
}



































/* Append the N-byte string in zIn to the end of the JsonString string
** under construction.  Enclose the string in double-quotes ("...") and
** escape any double-quotes or backslash characters contained within the
** string.
**
** This routine is a high-runner.  There is a measurable performance







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
static void jsonAppendSeparator(JsonString *p){
  char c;
  if( p->nUsed==0 ) return;
  c = p->zBuf[p->nUsed-1];
  if( c=='[' || c=='{' ) return;
  jsonAppendChar(p, ',');
}

/* c is a control character.  Append the canonical JSON representation
** of that control character to p.
**
** This routine assumes that the output buffer has already been enlarged
** sufficiently to hold the worst-case encoding plus a nul terminator.
*/
static void jsonAppendControlChar(JsonString *p, u8 c){
  static const char aSpecial[] = {
     0, 0, 0, 0, 0, 0, 0, 0, 'b', 't', 'n', 0, 'f', 'r', 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,   0,   0,   0, 0,   0,   0, 0, 0
  };
  assert( sizeof(aSpecial)==32 );
  assert( aSpecial['\b']=='b' );
  assert( aSpecial['\f']=='f' );
  assert( aSpecial['\n']=='n' );
  assert( aSpecial['\r']=='r' );
  assert( aSpecial['\t']=='t' );
  assert( c>=0 && c<sizeof(aSpecial) );
  assert( p->nUsed+7 <= p->nAlloc );
  if( aSpecial[c] ){
    p->zBuf[p->nUsed] = '\\';
    p->zBuf[p->nUsed+1] = aSpecial[c];
    p->nUsed += 2;
  }else{
    p->zBuf[p->nUsed] = '\\';
    p->zBuf[p->nUsed+1] = 'u';
    p->zBuf[p->nUsed+2] = '0';
    p->zBuf[p->nUsed+3] = '0';
    p->zBuf[p->nUsed+4] = "0123456789abcdef"[c>>4];
    p->zBuf[p->nUsed+5] = "0123456789abcdef"[c&0xf];
    p->nUsed += 6;
  }
}

/* Append the N-byte string in zIn to the end of the JsonString string
** under construction.  Enclose the string in double-quotes ("...") and
** escape any double-quotes or backslash characters contained within the
** string.
**
** This routine is a high-runner.  There is a measurable performance
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711

712
713
714
715
716
717
718
      memcpy(&p->zBuf[p->nUsed], z, k);
      p->nUsed += k;
      z += k;
      N -= k;
    }
    c = z[0];
    if( c=='"' || c=='\\' ){
      json_simple_escape:
      if( (p->nUsed+N+3 > p->nAlloc) && jsonStringGrow(p,N+3)!=0 ) return;
      p->zBuf[p->nUsed++] = '\\';
      p->zBuf[p->nUsed++] = c;
    }else if( c=='\'' ){
      p->zBuf[p->nUsed++] = c;
    }else{
      static const char aSpecial[] = {
         0, 0, 0, 0, 0, 0, 0, 0, 'b', 't', 'n', 0, 'f', 'r', 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0,   0,   0,   0, 0,   0,   0, 0, 0
      };
      assert( sizeof(aSpecial)==32 );
      assert( aSpecial['\b']=='b' );
      assert( aSpecial['\f']=='f' );
      assert( aSpecial['\n']=='n' );
      assert( aSpecial['\r']=='r' );
      assert( aSpecial['\t']=='t' );
      assert( c>=0 && c<sizeof(aSpecial) );
      if( aSpecial[c] ){
        c = aSpecial[c];
        goto json_simple_escape;
      }
      if( (p->nUsed+N+7 > p->nAlloc) && jsonStringGrow(p,N+7)!=0 ) return;
      p->zBuf[p->nUsed++] = '\\';
      p->zBuf[p->nUsed++] = 'u';
      p->zBuf[p->nUsed++] = '0';
      p->zBuf[p->nUsed++] = '0';
      p->zBuf[p->nUsed++] = "0123456789abcdef"[c>>4];
      p->zBuf[p->nUsed++] = "0123456789abcdef"[c&0xf];

    }
    z++;
    N--;
  }
  p->zBuf[p->nUsed++] = '"';
  assert( p->nUsed<p->nAlloc );
}







<






<
<
<
<
<
<
<
<
<
<
<
<
<
<
<

<
<
<
<
<
<
>







710
711
712
713
714
715
716

717
718
719
720
721
722















723






724
725
726
727
728
729
730
731
      memcpy(&p->zBuf[p->nUsed], z, k);
      p->nUsed += k;
      z += k;
      N -= k;
    }
    c = z[0];
    if( c=='"' || c=='\\' ){

      if( (p->nUsed+N+3 > p->nAlloc) && jsonStringGrow(p,N+3)!=0 ) return;
      p->zBuf[p->nUsed++] = '\\';
      p->zBuf[p->nUsed++] = c;
    }else if( c=='\'' ){
      p->zBuf[p->nUsed++] = c;
    }else{















      if( (p->nUsed+N+7 > p->nAlloc) && jsonStringGrow(p,N+7)!=0 ) return;






      jsonAppendControlChar(p, c);
    }
    z++;
    N--;
  }
  p->zBuf[p->nUsed++] = '"';
  assert( p->nUsed<p->nAlloc );
}
1405
1406
1407
1408
1409
1410
1411
1412



1413
1414
1415
1416
1417
1418
1419
    case JSONB_TEXT5: {
      j = i+n;
      k = j+sz;
      while( j<k ){
        if( !jsonIsOk[z[j]] && z[j]!='\'' ){
          if( z[j]=='"' ){
            if( x==JSONB_TEXTJ ) return j+1;
          }else if( z[j]!='\\' || j+1>=k ){



            return j+1;
          }else if( strchr("\"\\/bfnrt",z[j+1])!=0 ){
            j++;
          }else if( z[j+1]=='u' ){
            if( j+5>=k ) return j+1;
            if( !jsonIs4Hex((const char*)&z[j+2]) ) return j+1;
            j++;







|
>
>
>







1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
    case JSONB_TEXT5: {
      j = i+n;
      k = j+sz;
      while( j<k ){
        if( !jsonIsOk[z[j]] && z[j]!='\'' ){
          if( z[j]=='"' ){
            if( x==JSONB_TEXTJ ) return j+1;
          }else if( z[j]<=0x1f ){
            /* Control characters in JSON5 string literals are ok */
            if( x==JSONB_TEXTJ ) return j+1;
          }else if( NEVER(z[j]!='\\') || j+1>=k ){
            return j+1;
          }else if( strchr("\"\\/bfnrt",z[j+1])!=0 ){
            j++;
          }else if( z[j+1]=='u' ){
            if( j+5>=k ) return j+1;
            if( !jsonIs4Hex((const char*)&z[j+2]) ) return j+1;
            j++;
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708





1709
1710
1711
1712
1713
1714
1715
          opcode = JSONB_TEXT5;
          pParse->hasNonstd = 1;
        }else{
          pParse->iErr = j;
          return -1;
        }
      }else if( c<=0x1f ){
        /* Control characters are not allowed in strings */
        pParse->iErr = j;
        return -1;





      }else if( c=='"' ){
        opcode = JSONB_TEXT5;
      }
      j++;
    }
    jsonBlobAppendNode(pParse, opcode, j-1-i, &z[i+1]);
    return j+1;







|
|
|
>
>
>
>
>







1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
          opcode = JSONB_TEXT5;
          pParse->hasNonstd = 1;
        }else{
          pParse->iErr = j;
          return -1;
        }
      }else if( c<=0x1f ){
        if( c==0 ){
          pParse->iErr = j;
          return -1;
        }
        /* Control characters are not allowed in canonical JSON string
        ** literals, but are allowed in JSON5 string literals. */
        opcode = JSONB_TEXT5;
        pParse->hasNonstd = 1;
      }else if( c=='"' ){
        opcode = JSONB_TEXT5;
      }
      j++;
    }
    jsonBlobAppendNode(pParse, opcode, j-1-i, &z[i+1]);
    return j+1;
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200







2201
2202
2203
2204
2205
2206
2207
    case JSONB_TEXT5: {
      const char *zIn;
      u32 k;
      u32 sz2 = sz;
      zIn = (const char*)&pParse->aBlob[i+n];
      jsonAppendChar(pOut, '"');
      while( sz2>0 ){
        for(k=0; k<sz2 && zIn[k]!='\\' && zIn[k]!='"'; k++){}
        if( k>0 ){
          jsonAppendRawNZ(pOut, zIn, k);
          if( k>=sz2 ){
            break;
          }
          zIn += k;
          sz2 -= k;
        }
        if( zIn[0]=='"' ){
          jsonAppendRawNZ(pOut, "\\\"", 2);
          zIn++;







          sz2--;
          continue;
        }
        assert( zIn[0]=='\\' );
        assert( sz2>=1 );
        if( sz2<2 ){
          pOut->eErr |= JSTRING_MALFORMED;







|











>
>
>
>
>
>
>







2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
    case JSONB_TEXT5: {
      const char *zIn;
      u32 k;
      u32 sz2 = sz;
      zIn = (const char*)&pParse->aBlob[i+n];
      jsonAppendChar(pOut, '"');
      while( sz2>0 ){
        for(k=0; k<sz2 && (jsonIsOk[(u8)zIn[k]] || zIn[k]=='\''); k++){}
        if( k>0 ){
          jsonAppendRawNZ(pOut, zIn, k);
          if( k>=sz2 ){
            break;
          }
          zIn += k;
          sz2 -= k;
        }
        if( zIn[0]=='"' ){
          jsonAppendRawNZ(pOut, "\\\"", 2);
          zIn++;
          sz2--;
          continue;
        }
        if( zIn[0]<=0x1f ){
          if( pOut->nUsed+7>pOut->nAlloc && jsonStringGrow(pOut,7) ) break;
          jsonAppendControlChar(pOut, zIn[0]);
          zIn++;
          sz2--;
          continue;
        }
        assert( zIn[0]=='\\' );
        assert( sz2>=1 );
        if( sz2<2 ){
          pOut->eErr |= JSTRING_MALFORMED;
Changes to test/json501.test.
301
302
303
304
305
306
307



























308
309
} xyz

# 2023-11-08 forum/forumpost/ddcad3e884
#
do_execsql_test 13.1 {
  SELECT json('{x:''a "b" c''}');
} {{{"x":"a \"b\" c"}}}




























finish_test







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>


301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
} xyz

# 2023-11-08 forum/forumpost/ddcad3e884
#
do_execsql_test 13.1 {
  SELECT json('{x:''a "b" c''}');
} {{{"x":"a \"b\" c"}}}

# 2024-01-31
# Allow control characters within JSON5 string literals.
#
for {set c 1} {$c<=0x1f} {incr c} {
  do_execsql_test 14.$c.1 {
    SELECT json_valid('"abc' || char($c) || 'xyz"');
  } {0}
  do_execsql_test 14.$c.2 {
    SELECT json_valid('"abc' || char($c) || 'xyz"', 2);
  } {1}
  switch $c {
    8   {set e "\\b"}
    9   {set e "\\t"}
    10  {set e "\\n"}
    12  {set e "\\f"}
    13  {set e "\\r"}
    default {set e [format "\\u00%02x" $c]}
  }
  do_execsql_test 14.$c.3 {
    SELECT json('{label:"abc' || char($c) || 'xyz"}');
  } "{{\"label\":\"abc${e}xyz\"}}"
  do_execsql_test 14.$c.4 {
    SELECT jsonb('{label:"abc' || char($c) || 'xyz"}') -> '$';
  } "{{\"label\":\"abc${e}xyz\"}}"
}


finish_test