Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | The ".import" command of the shell, and the csv virtual table extension both ignore a single UTF-8 BOM at the beginning of their input. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA3-256: |
7c15d762d99c2e3e534cd35dfe25ddcd |
User & Date: | drh 2017-06-26 18:42:23.729 |
Context
2017-06-26
| ||
21:08 | Add the -withoutnulls option to the "db eval" method in the TCL interface. (check-in: 18f0616e15 user: drh tags: trunk) | |
18:42 | The ".import" command of the shell, and the csv virtual table extension both ignore a single UTF-8 BOM at the beginning of their input. (check-in: 7c15d762d9 user: drh tags: trunk) | |
2017-06-24
| ||
19:21 | Disable shell tests for the .schema command if virtual tables are not available. (check-in: c8186874b3 user: drh tags: trunk) | |
Changes
Changes to ext/misc/csv.c.
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | typedef struct CsvReader CsvReader; struct CsvReader { FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ char cTerm; /* Character that terminated the most recent field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ char *zIn; /* The input buffer */ char zErr[CSV_MXERR]; /* Error message */ }; /* Initialize a CsvReader object */ static void csv_reader_init(CsvReader *p){ p->in = 0; p->z = 0; p->n = 0; p->nAlloc = 0; p->nLine = 0; p->nIn = 0; p->zIn = 0; p->zErr[0] = 0; } /* Close and reset a CsvReader object */ static void csv_reader_reset(CsvReader *p){ | > > | 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | typedef struct CsvReader CsvReader; struct CsvReader { FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ int bNotFirst; /* True if prior text has been seen */ char cTerm; /* Character that terminated the most recent field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ char *zIn; /* The input buffer */ char zErr[CSV_MXERR]; /* Error message */ }; /* Initialize a CsvReader object */ static void csv_reader_init(CsvReader *p){ p->in = 0; p->z = 0; p->n = 0; p->nAlloc = 0; p->nLine = 0; p->bNotFirst = 0; p->nIn = 0; p->zIn = 0; p->zErr[0] = 0; } /* Close and reset a CsvReader object */ static void csv_reader_reset(CsvReader *p){ |
︙ | ︙ | |||
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | } } if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); } if( c=='\n' ){ p->nLine++; if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = (char)c; } if( p->z ) p->z[p->n] = 0; return p->z; } /* Forward references to the various virtual table methods implemented ** in this file. */ static int csvtabCreate(sqlite3*, void*, int, const char*const*, | > > > > > > > > > > > > > > > > | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | } } if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ /* If this is the first field being parsed and it begins with the ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ if( (c&0xff)==0xef && p->bNotFirst==0 ){ csv_append(p, c); c = csv_getc(p); if( (c&0xff)==0xbb ){ csv_append(p, c); c = csv_getc(p); if( (c&0xff)==0xbf ){ p->bNotFirst = 1; p->n = 0; return csv_read_one_field(p); } } } while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); } if( c=='\n' ){ p->nLine++; if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = (char)c; } if( p->z ) p->z[p->n] = 0; p->bNotFirst = 1; return p->z; } /* Forward references to the various virtual table methods implemented ** in this file. */ static int csvtabCreate(sqlite3*, void*, int, const char*const*, |
︙ | ︙ |
Changes to src/shell.c.
︙ | ︙ | |||
3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 | struct ImportCtx { const char *zFile; /* Name of the input file */ FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ int cTerm; /* Character that terminated the most recent field */ int cColSep; /* The column separator character. (Usually ",") */ int cRowSep; /* The row separator character. (Usually "\n") */ }; /* Append a single byte to z[] */ static void import_append_char(ImportCtx *p, int c){ | > | 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 | struct ImportCtx { const char *zFile; /* Name of the input file */ FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ int bNotFirst; /* True if one or more bytes already read */ int cTerm; /* Character that terminated the most recent field */ int cColSep; /* The column separator character. (Usually ",") */ int cRowSep; /* The row separator character. (Usually "\n") */ }; /* Append a single byte to z[] */ static void import_append_char(ImportCtx *p, int c){ |
︙ | ︙ | |||
3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 | break; } import_append_char(p, c); ppc = pc; pc = c; } }else{ while( c!=EOF && c!=cSep && c!=rSep ){ import_append_char(p, c); c = fgetc(p->in); } if( c==rSep ){ p->nLine++; if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = c; } if( p->z ) p->z[p->n] = 0; return p->z; } /* Read a single field of ASCII delimited text. ** ** + Input comes from p->in. ** + Store results in p->z of length p->n. Space to hold p->z comes | > > > > > > > > > > > > > > > > | 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 | break; } import_append_char(p, c); ppc = pc; pc = c; } }else{ /* If this is the first field being parsed and it begins with the ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ if( (c&0xff)==0xef && p->bNotFirst==0 ){ import_append_char(p, c); c = fgetc(p->in); if( (c&0xff)==0xbb ){ import_append_char(p, c); c = fgetc(p->in); if( (c&0xff)==0xbf ){ p->bNotFirst = 1; p->n = 0; return csv_read_one_field(p); } } } while( c!=EOF && c!=cSep && c!=rSep ){ import_append_char(p, c); c = fgetc(p->in); } if( c==rSep ){ p->nLine++; if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = c; } if( p->z ) p->z[p->n] = 0; p->bNotFirst = 1; return p->z; } /* Read a single field of ASCII delimited text. ** ** + Input comes from p->in. ** + Store results in p->z of length p->n. Space to hold p->z comes |
︙ | ︙ |
Changes to test/shell5.test.
︙ | ︙ | |||
179 180 181 182 183 184 185 186 187 188 189 190 191 192 | set res [catchcmd "test.db" {.import shell5.csv t1 SELECT COUNT(*) FROM t1;}] } {0 7} do_test shell5-1.4.10.2 { catchcmd "test.db" {SELECT b FROM t1 WHERE a='7';} } {0 {Now is the time for all good men to come to the aid of their country.}} # check importing very long field do_test shell5-1.5.1 { set str [string repeat X 999] set in [open shell5.csv w] puts $in "8|$str" close $in | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 | set res [catchcmd "test.db" {.import shell5.csv t1 SELECT COUNT(*) FROM t1;}] } {0 7} do_test shell5-1.4.10.2 { catchcmd "test.db" {SELECT b FROM t1 WHERE a='7';} } {0 {Now is the time for all good men to come to the aid of their country.}} # import file with 2 rows, 2 columns and an initial BOM # do_test shell5-1.4.11 { set in [open shell5.csv wb] puts $in "\xef\xbb\xbf2|3" puts $in "4|5" close $in set res [catchcmd "test.db" {CREATE TABLE t2(x INT, y INT); .import shell5.csv t2 .mode quote .header on SELECT * FROM t2;}] string map {\n | \n\r |} $res } {0 {'x','y'|2,3|4,5}} # import file with 2 rows, 2 columns or text with an initial BOM # do_test shell5-1.4.12 { set in [open shell5.csv wb] puts $in "\xef\xbb\xbf\"two\"|3" puts $in "4|5" close $in set res [catchcmd "test.db" {DELETE FROM t2; .import shell5.csv t2 .mode quote .header on SELECT * FROM t2;}] string map {\n | \n\r |} $res } {0 {'x','y'|'two',3|4,5}} # check importing very long field do_test shell5-1.5.1 { set str [string repeat X 999] set in [open shell5.csv w] puts $in "8|$str" close $in |
︙ | ︙ | |||
206 207 208 209 210 211 212 | for {set i 1} {$i<$cols} {incr i} { append data "$i|" } append data "$cols" set in [open shell5.csv w] puts $in $data close $in | | > | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | for {set i 1} {$i<$cols} {incr i} { append data "$i|" } append data "$cols" set in [open shell5.csv w] puts $in $data close $in set res [catchcmd "test.db" {DROP TABLE IF EXISTS t2; .import shell5.csv t2 SELECT COUNT(*) FROM t2;}] } {0 1} # try importing a large number of rows set rows 9999 do_test shell5-1.7.1 { set in [open shell5.csv w] |
︙ | ︙ |