Index: ext/misc/csv.c ================================================================== --- ext/misc/csv.c +++ ext/misc/csv.c @@ -75,10 +75,11 @@ FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ + int bNotFirst; /* True if prior text has been seen */ char cTerm; /* Character that terminated the most recent field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ char *zIn; /* The input buffer */ char zErr[CSV_MXERR]; /* Error message */ @@ -89,10 +90,11 @@ p->in = 0; p->z = 0; p->n = 0; p->nAlloc = 0; p->nLine = 0; + p->bNotFirst = 0; p->nIn = 0; p->zIn = 0; p->zErr[0] = 0; } @@ -249,10 +251,25 @@ if( csv_append(p, (char)c) ) return 0; ppc = pc; pc = c; } }else{ + /* If this is the first field being parsed and it begins with the + ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ + if( (c&0xff)==0xef && p->bNotFirst==0 ){ + csv_append(p, c); + c = csv_getc(p); + if( (c&0xff)==0xbb ){ + csv_append(p, c); + c = csv_getc(p); + if( (c&0xff)==0xbf ){ + p->bNotFirst = 1; + p->n = 0; + return csv_read_one_field(p); + } + } + } while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); } if( c=='\n' ){ @@ -260,10 +277,11 @@ if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = (char)c; } if( p->z ) p->z[p->n] = 0; + p->bNotFirst = 1; return p->z; } /* Forward references to the various virtual table methods implemented Index: src/shell.c ================================================================== --- src/shell.c +++ src/shell.c @@ -3820,10 +3820,11 @@ FILE *in; /* Read the CSV text from this input stream */ char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ + int bNotFirst; /* True if one or more bytes already read */ int cTerm; /* Character that terminated the most recent field */ int cColSep; /* The column separator character. (Usually ",") */ int cRowSep; /* The row separator character. (Usually "\n") */ }; @@ -3899,10 +3900,25 @@ import_append_char(p, c); ppc = pc; pc = c; } }else{ + /* If this is the first field being parsed and it begins with the + ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ + if( (c&0xff)==0xef && p->bNotFirst==0 ){ + import_append_char(p, c); + c = fgetc(p->in); + if( (c&0xff)==0xbb ){ + import_append_char(p, c); + c = fgetc(p->in); + if( (c&0xff)==0xbf ){ + p->bNotFirst = 1; + p->n = 0; + return csv_read_one_field(p); + } + } + } while( c!=EOF && c!=cSep && c!=rSep ){ import_append_char(p, c); c = fgetc(p->in); } if( c==rSep ){ @@ -3910,10 +3926,11 @@ if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; } p->cTerm = c; } if( p->z ) p->z[p->n] = 0; + p->bNotFirst = 1; return p->z; } /* Read a single field of ASCII delimited text. ** Index: test/shell5.test ================================================================== --- test/shell5.test +++ test/shell5.test @@ -181,10 +181,40 @@ } {0 7} do_test shell5-1.4.10.2 { catchcmd "test.db" {SELECT b FROM t1 WHERE a='7';} } {0 {Now is the time for all good men to come to the aid of their country.}} + +# import file with 2 rows, 2 columns and an initial BOM +# +do_test shell5-1.4.11 { + set in [open shell5.csv wb] + puts $in "\xef\xbb\xbf2|3" + puts $in "4|5" + close $in + set res [catchcmd "test.db" {CREATE TABLE t2(x INT, y INT); +.import shell5.csv t2 +.mode quote +.header on +SELECT * FROM t2;}] + string map {\n | \n\r |} $res +} {0 {'x','y'|2,3|4,5}} + +# import file with 2 rows, 2 columns or text with an initial BOM +# +do_test shell5-1.4.12 { + set in [open shell5.csv wb] + puts $in "\xef\xbb\xbf\"two\"|3" + puts $in "4|5" + close $in + set res [catchcmd "test.db" {DELETE FROM t2; +.import shell5.csv t2 +.mode quote +.header on +SELECT * FROM t2;}] + string map {\n | \n\r |} $res +} {0 {'x','y'|'two',3|4,5}} # check importing very long field do_test shell5-1.5.1 { set str [string repeat X 999] set in [open shell5.csv w] @@ -208,11 +238,12 @@ } append data "$cols" set in [open shell5.csv w] puts $in $data close $in - set res [catchcmd "test.db" {.import shell5.csv t2 + set res [catchcmd "test.db" {DROP TABLE IF EXISTS t2; +.import shell5.csv t2 SELECT COUNT(*) FROM t2;}] } {0 1} # try importing a large number of rows set rows 9999