SQLite Forum

SQLite3.exe Command shell: Iterate over table of csv files?
Login

SQLite3.exe Command shell: Iterate over table of csv files?

(1) By Gavin Holt (GavinHolt) on 2021-06-30 20:27:56 [link] [source]

Hi,

I love minimal software solutions and am trying to use SQLite3.exe in a lightweight data processing workflow. The command shell is fantastic, I am especially impressed with the error messages when reading saved scripts.

My script processes UK NHS treatment waiting times from 50 large (90+MB) CSV files. I set up a table to hold extracted data, use Nalgeon's vsv library to load each file, extract data, then drop the file, finally dumping the extract. Below is part of my script:

    -- Saved SQLite3.exe script
    --  This returns too many rows for Excel 2000!
    .load vsv
    -- Create a table for waiting_times
    CREATE TABLE waiting(
    census,                         -- Year, fieldname/format varies from file to file!
    provider,                       -- [Provider Org Name]
    department,                     -- [Treatment Function Code]
    dataset,                        -- [RTT Part Description]
    months_sum int,                 -- Area under the curve
    upto52_count int,               -- [Total] to get average months
    unknown_count int,              -- [Patients with unknown clock start date
    over52_count int,               -- [Gt 52 Weeks SUM 1]
    case_count int,                 -- [Total All]
    mean_wait,
    UNIQUE(census,provider,department,dataset)
    );

    -- for file 201704.csv
    CREATE virtual TABLE temp.vsv
    USING vsv(filename="/MyProjects/#NHSWaitingTimes/201704.csv", header=yes );

    INSERT or IGNORE INTO waiting
    SELECT
    '2017-04-30' AS census,         -- [census]
    [Provider Org Name],            -- [provider]
    [Treatment Function Name],      -- [department]
    [RTT Part Description],         -- [dataset]
    sum(
    ([Gt 00 to 01 Weeks SUM 1]*1)+
    ([Gt 01 to 02 Weeks SUM 1]*2)+
    ([Gt 02 to 03 Weeks SUM 1]*3)+
    ([Gt 03 to 04 Weeks SUM 1]*4)+
    ([Gt 04 to 05 Weeks SUM 1]*5)+
    ([Gt 05 to 06 Weeks SUM 1]*6)+
    ([Gt 06 to 07 Weeks SUM 1]*7)+
    ([Gt 07 to 08 Weeks SUM 1]*8)+
    ([Gt 08 to 09 Weeks SUM 1]*9)+
    ([Gt 09 to 10 Weeks SUM 1]*10)+
    ([Gt 10 to 11 Weeks SUM 1]*11)+
    ([Gt 11 to 12 Weeks SUM 1]*12)+
    ([Gt 12 to 13 Weeks SUM 1]*13)+
    ([Gt 13 to 14 Weeks SUM 1]*14)+
    ([Gt 14 to 15 Weeks SUM 1]*15)+
    ([Gt 15 to 16 Weeks SUM 1]*16)+
    ([Gt 16 to 17 Weeks SUM 1]*17)+
    ([Gt 17 to 18 Weeks SUM 1]*18)+
    ([Gt 18 to 19 Weeks SUM 1]*19)+
    ([Gt 19 to 20 Weeks SUM 1]*20)+
    ([Gt 20 to 21 Weeks SUM 1]*21)+
    ([Gt 21 to 22 Weeks SUM 1]*22)+
    ([Gt 22 to 23 Weeks SUM 1]*23)+
    ([Gt 23 to 24 Weeks SUM 1]*24)+
    ([Gt 24 to 25 Weeks SUM 1]*25)+
    ([Gt 25 to 26 Weeks SUM 1]*26)+
    ([Gt 26 to 27 Weeks SUM 1]*27)+
    ([Gt 27 to 28 Weeks SUM 1]*28)+
    ([Gt 28 to 29 Weeks SUM 1]*29)+
    ([Gt 29 to 30 Weeks SUM 1]*30)+
    ([Gt 30 to 31 Weeks SUM 1]*31)+
    ([Gt 31 to 32 Weeks SUM 1]*32)+
    ([Gt 32 to 33 Weeks SUM 1]*33)+
    ([Gt 33 to 34 Weeks SUM 1]*34)+
    ([Gt 34 to 35 Weeks SUM 1]*35)+
    ([Gt 35 to 36 Weeks SUM 1]*36)+
    ([Gt 36 to 37 Weeks SUM 1]*37)+
    ([Gt 37 to 38 Weeks SUM 1]*38)+
    ([Gt 38 to 39 Weeks SUM 1]*39)+
    ([Gt 39 to 40 Weeks SUM 1]*40)+
    ([Gt 40 to 41 Weeks SUM 1]*41)+
    ([Gt 41 to 42 Weeks SUM 1]*42)+
    ([Gt 42 to 43 Weeks SUM 1]*43)+
    ([Gt 43 to 44 Weeks SUM 1]*44)+
    ([Gt 44 to 45 Weeks SUM 1]*45)+
    ([Gt 45 to 46 Weeks SUM 1]*46)+
    ([Gt 46 to 47 Weeks SUM 1]*47)+
    ([Gt 47 to 48 Weeks SUM 1]*48)+
    ([Gt 48 to 49 Weeks SUM 1]*49)+
    ([Gt 49 to 50 Weeks SUM 1]*50)+
    ([Gt 50 to 51 Weeks SUM 1]*51)+
    ([Gt 51 to 52 Weeks SUM 1]*52)+
    ([Gt 52 Weeks SUM 1]*52) ),     -- [months_sum]
    sum([Total]),                   -- [upto52_coubt]
    sum([Patients with unknown clock start date]), -- [unknown_count]
    sum([Gt 52 Weeks SUM 1]),       -- [over52_count]
    sum([Total All]),               -- [all_count]
    '=INT(IF(INDIRECT("F" & ROW())>0,INDIRECT("E" & ROW())/INDIRECT("F" & ROW()),INDIRECT("E" & ROW())/INDIRECT("I" & ROW())))' AS mean_wait -- This is an Excel formula
    FROM vsv
    WHERE [Treatment Function Name]<>'Total'
    GROUP BY [census],
    [Provider Org Name],
    [Treatment Function Name],
    [RTT Part Description];

    DROP TABLE vsv;

    -- !! Repeated for all 50 files !!

    .headers on
    .excel
    SELECT * FROM waiting ORDER BY [provider];

This runs successfully and Excel pops up with the summary data - brilliant! I am really impressed with what can be done with this single tool.

However, I am uncomfortable with the length of the script and the amount of repetitive code. I know SQLite is designed to be embedded and would rely upon the host language to drive logic/loops. In the context of a saved CLI script it would be really nice to have some mechanism for iteration based upon a table of inputs e.g.

    -- Create a table of files to process
    CREATE TABLE filelist(
    filename,                      -- UNIX path from USB root
    census,                        -- Date as 2021-10-31
    UNIQUE(filename,census)
    );

    INSERT INTO filelist (filename,census)
    VALUES
    ("/MyProjects/#NHSWaitingTimes/201704.csv","2017-04-30"),
    ("/MyProjects/#NHSWaitingTimes/201705.csv","2017-05-31"),
    ("/MyProjects/#NHSWaitingTimes/201706.csv","2017-06-30"),
    ("/MyProjects/#NHSWaitingTimes/201707.csv","2017-07-31"),
    ("/MyProjects/#NHSWaitingTimes/201708.csv","2017-08-31"),
    ("/MyProjects/#NHSWaitingTimes/201709.csv","2017-09-30"),
    ("/MyProjects/#NHSWaitingTimes/201710.csv","2017-10-31"),
    ("/MyProjects/#NHSWaitingTimes/201711.csv","2017-11-30"),
    ("/MyProjects/#NHSWaitingTimes/201712.csv","2017-12-31"),
    ("/MyProjects/#NHSWaitingTimes/201801.csv","2018-01-31"),
    ("/MyProjects/#NHSWaitingTimes/201802.csv","2018-02-27"),
    ("/MyProjects/#NHSWaitingTimes/201803.csv","2018-03-31"),
    ("/MyProjects/#NHSWaitingTimes/201804.csv","2018-04-30"),
    ("/MyProjects/#NHSWaitingTimes/201805.csv","2018-05-31"),
    ("/MyProjects/#NHSWaitingTimes/201806.csv","2018-06-30"),
    ("/MyProjects/#NHSWaitingTimes/201807.csv","2018-07-31"),
    ("/MyProjects/#NHSWaitingTimes/201808.csv","2018-08-31"),
    ("/MyProjects/#NHSWaitingTimes/201809.csv","2018-09-30"),
    ("/MyProjects/#NHSWaitingTimes/201810.csv","2018-10-31"),
    ("/MyProjects/#NHSWaitingTimes/201811.csv","2018-11-30"),
    ("/MyProjects/#NHSWaitingTimes/201812.csv","2018-12-31"),
    ("/MyProjects/#NHSWaitingTimes/201901.csv","2019-01-31"),
    ("/MyProjects/#NHSWaitingTimes/201902.csv","2019-02-27"),
    ("/MyProjects/#NHSWaitingTimes/201903.csv","2019-03-31"),
    ("/MyProjects/#NHSWaitingTimes/201904.csv","2019-04-30"),
    ("/MyProjects/#NHSWaitingTimes/201905.csv","2019-05-31"),
    ("/MyProjects/#NHSWaitingTimes/201906.csv","2019-06-30"),
    ("/MyProjects/#NHSWaitingTimes/201907.csv","2019-07-31"),
    ("/MyProjects/#NHSWaitingTimes/201908.csv","2019-08-31"),
    ("/MyProjects/#NHSWaitingTimes/201909.csv","2019-09-30"),
    ("/MyProjects/#NHSWaitingTimes/201910.csv","2019-10-31"),
    ("/MyProjects/#NHSWaitingTimes/201911.csv","2019-11-30"),
    ("/MyProjects/#NHSWaitingTimes/201912.csv","2019-12-31"),
    ("/MyProjects/#NHSWaitingTimes/202001.csv","2020-01-31"),
    ("/MyProjects/#NHSWaitingTimes/202002.csv","2020-02-27"),
    ("/MyProjects/#NHSWaitingTimes/202003.csv","2020-03-31"),
    ("/MyProjects/#NHSWaitingTimes/202004.csv","2020-04-30"),
    ("/MyProjects/#NHSWaitingTimes/202005.csv","2020-05-31"),
    ("/MyProjects/#NHSWaitingTimes/202006.csv","2020-06-30"),
    ("/MyProjects/#NHSWaitingTimes/202007.csv","2020-07-31"),
    ("/MyProjects/#NHSWaitingTimes/202008.csv","2020-08-31"),
    ("/MyProjects/#NHSWaitingTimes/202009.csv","2020-09-30"),
    ("/MyProjects/#NHSWaitingTimes/202010.csv","2020-10-31"),
    ("/MyProjects/#NHSWaitingTimes/202011.csv","2020-11-30"),
    ("/MyProjects/#NHSWaitingTimes/202012.csv","2020-12-31"),
    ("/MyProjects/#NHSWaitingTimes/202101.csv","2021-01-31"),
    ("/MyProjects/#NHSWaitingTimes/202102.csv","2021-02-27"),
    ("/MyProjects/#NHSWaitingTimes/202103.csv","2021-03-31")
    ;

Q1. Using only SQLite3.exe - is there any way to repeat my CREATE virtual TABLE statement and my INSERT statement with parameters for "filename" and  "census"?

I guess the processing of numerous files with SQLite is common, perhaps my request to do so without a host language is uncommon. The advantages of a single script file for maintenance/distribution are so attractive I would accept any solution even if a "quick" python/batch script seems more appropriate.

Any guidance welcome.

Kind Regards Gavin Holt

NB. Workflow Details

    Batch file to load my saved scripts:

        @start "sqlite" \MyProfile\sqlite\sqlite3.exe ""  ".read %1:\=/"

    The executable used is the 64bit version kindly supplied by Nalgeon, 
    I am also using his vsv.dll library to parse csv files:
        https://github.com/nalgeon/sqlite (Version 3.35.5 2021-04-19)
        https://github.com/nalgeon/sqlean

    Raw data was hand edited to remove top lines and renamed to consistent filenames:
        https://www.england.nhs.uk/statistics/statistical-work-areas/rtt-waiting-times/

(2) By Larry Brasfield (larrybr) on 2021-06-30 21:09:10 in reply to 1 [link] [source]

I, too, have found the CLI shell to be quite useful for a variety of processing scenarios. I have also, at times, thought it would be nice for it to have some sort of programmability. Yet, after considering the alternatives, I have never gotten to the point of seriously proposing such to the SQLite dev group.

There are several reasons for this reluctance. One is that it is exceedingly easy to write scripts, in any of dozens of languages, that simply produce commands to be piped into the shell. This serves most lesser needs. Another is that, for Perl, Python, TCL, PowerShell and many other programming/scripting tools, there are SQLite libraries available that are very easy to use. These tools have served all of my heavier-duty needs.

As you might have read, SQLite was initially created as a TCL library. It works very well, almost naturally in the "TCL way". It's almost like shell programming, but with fewer crazy-inducing features.

(3) By Keith Medcalf (kmedcalf) on 2021-06-30 23:46:49 in reply to 1 [link] [source]

I do that with a wee Python script that looks (in part) as follows:

sql_import = '''
drop table if exists rawdata;
create virtual table rawdata using vsv(filename="%s",skip=1,schema="create table rawdata(number,date,zone,gender,age,status,state)",columns=7);
 insert into cases
      select date,
             zone,
             gender,
             age,
             status,
             count(*) as cnt
        from rawdata
    group by date, zone, gender, age, status
 on conflict (date, zone, gender, age, status)
   do update set cnt = max(cnt, excluded.cnt);
'''

for file in reversed(sorted([x for x in os.listdir('.') if x.startswith('data') and x.endswith('.csv')])):
    db.executescript(sql_import % (file,))
    maxdate = db.execute('select max(date) from rawdata;').fetchone()[0].replace('-', '')
    db.execute('drop table if exists rawdata;')
    filename = 'data%s.csv' % maxdate
    print(file, filename, end=' ')
    if file != filename:
        if not os.path.exists(filename):
            os.rename(file, filename)
            print('* RENAMED *')
        else:
            print('* CANNOT RENAME *')
    else:
        print(file, 'OK')

which looks for all files called "data*.csv" in the current directory and processes them in reverse reverse sort order (assuming the names are dataYYYYMMDD.csv) that would be from oldest to newest) and any file with a badly formed name is renamed to the right name.

(4.1) By Gavin Holt (GavinHolt) on 2021-07-02 08:39:27 edited from 4.0 in reply to 1 [link] [source]

Hi,

Many thanks for the replies. If programmability is not available in the command shell then some additional "computing" is required. 

"I have never meta language I like more than Lua", so I will use Lua to meta-program sqlite scripts: 

    -- This Lua script generates a .sqlite file
    --  to run with SQLite3.exe "" ".read %1:\=/"

    Files = {
    {"/MyProjects/#NHSWaitingTimes/201704.csv","2017-04-30"},
    {"/MyProjects/#NHSWaitingTimes/201705.csv","2017-05-31"},
    {"/MyProjects/#NHSWaitingTimes/201706.csv","2017-06-30"},
    {"/MyProjects/#NHSWaitingTimes/201707.csv","2017-07-31"},
    {"/MyProjects/#NHSWaitingTimes/201708.csv","2017-08-31"},
    {"/MyProjects/#NHSWaitingTimes/201709.csv","2017-09-30"},
    {"/MyProjects/#NHSWaitingTimes/201710.csv","2017-10-31"},
    {"/MyProjects/#NHSWaitingTimes/201711.csv","2017-11-30"},
    {"/MyProjects/#NHSWaitingTimes/201712.csv","2017-12-31"},
    {"/MyProjects/#NHSWaitingTimes/201801.csv","2018-01-31"},
    {"/MyProjects/#NHSWaitingTimes/201802.csv","2018-02-27"},
    {"/MyProjects/#NHSWaitingTimes/201803.csv","2018-03-31"},
    {"/MyProjects/#NHSWaitingTimes/201804.csv","2018-04-30"},
    {"/MyProjects/#NHSWaitingTimes/201805.csv","2018-05-31"},
    {"/MyProjects/#NHSWaitingTimes/201806.csv","2018-06-30"},
    {"/MyProjects/#NHSWaitingTimes/201807.csv","2018-07-31"},
    {"/MyProjects/#NHSWaitingTimes/201808.csv","2018-08-31"},
    {"/MyProjects/#NHSWaitingTimes/201809.csv","2018-09-30"},
    {"/MyProjects/#NHSWaitingTimes/201810.csv","2018-10-31"},
    {"/MyProjects/#NHSWaitingTimes/201811.csv","2018-11-30"},
    {"/MyProjects/#NHSWaitingTimes/201812.csv","2018-12-31"},
    {"/MyProjects/#NHSWaitingTimes/201901.csv","2019-01-31"},
    {"/MyProjects/#NHSWaitingTimes/201902.csv","2019-02-27"},
    {"/MyProjects/#NHSWaitingTimes/201903.csv","2019-03-31"},
    {"/MyProjects/#NHSWaitingTimes/201904.csv","2019-04-30"},
    {"/MyProjects/#NHSWaitingTimes/201905.csv","2019-05-31"},
    {"/MyProjects/#NHSWaitingTimes/201906.csv","2019-06-30"},
    {"/MyProjects/#NHSWaitingTimes/201907.csv","2019-07-31"},
    {"/MyProjects/#NHSWaitingTimes/201908.csv","2019-08-31"},
    {"/MyProjects/#NHSWaitingTimes/201909.csv","2019-09-30"},
    {"/MyProjects/#NHSWaitingTimes/201910.csv","2019-10-31"},
    {"/MyProjects/#NHSWaitingTimes/201911.csv","2019-11-30"},
    {"/MyProjects/#NHSWaitingTimes/201912.csv","2019-12-31"},
    {"/MyProjects/#NHSWaitingTimes/202001.csv","2020-01-31"},
    {"/MyProjects/#NHSWaitingTimes/202002.csv","2020-02-27"},
    {"/MyProjects/#NHSWaitingTimes/202003.csv","2020-03-31"},
    {"/MyProjects/#NHSWaitingTimes/202004.csv","2020-04-30"},
    {"/MyProjects/#NHSWaitingTimes/202005.csv","2020-05-31"},
    {"/MyProjects/#NHSWaitingTimes/202006.csv","2020-06-30"},
    {"/MyProjects/#NHSWaitingTimes/202007.csv","2020-07-31"},
    {"/MyProjects/#NHSWaitingTimes/202008.csv","2020-08-31"},
    {"/MyProjects/#NHSWaitingTimes/202009.csv","2020-09-30"},
    {"/MyProjects/#NHSWaitingTimes/202010.csv","2020-10-31"},
    {"/MyProjects/#NHSWaitingTimes/202011.csv","2020-11-30"},
    {"/MyProjects/#NHSWaitingTimes/202012.csv","2020-12-31"},
    {"/MyProjects/#NHSWaitingTimes/202101.csv","2021-01-31"},
    {"/MyProjects/#NHSWaitingTimes/202102.csv","2021-02-27"},
    {"/MyProjects/#NHSWaitingTimes/202103.csv","2021-03-31"},
    }

    Text = {[[
    -- My large SQLite script
    --  This returns too many rows for excel 2000!
    .load vsv

    -- Create a table of waiting_times
    CREATE TABLE waiting(
    census,                         -- Year, format varies from file to file!
    provider,                       -- [Provider Org Name]
    department,                     -- [Treatment Function Code]
    dataset,                        -- [RTT Part Description]
    months_sum int,                 -- Area under the curve
    upto52_count int,               -- [Total] to get average months
    unknown_count int,              -- [Patients with unknown clock start date
    over52_count int,               -- [Gt 52 Weeks SUM 1]
    case_count int,                 -- [Total All]
    mean_wait,
    UNIQUE(census,provider,department,dataset)
    );

    ]]}

    Template = [[
    -- Process each file
    CREATE virtual TABLE temp.vsv
    USING vsv(filename="$Filename", header=yes );

    INSERT or IGNORE INTO waiting
    SELECT
    '$Census' AS census,         -- [census]
    [Provider Org Name],            -- [provider]
    [Treatment Function Name],      -- [department]
    [RTT Part Description],         -- [dataset]
    sum(
    ([Gt 00 to 01 Weeks SUM 1]*1)+
    ([Gt 01 to 02 Weeks SUM 1]*2)+
    ([Gt 02 to 03 Weeks SUM 1]*3)+
    ([Gt 03 to 04 Weeks SUM 1]*4)+
    ([Gt 04 to 05 Weeks SUM 1]*5)+
    ([Gt 05 to 06 Weeks SUM 1]*6)+
    ([Gt 06 to 07 Weeks SUM 1]*7)+
    ([Gt 07 to 08 Weeks SUM 1]*8)+
    ([Gt 08 to 09 Weeks SUM 1]*9)+
    ([Gt 09 to 10 Weeks SUM 1]*10)+
    ([Gt 10 to 11 Weeks SUM 1]*11)+
    ([Gt 11 to 12 Weeks SUM 1]*12)+
    ([Gt 12 to 13 Weeks SUM 1]*13)+
    ([Gt 13 to 14 Weeks SUM 1]*14)+
    ([Gt 14 to 15 Weeks SUM 1]*15)+
    ([Gt 15 to 16 Weeks SUM 1]*16)+
    ([Gt 16 to 17 Weeks SUM 1]*17)+
    ([Gt 17 to 18 Weeks SUM 1]*18)+
    ([Gt 18 to 19 Weeks SUM 1]*19)+
    ([Gt 19 to 20 Weeks SUM 1]*20)+
    ([Gt 20 to 21 Weeks SUM 1]*21)+
    ([Gt 21 to 22 Weeks SUM 1]*22)+
    ([Gt 22 to 23 Weeks SUM 1]*23)+
    ([Gt 23 to 24 Weeks SUM 1]*24)+
    ([Gt 24 to 25 Weeks SUM 1]*25)+
    ([Gt 25 to 26 Weeks SUM 1]*26)+
    ([Gt 26 to 27 Weeks SUM 1]*27)+
    ([Gt 27 to 28 Weeks SUM 1]*28)+
    ([Gt 28 to 29 Weeks SUM 1]*29)+
    ([Gt 29 to 30 Weeks SUM 1]*30)+
    ([Gt 30 to 31 Weeks SUM 1]*31)+
    ([Gt 31 to 32 Weeks SUM 1]*32)+
    ([Gt 32 to 33 Weeks SUM 1]*33)+
    ([Gt 33 to 34 Weeks SUM 1]*34)+
    ([Gt 34 to 35 Weeks SUM 1]*35)+
    ([Gt 35 to 36 Weeks SUM 1]*36)+
    ([Gt 36 to 37 Weeks SUM 1]*37)+
    ([Gt 37 to 38 Weeks SUM 1]*38)+
    ([Gt 38 to 39 Weeks SUM 1]*39)+
    ([Gt 39 to 40 Weeks SUM 1]*40)+
    ([Gt 40 to 41 Weeks SUM 1]*41)+
    ([Gt 41 to 42 Weeks SUM 1]*42)+
    ([Gt 42 to 43 Weeks SUM 1]*43)+
    ([Gt 43 to 44 Weeks SUM 1]*44)+
    ([Gt 44 to 45 Weeks SUM 1]*45)+
    ([Gt 45 to 46 Weeks SUM 1]*46)+
    ([Gt 46 to 47 Weeks SUM 1]*47)+
    ([Gt 47 to 48 Weeks SUM 1]*48)+
    ([Gt 48 to 49 Weeks SUM 1]*49)+
    ([Gt 49 to 50 Weeks SUM 1]*50)+
    ([Gt 50 to 51 Weeks SUM 1]*51)+
    ([Gt 51 to 52 Weeks SUM 1]*52)+
    ([Gt 52 Weeks SUM 1]*52)),      -- [months_sum]
    sum([Total]),                   -- [upto52_count]
    sum([Patients with unknown clock start date]), -- [unknown_count]
    sum([Gt 52 Weeks SUM 1]),       -- [over52_count]
    sum([Total All]),               -- [all_count]
    '=INT(IF(INDIRECT("F" & ROW())>0,INDIRECT("E" & ROW())/INDIRECT("F" & ROW()),INDIRECT("E" & ROW())/INDIRECT("I" & ROW())))' AS mean_wait
    FROM vsv
    WHERE [Treatment Function Name]<>'Total'
    GROUP BY [census],
    [Provider Org Name],
    [Treatment Function Name],
    [RTT Part Description];

    DROP TABLE vsv;

    ]]

    for i,v in pairs(Files) do
        local Filename  = v[1]
        local Census    = v[2]
        local Output    = Template:gsub("$Filename",Filename):gsub("$Census",Census)
        table.insert(Text,Output)
    end

    table.insert(Text,[[

    .headers on
    .excel
    SELECT * FROM waiting ORDER BY [provider];
    ]])

    io.open("/MyProjects/#NHSWaitingTimes/ALL.sqlite", "w"):write((table.concat(Text))):flush():close()

    os.execute([[start "SQLite" \MyProfile\sqlite\SQLite3.exe "" ".read /MyProjects/#NHSWaitingTimes/ALL.sqlite"]])


Kind Regards Gavin Holt

(5) By Warren Young (wyoung) on 2021-07-03 00:16:05 in reply to 4.1 [link] [source]

If programmability is not available in the command shell...

...then you're using a terribly weak excuse for a command shell. :)

Dr. Bourne taught the world that programmable command shells was a good idea in 1977. His shell for UNIX V7 had logical expressions and loops, which didn't arrive in cmd.exe until decades later. If you take the original Unix shell spirit, we also had the ability to do arithmetic on variables, allowing us to clean up all that manual "([Gt 24 to 25 Weeks SUM 1]*25)+" stuff in your Lua code via expr and bc.

I believe you can do that today via your original cmd.exe method by using its FOR feature, but I wouldn't bother. Instead, I recommend that you continue down your new path: use a better-designed programming language from the start rather than try to arm-twist cmd.exe to suit.

This Lua script generates a .sqlite file

Why ever for? Call SQLite directly from Lua instead. It'll not only save you the temporary output SQL file, it'll run faster.

If you don't like that wrapper, I count seven others. (Do note that lsqlite3 is far and away the most popular among them, though.)

And again, you really should generate that "Gt...Weeks SUM" stuff rather than write it manually.

(6) By Larry Brasfield (larrybr) on 2021-07-03 00:38:05 in reply to 5 [source]

Warren, I think if you look at Gavin's "Q1", within and in the context of his initial post, you will see that the "programmability" he mentions is a non-feature of the SQLite CLI shell.

I do not gainsay your disrecommendation of cmd.exe programming. (I would so advise for sake of sanity.) And I second your recommendation that he just use SQLite directly from a real programming environment.

(7.1) By Víctor (wzrlpy) on 2021-07-03 16:02:03 edited from 7.0 in reply to 4.1 [link] [source]

Hi Gavin,

You can script out your repetitive SQL staments by generating them dynamically via SQL statements, and invoking them via .read. The generate_series() table function helps to create "loops".

Something like this:

.cd /MyProjects/#NHSWaitingTimes/

create table nhs_filenames as select
strftime('%Y%m', start_date) || '.csv' as file_name,
date(start_date, '+1 months', '-1 days') as census
from
(select date('2017-04-01', '+' || value || ' months') as start_date 
from generate_series(0,48));

create table nhs_sum as select
'([Gt ' || printf('%02d', value) || ' to ' || printf('%02d', value+1) || ' Weeks SUM 1]*' || printf('%02d', value+1) || ')+' as sum_line
from generate_series(0,51);

.headers off
.mode tabs
.separator "\r\n"
.output load_csv.sql

select 
'drop table if exists temp.vsv;',
'.import ' || file_name || ' temp.vsv',
'INSERT or IGNORE INTO waiting',
'    SELECT',
'    census,         -- [census]',
'    [Provider Org Name],            -- [provider]',
'    [Treatment Function Name],      -- [department]',
'    [RTT Part Description],         -- [dataset]',
'    sum(',

(select group_concat(sum_line, char(13) || char(10)) from nhs_sum),

'    ([Gt 52 Weeks SUM 1]*52) ),     -- [months_sum]',
'    sum([Total]),                   -- [upto52_coubt]',
'    sum([Patients with unknown clock start date]), -- [unknown_count]',
'    sum([Gt 52 Weeks SUM 1]),       -- [over52_count]',
'    sum([Total All]),               -- [all_count]',
'    ''=INT(IF(INDIRECT("F" & ROW())>0,INDIRECT("E" & ROW())/INDIRECT("F" & ROW()),INDIRECT("E" & ROW())/INDIRECT("I" & ROW())))'' AS mean_wait -- This is an Excel formula',
'    FROM vsv',
'    WHERE [Treatment Function Name]<>''Total'' ',
'    GROUP BY [census],',
'    [Provider Org Name],',
'    [Treatment Function Name],',
'    [RTT Part Description];'

from nhs_filenames;

.headers on
.mode csv
.read load_csv.sql

(8) By Gavin Holt (GavinHolt) on 2021-07-12 17:04:41 in reply to 7.1 [link] [source]

Hi Víctor, 

Many thanks for your reply it is very clear and exceeds my request. Meta-programming "SQLite with SQLite" is curiously pleasing.

This has been my first question on the forum and I would like to thanks all the respondents - a delightful community.

Kind Regards Gavin Holt