/* ** This C program extracts all "words" from an input document and adds them ** to an SQLite database. A "word" is any contiguous sequence of alphabetic ** characters. All digits, punctuation, and whitespace characters are ** word separators. The database stores a single entry for each distinct ** word together with a count of the number of occurrences of that word. ** A fresh database is created automatically on each run. ** ** wordcount DATABASE INPUTFILE ** ** The INPUTFILE name can be omitted, in which case input it taken from ** standard input. ** ** Option: ** ** --without-rowid Use a WITHOUT ROWID table to store the words. ** --insert Use INSERT mode (the default) ** --replace Use REPLACE mode ** --select Use SELECT mode ** --update Use UPDATE mode ** --nocase Add the NOCASE collating sequence to the words. ** --trace Enable sqlite3_trace() output. ** ** Modes: ** ** Insert mode means: ** (1) INSERT OR IGNORE INTO wordcount VALUES($new,1) ** (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new -- if (1) is a noop ** ** Update mode means: ** (1) INSERT OR IGNORE INTO wordcount VALUES($new,0) ** (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new ** ** Replace mode means: ** (1) REPLACE INTO wordcount ** VALUES($new,ifnull((SELECT cnt FROM wordcount WHERE word=$new),0)+1); ** ** Select mode modes: ** (1) SELECT 1 FROM wordcount WHERE word=$newword ** (2) INSERT INTO wordcount VALUES($new,1) -- if (1) returns nothing ** (3) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new --if (1) return TRUE ** ****************************************************************************** ** ** Compile as follows: ** ** gcc -I. wordcount.c sqlite3.c -ldl -lpthreads ** ** Or: ** ** gcc -I. -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \ ** wordcount.c sqlite3.c */ #include #include #include #include #include #include "sqlite3.h" /* Print an error message and exit */ static void fatal_error(const char *zMsg, ...){ va_list ap; va_start(ap, zMsg); vfprintf(stderr, zMsg, ap); va_end(ap); exit(1); } /* The sqlite3_trace() callback function */ static void traceCallback(void *NotUsed, const char *zSql){ printf("%s;\n", zSql); } /* Define operating modes */ #define MODE_INSERT 0 #define MODE_REPLACE 1 #define MODE_SELECT 2 #define MODE_UPDATE 3 int main(int argc, char **argv){ const char *zFileToRead = 0; /* Input file. NULL for stdin */ const char *zDbName = 0; /* Name of the database file to create */ int useWithoutRowid = 0; /* True for --without-rowid */ int iMode = MODE_INSERT; /* One of MODE_xxxxx */ int useNocase = 0; /* True for --nocase */ int doTrace = 0; /* True for --trace */ int i, j; /* Loop counters */ sqlite3 *db; /* The SQLite database connection */ char *zSql; /* Constructed SQL statement */ sqlite3_stmt *pInsert = 0; /* The INSERT statement */ sqlite3_stmt *pUpdate = 0; /* The UPDATE statement */ sqlite3_stmt *pSelect = 0; /* The SELECT statement */ FILE *in; /* The open input file */ int rc; /* Return code from an SQLite interface */ int iCur, iHiwtr; /* Statistics values, current and "highwater" */ char zInput[2000]; /* A single line of input */ /* Process command-line arguments */ for(i=1; i