diff options
Diffstat (limited to 'Plugins/DbSqliteWx/csv.c')
| -rw-r--r-- | Plugins/DbSqliteWx/csv.c | 1758 |
1 files changed, 888 insertions, 870 deletions
diff --git a/Plugins/DbSqliteWx/csv.c b/Plugins/DbSqliteWx/csv.c index 6804ce0..61ecc60 100644 --- a/Plugins/DbSqliteWx/csv.c +++ b/Plugins/DbSqliteWx/csv.c @@ -1,870 +1,888 @@ -/*
-** 2016-05-28
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-**
-** This file contains the implementation of an SQLite virtual table for
-** reading CSV files.
-**
-** Usage:
-**
-** .load ./csv
-** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
-** SELECT * FROM csv;
-**
-** The columns are named "c1", "c2", "c3", ... by default. But the
-** application can define its own CREATE TABLE statement as an additional
-** parameter. For example:
-**
-** CREATE VIRTUAL TABLE temp.csv2 USING csv(
-** filename = "../http.log",
-** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
-** );
-**
-** Instead of specifying a file, the text of the CSV can be loaded using
-** the data= parameter.
-**
-** If the columns=N parameter is supplied, then the CSV file is assumed to have
-** N columns. If the columns parameter is omitted, the CSV file is opened
-** as soon as the virtual table is constructed and the first row of the CSV
-** is read in order to count the tables.
-**
-** Some extra debugging features (used for testing virtual tables) are available
-** if this module is compiled with -DSQLITE_TEST.
-*/
-#include "wx_sqlite3ext.h"
-SQLITE_EXTENSION_INIT1
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include <stdio.h>
-
-#ifndef SQLITE_OMIT_VIRTUALTABLE
-
-/*
-** A macro to hint to the compiler that a function should not be
-** inlined.
-*/
-#if defined(__GNUC__)
-# define CSV_NOINLINE __attribute__((noinline))
-#elif defined(_MSC_VER) && _MSC_VER>=1310
-# define CSV_NOINLINE __declspec(noinline)
-#else
-# define CSV_NOINLINE
-#endif
-
-
-/* Max size of the error message in a CsvReader */
-#define CSV_MXERR 200
-
-/* Size of the CsvReader input buffer */
-#define CSV_INBUFSZ 1024
-
-/* A context object used when read a CSV file. */
-typedef struct CsvReader CsvReader;
-struct CsvReader {
- FILE *in; /* Read the CSV text from this input stream */
- char *z; /* Accumulated text for a field */
- int n; /* Number of bytes in z */
- int nAlloc; /* Space allocated for z[] */
- int nLine; /* Current line number */
- char cTerm; /* Character that terminated the most recent field */
- size_t iIn; /* Next unread character in the input buffer */
- size_t nIn; /* Number of characters in the input buffer */
- char *zIn; /* The input buffer */
- char zErr[CSV_MXERR]; /* Error message */
-};
-
-/* Initialize a CsvReader object */
-static void csv_reader_init(CsvReader *p){
- p->in = 0;
- p->z = 0;
- p->n = 0;
- p->nAlloc = 0;
- p->nLine = 0;
- p->nIn = 0;
- p->zIn = 0;
- p->zErr[0] = 0;
-}
-
-/* Close and reset a CsvReader object */
-static void csv_reader_reset(CsvReader *p){
- if( p->in ){
- fclose(p->in);
- wx_sqlite3_free(p->zIn);
- }
- wx_sqlite3_free(p->z);
- csv_reader_init(p);
-}
-
-/* Report an error on a CsvReader */
-static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
- va_list ap;
- va_start(ap, zFormat);
- wx_sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
- va_end(ap);
-}
-
-/* Open the file associated with a CsvReader
-** Return the number of errors.
-*/
-static int csv_reader_open(
- CsvReader *p, /* The reader to open */
- const char *zFilename, /* Read from this filename */
- const char *zData /* ... or use this data */
-){
- if( zFilename ){
- p->zIn = wx_sqlite3_malloc( CSV_INBUFSZ );
- if( p->zIn==0 ){
- csv_errmsg(p, "out of memory");
- return 1;
- }
- p->in = fopen(zFilename, "rb");
- if( p->in==0 ){
- csv_reader_reset(p);
- csv_errmsg(p, "cannot open '%s' for reading", zFilename);
- return 1;
- }
- }else{
- assert( p->in==0 );
- p->zIn = (char*)zData;
- p->nIn = strlen(zData);
- }
- return 0;
-}
-
-/* The input buffer has overflowed. Refill the input buffer, then
-** return the next character
-*/
-static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
- size_t got;
-
- assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
- assert( p->in!=0 ); /* Only called if reading froma file */
-
- got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
- if( got==0 ) return EOF;
- p->nIn = got;
- p->iIn = 1;
- return p->zIn[0];
-}
-
-/* Return the next character of input. Return EOF at end of input. */
-static int csv_getc(CsvReader *p){
- if( p->iIn >= p->nIn ){
- if( p->in!=0 ) return csv_getc_refill(p);
- return EOF;
- }
- return p->zIn[p->iIn++];
-}
-
-/* Increase the size of p->z and append character c to the end.
-** Return 0 on success and non-zero if there is an OOM error */
-static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
- char *zNew;
- int nNew = p->nAlloc*2 + 100;
- zNew = wx_sqlite3_realloc64(p->z, nNew);
- if( zNew ){
- p->z = zNew;
- p->nAlloc = nNew;
- p->z[p->n++] = c;
- return 0;
- }else{
- csv_errmsg(p, "out of memory");
- return 1;
- }
-}
-
-/* Append a single character to the CsvReader.z[] array.
-** Return 0 on success and non-zero if there is an OOM error */
-static int csv_append(CsvReader *p, char c){
- if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
- p->z[p->n++] = c;
- return 0;
-}
-
-/* Read a single field of CSV text. Compatible with rfc4180 and extended
-** with the option of having a separator other than ",".
-**
-** + Input comes from p->in.
-** + Store results in p->z of length p->n. Space to hold p->z comes
-** from wx_sqlite3_malloc64().
-** + Keep track of the line number in p->nLine.
-** + Store the character that terminates the field in p->cTerm. Store
-** EOF on end-of-file.
-**
-** Return "" at EOF. Return 0 on an OOM error.
-*/
-static char *csv_read_one_field(CsvReader *p){
- int c;
- p->n = 0;
- c = csv_getc(p);
- if( c==EOF ){
- p->cTerm = EOF;
- return "";
- }
- if( c=='"' ){
- int pc, ppc;
- int startLine = p->nLine;
- pc = ppc = 0;
- while( 1 ){
- c = csv_getc(p);
- if( c<='"' || pc=='"' ){
- if( c=='\n' ) p->nLine++;
- if( c=='"' ){
- if( pc=='"' ){
- pc = 0;
- continue;
- }
- }
- if( (c==',' && pc=='"')
- || (c=='\n' && pc=='"')
- || (c=='\n' && pc=='\r' && ppc=='"')
- || (c==EOF && pc=='"')
- ){
- do{ p->n--; }while( p->z[p->n]!='"' );
- p->cTerm = (char)c;
- break;
- }
- if( pc=='"' && c!='\r' ){
- csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
- break;
- }
- if( c==EOF ){
- csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
- startLine, '"');
- p->cTerm = (char)c;
- break;
- }
- }
- if( csv_append(p, (char)c) ) return 0;
- ppc = pc;
- pc = c;
- }
- }else{
- while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
- if( csv_append(p, (char)c) ) return 0;
- c = csv_getc(p);
- }
- if( c=='\n' ){
- p->nLine++;
- if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
- }
- p->cTerm = (char)c;
- }
- if( p->z ) p->z[p->n] = 0;
- return p->z;
-}
-
-
-/* Forward references to the various virtual table methods implemented
-** in this file. */
-static int csvtabCreate(wx_sqlite3*, void*, int, const char*const*,
- wx_sqlite3_vtab**,char**);
-static int csvtabConnect(wx_sqlite3*, void*, int, const char*const*,
- wx_sqlite3_vtab**,char**);
-static int csvtabBestIndex(wx_sqlite3_vtab*,wx_sqlite3_index_info*);
-static int csvtabDisconnect(wx_sqlite3_vtab*);
-static int csvtabOpen(wx_sqlite3_vtab*, wx_sqlite3_vtab_cursor**);
-static int csvtabClose(wx_sqlite3_vtab_cursor*);
-static int csvtabFilter(wx_sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
- int argc, wx_sqlite3_value **argv);
-static int csvtabNext(wx_sqlite3_vtab_cursor*);
-static int csvtabEof(wx_sqlite3_vtab_cursor*);
-static int csvtabColumn(wx_sqlite3_vtab_cursor*,wx_sqlite3_context*,int);
-static int csvtabRowid(wx_sqlite3_vtab_cursor*,wx_sqlite3_int64*);
-
-/* An instance of the CSV virtual table */
-typedef struct CsvTable {
- wx_sqlite3_vtab base; /* Base class. Must be first */
- char *zFilename; /* Name of the CSV file */
- char *zData; /* Raw CSV data in lieu of zFilename */
- long iStart; /* Offset to start of data in zFilename */
- int nCol; /* Number of columns in the CSV file */
- unsigned int tstFlags; /* Bit values used for testing */
-} CsvTable;
-
-/* Allowed values for tstFlags */
-#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
-
-/* A cursor for the CSV virtual table */
-typedef struct CsvCursor {
- wx_sqlite3_vtab_cursor base; /* Base class. Must be first */
- CsvReader rdr; /* The CsvReader object */
- char **azVal; /* Value of the current row */
- int *aLen; /* Length of each entry */
- wx_sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
-} CsvCursor;
-
-/* Transfer error message text from a reader into a CsvTable */
-static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
- wx_sqlite3_free(pTab->base.zErrMsg);
- pTab->base.zErrMsg = wx_sqlite3_mprintf("%s", pRdr->zErr);
-}
-
-/*
-** This method is the destructor fo a CsvTable object.
-*/
-static int csvtabDisconnect(wx_sqlite3_vtab *pVtab){
- CsvTable *p = (CsvTable*)pVtab;
- wx_sqlite3_free(p->zFilename);
- wx_sqlite3_free(p->zData);
- wx_sqlite3_free(p);
- return SQLITE_OK;
-}
-
-/* Skip leading whitespace. Return a pointer to the first non-whitespace
-** character, or to the zero terminator if the string has only whitespace */
-static const char *csv_skip_whitespace(const char *z){
- while( isspace((unsigned char)z[0]) ) z++;
- return z;
-}
-
-/* Remove trailing whitespace from the end of string z[] */
-static void csv_trim_whitespace(char *z){
- size_t n = strlen(z);
- while( n>0 && isspace((unsigned char)z[n]) ) n--;
- z[n] = 0;
-}
-
-/* Dequote the string */
-static void csv_dequote(char *z){
- int j;
- char cQuote = z[0];
- size_t i, n;
-
- if( cQuote!='\'' && cQuote!='"' ) return;
- n = strlen(z);
- if( n<2 || z[n-1]!=z[0] ) return;
- for(i=1, j=0; i<n-1; i++){
- if( z[i]==cQuote && z[i+1]==cQuote ) i++;
- z[j++] = z[i];
- }
- z[j] = 0;
-}
-
-/* Check to see if the string is of the form: "TAG = VALUE" with optional
-** whitespace before and around tokens. If it is, return a pointer to the
-** first character of VALUE. If it is not, return NULL.
-*/
-static const char *csv_parameter(const char *zTag, int nTag, const char *z){
- z = csv_skip_whitespace(z);
- if( strncmp(zTag, z, nTag)!=0 ) return 0;
- z = csv_skip_whitespace(z+nTag);
- if( z[0]!='=' ) return 0;
- return csv_skip_whitespace(z+1);
-}
-
-/* Decode a parameter that requires a dequoted string.
-**
-** Return 1 if the parameter is seen, or 0 if not. 1 is returned
-** even if there is an error. If an error occurs, then an error message
-** is left in p->zErr. If there are no errors, p->zErr[0]==0.
-*/
-static int csv_string_parameter(
- CsvReader *p, /* Leave the error message here, if there is one */
- const char *zParam, /* Parameter we are checking for */
- const char *zArg, /* Raw text of the virtual table argment */
- char **pzVal /* Write the dequoted string value here */
-){
- const char *zValue;
- zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
- if( zValue==0 ) return 0;
- p->zErr[0] = 0;
- if( *pzVal ){
- csv_errmsg(p, "more than one '%s' parameter", zParam);
- return 1;
- }
- *pzVal = wx_sqlite3_mprintf("%s", zValue);
- if( *pzVal==0 ){
- csv_errmsg(p, "out of memory");
- return 1;
- }
- csv_trim_whitespace(*pzVal);
- csv_dequote(*pzVal);
- return 1;
-}
-
-
-/* Return 0 if the argument is false and 1 if it is true. Return -1 if
-** we cannot really tell.
-*/
-static int csv_boolean(const char *z){
- if( wx_sqlite3_stricmp("yes",z)==0
- || wx_sqlite3_stricmp("on",z)==0
- || wx_sqlite3_stricmp("true",z)==0
- || (z[0]=='1' && z[0]==0)
- ){
- return 1;
- }
- if( wx_sqlite3_stricmp("no",z)==0
- || wx_sqlite3_stricmp("off",z)==0
- || wx_sqlite3_stricmp("false",z)==0
- || (z[0]=='0' && z[1]==0)
- ){
- return 0;
- }
- return -1;
-}
-
-
-/*
-** Parameters:
-** filename=FILENAME Name of file containing CSV content
-** data=TEXT Direct CSV content.
-** schema=SCHEMA Alternative CSV schema.
-** header=YES|NO First row of CSV defines the names of
-** columns if "yes". Default "no".
-** columns=N Assume the CSV file contains N columns.
-**
-** Only available if compiled with SQLITE_TEST:
-**
-** testflags=N Bitmask of test flags. Optional
-**
-** If schema= is omitted, then the columns are named "c0", "c1", "c2",
-** and so forth. If columns=N is omitted, then the file is opened and
-** the number of columns in the first row is counted to determine the
-** column count. If header=YES, then the first row is skipped.
-*/
-static int csvtabConnect(
- wx_sqlite3 *db,
- void *pAux,
- int argc, const char *const*argv,
- wx_sqlite3_vtab **ppVtab,
- char **pzErr
-){
- CsvTable *pNew = 0; /* The CsvTable object to construct */
- int bHeader = -1; /* header= flags. -1 means not seen yet */
- int rc = SQLITE_OK; /* Result code from this routine */
- int i, j; /* Loop counters */
-#ifdef SQLITE_TEST
- int tstFlags = 0; /* Value for testflags=N parameter */
-#endif
- int nCol = -99; /* Value of the columns= parameter */
- CsvReader sRdr; /* A CSV file reader used to store an error
- ** message and/or to count the number of columns */
- static const char *azParam[] = {
- "filename", "data", "schema",
- };
- char *azPValue[3]; /* Parameter values */
-# define CSV_FILENAME (azPValue[0])
-# define CSV_DATA (azPValue[1])
-# define CSV_SCHEMA (azPValue[2])
-
-
- assert( sizeof(azPValue)==sizeof(azParam) );
- memset(&sRdr, 0, sizeof(sRdr));
- memset(azPValue, 0, sizeof(azPValue));
- for(i=3; i<argc; i++){
- const char *z = argv[i];
- const char *zValue;
- for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
- if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
- }
- if( j<sizeof(azParam)/sizeof(azParam[0]) ){
- if( sRdr.zErr[0] ) goto csvtab_connect_error;
- }else
- if( (zValue = csv_parameter("header",6,z))!=0 ){
- int x;
- if( bHeader>=0 ){
- csv_errmsg(&sRdr, "more than one 'header' parameter");
- goto csvtab_connect_error;
- }
- x = csv_boolean(zValue);
- if( x==1 ){
- bHeader = 1;
- }else if( x==0 ){
- bHeader = 0;
- }else{
- csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
- goto csvtab_connect_error;
- }
- }else
-#ifdef SQLITE_TEST
- if( (zValue = csv_parameter("testflags",9,z))!=0 ){
- tstFlags = (unsigned int)atoi(zValue);
- }else
-#endif
- if( (zValue = csv_parameter("columns",7,z))!=0 ){
- if( nCol>0 ){
- csv_errmsg(&sRdr, "more than one 'columns' parameter");
- goto csvtab_connect_error;
- }
- nCol = atoi(zValue);
- if( nCol<=0 ){
- csv_errmsg(&sRdr, "must have at least one column");
- goto csvtab_connect_error;
- }
- }else
- {
- csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
- goto csvtab_connect_error;
- }
- }
- if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
- csv_errmsg(&sRdr, "must either filename= or data= but not both");
- goto csvtab_connect_error;
- }
- if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
- goto csvtab_connect_error;
- }
- pNew = wx_sqlite3_malloc( sizeof(*pNew) );
- *ppVtab = (wx_sqlite3_vtab*)pNew;
- if( pNew==0 ) goto csvtab_connect_oom;
- memset(pNew, 0, sizeof(*pNew));
- if( nCol>0 ){
- pNew->nCol = nCol;
- }else{
- do{
- const char *z = csv_read_one_field(&sRdr);
- if( z==0 ) goto csvtab_connect_oom;
- pNew->nCol++;
- }while( sRdr.cTerm==',' );
- }
- pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
- pNew->zData = CSV_DATA; CSV_DATA = 0;
-#ifdef SQLITE_TEST
- pNew->tstFlags = tstFlags;
-#endif
- pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
- csv_reader_reset(&sRdr);
- if( CSV_SCHEMA==0 ){
- char *zSep = "";
- CSV_SCHEMA = wx_sqlite3_mprintf("CREATE TABLE x(");
- if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
- for(i=0; i<pNew->nCol; i++){
- CSV_SCHEMA = wx_sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
- zSep = ",";
- }
- CSV_SCHEMA = wx_sqlite3_mprintf("%z);", CSV_SCHEMA);
- }
- rc = wx_sqlite3_declare_vtab(db, CSV_SCHEMA);
- if( rc ) goto csvtab_connect_error;
- for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
- wx_sqlite3_free(azPValue[i]);
- }
- return SQLITE_OK;
-
-csvtab_connect_oom:
- rc = SQLITE_NOMEM;
- csv_errmsg(&sRdr, "out of memory");
-
-csvtab_connect_error:
- if( pNew ) csvtabDisconnect(&pNew->base);
- for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
- wx_sqlite3_free(azPValue[i]);
- }
- if( sRdr.zErr[0] ){
- wx_sqlite3_free(*pzErr);
- *pzErr = wx_sqlite3_mprintf("%s", sRdr.zErr);
- }
- csv_reader_reset(&sRdr);
- if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
- return rc;
-}
-
-/*
-** Reset the current row content held by a CsvCursor.
-*/
-static void csvtabCursorRowReset(CsvCursor *pCur){
- CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
- int i;
- for(i=0; i<pTab->nCol; i++){
- wx_sqlite3_free(pCur->azVal[i]);
- pCur->azVal[i] = 0;
- pCur->aLen[i] = 0;
- }
-}
-
-/*
-** The xConnect and xCreate methods do the same thing, but they must be
-** different so that the virtual table is not an eponymous virtual table.
-*/
-static int csvtabCreate(
- wx_sqlite3 *db,
- void *pAux,
- int argc, const char *const*argv,
- wx_sqlite3_vtab **ppVtab,
- char **pzErr
-){
- return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
-}
-
-/*
-** Destructor for a CsvCursor.
-*/
-static int csvtabClose(wx_sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- csvtabCursorRowReset(pCur);
- csv_reader_reset(&pCur->rdr);
- wx_sqlite3_free(cur);
- return SQLITE_OK;
-}
-
-/*
-** Constructor for a new CsvTable cursor object.
-*/
-static int csvtabOpen(wx_sqlite3_vtab *p, wx_sqlite3_vtab_cursor **ppCursor){
- CsvTable *pTab = (CsvTable*)p;
- CsvCursor *pCur;
- size_t nByte;
- nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
- pCur = wx_sqlite3_malloc64( nByte );
- if( pCur==0 ) return SQLITE_NOMEM;
- memset(pCur, 0, nByte);
- pCur->azVal = (char**)&pCur[1];
- pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
- *ppCursor = &pCur->base;
- if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
- csv_xfer_error(pTab, &pCur->rdr);
- return SQLITE_ERROR;
- }
- return SQLITE_OK;
-}
-
-
-/*
-** Advance a CsvCursor to its next row of input.
-** Set the EOF marker if we reach the end of input.
-*/
-static int csvtabNext(wx_sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- CsvTable *pTab = (CsvTable*)cur->pVtab;
- int i = 0;
- char *z;
- do{
- z = csv_read_one_field(&pCur->rdr);
- if( z==0 ){
- csv_xfer_error(pTab, &pCur->rdr);
- break;
- }
- if( i<pTab->nCol ){
- if( pCur->aLen[i] < pCur->rdr.n+1 ){
- char *zNew = wx_sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
- if( zNew==0 ){
- csv_errmsg(&pCur->rdr, "out of memory");
- csv_xfer_error(pTab, &pCur->rdr);
- break;
- }
- pCur->azVal[i] = zNew;
- pCur->aLen[i] = pCur->rdr.n+1;
- }
- memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
- i++;
- }
- }while( pCur->rdr.cTerm==',' );
- while( i<pTab->nCol ){
- wx_sqlite3_free(pCur->azVal[i]);
- pCur->azVal[i] = 0;
- pCur->aLen[i] = 0;
- i++;
- }
- if( z==0 || pCur->rdr.cTerm==EOF ){
- pCur->iRowid = -1;
- }else{
- pCur->iRowid++;
- }
- return SQLITE_OK;
-}
-
-/*
-** Return values of columns for the row at which the CsvCursor
-** is currently pointing.
-*/
-static int csvtabColumn(
- wx_sqlite3_vtab_cursor *cur, /* The cursor */
- wx_sqlite3_context *ctx, /* First argument to wx_sqlite3_result_...() */
- int i /* Which column to return */
-){
- CsvCursor *pCur = (CsvCursor*)cur;
- CsvTable *pTab = (CsvTable*)cur->pVtab;
- if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
- wx_sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
- }
- return SQLITE_OK;
-}
-
-/*
-** Return the rowid for the current row.
-*/
-static int csvtabRowid(wx_sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
- CsvCursor *pCur = (CsvCursor*)cur;
- *pRowid = pCur->iRowid;
- return SQLITE_OK;
-}
-
-/*
-** Return TRUE if the cursor has been moved off of the last
-** row of output.
-*/
-static int csvtabEof(wx_sqlite3_vtab_cursor *cur){
- CsvCursor *pCur = (CsvCursor*)cur;
- return pCur->iRowid<0;
-}
-
-/*
-** Only a full table scan is supported. So xFilter simply rewinds to
-** the beginning.
-*/
-static int csvtabFilter(
- wx_sqlite3_vtab_cursor *pVtabCursor,
- int idxNum, const char *idxStr,
- int argc, wx_sqlite3_value **argv
-){
- CsvCursor *pCur = (CsvCursor*)pVtabCursor;
- CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
- pCur->iRowid = 0;
- if( pCur->rdr.in==0 ){
- assert( pCur->rdr.zIn==pTab->zData );
- assert( pTab->iStart>=0 );
- assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
- pCur->rdr.iIn = pTab->iStart;
- }else{
- fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
- pCur->rdr.iIn = 0;
- pCur->rdr.nIn = 0;
- }
- return csvtabNext(pVtabCursor);
-}
-
-/*
-** Only a forward full table scan is supported. xBestIndex is mostly
-** a no-op. If CSVTEST_FIDX is set, then the presence of equality
-** constraints lowers the estimated cost, which is fiction, but is useful
-** for testing certain kinds of virtual table behavior.
-*/
-static int csvtabBestIndex(
- wx_sqlite3_vtab *tab,
- wx_sqlite3_index_info *pIdxInfo
-){
- pIdxInfo->estimatedCost = 1000000;
-#ifdef SQLITE_TEST
- if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
- /* The usual (and sensible) case is to always do a full table scan.
- ** The code in this branch only runs when testflags=1. This code
- ** generates an artifical and unrealistic plan which is useful
- ** for testing virtual table logic but is not helpful to real applications.
- **
- ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
- ** table (even though it is not) and the cost of running the virtual table
- ** is reduced from 1 million to just 10. The constraints are *not* marked
- ** as omittable, however, so the query planner should still generate a
- ** plan that gives a correct answer, even if they plan is not optimal.
- */
- int i;
- int nConst = 0;
- for(i=0; i<pIdxInfo->nConstraint; i++){
- unsigned char op;
- if( pIdxInfo->aConstraint[i].usable==0 ) continue;
- op = pIdxInfo->aConstraint[i].op;
- if( op==SQLITE_INDEX_CONSTRAINT_EQ
- || op==SQLITE_INDEX_CONSTRAINT_LIKE
- || op==SQLITE_INDEX_CONSTRAINT_GLOB
- ){
- pIdxInfo->estimatedCost = 10;
- pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
- nConst++;
- }
- }
- }
-#endif
- return SQLITE_OK;
-}
-
-
-static wx_sqlite3_module CsvModule = {
- 0, /* iVersion */
- csvtabCreate, /* xCreate */
- csvtabConnect, /* xConnect */
- csvtabBestIndex, /* xBestIndex */
- csvtabDisconnect, /* xDisconnect */
- csvtabDisconnect, /* xDestroy */
- csvtabOpen, /* xOpen - open a cursor */
- csvtabClose, /* xClose - close a cursor */
- csvtabFilter, /* xFilter - configure scan constraints */
- csvtabNext, /* xNext - advance a cursor */
- csvtabEof, /* xEof - check for end of scan */
- csvtabColumn, /* xColumn - read data */
- csvtabRowid, /* xRowid - read data */
- 0, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindMethod */
- 0, /* xRename */
-};
-
-#ifdef SQLITE_TEST
-/*
-** For virtual table testing, make a version of the CSV virtual table
-** available that has an xUpdate function. But the xUpdate always returns
-** SQLITE_READONLY since the CSV file is not really writable.
-*/
-static int csvtabUpdate(wx_sqlite3_vtab *p,int n,wx_sqlite3_value**v,wx_sqlite3_int64*x){
- return SQLITE_READONLY;
-}
-static wx_sqlite3_module CsvModuleFauxWrite = {
- 0, /* iVersion */
- csvtabCreate, /* xCreate */
- csvtabConnect, /* xConnect */
- csvtabBestIndex, /* xBestIndex */
- csvtabDisconnect, /* xDisconnect */
- csvtabDisconnect, /* xDestroy */
- csvtabOpen, /* xOpen - open a cursor */
- csvtabClose, /* xClose - close a cursor */
- csvtabFilter, /* xFilter - configure scan constraints */
- csvtabNext, /* xNext - advance a cursor */
- csvtabEof, /* xEof - check for end of scan */
- csvtabColumn, /* xColumn - read data */
- csvtabRowid, /* xRowid - read data */
- csvtabUpdate, /* xUpdate */
- 0, /* xBegin */
- 0, /* xSync */
- 0, /* xCommit */
- 0, /* xRollback */
- 0, /* xFindMethod */
- 0, /* xRename */
-};
-#endif /* SQLITE_TEST */
-
-#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
-
-
-#ifdef _WIN32
-__declspec(dllexport)
-#endif
-/*
-** This routine is called when the extension is loaded. The new
-** CSV virtual table module is registered with the calling database
-** connection.
-*/
-int wx_sqlite3_csv_init(
- wx_sqlite3 *db,
- char **pzErrMsg,
- const wx_sqlite3_api_routines *pApi
-){
-#ifndef SQLITE_OMIT_VIRTUALTABLE
- int rc;
- SQLITE_EXTENSION_INIT2(pApi);
- rc = wx_sqlite3_create_module(db, "csv", &CsvModule, 0);
-#ifdef SQLITE_TEST
- if( rc==SQLITE_OK ){
- rc = wx_sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
- }
-#endif
- return rc;
-#else
- return SQLITE_OK;
-#endif
-}
-
+/* +** 2016-05-28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains the implementation of an SQLite virtual table for +** reading CSV files. +** +** Usage: +** +** .load ./csv +** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); +** SELECT * FROM csv; +** +** The columns are named "c1", "c2", "c3", ... by default. But the +** application can define its own CREATE TABLE statement as an additional +** parameter. For example: +** +** CREATE VIRTUAL TABLE temp.csv2 USING csv( +** filename = "../http.log", +** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" +** ); +** +** Instead of specifying a file, the text of the CSV can be loaded using +** the data= parameter. +** +** If the columns=N parameter is supplied, then the CSV file is assumed to have +** N columns. If the columns parameter is omitted, the CSV file is opened +** as soon as the virtual table is constructed and the first row of the CSV +** is read in order to count the tables. +** +** Some extra debugging features (used for testing virtual tables) are available +** if this module is compiled with -DSQLITE_TEST. +*/ +#include "wx_sqlite3ext.h" +SQLITE_EXTENSION_INIT1 +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include <stdarg.h> +#include <ctype.h> +#include <stdio.h> + +#ifndef SQLITE_OMIT_VIRTUALTABLE + +/* +** A macro to hint to the compiler that a function should not be +** inlined. +*/ +#if defined(__GNUC__) +# define CSV_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && _MSC_VER>=1310 +# define CSV_NOINLINE __declspec(noinline) +#else +# define CSV_NOINLINE +#endif + + +/* Max size of the error message in a CsvReader */ +#define CSV_MXERR 200 + +/* Size of the CsvReader input buffer */ +#define CSV_INBUFSZ 1024 + +/* A context object used when read a CSV file. */ +typedef struct CsvReader CsvReader; +struct CsvReader { + FILE *in; /* Read the CSV text from this input stream */ + char *z; /* Accumulated text for a field */ + int n; /* Number of bytes in z */ + int nAlloc; /* Space allocated for z[] */ + int nLine; /* Current line number */ + int bNotFirst; /* True if prior text has been seen */ + int cTerm; /* Character that terminated the most recent field */ + size_t iIn; /* Next unread character in the input buffer */ + size_t nIn; /* Number of characters in the input buffer */ + char *zIn; /* The input buffer */ + char zErr[CSV_MXERR]; /* Error message */ +}; + +/* Initialize a CsvReader object */ +static void csv_reader_init(CsvReader *p){ + p->in = 0; + p->z = 0; + p->n = 0; + p->nAlloc = 0; + p->nLine = 0; + p->bNotFirst = 0; + p->nIn = 0; + p->zIn = 0; + p->zErr[0] = 0; +} + +/* Close and reset a CsvReader object */ +static void csv_reader_reset(CsvReader *p){ + if( p->in ){ + fclose(p->in); + wx_sqlite3_free(p->zIn); + } + wx_sqlite3_free(p->z); + csv_reader_init(p); +} + +/* Report an error on a CsvReader */ +static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ + va_list ap; + va_start(ap, zFormat); + wx_sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); + va_end(ap); +} + +/* Open the file associated with a CsvReader +** Return the number of errors. +*/ +static int csv_reader_open( + CsvReader *p, /* The reader to open */ + const char *zFilename, /* Read from this filename */ + const char *zData /* ... or use this data */ +){ + if( zFilename ){ + p->zIn = wx_sqlite3_malloc( CSV_INBUFSZ ); + if( p->zIn==0 ){ + csv_errmsg(p, "out of memory"); + return 1; + } + p->in = fopen(zFilename, "rb"); + if( p->in==0 ){ + wx_sqlite3_free(p->zIn); + csv_reader_reset(p); + csv_errmsg(p, "cannot open '%s' for reading", zFilename); + return 1; + } + }else{ + assert( p->in==0 ); + p->zIn = (char*)zData; + p->nIn = strlen(zData); + } + return 0; +} + +/* The input buffer has overflowed. Refill the input buffer, then +** return the next character +*/ +static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ + size_t got; + + assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ + assert( p->in!=0 ); /* Only called if reading froma file */ + + got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); + if( got==0 ) return EOF; + p->nIn = got; + p->iIn = 1; + return p->zIn[0]; +} + +/* Return the next character of input. Return EOF at end of input. */ +static int csv_getc(CsvReader *p){ + if( p->iIn >= p->nIn ){ + if( p->in!=0 ) return csv_getc_refill(p); + return EOF; + } + return ((unsigned char*)p->zIn)[p->iIn++]; +} + +/* Increase the size of p->z and append character c to the end. +** Return 0 on success and non-zero if there is an OOM error */ +static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ + char *zNew; + int nNew = p->nAlloc*2 + 100; + zNew = wx_sqlite3_realloc64(p->z, nNew); + if( zNew ){ + p->z = zNew; + p->nAlloc = nNew; + p->z[p->n++] = c; + return 0; + }else{ + csv_errmsg(p, "out of memory"); + return 1; + } +} + +/* Append a single character to the CsvReader.z[] array. +** Return 0 on success and non-zero if there is an OOM error */ +static int csv_append(CsvReader *p, char c){ + if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); + p->z[p->n++] = c; + return 0; +} + +/* Read a single field of CSV text. Compatible with rfc4180 and extended +** with the option of having a separator other than ",". +** +** + Input comes from p->in. +** + Store results in p->z of length p->n. Space to hold p->z comes +** from wx_sqlite3_malloc64(). +** + Keep track of the line number in p->nLine. +** + Store the character that terminates the field in p->cTerm. Store +** EOF on end-of-file. +** +** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have +** been set to EOF. +*/ +static char *csv_read_one_field(CsvReader *p){ + int c; + p->n = 0; + c = csv_getc(p); + if( c==EOF ){ + p->cTerm = EOF; + return 0; + } + if( c=='"' ){ + int pc, ppc; + int startLine = p->nLine; + pc = ppc = 0; + while( 1 ){ + c = csv_getc(p); + if( c<='"' || pc=='"' ){ + if( c=='\n' ) p->nLine++; + if( c=='"' ){ + if( pc=='"' ){ + pc = 0; + continue; + } + } + if( (c==',' && pc=='"') + || (c=='\n' && pc=='"') + || (c=='\n' && pc=='\r' && ppc=='"') + || (c==EOF && pc=='"') + ){ + do{ p->n--; }while( p->z[p->n]!='"' ); + p->cTerm = (char)c; + break; + } + if( pc=='"' && c!='\r' ){ + csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); + break; + } + if( c==EOF ){ + csv_errmsg(p, "line %d: unterminated %c-quoted field\n", + startLine, '"'); + p->cTerm = (char)c; + break; + } + } + if( csv_append(p, (char)c) ) return 0; + ppc = pc; + pc = c; + } + }else{ + /* If this is the first field being parsed and it begins with the + ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ + if( (c&0xff)==0xef && p->bNotFirst==0 ){ + csv_append(p, (char)c); + c = csv_getc(p); + if( (c&0xff)==0xbb ){ + csv_append(p, (char)c); + c = csv_getc(p); + if( (c&0xff)==0xbf ){ + p->bNotFirst = 1; + p->n = 0; + return csv_read_one_field(p); + } + } + } + while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ + if( csv_append(p, (char)c) ) return 0; + c = csv_getc(p); + } + if( c=='\n' ){ + p->nLine++; + if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; + } + p->cTerm = (char)c; + } + if( p->z ) p->z[p->n] = 0; + p->bNotFirst = 1; + return p->z; +} + + +/* Forward references to the various virtual table methods implemented +** in this file. */ +static int csvtabCreate(wx_sqlite3*, void*, int, const char*const*, + wx_sqlite3_vtab**,char**); +static int csvtabConnect(wx_sqlite3*, void*, int, const char*const*, + wx_sqlite3_vtab**,char**); +static int csvtabBestIndex(wx_sqlite3_vtab*,wx_sqlite3_index_info*); +static int csvtabDisconnect(wx_sqlite3_vtab*); +static int csvtabOpen(wx_sqlite3_vtab*, wx_sqlite3_vtab_cursor**); +static int csvtabClose(wx_sqlite3_vtab_cursor*); +static int csvtabFilter(wx_sqlite3_vtab_cursor*, int idxNum, const char *idxStr, + int argc, wx_sqlite3_value **argv); +static int csvtabNext(wx_sqlite3_vtab_cursor*); +static int csvtabEof(wx_sqlite3_vtab_cursor*); +static int csvtabColumn(wx_sqlite3_vtab_cursor*,wx_sqlite3_context*,int); +static int csvtabRowid(wx_sqlite3_vtab_cursor*,wx_sqlite3_int64*); + +/* An instance of the CSV virtual table */ +typedef struct CsvTable { + wx_sqlite3_vtab base; /* Base class. Must be first */ + char *zFilename; /* Name of the CSV file */ + char *zData; /* Raw CSV data in lieu of zFilename */ + long iStart; /* Offset to start of data in zFilename */ + int nCol; /* Number of columns in the CSV file */ + unsigned int tstFlags; /* Bit values used for testing */ +} CsvTable; + +/* Allowed values for tstFlags */ +#define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ + +/* A cursor for the CSV virtual table */ +typedef struct CsvCursor { + wx_sqlite3_vtab_cursor base; /* Base class. Must be first */ + CsvReader rdr; /* The CsvReader object */ + char **azVal; /* Value of the current row */ + int *aLen; /* Length of each entry */ + wx_sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ +} CsvCursor; + +/* Transfer error message text from a reader into a CsvTable */ +static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ + wx_sqlite3_free(pTab->base.zErrMsg); + pTab->base.zErrMsg = wx_sqlite3_mprintf("%s", pRdr->zErr); +} + +/* +** This method is the destructor fo a CsvTable object. +*/ +static int csvtabDisconnect(wx_sqlite3_vtab *pVtab){ + CsvTable *p = (CsvTable*)pVtab; + wx_sqlite3_free(p->zFilename); + wx_sqlite3_free(p->zData); + wx_sqlite3_free(p); + return SQLITE_OK; +} + +/* Skip leading whitespace. Return a pointer to the first non-whitespace +** character, or to the zero terminator if the string has only whitespace */ +static const char *csv_skip_whitespace(const char *z){ + while( isspace((unsigned char)z[0]) ) z++; + return z; +} + +/* Remove trailing whitespace from the end of string z[] */ +static void csv_trim_whitespace(char *z){ + size_t n = strlen(z); + while( n>0 && isspace((unsigned char)z[n]) ) n--; + z[n] = 0; +} + +/* Dequote the string */ +static void csv_dequote(char *z){ + int j; + char cQuote = z[0]; + size_t i, n; + + if( cQuote!='\'' && cQuote!='"' ) return; + n = strlen(z); + if( n<2 || z[n-1]!=z[0] ) return; + for(i=1, j=0; i<n-1; i++){ + if( z[i]==cQuote && z[i+1]==cQuote ) i++; + z[j++] = z[i]; + } + z[j] = 0; +} + +/* Check to see if the string is of the form: "TAG = VALUE" with optional +** whitespace before and around tokens. If it is, return a pointer to the +** first character of VALUE. If it is not, return NULL. +*/ +static const char *csv_parameter(const char *zTag, int nTag, const char *z){ + z = csv_skip_whitespace(z); + if( strncmp(zTag, z, nTag)!=0 ) return 0; + z = csv_skip_whitespace(z+nTag); + if( z[0]!='=' ) return 0; + return csv_skip_whitespace(z+1); +} + +/* Decode a parameter that requires a dequoted string. +** +** Return 1 if the parameter is seen, or 0 if not. 1 is returned +** even if there is an error. If an error occurs, then an error message +** is left in p->zErr. If there are no errors, p->zErr[0]==0. +*/ +static int csv_string_parameter( + CsvReader *p, /* Leave the error message here, if there is one */ + const char *zParam, /* Parameter we are checking for */ + const char *zArg, /* Raw text of the virtual table argment */ + char **pzVal /* Write the dequoted string value here */ +){ + const char *zValue; + zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); + if( zValue==0 ) return 0; + p->zErr[0] = 0; + if( *pzVal ){ + csv_errmsg(p, "more than one '%s' parameter", zParam); + return 1; + } + *pzVal = wx_sqlite3_mprintf("%s", zValue); + if( *pzVal==0 ){ + csv_errmsg(p, "out of memory"); + return 1; + } + csv_trim_whitespace(*pzVal); + csv_dequote(*pzVal); + return 1; +} + + +/* Return 0 if the argument is false and 1 if it is true. Return -1 if +** we cannot really tell. +*/ +static int csv_boolean(const char *z){ + if( wx_sqlite3_stricmp("yes",z)==0 + || wx_sqlite3_stricmp("on",z)==0 + || wx_sqlite3_stricmp("true",z)==0 + || (z[0]=='1' && z[1]==0) + ){ + return 1; + } + if( wx_sqlite3_stricmp("no",z)==0 + || wx_sqlite3_stricmp("off",z)==0 + || wx_sqlite3_stricmp("false",z)==0 + || (z[0]=='0' && z[1]==0) + ){ + return 0; + } + return -1; +} + + +/* +** Parameters: +** filename=FILENAME Name of file containing CSV content +** data=TEXT Direct CSV content. +** schema=SCHEMA Alternative CSV schema. +** header=YES|NO First row of CSV defines the names of +** columns if "yes". Default "no". +** columns=N Assume the CSV file contains N columns. +** +** Only available if compiled with SQLITE_TEST: +** +** testflags=N Bitmask of test flags. Optional +** +** If schema= is omitted, then the columns are named "c0", "c1", "c2", +** and so forth. If columns=N is omitted, then the file is opened and +** the number of columns in the first row is counted to determine the +** column count. If header=YES, then the first row is skipped. +*/ +static int csvtabConnect( + wx_sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + wx_sqlite3_vtab **ppVtab, + char **pzErr +){ + CsvTable *pNew = 0; /* The CsvTable object to construct */ + int bHeader = -1; /* header= flags. -1 means not seen yet */ + int rc = SQLITE_OK; /* Result code from this routine */ + int i, j; /* Loop counters */ +#ifdef SQLITE_TEST + int tstFlags = 0; /* Value for testflags=N parameter */ +#endif + int nCol = -99; /* Value of the columns= parameter */ + CsvReader sRdr; /* A CSV file reader used to store an error + ** message and/or to count the number of columns */ + static const char *azParam[] = { + "filename", "data", "schema", + }; + char *azPValue[3]; /* Parameter values */ +# define CSV_FILENAME (azPValue[0]) +# define CSV_DATA (azPValue[1]) +# define CSV_SCHEMA (azPValue[2]) + + + assert( sizeof(azPValue)==sizeof(azParam) ); + memset(&sRdr, 0, sizeof(sRdr)); + memset(azPValue, 0, sizeof(azPValue)); + for(i=3; i<argc; i++){ + const char *z = argv[i]; + const char *zValue; + for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ + if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; + } + if( j<sizeof(azParam)/sizeof(azParam[0]) ){ + if( sRdr.zErr[0] ) goto csvtab_connect_error; + }else + if( (zValue = csv_parameter("header",6,z))!=0 ){ + int x; + if( bHeader>=0 ){ + csv_errmsg(&sRdr, "more than one 'header' parameter"); + goto csvtab_connect_error; + } + x = csv_boolean(zValue); + if( x==1 ){ + bHeader = 1; + }else if( x==0 ){ + bHeader = 0; + }else{ + csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); + goto csvtab_connect_error; + } + }else +#ifdef SQLITE_TEST + if( (zValue = csv_parameter("testflags",9,z))!=0 ){ + tstFlags = (unsigned int)atoi(zValue); + }else +#endif + if( (zValue = csv_parameter("columns",7,z))!=0 ){ + if( nCol>0 ){ + csv_errmsg(&sRdr, "more than one 'columns' parameter"); + goto csvtab_connect_error; + } + nCol = atoi(zValue); + if( nCol<=0 ){ + csv_errmsg(&sRdr, "must have at least one column"); + goto csvtab_connect_error; + } + }else + { + csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); + goto csvtab_connect_error; + } + } + if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ + csv_errmsg(&sRdr, "must either filename= or data= but not both"); + goto csvtab_connect_error; + } + if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ + goto csvtab_connect_error; + } + pNew = wx_sqlite3_malloc( sizeof(*pNew) ); + *ppVtab = (wx_sqlite3_vtab*)pNew; + if( pNew==0 ) goto csvtab_connect_oom; + memset(pNew, 0, sizeof(*pNew)); + if( nCol>0 ){ + pNew->nCol = nCol; + }else{ + do{ + csv_read_one_field(&sRdr); + pNew->nCol++; + }while( sRdr.cTerm==',' ); + } + pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; + pNew->zData = CSV_DATA; CSV_DATA = 0; +#ifdef SQLITE_TEST + pNew->tstFlags = tstFlags; +#endif + pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; + csv_reader_reset(&sRdr); + if( CSV_SCHEMA==0 ){ + char *zSep = ""; + CSV_SCHEMA = wx_sqlite3_mprintf("CREATE TABLE x("); + if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; + for(i=0; i<pNew->nCol; i++){ + CSV_SCHEMA = wx_sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); + zSep = ","; + } + CSV_SCHEMA = wx_sqlite3_mprintf("%z);", CSV_SCHEMA); + } + rc = wx_sqlite3_declare_vtab(db, CSV_SCHEMA); + if( rc ) goto csvtab_connect_error; + for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ + wx_sqlite3_free(azPValue[i]); + } + return SQLITE_OK; + +csvtab_connect_oom: + rc = SQLITE_NOMEM; + csv_errmsg(&sRdr, "out of memory"); + +csvtab_connect_error: + if( pNew ) csvtabDisconnect(&pNew->base); + for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ + wx_sqlite3_free(azPValue[i]); + } + if( sRdr.zErr[0] ){ + wx_sqlite3_free(*pzErr); + *pzErr = wx_sqlite3_mprintf("%s", sRdr.zErr); + } + csv_reader_reset(&sRdr); + if( rc==SQLITE_OK ) rc = SQLITE_ERROR; + return rc; +} + +/* +** Reset the current row content held by a CsvCursor. +*/ +static void csvtabCursorRowReset(CsvCursor *pCur){ + CsvTable *pTab = (CsvTable*)pCur->base.pVtab; + int i; + for(i=0; i<pTab->nCol; i++){ + wx_sqlite3_free(pCur->azVal[i]); + pCur->azVal[i] = 0; + pCur->aLen[i] = 0; + } +} + +/* +** The xConnect and xCreate methods do the same thing, but they must be +** different so that the virtual table is not an eponymous virtual table. +*/ +static int csvtabCreate( + wx_sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + wx_sqlite3_vtab **ppVtab, + char **pzErr +){ + return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); +} + +/* +** Destructor for a CsvCursor. +*/ +static int csvtabClose(wx_sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + csvtabCursorRowReset(pCur); + csv_reader_reset(&pCur->rdr); + wx_sqlite3_free(cur); + return SQLITE_OK; +} + +/* +** Constructor for a new CsvTable cursor object. +*/ +static int csvtabOpen(wx_sqlite3_vtab *p, wx_sqlite3_vtab_cursor **ppCursor){ + CsvTable *pTab = (CsvTable*)p; + CsvCursor *pCur; + size_t nByte; + nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; + pCur = wx_sqlite3_malloc64( nByte ); + if( pCur==0 ) return SQLITE_NOMEM; + memset(pCur, 0, nByte); + pCur->azVal = (char**)&pCur[1]; + pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; + *ppCursor = &pCur->base; + if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ + csv_xfer_error(pTab, &pCur->rdr); + return SQLITE_ERROR; + } + return SQLITE_OK; +} + + +/* +** Advance a CsvCursor to its next row of input. +** Set the EOF marker if we reach the end of input. +*/ +static int csvtabNext(wx_sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + CsvTable *pTab = (CsvTable*)cur->pVtab; + int i = 0; + char *z; + do{ + z = csv_read_one_field(&pCur->rdr); + if( z==0 ){ + break; + } + if( i<pTab->nCol ){ + if( pCur->aLen[i] < pCur->rdr.n+1 ){ + char *zNew = wx_sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); + if( zNew==0 ){ + csv_errmsg(&pCur->rdr, "out of memory"); + csv_xfer_error(pTab, &pCur->rdr); + break; + } + pCur->azVal[i] = zNew; + pCur->aLen[i] = pCur->rdr.n+1; + } + memcpy(pCur->azVal[i], z, pCur->rdr.n+1); + i++; + } + }while( pCur->rdr.cTerm==',' ); + if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){ + pCur->iRowid = -1; + }else{ + pCur->iRowid++; + while( i<pTab->nCol ){ + wx_sqlite3_free(pCur->azVal[i]); + pCur->azVal[i] = 0; + pCur->aLen[i] = 0; + i++; + } + } + return SQLITE_OK; +} + +/* +** Return values of columns for the row at which the CsvCursor +** is currently pointing. +*/ +static int csvtabColumn( + wx_sqlite3_vtab_cursor *cur, /* The cursor */ + wx_sqlite3_context *ctx, /* First argument to wx_sqlite3_result_...() */ + int i /* Which column to return */ +){ + CsvCursor *pCur = (CsvCursor*)cur; + CsvTable *pTab = (CsvTable*)cur->pVtab; + if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ + wx_sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. +*/ +static int csvtabRowid(wx_sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + CsvCursor *pCur = (CsvCursor*)cur; + *pRowid = pCur->iRowid; + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int csvtabEof(wx_sqlite3_vtab_cursor *cur){ + CsvCursor *pCur = (CsvCursor*)cur; + return pCur->iRowid<0; +} + +/* +** Only a full table scan is supported. So xFilter simply rewinds to +** the beginning. +*/ +static int csvtabFilter( + wx_sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, wx_sqlite3_value **argv +){ + CsvCursor *pCur = (CsvCursor*)pVtabCursor; + CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; + pCur->iRowid = 0; + if( pCur->rdr.in==0 ){ + assert( pCur->rdr.zIn==pTab->zData ); + assert( pTab->iStart>=0 ); + assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); + pCur->rdr.iIn = pTab->iStart; + }else{ + fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); + pCur->rdr.iIn = 0; + pCur->rdr.nIn = 0; + } + return csvtabNext(pVtabCursor); +} + +/* +** Only a forward full table scan is supported. xBestIndex is mostly +** a no-op. If CSVTEST_FIDX is set, then the presence of equality +** constraints lowers the estimated cost, which is fiction, but is useful +** for testing certain kinds of virtual table behavior. +*/ +static int csvtabBestIndex( + wx_sqlite3_vtab *tab, + wx_sqlite3_index_info *pIdxInfo +){ + pIdxInfo->estimatedCost = 1000000; +#ifdef SQLITE_TEST + if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ + /* The usual (and sensible) case is to always do a full table scan. + ** The code in this branch only runs when testflags=1. This code + ** generates an artifical and unrealistic plan which is useful + ** for testing virtual table logic but is not helpful to real applications. + ** + ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual + ** table (even though it is not) and the cost of running the virtual table + ** is reduced from 1 million to just 10. The constraints are *not* marked + ** as omittable, however, so the query planner should still generate a + ** plan that gives a correct answer, even if they plan is not optimal. + */ + int i; + int nConst = 0; + for(i=0; i<pIdxInfo->nConstraint; i++){ + unsigned char op; + if( pIdxInfo->aConstraint[i].usable==0 ) continue; + op = pIdxInfo->aConstraint[i].op; + if( op==SQLITE_INDEX_CONSTRAINT_EQ + || op==SQLITE_INDEX_CONSTRAINT_LIKE + || op==SQLITE_INDEX_CONSTRAINT_GLOB + ){ + pIdxInfo->estimatedCost = 10; + pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; + nConst++; + } + } + } +#endif + return SQLITE_OK; +} + + +static wx_sqlite3_module CsvModule = { + 0, /* iVersion */ + csvtabCreate, /* xCreate */ + csvtabConnect, /* xConnect */ + csvtabBestIndex, /* xBestIndex */ + csvtabDisconnect, /* xDisconnect */ + csvtabDisconnect, /* xDestroy */ + csvtabOpen, /* xOpen - open a cursor */ + csvtabClose, /* xClose - close a cursor */ + csvtabFilter, /* xFilter - configure scan constraints */ + csvtabNext, /* xNext - advance a cursor */ + csvtabEof, /* xEof - check for end of scan */ + csvtabColumn, /* xColumn - read data */ + csvtabRowid, /* xRowid - read data */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; + +#ifdef SQLITE_TEST +/* +** For virtual table testing, make a version of the CSV virtual table +** available that has an xUpdate function. But the xUpdate always returns +** SQLITE_READONLY since the CSV file is not really writable. +*/ +static int csvtabUpdate(wx_sqlite3_vtab *p,int n,wx_sqlite3_value**v,wx_sqlite3_int64*x){ + return SQLITE_READONLY; +} +static wx_sqlite3_module CsvModuleFauxWrite = { + 0, /* iVersion */ + csvtabCreate, /* xCreate */ + csvtabConnect, /* xConnect */ + csvtabBestIndex, /* xBestIndex */ + csvtabDisconnect, /* xDisconnect */ + csvtabDisconnect, /* xDestroy */ + csvtabOpen, /* xOpen - open a cursor */ + csvtabClose, /* xClose - close a cursor */ + csvtabFilter, /* xFilter - configure scan constraints */ + csvtabNext, /* xNext - advance a cursor */ + csvtabEof, /* xEof - check for end of scan */ + csvtabColumn, /* xColumn - read data */ + csvtabRowid, /* xRowid - read data */ + csvtabUpdate, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindMethod */ + 0, /* xRename */ +}; +#endif /* SQLITE_TEST */ + +#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ + + +#ifdef _WIN32 +__declspec(dllexport) +#endif +/* +** This routine is called when the extension is loaded. The new +** CSV virtual table module is registered with the calling database +** connection. +*/ +int wx_sqlite3_csv_init( + wx_sqlite3 *db, + char **pzErrMsg, + const wx_sqlite3_api_routines *pApi +){ +#ifndef SQLITE_OMIT_VIRTUALTABLE + int rc; + SQLITE_EXTENSION_INIT2(pApi); + rc = wx_sqlite3_create_module(db, "csv", &CsvModule, 0); +#ifdef SQLITE_TEST + if( rc==SQLITE_OK ){ + rc = wx_sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); + } +#endif + return rc; +#else + return SQLITE_OK; +#endif +} + |
