/* ** 2020-02-08 modified by Keith Medcalf who also disclaims all copyright ** on the modifications and hereby places this code in the public domain ** ** This file contains the implementation of an SQLite virtual table for ** reading VSV (Variably Separated Values), which are like CSV files, ** but subtly different. VSV supports a number of extensions to the ** CSV format as well as more processing options. ** ** http:\\www.dessus.com\files\vsv.c ** ** Usage: ** ** create virtual table temp.vsv using vsv(...); ** select * from vsv; ** ** The parameters to the vsv module (the vsv(...) part) are as follows: ** ** filename=STRING the filename, passed to the Operating System ** data=STRING alternative data ** schema=STRING Alternate Schema to use ** columns=N columns parsed from the VSV file ** header=BOOL whether or not a header row is present ** skip=N number of leading data rows to skip ** rsep=STRING record separator ** fsep=STRING field separator ** validatetext=BOOL validate UTF-8 encoding of text fields ** affinity=AFFINITY affinity to apply to each returned value ** nulls=BOOL empty fields are returned as NULL ** ** ** Defaults: ** ** filename / data nothing. You must provide one or the other ** it is an error to provide both or neither ** schema nothing. If not provided then one will be ** generated for you from the header, or if no ** header is available then autogenerated using ** field names manufactured as cX where X is the ** column number ** columns nothing. If not specified then the number of ** columns is determined by counting the fields ** in the first record of the VSV file (which ** will be the header row if header is specified), ** the number of columns is not parsed from the ** schema even if one is provided ** header=no no header row in the VSV file ** skip=0 do not skip any data rows in the VSV file ** fsep=',' default field separator is a comma ** rsep='\n' default record separator is a newline ** validatetext=no do not validate text field encoding ** affinity=none do not apply affinity to each returned value ** nulls=off empty fields returned as zero-length ** ** ** Parameter types: ** ** STRING means a quoted string ** N means a whole number not containing a sign ** BOOL means something that evaluates as true or false ** it is case insensitive ** yes, no, true, false, 1, 0 ** AFFINITY means an SQLite3 type specification ** it is case insensitive ** none, blob, text, integer, real, numeric ** ** STRING means a quoted string. The quote character may be either ** a single quote or a double quote. Two quote characters in a row ** will be replaced with one quote character. STRINGS do not ** need to be quoted if it is obvious where they begin and end ** (that is, they do not contain a comma or other character that the ** parser treats especially, such as : or \). Leading and trailing ** spaces will be trimmed from unquoted strings. ** ** filename =./this/filename.here, ... ** filename =./this/filename.here , ... ** filename = ./this/filename.here, ... ** filename = ./this/filename.here , ... ** filename = './this/filename.here', ... ** filename = "./this/filename.here", ... ** ** are all equivalent. ** ** BOOL defaults to true so the following specifications are all the ** same: ** ** header = true ** header = yes ** header = 1 ** header ** ** ** Specific Parameters: ** ** The platform/compiler/OS fopen call is responsible for interpreting ** the filename. It may contain anything recognized by the OS. ** ** The separator string containing exactly one character, or a valid ** escape sequence. Recognized escape sequences are: ** ** \t horizontal tab, ascii character 9 (0x09) ** \n linefeed, ascii character 10 (0x0a) ** \v vertical tab, ascii character 11 (0x0b) ** \f form feed, ascii character 12 (0x0c) ** \xhh specific byte where hh is hexadecimal ** ** The validatetext setting will cause the validity of the field ** encoding (not its contents) to be verified. It effects how ** fields that are supposed to contain text will be returned to ** the SQLite3 library in order to prevent invalid utf8 data from ** being stored or processed as if it were valid utf8 text. ** ** The nulls option will cause fields that do not contain anything ** to return NULL rather than an empty result. Two separators ** side-by-each with no intervening characters at all will be ** returned as NULL if nulls is true; if nulls is false or ** the contents are explicity empty ("") then a 0 length blob ** (if affinity=blob) or 0 length text string. ** ** For the affinity setting, the following processing is applied to ** each value returned by the VSV virtual table: ** ** none no affinity is applied, all fields will be ** returned as text just like in the original ** csv module, embedded nulls will terminate ** the text. if validatetext is in effect then ** an error will be thrown if the field does ** not contain validly encoded text or contains ** embedded nulls ** ** blob all fields will be returned as blobs ** validatetext has no effect ** ** text all fields will be returned as text just ** like in the original csv module, embedded ** nulls will terminate the text. ** if validatetext is in effect then a blob ** will be returned if the field does not ** contain validly encoded text or the field ** contains embedded nulls ** ** integer if the field data looks like an integer, ** (regex "^ *(\+|-)?\d+ *$"), ** then an integer will be returned as ** provided by the compiler and platform ** runtime strtoll function ** otherwise the field will be processed as ** text as defined above ** ** real if the field data looks like a number, ** (regex "^ *(\+|-)?(\d+\.?\d*|\d*\.?\d+)([eE](\+|-)?\d+)? *$") ** then a double will be returned as ** provided by the compiler and platform ** runtime strtold function otherwise the ** field will be processed as text as ** defined above ** ** numeric if the field looks like an integer ** (see integer above) that integer will be ** returned ** if the field looks like a number ** (see real above) then the number will ** returned as an integer if it has no ** fractional part and ** (a) your platform/compiler supports ** long double and the number will fit in ** a 64-bit integer; or, ** (b) your platform/compiler does not ** support long double (treats it as a double) ** then a 64-bit integer will only be returned ** for integers that can be represented in the ** 53 bit significand of a double ** ** The nulls option will cause fields that do not contain anything ** to return NULL rather than an empty result. Two separators ** side-by-each with no intervening characters at all will be ** returned as NULL if nulls is true; if nulls is false or ** the contents are explicity empty ("") then a 0 length blob ** (if affinity=blob) or 0 length text string will be returned. ** */ /* ** 2016-05-28 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains the implementation of an SQLite virtual table for ** reading CSV files. ** ** Usage: ** ** .load ./csv ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); ** SELECT * FROM csv; ** ** The columns are named "c1", "c2", "c3", ... by default. Or the ** application can define its own CREATE TABLE statement using the ** schema= parameter, like this: ** ** CREATE VIRTUAL TABLE temp.csv2 USING csv( ** filename = "../http.log", ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" ** ); ** ** Instead of specifying a file, the text of the CSV can be loaded using ** the data= parameter. ** ** If the columns=N parameter is supplied, then the CSV file is assumed to have ** N columns. If both the columns= and schema= parameters are omitted, then ** the number and names of the columns is determined by the first line of ** the CSV input. ** */ #include SQLITE_EXTENSION_INIT1 #include #include #include #include #include #include #include #include #ifdef SQLITE_HAVE_ZLIB #include #define fopen gzopen #define fclose gzclose #define fread gzfread #define fseek gzseek #define ftell gztell #endif #undef LONGDOUBLE_CONSTANT #undef LONGDOUBLE_TYPE #if defined(__GNUC__) && defined(_WIN64) #include #define LONGDOUBLE_TYPE __float128 #define LONGDOUBLE_CONSTANT(x) x##Q #define modfl modfq #define strtold strtoflt128 #else #define LONGDOUBLE_TYPE long double #define LONGDOUBLE_CONSTANT(x) x##L #endif #ifndef SQLITE_OMIT_VIRTUALTABLE /* ** A macro to hint to the compiler that a function should not be ** inlined. */ #if defined(__GNUC__) #define VSV_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) && _MSC_VER>=1310 #define VSV_NOINLINE __declspec(noinline) #else #define VSV_NOINLINE #endif /* ** Max size of the error message in a VsvReader */ #define VSV_MXERR 200 /* ** Size of the VsvReader input buffer */ #define VSV_INBUFSZ 1024 /* ** A context object used when read a VSV file. */ typedef struct VsvReader VsvReader; struct VsvReader { #ifdef SQLITE_HAVE_ZLIB gzFile in; /* Read the VSV text from this compressed input stream */ #else FILE *in; /* Read the VSV text from this input stream */ #endif char *z; /* Accumulated text for a field */ int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ int bNotFirst; /* True if prior text has been seen */ int cTerm; /* Character that terminated the most recent field */ int fsep; /* Field Seperator Character */ int rsep; /* Record Seperator Character */ int affinity; /* Perform Affinity Conversions */ int notNull; /* Have we seen data for field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ char *zIn; /* The input buffer */ char zErr[VSV_MXERR]; /* Error message */ }; /* ** Initialize a VsvReader object */ static void vsv_reader_init(VsvReader *p) { p->in = 0; p->z = 0; p->n = 0; p->nAlloc = 0; p->nLine = 0; p->bNotFirst = 0; p->nIn = 0; p->zIn = 0; p->notNull = 0; p->zErr[0] = 0; } /* ** Close and reset a VsvReader object */ static void vsv_reader_reset(VsvReader *p) { if (p->in) { fclose(p->in); sqlite3_free(p->zIn); } sqlite3_free(p->z); vsv_reader_init(p); } /* ** Report an error on a VsvReader */ static void vsv_errmsg(VsvReader *p, const char *zFormat, ...) { va_list ap; va_start(ap, zFormat); sqlite3_vsnprintf(VSV_MXERR, p->zErr, zFormat, ap); va_end(ap); } /* ** Open the file associated with a VsvReader ** Return the number of errors. */ static int vsv_reader_open( VsvReader *p, /* The reader to open */ const char *zFilename, /* Read from this filename */ const char *zData /* ... or use this data */ ) { if (zFilename) { p->zIn = sqlite3_malloc(VSV_INBUFSZ); if (p->zIn==0) { vsv_errmsg(p, "out of memory"); return 1; } p->in = fopen(zFilename, "rb"); if (p->in==0) { sqlite3_free(p->zIn); vsv_reader_reset(p); vsv_errmsg(p, "cannot open '%s' for reading", zFilename); return 1; } } else { assert( p->in==0 ); p->zIn = (char*)zData; p->nIn = strlen(zData); } return 0; } /* ** The input buffer has overflowed. Refill the input buffer, then ** return the next character */ static VSV_NOINLINE int vsv_getc_refill(VsvReader *p) { size_t got; assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ assert( p->in!=0 ); /* Only called if reading from a file */ got = fread(p->zIn, 1, VSV_INBUFSZ, p->in); if (got==0) { return EOF; } p->nIn = got; p->iIn = 1; return p->zIn[0]; } /* ** Return the next character of input. Return EOF at end of input. */ static int vsv_getc(VsvReader *p) { if (p->iIn >= p->nIn) { if (p->in!=0) { return vsv_getc_refill(p); } return EOF; } return((unsigned char*)p->zIn)[p->iIn++]; } /* ** Increase the size of p->z and append character c to the end. ** Return 0 on success and non-zero if there is an OOM error */ static VSV_NOINLINE int vsv_resize_and_append(VsvReader *p, char c) { char *zNew; int nNew = p->nAlloc*2 + 100; zNew = sqlite3_realloc64(p->z, nNew); if (zNew) { p->z = zNew; p->nAlloc = nNew; p->z[p->n++] = c; return 0; } else { vsv_errmsg(p, "out of memory"); return 1; } } /* ** Append a single character to the VsvReader.z[] array. ** Return 0 on success and non-zero if there is an OOM error */ static int vsv_append(VsvReader *p, char c) { if (p->n>=p->nAlloc-1) { return vsv_resize_and_append(p, c); } p->z[p->n++] = c; return 0; } /* ** Read a single field of VSV text. Compatible with rfc4180 and extended ** with the option of having a separator other than ",". ** ** + Input comes from p->in. ** + Store results in p->z of length p->n. Space to hold p->z comes ** from sqlite3_malloc64(). ** + Keep track of the line number in p->nLine. ** + Store the character that terminates the field in p->cTerm. Store ** EOF on end-of-file. ** ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have ** been set to EOF. */ static char *vsv_read_one_field(VsvReader *p) { int c; p->notNull = 0; p->n = 0; c = vsv_getc(p); if (c==EOF) { p->cTerm = EOF; return 0; } if (c=='"') { int pc, ppc; int startLine = p->nLine; p->notNull = 1; pc = ppc = 0; while (1) { c = vsv_getc(p); if (c=='\n') { p->nLine++; } if (c=='"' && pc=='"') { pc = ppc; ppc = 0; continue; } if ( (c==p->fsep && pc=='"') || (c==p->rsep && pc=='"') || (p->rsep=='\n' && c=='\n' && pc=='\r' && ppc=='"') || (c==EOF && pc=='"') ) { do { p->n--; } while (p->z[p->n]!='"'); p->cTerm = (char)c; break; } if (pc=='"' && p->rsep=='\n' && c!='\r') { vsv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); break; } if (c==EOF) { vsv_errmsg(p, "line %d: unterminated %c-quoted field\n", startLine, '"'); p->cTerm = (char)c; break; } if (vsv_append(p, (char)c)) { return 0; } ppc = pc; pc = c; } } else { /* ** If this is the first field being parsed and it begins with the ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ if ((c&0xff)==0xef && p->bNotFirst==0) { vsv_append(p, (char)c); c = vsv_getc(p); if ((c&0xff)==0xbb) { vsv_append(p, (char)c); c = vsv_getc(p); if ((c&0xff)==0xbf) { p->bNotFirst = 1; p->n = 0; return vsv_read_one_field(p); } } } while (c!=EOF && c!=p->rsep && c!=p->fsep) { if (c=='\n') p->nLine++; if (!p->notNull) p->notNull = 1; if (vsv_append(p, (char)c)) return 0; c = vsv_getc(p); } if (c=='\n') { p->nLine++; } if (p->n>0 && (p->rsep=='\n' || p->fsep=='\n') && p->z[p->n-1]=='\r') { p->n--; if (p->n==0) { p->notNull = 0; } } p->cTerm = (char)c; } assert( p->z==0 || p->nnAlloc ); if (p->z) { p->z[p->n] = 0; } p->bNotFirst = 1; return p->z; } /* ** Forward references to the various virtual table methods implemented ** in this file. */ static int vsvtabCreate(sqlite3*, void*, int, const char*const*, sqlite3_vtab**,char**); static int vsvtabConnect(sqlite3*, void*, int, const char*const*, sqlite3_vtab**,char**); static int vsvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); static int vsvtabDisconnect(sqlite3_vtab*); static int vsvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); static int vsvtabClose(sqlite3_vtab_cursor*); static int vsvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, int argc, sqlite3_value **argv); static int vsvtabNext(sqlite3_vtab_cursor*); static int vsvtabEof(sqlite3_vtab_cursor*); static int vsvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); static int vsvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); /* ** An instance of the VSV virtual table */ typedef struct VsvTable { sqlite3_vtab base; /* Base class. Must be first */ char *zFilename; /* Name of the VSV file */ char *zData; /* Raw VSV data in lieu of zFilename */ long iStart; /* Offset to start of data in zFilename */ int nCol; /* Number of columns in the VSV file */ int fsep; /* The field seperator for this VSV file */ int rsep; /* The record seperator for this VSV file */ int affinity; /* Perform affinity conversions */ int nulls; /* Process NULLs */ int validateUTF8; /* Validate UTF8 */ unsigned int tstFlags; /* Bit values used for testing */ } VsvTable; /* ** Allowed values for tstFlags */ #define VSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ /* ** A cursor for the VSV virtual table */ typedef struct VsvCursor { sqlite3_vtab_cursor base; /* Base class. Must be first */ VsvReader rdr; /* The VsvReader object */ char **azVal; /* Value of the current row */ int *aLen; /* Allocation Length of each entry */ int *dLen; /* Data Length of each entry */ sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ } VsvCursor; /* ** Transfer error message text from a reader into a VsvTable */ static void vsv_xfer_error(VsvTable *pTab, VsvReader *pRdr) { sqlite3_free(pTab->base.zErrMsg); pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); } /* ** This method is the destructor for a VsvTable object. */ static int vsvtabDisconnect(sqlite3_vtab *pVtab) { VsvTable *p = (VsvTable*)pVtab; sqlite3_free(p->zFilename); sqlite3_free(p->zData); sqlite3_free(p); return SQLITE_OK; } /* ** Skip leading whitespace. Return a pointer to the first non-whitespace ** character, or to the zero terminator if the string has only whitespace */ static const char *vsv_skip_whitespace(const char *z) { while (isspace((unsigned char)z[0])) { z++; } return z; } /* ** Remove trailing whitespace from the end of string z[] */ static void vsv_trim_whitespace(char *z) { size_t n = strlen(z); while (n>0 && isspace((unsigned char)z[n])) { n--; } z[n] = 0; } /* ** Dequote the string */ static void vsv_dequote(char *z) { int j; char cQuote = z[0]; size_t i, n; if (cQuote!='\'' && cQuote!='"') { return; } n = strlen(z); if (n<2 || z[n-1]!=z[0]) { return; } for (i=1, j=0; izErr. If there are no errors, p->zErr[0]==0. */ static int vsv_string_parameter( VsvReader *p, /* Leave the error message here, if there is one */ const char *zParam, /* Parameter we are checking for */ const char *zArg, /* Raw text of the virtual table argment */ char **pzVal /* Write the dequoted string value here */ ) { const char *zValue; zValue = vsv_parameter(zParam,(int)strlen(zParam),zArg); if (zValue==0) { return 0; } p->zErr[0] = 0; if (*pzVal) { vsv_errmsg(p, "more than one '%s' parameter", zParam); return 1; } *pzVal = sqlite3_mprintf("%s", zValue); if (*pzVal==0) { vsv_errmsg(p, "out of memory"); return 1; } vsv_trim_whitespace(*pzVal); vsv_dequote(*pzVal); return 1; } /* ** Return 0 if the argument is false and 1 if it is true. Return -1 if ** we cannot really tell. */ static int vsv_boolean(const char *z) { if (sqlite3_stricmp("yes",z)==0 || sqlite3_stricmp("on",z)==0 || sqlite3_stricmp("true",z)==0 || (z[0]=='1' && z[1]==0) ) { return 1; } if (sqlite3_stricmp("no",z)==0 || sqlite3_stricmp("off",z)==0 || sqlite3_stricmp("false",z)==0 || (z[0]=='0' && z[1]==0) ) { return 0; } return -1; } /* ** Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG". ** If it is, set *pValue to be the value of the boolean ("true" if there is ** not "= BOOLEAN" component) and return non-zero. If the input string ** does not begin with TAG, return zero. */ static int vsv_boolean_parameter( const char *zTag, /* Tag we are looking for */ int nTag, /* Size of the tag in bytes */ const char *z, /* Input parameter */ int *pValue /* Write boolean value here */ ) { int b; z = vsv_skip_whitespace(z); if (strncmp(zTag, z, nTag)!=0) { return 0; } z = vsv_skip_whitespace(z + nTag); if (z[0]==0) { *pValue = 1; return 1; } if (z[0]!='=') { return 0; } z = vsv_skip_whitespace(z+1); b = vsv_boolean(z); if (b>=0) { *pValue = b; return 1; } return 0; } /* ** Convert the seperator character specification into the character code ** Return 1 signifies error, 0 for no error ** ** Recognized inputs: ** any single character ** escaped characters \f \n \t \v ** escaped hex byte \x1e \x1f etc (RS and US respectively) ** */ static int vsv_parse_sep_char(char *in, int dflt, int *out) { if (!in) { *out = dflt; return 0; } switch (strlen(in)) { case 0: { *out = dflt; return 0; } case 1: { *out = in[0]; return 0; } case 2: { if (in[0] != '\\') { return 1; } switch (in[1]) { case 'f': { *out = 12; return 0; } case 'n': { *out = 10; return 0; } case 't': { *out = 9; return 0; } case 'v': { *out = 11; return 0; } } return 1; } case 4: { if (sqlite3_strnicmp(in, "\\x", 2) != 0) { return 1; } if (!isxdigit(in[2]) || !isxdigit(in[3])) { return 1; } *out = ((in[2] > '9' ? (in[2] & 0x0f) + 9 : in[2] & 0x0f) << 4) + (in[3] > '9' ? (in[3] & 0x0f) + 9 : in[3] & 0x0f); return 0; } } return 0; } /* ** Parameters: ** filename=FILENAME Name of file containing VSV content ** data=TEXT Direct VSV content. ** schema=SCHEMA Alternative VSV schema. ** header=YES|NO First row of VSV defines the names of ** columns if "yes". Default "no". ** columns=N Assume the VSV file contains N columns. ** fsep=FSET Field Seperator ** rsep=RSEP Record Seperator ** skip=N skip N records of file (default 0) ** affinity=AFF affinity to apply to ALL columns ** default: none ** none text integer real numeric ** ** Only available if compiled with SQLITE_TEST: ** ** testflags=N Bitmask of test flags. Optional ** ** If schema= is omitted, then the columns are named "c0", "c1", "c2", ** and so forth. If columns=N is omitted, then the file is opened and ** the number of columns in the first row is counted to determine the ** column count. If header=YES, then the first row is skipped. */ static int vsvtabConnect( sqlite3 *db, void *pAux, int argc, const char *const*argv, sqlite3_vtab **ppVtab, char **pzErr ) { VsvTable *pNew = 0; /* The VsvTable object to construct */ int affinity = -1; /* Affinity coercion */ int bHeader = -1; /* header= flags. -1 means not seen yet */ int validateUTF8 = -1; /* validateUTF8 flag */ int rc = SQLITE_OK; /* Result code from this routine */ int i, j; /* Loop counters */ #ifdef SQLITE_TEST int tstFlags = 0; /* Value for testflags=N parameter */ #endif int b; /* Value of a boolean parameter */ int nCol = -99; /* Value of the columns= parameter */ int nSkip = -1; /* Value of the skip= parameter */ int bNulls = -1; /* Process Nulls flag */ VsvReader sRdr; /* A VSV file reader used to store an error ** message and/or to count the number of columns */ static const char *azParam[] = { "filename", "data", "schema", "fsep", "rsep" }; char *azPValue[5]; /* Parameter values */ #define VSV_FILENAME (azPValue[0]) #define VSV_DATA (azPValue[1]) #define VSV_SCHEMA (azPValue[2]) #define VSV_FSEP (azPValue[3]) #define VSV_RSEP (azPValue[4]) assert( sizeof(azPValue)==sizeof(azParam) ); memset(&sRdr, 0, sizeof(sRdr)); memset(azPValue, 0, sizeof(azPValue)); for (i=3; i=0) { vsv_errmsg(&sRdr, "more than one 'header' parameter"); goto vsvtab_connect_error; } bHeader = b; } else if (vsv_boolean_parameter("validatetext",12,z,&b)) { if (validateUTF8>=0) { vsv_errmsg(&sRdr, "more than one 'validatetext' parameter"); goto vsvtab_connect_error; } validateUTF8 = b; } else if (vsv_boolean_parameter("nulls",5,z,&b)) { if (bNulls>=0) { vsv_errmsg(&sRdr, "more than one 'nulls' parameter"); goto vsvtab_connect_error; } bNulls = b; } else #ifdef SQLITE_TEST if ((zValue = vsv_parameter("testflags",9,z))!=0) { tstFlags = (unsigned int)atoi(zValue); } else #endif if ((zValue = vsv_parameter("columns",7,z))!=0) { if (nCol>0) { vsv_errmsg(&sRdr, "more than one 'columns' parameter"); goto vsvtab_connect_error; } nCol = atoi(zValue); if (nCol<=0) { vsv_errmsg(&sRdr, "column= value must be positive"); goto vsvtab_connect_error; } } else if ((zValue = vsv_parameter("skip",4,z))!=0) { if (nSkip>0) { vsv_errmsg(&sRdr, "more than one 'skip' parameter"); goto vsvtab_connect_error; } nSkip = atoi(zValue); if (nSkip<=0) { vsv_errmsg(&sRdr, "skip= value must be positive"); goto vsvtab_connect_error; } } else if ((zValue = vsv_parameter("affinity",8,z))!=0) { if (affinity>-1) { vsv_errmsg(&sRdr, "more than one 'affinity' parameter"); goto vsvtab_connect_error; } if (sqlite3_strnicmp(zValue,"none",4)==0) affinity=0; else if (sqlite3_strnicmp(zValue,"blob",4)==0) affinity=1; else if (sqlite3_strnicmp(zValue,"text",4)==0) affinity=2; else if (sqlite3_strnicmp(zValue,"integer",7)==0) affinity=3; else if (sqlite3_strnicmp(zValue,"real",4)==0) affinity=4; else if (sqlite3_strnicmp(zValue,"numeric",7)==0) affinity=5; else { vsv_errmsg(&sRdr, "unknown affinity: '%s'", zValue); goto vsvtab_connect_error; } } else { vsv_errmsg(&sRdr, "bad parameter: '%s'", z); goto vsvtab_connect_error; } } if (affinity==-1) { affinity = 0; } if (bNulls==-1) { bNulls = 0; } if (validateUTF8==-1) { validateUTF8 = 0; } if ((VSV_FILENAME==0)==(VSV_DATA==0)) { vsv_errmsg(&sRdr, "must specify either filename= or data= but not both"); goto vsvtab_connect_error; } if (vsv_parse_sep_char(VSV_FSEP, ',', &(sRdr.fsep))) { vsv_errmsg(&sRdr, "cannot parse fsep: '%s'", VSV_FSEP); goto vsvtab_connect_error; } if (vsv_parse_sep_char(VSV_RSEP, '\n', &(sRdr.rsep))) { vsv_errmsg(&sRdr, "cannot parse rsep: '%s'", VSV_RSEP); goto vsvtab_connect_error; } if ((nCol <= 0 || bHeader == 1) && vsv_reader_open(&sRdr, VSV_FILENAME, VSV_DATA)) { goto vsvtab_connect_error; } pNew = sqlite3_malloc( sizeof(*pNew) ); *ppVtab = (sqlite3_vtab*)pNew; if (pNew==0) { goto vsvtab_connect_oom; } memset(pNew, 0, sizeof(*pNew)); pNew->fsep = sRdr.fsep; pNew->rsep = sRdr.rsep; pNew->affinity = affinity; pNew->validateUTF8 = validateUTF8; pNew->nulls = bNulls; if (VSV_SCHEMA==0) { sqlite3_str *pStr = sqlite3_str_new(0); char *zSep = ""; int iCol = 0; sqlite3_str_appendf(pStr, "CREATE TABLE x("); if (nCol<0 && bHeader<1) { nCol = 0; do { vsv_read_one_field(&sRdr); nCol++; } while (sRdr.cTerm==sRdr.fsep); } if (nCol>0 && bHeader<1) { for (iCol=0; iCol0 && iColnCol = nCol; if (nSkip>0) { int tskip = nSkip + (bHeader==1); vsv_reader_reset(&sRdr); if (vsv_reader_open(&sRdr, VSV_FILENAME, VSV_DATA)) { goto vsvtab_connect_error; } do { do { if (!vsv_read_one_field(&sRdr)) goto vsvtab_connect_error; } while (sRdr.cTerm==sRdr.fsep); tskip--; } while (tskip>0 && sRdr.cTerm==sRdr.rsep); if (tskip>0) { vsv_errmsg(&sRdr, "premature end of file during skip"); goto vsvtab_connect_error; } } pNew->zFilename = VSV_FILENAME; VSV_FILENAME = 0; pNew->zData = VSV_DATA; VSV_DATA = 0; #ifdef SQLITE_TEST pNew->tstFlags = tstFlags; #endif if (bHeader!=1 && nSkip<1) { pNew->iStart = 0; } else if (pNew->zData) { pNew->iStart = (int)sRdr.iIn; } else { pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn); } vsv_reader_reset(&sRdr); rc = sqlite3_declare_vtab(db, VSV_SCHEMA); if (rc) { vsv_errmsg(&sRdr, "bad schema: '%s' - %s", VSV_SCHEMA, sqlite3_errmsg(db)); goto vsvtab_connect_error; } for (i=0; ibase); } for (i=0; ibase.pVtab; int i; for (i=0; inCol; i++) { sqlite3_free(pCur->azVal[i]); pCur->azVal[i] = 0; pCur->aLen[i] = 0; pCur->dLen[i] = -1; } } /* ** The xConnect and xCreate methods do the same thing, but they must be ** different so that the virtual table is not an eponymous virtual table. */ static int vsvtabCreate( sqlite3 *db, void *pAux, int argc, const char *const*argv, sqlite3_vtab **ppVtab, char **pzErr ) { return vsvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); } /* ** Destructor for a VsvCursor. */ static int vsvtabClose(sqlite3_vtab_cursor *cur) { VsvCursor *pCur = (VsvCursor*)cur; vsvtabCursorRowReset(pCur); vsv_reader_reset(&pCur->rdr); sqlite3_free(cur); return SQLITE_OK; } /* ** Constructor for a new VsvTable cursor object. */ static int vsvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor) { VsvTable *pTab = (VsvTable*)p; VsvCursor *pCur; size_t nByte; nByte = sizeof(*pCur) + (sizeof(char*)+(2*sizeof(int)))*pTab->nCol; pCur = sqlite3_malloc64( nByte ); if (pCur==0) return SQLITE_NOMEM; memset(pCur, 0, nByte); pCur->azVal = (char**)&pCur[1]; pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; pCur->dLen = (int*)&pCur->aLen[pTab->nCol]; pCur->rdr.fsep = pTab->fsep; pCur->rdr.rsep = pTab->rsep; pCur->rdr.affinity = pTab->affinity; *ppCursor = &pCur->base; if (vsv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData)) { vsv_xfer_error(pTab, &pCur->rdr); return SQLITE_ERROR; } return SQLITE_OK; } /* ** Advance a VsvCursor to its next row of input. ** Set the EOF marker if we reach the end of input. */ static int vsvtabNext(sqlite3_vtab_cursor *cur) { VsvCursor *pCur = (VsvCursor*)cur; VsvTable *pTab = (VsvTable*)cur->pVtab; int i = 0; char *z; do { z = vsv_read_one_field(&pCur->rdr); if (z==0) { if (inCol) pCur->dLen[i] = -1; } else if (inCol) { if (pCur->aLen[i] < pCur->rdr.n+1) { char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); if (zNew==0) { z = 0; vsv_errmsg(&pCur->rdr, "out of memory"); vsv_xfer_error(pTab, &pCur->rdr); break; } pCur->azVal[i] = zNew; pCur->aLen[i] = pCur->rdr.n+1; } if (!pCur->rdr.notNull && pTab->nulls) { pCur->dLen[i] = -1; } else { pCur->dLen[i] = pCur->rdr.n; memcpy(pCur->azVal[i], z, pCur->rdr.n+1); } i++; } } while (pCur->rdr.cTerm==pCur->rdr.fsep); if ((pCur->rdr.cTerm==EOF && i==0)) { pCur->iRowid = -1; } else { pCur->iRowid++; while (inCol) { pCur->dLen[i] = -1; i++; } } return SQLITE_OK; } /* ** ** Determine affinity of field ** ** ignore leading space ** then may have + or - ** then may have digits or . (if . found then type=real) ** then may have digits (if another . then not number) ** then may have e (if found then type=real) ** then may have + or - ** then may have digits ** then may have trailing space */ static int vsv_isValidNumber(char *arg) { char *start; char *stop; int isValid = 0; int hasDigit = 0; start = arg; stop = arg + strlen(arg) - 1; while (start <= stop && *start==' ') // strip spaces from begining { start++; } while (start <= stop && *stop==' ') // strip spaces from end { stop--; } if (start > stop) { goto vsv_end_isValidNumber; } if (start <= stop && (*start=='+' || *start=='-')) // may have + or - { start++; } if (start <= stop && isdigit(*start)) // must have a digit to be valid { hasDigit = 1; isValid = 1; } while (start <= stop && isdigit(*start)) // bunch of digits { start++; } if (start <= stop && *start=='.') // may have . { isValid = 2; start++; } if (start <= stop && isdigit(*start)) { hasDigit = 1; } while (start <= stop && isdigit(*start)) // bunch of digits { start++; } if (!hasDigit) // no digits then invalid { isValid = 0; goto vsv_end_isValidNumber; } if (start <= stop && (*start=='e' || *start=='E')) // may have 'e' or 'E' { isValid = 3; start++; } if (start <= stop && isValid == 3 && (*start == '+' || *start == '-')) { start++; } if (start <= stop && isValid == 3 && isdigit(*start)) { isValid = 2; } while (start <= stop && isdigit(*start)) // bunch of digits { start++; } if (isValid == 3) { isValid = 0; } vsv_end_isValidNumber: if (start <= stop) { isValid = 0; } return isValid; } /* ** Validate UTF-8 ** Return -1 if invalid else length */ static long long vsv_utf8IsValid(char *string) { long long length = 0; unsigned char *start; int trailing = 0; unsigned char c; start = (unsigned char *)string; while ((c = *start)) { if (trailing) { if ((c & 0xC0) == 0x80) { trailing--; start++; length++; continue; } else { length = -1; break; } } if ((c & 0x80) == 0) { start++; length++; continue; } if ((c & 0xE0) == 0xC0) { trailing = 1; start++; length++; continue; } if ((c & 0xF0) == 0xE0) { trailing = 2; start++; length++; continue; } if ((c & 0xF8) == 0xF0) { trailing = 3; start++; length++; continue; } #if 0 // UTF-8 does not encode sequences longer than 4 bytes (yet) if ((c & 0xFC) == 0xF8) { trailing = 4; start++; length++; continue; } if ((c & 0xFE) == 0xFC) { trailing = 5; start++; length++; continue; } if ((c & 0xFF) == 0xFE) { trailing = 6; start++; length++; continue; } if ((c & 0xFF) == 0xFF) { trailing = 7; start++; length++; continue; } #endif length = -1; break; } return length; } /* ** Return values of columns for the row at which the VsvCursor ** is currently pointing. */ static int vsvtabColumn( sqlite3_vtab_cursor *cur, /* The cursor */ sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ int i /* Which column to return */ ) { VsvCursor *pCur = (VsvCursor*)cur; VsvTable *pTab = (VsvTable*)cur->pVtab; long long dLen = pCur->dLen[i]; long long length = 0; static int hasExtended = 0; if (i>=0 && inCol && pCur->azVal[i]!=0 && dLen>-1) { switch (pTab->affinity) { case 0: { if (pTab->validateUTF8) { length = vsv_utf8IsValid(pCur->azVal[i]); if (length == dLen) { sqlite3_result_text(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); } else { sqlite3_result_error(ctx, "Invalid UTF8 Data", -1); } } else { sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); } break; } case 1: { sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); break; } case 2: { if (pTab->validateUTF8) { length = vsv_utf8IsValid(pCur->azVal[i]); if (length < dLen) { sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); } else { sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT); } } else { sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); } break; } case 3: { switch (vsv_isValidNumber(pCur->azVal[i])) { case 1: { sqlite3_result_int64(ctx, strtoll(pCur->azVal[i], 0, 10)); break; } default: { if (pTab->validateUTF8) { length = vsv_utf8IsValid(pCur->azVal[i]); if (length < dLen) { sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); } else { sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT); } } else { sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); } break; } } break; } case 4: { switch (vsv_isValidNumber(pCur->azVal[i])) { case 1: case 2: { sqlite3_result_double(ctx, strtod(pCur->azVal[i], 0)); break; } default: { if (pTab->validateUTF8) { length = vsv_utf8IsValid(pCur->azVal[i]); if (length < dLen) { sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); } else { sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT); } } else { sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); } break; } } break; } case 5: { switch (vsv_isValidNumber(pCur->azVal[i])) { case 1: { sqlite_int64 ival; ival = strtoll(pCur->azVal[i], 0, 10); if (ival > LLONG_MIN && ival < LLONG_MAX) { sqlite3_result_int64(ctx, ival); break; } } case 2: { LONGDOUBLE_TYPE dv, fp, ip; #if defined(__GNUC__) && defined(_WIN64) if (!hasExtended) hasExtended = 1; #else if (!hasExtended) { if (sizeof(long double) > sizeof(double)) { volatile unsigned long long i = ULLONG_MAX; volatile long double l; volatile double d; l = i; d = i; hasExtended = (d == l) ? -1 : 1; } else { hasExtended = -1; } } #endif dv = strtold(pCur->azVal[i], 0); fp = modfl(dv, &ip); if (hasExtended<0) { if (fp==0.0L && ip >= -9007199254740991LL && dv <= 9007199254740991LL) { sqlite3_result_int64(ctx, (long long)ip); } else { sqlite3_result_double(ctx, dv); } } else { if (fp==0.0L && ip >= LLONG_MIN && ip <= LLONG_MAX) { sqlite3_result_int64(ctx, (long long)ip); } else { sqlite3_result_double(ctx, dv); } } break; } default: { if (pTab->validateUTF8) { length = vsv_utf8IsValid(pCur->azVal[i]); if (length < dLen) { sqlite3_result_blob(ctx, pCur->azVal[i], dLen, SQLITE_TRANSIENT); } else { sqlite3_result_text(ctx, pCur->azVal[i], length, SQLITE_TRANSIENT); } } else { sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); } break; } } } } } return SQLITE_OK; } /* ** Return the rowid for the current row. */ static int vsvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid) { VsvCursor *pCur = (VsvCursor*)cur; *pRowid = pCur->iRowid; return SQLITE_OK; } /* ** Return TRUE if the cursor has been moved off of the last ** row of output. */ static int vsvtabEof(sqlite3_vtab_cursor *cur) { VsvCursor *pCur = (VsvCursor*)cur; return pCur->iRowid<0; } /* ** Only a full table scan is supported. So xFilter simply rewinds to ** the beginning. */ static int vsvtabFilter( sqlite3_vtab_cursor *pVtabCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ) { VsvCursor *pCur = (VsvCursor*)pVtabCursor; VsvTable *pTab = (VsvTable*)pVtabCursor->pVtab; pCur->iRowid = 0; /* Ensure the field buffer is always allocated. Otherwise, if the ** first field is zero bytes in size, this may be mistaken for an OOM ** error in csvtabNext(). */ if( vsv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM; if (pCur->rdr.in==0) { assert( pCur->rdr.zIn==pTab->zData ); assert( pTab->iStart>=0 ); assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); pCur->rdr.iIn = pTab->iStart; } else { fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); pCur->rdr.iIn = 0; pCur->rdr.nIn = 0; } return vsvtabNext(pVtabCursor); } /* ** Only a forward full table scan is supported. xBestIndex is mostly ** a no-op. If VSVTEST_FIDX is set, then the presence of equality ** constraints lowers the estimated cost, which is fiction, but is useful ** for testing certain kinds of virtual table behavior. */ static int vsvtabBestIndex( sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo ) { pIdxInfo->estimatedCost = 1000000; #ifdef SQLITE_TEST if ((((VsvTable*)tab)->tstFlags & VSVTEST_FIDX)!=0) { /* The usual (and sensible) case is to always do a full table scan. ** The code in this branch only runs when testflags=1. This code ** generates an artifical and unrealistic plan which is useful ** for testing virtual table logic but is not helpful to real applications. ** ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual ** table (even though it is not) and the cost of running the virtual table ** is reduced from 1 million to just 10. The constraints are *not* marked ** as omittable, however, so the query planner should still generate a ** plan that gives a correct answer, even if they plan is not optimal. */ int i; int nConst = 0; for (i=0; inConstraint; i++) { unsigned char op; if (pIdxInfo->aConstraint[i].usable==0) continue; op = pIdxInfo->aConstraint[i].op; if (op==SQLITE_INDEX_CONSTRAINT_EQ || op==SQLITE_INDEX_CONSTRAINT_LIKE || op==SQLITE_INDEX_CONSTRAINT_GLOB ) { pIdxInfo->estimatedCost = 10; pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; nConst++; } } } #endif return SQLITE_OK; } static sqlite3_module VsvModule = { 0, /* iVersion */ vsvtabCreate, /* xCreate */ vsvtabConnect, /* xConnect */ vsvtabBestIndex, /* xBestIndex */ vsvtabDisconnect, /* xDisconnect */ vsvtabDisconnect, /* xDestroy */ vsvtabOpen, /* xOpen - open a cursor */ vsvtabClose, /* xClose - close a cursor */ vsvtabFilter, /* xFilter - configure scan constraints */ vsvtabNext, /* xNext - advance a cursor */ vsvtabEof, /* xEof - check for end of scan */ vsvtabColumn, /* xColumn - read data */ vsvtabRowid, /* xRowid - read data */ 0, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ 0, /* xRename */ }; #ifdef SQLITE_TEST /* ** For virtual table testing, make a version of the VSV virtual table ** available that has an xUpdate function. But the xUpdate always returns ** SQLITE_READONLY since the VSV file is not really writable. */ static int vsvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x) { return SQLITE_READONLY; } static sqlite3_module VsvModuleFauxWrite = { 0, /* iVersion */ vsvtabCreate, /* xCreate */ vsvtabConnect, /* xConnect */ vsvtabBestIndex, /* xBestIndex */ vsvtabDisconnect, /* xDisconnect */ vsvtabDisconnect, /* xDestroy */ vsvtabOpen, /* xOpen - open a cursor */ vsvtabClose, /* xClose - close a cursor */ vsvtabFilter, /* xFilter - configure scan constraints */ vsvtabNext, /* xNext - advance a cursor */ vsvtabEof, /* xEof - check for end of scan */ vsvtabColumn, /* xColumn - read data */ vsvtabRowid, /* xRowid - read data */ vsvtabUpdate, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ 0, /* xRename */ }; #endif /* SQLITE_TEST */ #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ /* ** This routine is called when the extension is loaded. The new ** VSV virtual table module is registered with the calling database ** connection. */ #ifndef SQLITE_CORE #ifdef _WIN32 __declspec(dllexport) #endif #endif int sqlite3_vsv_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ) { #ifndef SQLITE_OMIT_VIRTUALTABLE int rc; SQLITE_EXTENSION_INIT2(pApi); rc = sqlite3_create_module(db, "vsv", &VsvModule, 0); #ifdef SQLITE_TEST if (rc==SQLITE_OK) { rc = sqlite3_create_module(db, "vsv_wr", &VsvModuleFauxWrite, 0); } #endif return rc; #else return SQLITE_OK; #endif } #undef modfl #undef strtold