Skip to content
Snippets Groups Projects
Commit f6d9c844 authored by Stephen R. van den Berg's avatar Stephen R. van den Berg
Browse files

CSV: Added optional Regexp check for fieldnames.

parent 48d3708b
No related branches found
No related tags found
No related merge requests found
...@@ -13,23 +13,32 @@ ...@@ -13,23 +13,32 @@
inherit Parser.Tabular; inherit Parser.Tabular;
//! This function consumes the header-line preceding a typical comma, //! This function consumes the header-line preceding a typical comma,
//! semicolon or tab separated value list. //! semicolon or tab separated value list and autocompiles a format
//! description from that. After this function has
//! successfully parsed a header-line, you can proceed with
//! either @[fetchrecord()] or @[fetch()] to get the remaining records.
//! //!
//! @param delimiters //! @param delimiters
//! Explicitly specify a string containing all the characters that should //! Explicitly specify a string containing all the characters that should
//! be considered field delimiters. If not specified, the function will //! be considered field delimiters. If not specified or empty, the function
//! try to autodetect the single delimiter in use. //! will try to autodetect the single delimiter in use.
//!
//! @param matchfieldname
//! A string containing a regular expression, using @[Regexp.SimpleRegexp]
//! syntax, or an object providing a @[Regexp.SimpleRegexp.match()]
//! single string argument compatible method, that must match all the
//! individual fieldnames before the header will be considered valid.
//! //!
//! @returns //! @returns
//! It returns true if a CSV head has successfully been parsed. //! It returns true if a CSV head has successfully been parsed.
//! //!
//! @seealso //! @seealso
//! @[fetchrecord()], @[compile()] //! @[fetchrecord()], @[fetch()], @[compile()]
int parsehead(void|string delimiters) int parsehead(void|string delimiters,void|string|object matchfieldname)
{ if(skipemptylines()) { if(skipemptylines())
return 0; return 0;
string line=_in->gets(); { string line=_in->gets();
if(!delimiters) if(!delimiters||!sizeof(delimiters))
{ int countcomma,countsemicolon,counttab; { int countcomma,countsemicolon,counttab;
countcomma=countsemicolon=counttab=0; countcomma=countsemicolon=counttab=0;
foreach(line;;int c) foreach(line;;int c)
...@@ -45,14 +54,36 @@ int parsehead(void|string delimiters) ...@@ -45,14 +54,36 @@ int parsehead(void|string delimiters)
countsemicolon>counttab?";":"\t"; countsemicolon>counttab?";":"\t";
} }
_in->unread(line+"\n"); _in->unread(line+"\n");
}
multiset delim=(<>); multiset delim=(<>);
foreach(delimiters;;int c) foreach(delimiters;;int c)
delim+=(<c>); delim+=(<c>);
array res=({ (["single":1]),0 }); array res=({ (["single":1]),0 });
mapping m=(["delim":delim]); mapping m=(["delim":delim]);
if(!objectp(matchfieldname))
matchfieldname=Regexp(matchfieldname||"");
_eol=0; _eol=0;
do res+=({m+(["name":_getdelimword(m)])}); if(mixed err = catch
{ _checkpoint checkp=_checkpoint();
do
{ string field=_getdelimword(m);
res+=({ m+(["name":field]) });
if(String.width(field)>8)
field=string_to_utf8(field); // FIXME dumbing it down for Regexp()
if(!matchfieldname->match(field))
throw(1);
}
while(!_eol); while(!_eol);
})
switch(err)
{ default:
throw(err);
case 1:
return 0;
}
setformat( ({res}) ); setformat( ({res}) );
return 1; return 1;
} }
......
...@@ -112,7 +112,7 @@ private string gets(int n) ...@@ -112,7 +112,7 @@ private string gets(int n)
return s; return s;
} }
private class checkpoint class _checkpoint
{ private string oldalread; { private string oldalread;
void create() void create()
...@@ -428,12 +428,12 @@ mapping fetch(void|array|mapping format) ...@@ -428,12 +428,12 @@ mapping fetch(void|array|mapping format)
ret: ret:
{ if(arrayp(format)) { if(arrayp(format))
{ mixed err=catch { mixed err=catch
{ checkpoint checkp=checkpoint(); { _checkpoint checkp=_checkpoint();
foreach(format;;array|mapping fmt) foreach(format;;array|mapping fmt)
if(arrayp(fmt)) if(arrayp(fmt))
for(int found=0;;found=1) for(int found=0;;found=1)
{ mixed err=catch { mixed err=catch
{ checkpoint checkp=checkpoint(); { _checkpoint checkp=_checkpoint();
mapping rec=getrecord(fmt,found); mapping rec=getrecord(fmt,found);
foreach(rec;string name;mixed value) foreach(rec;string name;mixed value)
add2map(ret,name,value); add2map(ret,name,value);
...@@ -468,6 +468,8 @@ ret: ...@@ -468,6 +468,8 @@ ret:
{ int found; { int found;
do do
{ found=0; { found=0;
if(!mappingp(format))
error("Empty format definition\n");
foreach(format;string name;array|mapping subfmt) foreach(format;string name;array|mapping subfmt)
for(;;) for(;;)
{ if(verb<0) { if(verb<0)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment