diff --git a/lib/modules/Parser.pmod/CSV.pike b/lib/modules/Parser.pmod/CSV.pike index 1c32da7487bacc55d0aad6aa13144e55adba3c79..2aab1c18029990d3caee0a6f17d83b854d30146e 100644 --- a/lib/modules/Parser.pmod/CSV.pike +++ b/lib/modules/Parser.pmod/CSV.pike @@ -13,47 +13,78 @@ inherit Parser.Tabular; //! This function consumes the header-line preceding a typical comma, -//! semicolon or tab separated value list. +//! semicolon or tab separated value list and autocompiles a format +//! description from that. After this function has +//! successfully parsed a header-line, you can proceed with +//! either @[fetchrecord()] or @[fetch()] to get the remaining records. //! //! @param delimiters //! Explicitly specify a string containing all the characters that should -//! be considered field delimiters. If not specified, the function will -//! try to autodetect the single delimiter in use. +//! be considered field delimiters. If not specified or empty, the function +//! will try to autodetect the single delimiter in use. +//! +//! @param matchfieldname +//! A string containing a regular expression, using @[Regexp.SimpleRegexp] +//! syntax, or an object providing a @[Regexp.SimpleRegexp.match()] +//! single string argument compatible method, that must match all the +//! individual fieldnames before the header will be considered valid. //! //! @returns //! It returns true if a CSV head has successfully been parsed. //! //! @seealso -//! @[fetchrecord()], @[compile()] -int parsehead(void|string delimiters) +//! @[fetchrecord()], @[fetch()], @[compile()] +int parsehead(void|string delimiters,void|string|object matchfieldname) { if(skipemptylines()) return 0; - string line=_in->gets(); - if(!delimiters) - { int countcomma,countsemicolon,counttab; - countcomma=countsemicolon=counttab=0; - foreach(line;;int c) - switch(c) - { case ',':countcomma++; - break; - case ';':countsemicolon++; - break; - case '\t':counttab++; - break; - } - delimiters=countcomma>countsemicolon?countcomma>counttab?",":"\t": - countsemicolon>counttab?";":"\t"; + { string line=_in->gets(); + if(!delimiters||!sizeof(delimiters)) + { int countcomma,countsemicolon,counttab; + countcomma=countsemicolon=counttab=0; + foreach(line;;int c) + switch(c) + { case ',':countcomma++; + break; + case ';':countsemicolon++; + break; + case '\t':counttab++; + break; + } + delimiters=countcomma>countsemicolon?countcomma>counttab?",":"\t": + countsemicolon>counttab?";":"\t"; + } + _in->unread(line+"\n"); } - _in->unread(line+"\n"); + multiset delim=(<>); foreach(delimiters;;int c) delim+=(<c>); + array res=({ (["single":1]),0 }); mapping m=(["delim":delim]); + + if(!objectp(matchfieldname)) + matchfieldname=Regexp(matchfieldname||""); _eol=0; - do res+=({m+(["name":_getdelimword(m)])}); - while(!_eol); - setformat(({res})); + if(mixed err = catch + { _checkpoint checkp=_checkpoint(); + do + { string field=_getdelimword(m); + res+=({ m+(["name":field]) }); + if(String.width(field)>8) + field=string_to_utf8(field); // FIXME dumbing it down for Regexp() + if(!matchfieldname->match(field)) + throw(1); + } + while(!_eol); + }) + switch(err) + { default: + throw(err); + case 1: + return 0; + } + setformat( ({res}) ); return 1; } diff --git a/lib/modules/Parser.pmod/Tabular.pike b/lib/modules/Parser.pmod/Tabular.pike index 57fef9e8ea3e820b85e8dfab3305ca98fcf1d694..6fd279e147019e5faea75ba2dec1c9d89de8e395 100644 --- a/lib/modules/Parser.pmod/Tabular.pike +++ b/lib/modules/Parser.pmod/Tabular.pike @@ -112,7 +112,7 @@ private string gets(int n) return s; } -private class checkpoint +class _checkpoint { private string oldalread; void create() @@ -428,12 +428,12 @@ mapping fetch(void|array|mapping format) ret: { if(arrayp(format)) { mixed err=catch - { checkpoint checkp=checkpoint(); + { _checkpoint checkp=_checkpoint(); foreach(format;;array|mapping fmt) if(arrayp(fmt)) for(int found=0;;found=1) { mixed err=catch - { checkpoint checkp=checkpoint(); + { _checkpoint checkp=_checkpoint(); mapping rec=getrecord(fmt,found); foreach(rec;string name;mixed value) add2map(ret,name,value); @@ -468,6 +468,8 @@ ret: { int found; do { found=0; + if(!mappingp(format)) + error("Empty format definition\n"); foreach(format;string name;array|mapping subfmt) for(;;) { if(verb<0)