CSV: Added optional Regexp check for fieldnames.

f6d9c844 · Stephen R. van den Berg · 48d3708b · f6d9c844 · f6d9c844
Commit f6d9c844 authored 13 years ago by Stephen R. van den Berg
--- a/lib/modules/Parser.pmod/CSV.pike
+++ b/lib/modules/Parser.pmod/CSV.pike
@@ -13,23 +13,32 @@
 inherit Parser.Tabular;
 //! This function consumes the header-line preceding a typical comma,
-//! semicolon or tab separated value list.
+//! semicolon or tab separated value list and autocompiles a format
+//! description from that.  After this function has
+//! successfully parsed a header-line, you can proceed with
+//! either @[fetchrecord()] or @[fetch()] to get the remaining records.
 //!
 //! @param delimiters
 //! Explicitly specify a string containing all the characters that should
-//! be considered field delimiters.  If not specified, the function will
+//! be considered field delimiters.  If not specified or empty, the function
-//! try to autodetect the single delimiter in use.
+//! will try to autodetect the single delimiter in use.
+//!
+//! @param matchfieldname
+//! A string containing a regular expression, using @[Regexp.SimpleRegexp]
+//! syntax, or an object providing a @[Regexp.SimpleRegexp.match()]
+//! single string argument compatible method, that must match all the
+//! individual fieldnames before the header will be considered valid.
 //!
 //! @returns
 //! It returns true if a CSV head has successfully been parsed.
 //!
 //! @seealso
-//!  @[fetchrecord()], @[compile()]
+//!  @[fetchrecord()], @[fetch()], @[compile()]
-int parsehead(void|string delimiters)
+int parsehead(void|string delimiters,void|string|object matchfieldname)
 { if(skipemptylines())
    return 0;
-  string line=_in->gets();
+  { string line=_in->gets();
-  if(!delimiters)
+    if(!delimiters||!sizeof(delimiters))
    { int countcomma,countsemicolon,counttab;
      countcomma=countsemicolon=counttab=0;
      foreach(line;;int c)
@@ -45,14 +54,36 @@ int parsehead(void|string delimiters)
       countsemicolon>counttab?";":"\t";
    }
    _in->unread(line+"\n");
+  }
  multiset delim=(<>);
  foreach(delimiters;;int c)
    delim+=(<c>);
  array res=({ (["single":1]),0 });
  mapping m=(["delim":delim]);
+  if(!objectp(matchfieldname))
+    matchfieldname=Regexp(matchfieldname||"");
  _eol=0;
-  do res+=({m+(["name":_getdelimword(m)])});
+  if(mixed err = catch
+    { _checkpoint checkp=_checkpoint();
+      do
+      { string field=_getdelimword(m);
+        res+=({ m+(["name":field]) });
+	if(String.width(field)>8)
+	  field=string_to_utf8(field);	  // FIXME dumbing it down for Regexp()
+        if(!matchfieldname->match(field))
+	  throw(1);
+      }
      while(!_eol);
+    })
+    switch(err)
+    { default:
+	throw(err);
+      case 1:
+	return 0;
+    }
  setformat( ({res}) );
  return 1;
 }

--- a/lib/modules/Parser.pmod/Tabular.pike
+++ b/lib/modules/Parser.pmod/Tabular.pike
@@ -112,7 +112,7 @@ private string gets(int n)
  return s;
 }
-private class checkpoint
+class _checkpoint
 { private string oldalread;
  void create()
@@ -428,12 +428,12 @@ mapping fetch(void|array|mapping format)
 ret:
  { if(arrayp(format))
    { mixed err=catch
-      { checkpoint checkp=checkpoint();
+      { _checkpoint checkp=_checkpoint();
        foreach(format;;array|mapping fmt)
          if(arrayp(fmt))
            for(int found=0;;found=1)
            { mixed err=catch
-              { checkpoint checkp=checkpoint();
+              { _checkpoint checkp=_checkpoint();
 		mapping rec=getrecord(fmt,found);
 	        foreach(rec;string name;mixed value)
                  add2map(ret,name,value);
@@ -468,6 +468,8 @@ ret:
    { int found;
      do
      { found=0;
+	if(!mappingp(format))
+	  error("Empty format definition\n");
        foreach(format;string name;array|mapping subfmt)
          for(;;)
 	  { if(verb<0)