From 4e331a687818d56979c456c25a7ece4a4128171e Mon Sep 17 00:00:00 2001
From: "Mirar (Pontus Hagland)" <pike@sort.mirar.org>
Date: Sat, 12 Jun 1999 21:13:08 +0200
Subject: [PATCH] it works, abit

Rev: src/modules/Parser/html.c:1.18
Rev: src/modules/Parser/module.pmod.in:1.2
---
 src/modules/Parser/html.c         | 150 ++++++++++++++++++++++++--
 src/modules/Parser/module.pmod.in | 169 +++++++++++++++++++++++++++++-
 2 files changed, 310 insertions(+), 9 deletions(-)

diff --git a/src/modules/Parser/html.c b/src/modules/Parser/html.c
index c3de86ac65..131d9b6431 100644
--- a/src/modules/Parser/html.c
+++ b/src/modules/Parser/html.c
@@ -19,8 +19,8 @@
 
 extern struct program *parser_html_program;
 
-#define DEBUG
 /*
+#define DEBUG
 #define SCAN_DEBUG
 */
 
@@ -33,7 +33,7 @@ extern struct program *parser_html_program;
 #define DEBUG_MARK_SPOT(TEXT,FEED,C) do; while(0)
 #endif
 
-#if 1
+#if 0
 #define free(X) fprintf(stderr,"free line %d: %p\n",__LINE__,X); free(X)
 #endif
 
@@ -465,13 +465,14 @@ static void html__set_entity_callback(INT32 args)
 **!	(Ie, do destructive editing of the args mapping if you 
 **!	want the next callback to read it - don't just return a new tag.)
 **!	</ul>
+**!
+**! see also: tags, containers, entities
 **!	
 */
 
 static void html_add_tag(INT32 args)
 {
-   check_all_args("add_tag",args,
-		  BIT_STRING,BIT_MIXED,0);
+   check_all_args("add_tag",args,BIT_STRING,BIT_MIXED,0);
    if (THIS->maptag->refs>1)
    {
       push_mapping(THIS->maptag);
@@ -486,8 +487,7 @@ static void html_add_tag(INT32 args)
 
 static void html_add_container(INT32 args)
 {
-   check_all_args("add_container",args,
-		  BIT_STRING,BIT_MIXED,0);
+   check_all_args("add_container",args,BIT_STRING,BIT_MIXED,0);
    if (THIS->mapcont->refs>1)
    {
       push_mapping(THIS->mapcont);
@@ -500,8 +500,7 @@ static void html_add_container(INT32 args)
 
 static void html_add_entity(INT32 args)
 {
-   check_all_args("parse_tag_args",args,
-		  BIT_STRING,BIT_MIXED,0);
+   check_all_args("add_entity",args,BIT_STRING,BIT_MIXED,0);
    if (THIS->mapentity->refs>1)
    {
       push_mapping(THIS->mapentity);
@@ -512,6 +511,81 @@ static void html_add_entity(INT32 args)
    pop_n_elems(args);
 }
 
+static void html_add_tags(INT32 args)
+{
+   int sz;
+   INT32 e;
+   struct keypair *k;
+   check_all_args("add_tags",args,BIT_MAPPING,0);
+
+   sz=sp[-1].u.mapping->size;
+
+   MAPPING_LOOP(sp[-1].u.mapping)
+      {
+	 push_svalue(&k->ind);
+	 push_svalue(&k->val);
+	 html_add_tag(2);
+      }
+   
+   pop_n_elems(args);
+}
+
+static void html_add_containers(INT32 args)
+{
+   int sz;
+   INT32 e;
+   struct keypair *k;
+   check_all_args("add_containers",args,BIT_MAPPING,0);
+
+   sz=sp[-1].u.mapping->size;
+
+   MAPPING_LOOP(sp[-1].u.mapping)
+      {
+	 push_svalue(&k->ind);
+	 push_svalue(&k->val);
+	 html_add_container(2);
+      }
+   
+   pop_n_elems(args);
+}
+
+static void html_add_entities(INT32 args)
+{
+   int sz;
+   INT32 e;
+   struct keypair *k;
+   check_all_args("add_entities",args,BIT_MAPPING,0);
+
+   sz=sp[-1].u.mapping->size;
+
+   MAPPING_LOOP(sp[-1].u.mapping)
+      {
+	 push_svalue(&k->ind);
+	 push_svalue(&k->val);
+	 html_add_entity(2);
+      }
+   
+   pop_n_elems(args);
+}
+
+static void html_tags(INT32 args)
+{
+   pop_n_elems(args);
+   ref_push_mapping(THIS->maptag);
+}
+
+static void html_containers(INT32 args)
+{
+   pop_n_elems(args);
+   ref_push_mapping(THIS->mapcont);
+}
+
+static void html_entities(INT32 args)
+{
+   pop_n_elems(args);
+   ref_push_mapping(THIS->mapentity);
+}
+
 /****** try_feed - internal main ********************/
 
 /* ---------------------------------------- */
@@ -1922,6 +1996,43 @@ void html_write_out(INT32 args)
 
 /** query *******************************************/
 
+/* 
+**! method array(int) at();
+**! method int at_line();
+**! method int at_char();
+**! method int at_column();
+**!	Get the current position.
+**!	<ref>at</ref>() gives an array consisting of
+**!	({<i>line</i>,<i>char</i>,<i>column</i>}), in that order.
+*/
+
+static void html_at_line(INT32 args)
+{
+   pop_n_elems(args);
+   push_int(THIS->stack->pos.lineno);
+}
+
+static void html_at_char(INT32 args)
+{
+   pop_n_elems(args);
+   push_int(THIS->stack->pos.byteno);
+}
+
+static void html_at_column(INT32 args)
+{
+   pop_n_elems(args);
+   push_int(THIS->stack->pos.byteno-THIS->stack->pos.linestart);
+}
+
+static void html_at(INT32 args)
+{
+   pop_n_elems(args);
+   push_int(THIS->stack->pos.lineno);
+   push_int(THIS->stack->pos.byteno);
+   push_int(THIS->stack->pos.byteno-THIS->stack->pos.linestart);
+   f_aggregate(3);
+}
+
 /*
 **! method string current()
 **!	Gives the current range of data, ie the contents
@@ -2182,6 +2293,8 @@ void html__inspect(INT32 args)
    f_aggregate_mapping(n*2);
 }
 
+/** create, clone ***********************************/
+
 void html_create(INT32 args)
 {
    pop_n_elems(args);
@@ -2292,6 +2405,12 @@ void init_parser_html(void)
    /* query */
 
    ADD_FUNCTION("current",html_current,tFunc(,tStr),0);
+
+   ADD_FUNCTION("at",html_at,tFunc(,tArr(tInt)),0);
+   ADD_FUNCTION("at_line",html_at_line,tFunc(,tInt),0);
+   ADD_FUNCTION("at_char",html_at_char,tFunc(,tInt),0);
+   ADD_FUNCTION("at_column",html_at_column,tFunc(,tInt),0);
+
    ADD_FUNCTION("tag_name",html_tag_name,tFunc(,tStr),0);
    ADD_FUNCTION("tag_args",html_tag_args,tFunc(,tMapping),0);
 
@@ -2304,6 +2423,21 @@ void init_parser_html(void)
    ADD_FUNCTION("add_entity",html_add_entity,
 		tFunc(tStr tTodo(""),tVoid),0);
 
+   ADD_FUNCTION("add_tags",html_add_tags,
+		tFunc(tMap(tStr,tTodo( tTagargs )),tVoid),0);
+   ADD_FUNCTION("add_containers",html_add_containers,
+		tFunc(tMap(tStr,tTodo( tTagargs tStr )),tVoid),0);
+   ADD_FUNCTION("add_entities",html_add_entities,
+		tFunc(tMap(tStr,tTodo( "" )),tVoid),0);
+
+   ADD_FUNCTION("tags",html_tags,
+		tFunc(,tMap(tStr,tTodo( tTagargs ))),0);
+   ADD_FUNCTION("containers",html_containers,
+		tFunc(,tMap(tStr,tTodo( tTagargs tStr ))),0);
+   ADD_FUNCTION("entities",html_entities,
+		tFunc(,tMap(tStr,tTodo( "" ))),0);
+
+
    /* special callbacks */
 
    ADD_FUNCTION("_set_tag_callback",html__set_tag_callback,
diff --git a/src/modules/Parser/module.pmod.in b/src/modules/Parser/module.pmod.in
index 747882d115..3089d84fe4 100644
--- a/src/modules/Parser/module.pmod.in
+++ b/src/modules/Parser/module.pmod.in
@@ -1,8 +1,175 @@
 /*
- * $Id: module.pmod.in,v 1.1 1999/02/19 04:58:40 mirar Exp $
+ * $Id: module.pmod.in,v 1.2 1999/06/12 19:13:08 mirar Exp $
  *
  */
 
 inherit @module@;
 
+//! module Parser
+
+class SGML
+//! class SGML
+//!  	This is a handy simple parser of SGML-like
+//!	syntax like HTML. It doesn't do anything advanced,
+//!	but finding the corresponding end-tags.
+//!	
+//!	It's used like this:
+//!	<pre>array res=Parser.SGML()->feed(string)->finish()->result();</pre>
+//!
+//!	The resulting structure is an array of atoms,
+//!	where the atom can be a string or a tag.
+//!	A tag contains a similar array, as data. 
+//!     
+//!	Example:
+//!	A string
+//!     <tt>"<gat>&nbsp;<gurka>&nbsp;</gurka>&nbsp;<banan>&nbsp;<kiwi>&nbsp;</gat>"</tt>
+//!     results in 
+//!	<pre>
+//!	({
+//!	    tag "gat" object with data:
+//!	    ({
+//!	        tag "gurka" object with data:
+//!		({
+//!                 " "
+//!             })
+//!	        tag "banan" object with data:
+//!		({
+//!                 " "
+//!	            tag "kiwi" object with data:
+//!		    ({
+//!                    " "
+//!                 })
+//!             })
+//!         })
+//!     })
+//!	</pre>
+//!             
+//!	ie, simple "tags" (not containers) are not detected,
+//!	but containers are ended implicitely by a surrounding
+//!	container _with_ an end tag.
+//!
+//! 	The 'tag' is an object with the following variables:
+//!	<pre>
+//!	 string name;           - name of tag
+//!	 mapping args;          - argument to tag
+//!	 int line,char,column;  - position of tag
+//!	 string file;           - filename (see <ref>create</ref>)
+//!	 array(SGMLatom) data;  - contained data
+//!     </pre>
+//!
+{
+   string file;
+
+   class SGMLatom
+   {
+      string name;
+      mapping args;
+      int line,char,column;
+      string file;
+      array(SGMLatom) data;
+   }
+
+   static array(array(object(SGMLatom)|string)) res=({({})});
+   static array(SGMLatom) tagstack=({});
+   static array(object) errors;
+
+   array(object(SGMLatom)|string) data;
+
+   static private array(string) got_tag(object g)
+   {
+      string name=g->tag_name();
+
+      if (name!="" && name[0]=='/')
+      {
+	 int i=search(tagstack->name,name[1..]);
+	 if (i!=-1) 
+	 {
+	    i++;
+	    while (i--)
+	    {
+	       tagstack[0]->data=res[0];
+	       res=res[1..];
+	       tagstack=tagstack[1..];
+	    }
+	    return ({});
+	 }
+      }
+
+      object t=SGMLatom();
+      t->name=name;
+      t->args=g->tag_args();
+      [t->line,t->char,t->column]=g->at();
+      t->file=file;
+      res[0]+=({t});
+      tagstack=({t})+tagstack;
+      res=({({})})+res;
+      
+      return ({}); // don't care
+   }
+
+   void debug(array|void arr,void|int level)
+   {
+      level+=2;
+      if (!arr) arr=data;
+      foreach (arr,string|object(SGMLatom) t)
+	 if (stringp(t))
+	    write("%*s%-=*s\n",level,"",79-level,sprintf("%O",t));
+	 else
+	 {
+	    write("%*stag %O\n",level,"",t->name,);
+	    if (sizeof(t->args))
+	       write("%*s%-=*s\n",level+4,"",75-level,sprintf("%O",t->args));
+	    debug(t->data,level);
+	 }
+   }
+
+
+   private static object p=HTML();
+
+//! static void create()
+//! static void create(string filename)
+//!	This object is created with this filename.
+//!	It's passed to all created tags, for debug and trace purposes.
+//! note:
+//! 	No, it doesn't read the file itself. See <ref>feed</ref>.
+
+   void create(void|string _file)
+   {
+      file=_file;
+
+      p->_set_tag_callback(got_tag);
+      p->_set_data_callback(lambda(object g,string data) 
+			    { if (data!="") res[0]+=({data}); return ({}); });
+   }
+
+//! static object feed(string s)
+//! static array finish()
+//! static array result(string s)
+//!	Feed new data to the object, or finish the stream.
+//!	No result can be used until finish() is called.
+//!
+//! 	Both finish() and result() returns the computed data.
+//!
+//!	feed() returns the called object.
+
+   object feed(string s)
+   {
+      p->feed(s);
+      return this_object();
+   }
+
+   array finish()
+   {
+      p->finish();
+      data=res[0];
+      res=0;
+      return data;
+   }
+
+   array(object(SGMLatom)|string) result()
+   {
+      return data;
+   }
+}
+
 
-- 
GitLab