From 4e331a687818d56979c456c25a7ece4a4128171e Mon Sep 17 00:00:00 2001 From: "Mirar (Pontus Hagland)" <pike@sort.mirar.org> Date: Sat, 12 Jun 1999 21:13:08 +0200 Subject: [PATCH] it works, abit Rev: src/modules/Parser/html.c:1.18 Rev: src/modules/Parser/module.pmod.in:1.2 --- src/modules/Parser/html.c | 150 ++++++++++++++++++++++++-- src/modules/Parser/module.pmod.in | 169 +++++++++++++++++++++++++++++- 2 files changed, 310 insertions(+), 9 deletions(-) diff --git a/src/modules/Parser/html.c b/src/modules/Parser/html.c index c3de86ac65..131d9b6431 100644 --- a/src/modules/Parser/html.c +++ b/src/modules/Parser/html.c @@ -19,8 +19,8 @@ extern struct program *parser_html_program; -#define DEBUG /* +#define DEBUG #define SCAN_DEBUG */ @@ -33,7 +33,7 @@ extern struct program *parser_html_program; #define DEBUG_MARK_SPOT(TEXT,FEED,C) do; while(0) #endif -#if 1 +#if 0 #define free(X) fprintf(stderr,"free line %d: %p\n",__LINE__,X); free(X) #endif @@ -465,13 +465,14 @@ static void html__set_entity_callback(INT32 args) **! (Ie, do destructive editing of the args mapping if you **! want the next callback to read it - don't just return a new tag.) **! </ul> +**! +**! see also: tags, containers, entities **! */ static void html_add_tag(INT32 args) { - check_all_args("add_tag",args, - BIT_STRING,BIT_MIXED,0); + check_all_args("add_tag",args,BIT_STRING,BIT_MIXED,0); if (THIS->maptag->refs>1) { push_mapping(THIS->maptag); @@ -486,8 +487,7 @@ static void html_add_tag(INT32 args) static void html_add_container(INT32 args) { - check_all_args("add_container",args, - BIT_STRING,BIT_MIXED,0); + check_all_args("add_container",args,BIT_STRING,BIT_MIXED,0); if (THIS->mapcont->refs>1) { push_mapping(THIS->mapcont); @@ -500,8 +500,7 @@ static void html_add_container(INT32 args) static void html_add_entity(INT32 args) { - check_all_args("parse_tag_args",args, - BIT_STRING,BIT_MIXED,0); + check_all_args("add_entity",args,BIT_STRING,BIT_MIXED,0); if (THIS->mapentity->refs>1) { push_mapping(THIS->mapentity); @@ -512,6 +511,81 @@ static void html_add_entity(INT32 args) pop_n_elems(args); } +static void html_add_tags(INT32 args) +{ + int sz; + INT32 e; + struct keypair *k; + check_all_args("add_tags",args,BIT_MAPPING,0); + + sz=sp[-1].u.mapping->size; + + MAPPING_LOOP(sp[-1].u.mapping) + { + push_svalue(&k->ind); + push_svalue(&k->val); + html_add_tag(2); + } + + pop_n_elems(args); +} + +static void html_add_containers(INT32 args) +{ + int sz; + INT32 e; + struct keypair *k; + check_all_args("add_containers",args,BIT_MAPPING,0); + + sz=sp[-1].u.mapping->size; + + MAPPING_LOOP(sp[-1].u.mapping) + { + push_svalue(&k->ind); + push_svalue(&k->val); + html_add_container(2); + } + + pop_n_elems(args); +} + +static void html_add_entities(INT32 args) +{ + int sz; + INT32 e; + struct keypair *k; + check_all_args("add_entities",args,BIT_MAPPING,0); + + sz=sp[-1].u.mapping->size; + + MAPPING_LOOP(sp[-1].u.mapping) + { + push_svalue(&k->ind); + push_svalue(&k->val); + html_add_entity(2); + } + + pop_n_elems(args); +} + +static void html_tags(INT32 args) +{ + pop_n_elems(args); + ref_push_mapping(THIS->maptag); +} + +static void html_containers(INT32 args) +{ + pop_n_elems(args); + ref_push_mapping(THIS->mapcont); +} + +static void html_entities(INT32 args) +{ + pop_n_elems(args); + ref_push_mapping(THIS->mapentity); +} + /****** try_feed - internal main ********************/ /* ---------------------------------------- */ @@ -1922,6 +1996,43 @@ void html_write_out(INT32 args) /** query *******************************************/ +/* +**! method array(int) at(); +**! method int at_line(); +**! method int at_char(); +**! method int at_column(); +**! Get the current position. +**! <ref>at</ref>() gives an array consisting of +**! ({<i>line</i>,<i>char</i>,<i>column</i>}), in that order. +*/ + +static void html_at_line(INT32 args) +{ + pop_n_elems(args); + push_int(THIS->stack->pos.lineno); +} + +static void html_at_char(INT32 args) +{ + pop_n_elems(args); + push_int(THIS->stack->pos.byteno); +} + +static void html_at_column(INT32 args) +{ + pop_n_elems(args); + push_int(THIS->stack->pos.byteno-THIS->stack->pos.linestart); +} + +static void html_at(INT32 args) +{ + pop_n_elems(args); + push_int(THIS->stack->pos.lineno); + push_int(THIS->stack->pos.byteno); + push_int(THIS->stack->pos.byteno-THIS->stack->pos.linestart); + f_aggregate(3); +} + /* **! method string current() **! Gives the current range of data, ie the contents @@ -2182,6 +2293,8 @@ void html__inspect(INT32 args) f_aggregate_mapping(n*2); } +/** create, clone ***********************************/ + void html_create(INT32 args) { pop_n_elems(args); @@ -2292,6 +2405,12 @@ void init_parser_html(void) /* query */ ADD_FUNCTION("current",html_current,tFunc(,tStr),0); + + ADD_FUNCTION("at",html_at,tFunc(,tArr(tInt)),0); + ADD_FUNCTION("at_line",html_at_line,tFunc(,tInt),0); + ADD_FUNCTION("at_char",html_at_char,tFunc(,tInt),0); + ADD_FUNCTION("at_column",html_at_column,tFunc(,tInt),0); + ADD_FUNCTION("tag_name",html_tag_name,tFunc(,tStr),0); ADD_FUNCTION("tag_args",html_tag_args,tFunc(,tMapping),0); @@ -2304,6 +2423,21 @@ void init_parser_html(void) ADD_FUNCTION("add_entity",html_add_entity, tFunc(tStr tTodo(""),tVoid),0); + ADD_FUNCTION("add_tags",html_add_tags, + tFunc(tMap(tStr,tTodo( tTagargs )),tVoid),0); + ADD_FUNCTION("add_containers",html_add_containers, + tFunc(tMap(tStr,tTodo( tTagargs tStr )),tVoid),0); + ADD_FUNCTION("add_entities",html_add_entities, + tFunc(tMap(tStr,tTodo( "" )),tVoid),0); + + ADD_FUNCTION("tags",html_tags, + tFunc(,tMap(tStr,tTodo( tTagargs ))),0); + ADD_FUNCTION("containers",html_containers, + tFunc(,tMap(tStr,tTodo( tTagargs tStr ))),0); + ADD_FUNCTION("entities",html_entities, + tFunc(,tMap(tStr,tTodo( "" ))),0); + + /* special callbacks */ ADD_FUNCTION("_set_tag_callback",html__set_tag_callback, diff --git a/src/modules/Parser/module.pmod.in b/src/modules/Parser/module.pmod.in index 747882d115..3089d84fe4 100644 --- a/src/modules/Parser/module.pmod.in +++ b/src/modules/Parser/module.pmod.in @@ -1,8 +1,175 @@ /* - * $Id: module.pmod.in,v 1.1 1999/02/19 04:58:40 mirar Exp $ + * $Id: module.pmod.in,v 1.2 1999/06/12 19:13:08 mirar Exp $ * */ inherit @module@; +//! module Parser + +class SGML +//! class SGML +//! This is a handy simple parser of SGML-like +//! syntax like HTML. It doesn't do anything advanced, +//! but finding the corresponding end-tags. +//! +//! It's used like this: +//! <pre>array res=Parser.SGML()->feed(string)->finish()->result();</pre> +//! +//! The resulting structure is an array of atoms, +//! where the atom can be a string or a tag. +//! A tag contains a similar array, as data. +//! +//! Example: +//! A string +//! <tt>"<gat> <gurka> </gurka> <banan> <kiwi> </gat>"</tt> +//! results in +//! <pre> +//! ({ +//! tag "gat" object with data: +//! ({ +//! tag "gurka" object with data: +//! ({ +//! " " +//! }) +//! tag "banan" object with data: +//! ({ +//! " " +//! tag "kiwi" object with data: +//! ({ +//! " " +//! }) +//! }) +//! }) +//! }) +//! </pre> +//! +//! ie, simple "tags" (not containers) are not detected, +//! but containers are ended implicitely by a surrounding +//! container _with_ an end tag. +//! +//! The 'tag' is an object with the following variables: +//! <pre> +//! string name; - name of tag +//! mapping args; - argument to tag +//! int line,char,column; - position of tag +//! string file; - filename (see <ref>create</ref>) +//! array(SGMLatom) data; - contained data +//! </pre> +//! +{ + string file; + + class SGMLatom + { + string name; + mapping args; + int line,char,column; + string file; + array(SGMLatom) data; + } + + static array(array(object(SGMLatom)|string)) res=({({})}); + static array(SGMLatom) tagstack=({}); + static array(object) errors; + + array(object(SGMLatom)|string) data; + + static private array(string) got_tag(object g) + { + string name=g->tag_name(); + + if (name!="" && name[0]=='/') + { + int i=search(tagstack->name,name[1..]); + if (i!=-1) + { + i++; + while (i--) + { + tagstack[0]->data=res[0]; + res=res[1..]; + tagstack=tagstack[1..]; + } + return ({}); + } + } + + object t=SGMLatom(); + t->name=name; + t->args=g->tag_args(); + [t->line,t->char,t->column]=g->at(); + t->file=file; + res[0]+=({t}); + tagstack=({t})+tagstack; + res=({({})})+res; + + return ({}); // don't care + } + + void debug(array|void arr,void|int level) + { + level+=2; + if (!arr) arr=data; + foreach (arr,string|object(SGMLatom) t) + if (stringp(t)) + write("%*s%-=*s\n",level,"",79-level,sprintf("%O",t)); + else + { + write("%*stag %O\n",level,"",t->name,); + if (sizeof(t->args)) + write("%*s%-=*s\n",level+4,"",75-level,sprintf("%O",t->args)); + debug(t->data,level); + } + } + + + private static object p=HTML(); + +//! static void create() +//! static void create(string filename) +//! This object is created with this filename. +//! It's passed to all created tags, for debug and trace purposes. +//! note: +//! No, it doesn't read the file itself. See <ref>feed</ref>. + + void create(void|string _file) + { + file=_file; + + p->_set_tag_callback(got_tag); + p->_set_data_callback(lambda(object g,string data) + { if (data!="") res[0]+=({data}); return ({}); }); + } + +//! static object feed(string s) +//! static array finish() +//! static array result(string s) +//! Feed new data to the object, or finish the stream. +//! No result can be used until finish() is called. +//! +//! Both finish() and result() returns the computed data. +//! +//! feed() returns the called object. + + object feed(string s) + { + p->feed(s); + return this_object(); + } + + array finish() + { + p->finish(); + data=res[0]; + res=0; + return data; + } + + array(object(SGMLatom)|string) result() + { + return data; + } +} + -- GitLab