From 0533e2bfc689d534f24afde8402eee901a3f8d40 Mon Sep 17 00:00:00 2001 From: "Mirar (Pontus Hagland)" <pike@sort.mirar.org> Date: Fri, 19 Feb 1999 05:58:41 +0100 Subject: [PATCH] Parser Parser.HTML begun work Rev: src/modules/Parser/Makefile.in:1.1 Rev: src/modules/Parser/acconfig.h:1.1 Rev: src/modules/Parser/config.h.in:1.1 Rev: src/modules/Parser/configure.in:1.1 Rev: src/modules/Parser/html.c:1.1 Rev: src/modules/Parser/module.pmod.in:1.1 Rev: src/modules/Parser/parser.c:1.1 Rev: src/modules/Parser/parser.h:1.1 Rev: src/modules/Parser/testsuite.in:1.1 --- .gitattributes | 5 + src/modules/Parser/Makefile.in | 8 + src/modules/Parser/acconfig.h | 1 + src/modules/Parser/config.h.in | 6 + src/modules/Parser/configure.in | 9 + src/modules/Parser/html.c | 373 ++++++++++++++++++++++++++++++ src/modules/Parser/module.pmod.in | 8 + src/modules/Parser/parser.c | 43 ++++ src/modules/Parser/parser.h | 1 + src/modules/Parser/testsuite.in | 1 + 10 files changed, 455 insertions(+) create mode 100644 src/modules/Parser/Makefile.in create mode 100644 src/modules/Parser/acconfig.h create mode 100644 src/modules/Parser/config.h.in create mode 100644 src/modules/Parser/configure.in create mode 100644 src/modules/Parser/html.c create mode 100644 src/modules/Parser/module.pmod.in create mode 100644 src/modules/Parser/parser.c create mode 100644 src/modules/Parser/parser.h create mode 100644 src/modules/Parser/testsuite.in diff --git a/.gitattributes b/.gitattributes index ae498e7a08..dde0465829 100644 --- a/.gitattributes +++ b/.gitattributes @@ -222,6 +222,11 @@ testfont binary /src/modules/Oracle/acconfig.h foreign_ident /src/modules/Oracle/configure.in foreign_ident /src/modules/Oracle/oracle.c foreign_ident +/src/modules/Parser/Makefile.in foreign_ident +/src/modules/Parser/acconfig.h foreign_ident +/src/modules/Parser/configure.in foreign_ident +/src/modules/Parser/module.pmod.in foreign_ident +/src/modules/Parser/parser.c foreign_ident /src/modules/Perl/Makefile.in foreign_ident /src/modules/Perl/acconfig.h foreign_ident /src/modules/Perl/configure.in foreign_ident diff --git a/src/modules/Parser/Makefile.in b/src/modules/Parser/Makefile.in new file mode 100644 index 0000000000..0af97529e0 --- /dev/null +++ b/src/modules/Parser/Makefile.in @@ -0,0 +1,8 @@ +# $Id: Makefile.in,v 1.1 1999/02/19 04:58:37 mirar Exp $ +SRCDIR=@srcdir@ +VPATH=@srcdir@:@srcdir@/../..:../.. +OBJS=parser.o html.o +MODULE_LDFLAGS=@LDFLAGS@ @LIBS@ + +@dynamic_module_makefile@ +@dependencies@ diff --git a/src/modules/Parser/acconfig.h b/src/modules/Parser/acconfig.h new file mode 100644 index 0000000000..44643307a5 --- /dev/null +++ b/src/modules/Parser/acconfig.h @@ -0,0 +1 @@ +/* $Id: acconfig.h,v 1.1 1999/02/19 04:58:38 mirar Exp $ */ diff --git a/src/modules/Parser/config.h.in b/src/modules/Parser/config.h.in new file mode 100644 index 0000000000..3976f80e50 --- /dev/null +++ b/src/modules/Parser/config.h.in @@ -0,0 +1,6 @@ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if type char is unsigned and you are not using gcc. */ +#ifndef __CHAR_UNSIGNED__ +#undef __CHAR_UNSIGNED__ +#endif diff --git a/src/modules/Parser/configure.in b/src/modules/Parser/configure.in new file mode 100644 index 0000000000..58aa95abcd --- /dev/null +++ b/src/modules/Parser/configure.in @@ -0,0 +1,9 @@ +# $Id: configure.in,v 1.1 1999/02/19 04:58:39 mirar Exp $ +AC_INIT(parser.c) +AC_CONFIG_HEADER(config.h) + +AC_MODULE_INIT() + +AC_C_CHAR_UNSIGNED + +AC_OUTPUT(Makefile,echo FOO >stamp-h ) diff --git a/src/modules/Parser/html.c b/src/modules/Parser/html.c new file mode 100644 index 0000000000..1bb47a67fb --- /dev/null +++ b/src/modules/Parser/html.c @@ -0,0 +1,373 @@ +#include "global.h" +#include "config.h" + +#include "pike_macros.h" +#include "object.h" +#include "constants.h" +#include "interpret.h" +#include "svalue.h" +#include "threads.h" +#include "array.h" +#include "error.h" +#include "operators.h" + +#include "parser.h" + +struct location +{ + struct piece *p; + + int byteno; /* current byte, first=1 */ + int lineno; /* line number, first=1 */ + int linestart; /* byte current line started at */ +}; + +struct piece +{ + struct pike_string *s; + struct piece *next; +}; + +struct feed_stack +{ + struct piece *start; + struct piece *location; + + struct feed_stack *prev; +}; + +struct parser_html_storage +{ + /* feeded info */ + struct piece *feed,*feed_end; + + /* resulting data */ + struct piece *out,*out_end; + + /* location */ + struct location head; /* first byte in feed is.. */ + + /* parser stack */ + struct feed_stack *stack; + + struct svalue callback__tag; + struct svalue callback__data; + struct svalue callback__entity; +}; + +#ifdef THIS +#undef THIS /* Needed for NT */ +#endif + +#define THIS ((struct parser_html_storage*)(fp->current_storage)) +#define THISOBJ (fp->current_object) + +/****** init & exit *********************************/ + +void _reset_feed() +{ + struct piece *f; + struct feed_stack *st; + + /* kill feed */ + + while (THIS->feed) + { + f=THIS->feed->next; + free(THIS->feed); + THIS->feed=f; + } + THIS->feed_end=NULL; + + /* kill out-feed */ + + while (THIS->out) + { + f=THIS->out->next; + free(THIS->out); + THIS->out=f; + } + THIS->out_end=NULL; + + /* free stack */ + + while (THIS->stack) + { + st=THIS->stack; + THIS->stack=st->prev; + free(st); + } + + THIS->head.p=NULL; + THIS->head.byteno=1; + THIS->head.lineno=1; + THIS->head.linestart=1; +} + +static void init_html_struct(struct object *o) +{ + THIS->feed=NULL; + THIS->out=NULL; + THIS->stack=NULL; + _reset_feed(); + + THIS->callback__tag.type=T_INT; + THIS->callback__data.type=T_INT; + THIS->callback__entity.type=T_INT; +} + +static void exit_html_struct(struct object *o) +{ + _reset_feed(); /* frees feed & out */ + + free_svalue(&(THIS->callback__tag)); + free_svalue(&(THIS->callback__data)); + free_svalue(&(THIS->callback__entity)); +} + +/****** setup callbacks *****************************/ + +static void html__set_tag_callback(INT32 args) +{ + if (!args) error("_set_tag_callback: too few arguments\n"); + assign_svalue(&(THIS->callback__tag),sp-args); + pop_n_elems(args); + push_int(0); +} + +static void html__set_data_callback(INT32 args) +{ + if (!args) error("_set_data_callback: too few arguments\n"); + assign_svalue(&(THIS->callback__data),sp-args); + pop_n_elems(args); + push_int(0); +} + +static void html__set_entity_callback(INT32 args) +{ + if (!args) error("_set_entity_callback: too few arguments\n"); + assign_svalue(&(THIS->callback__entity),sp-args); + pop_n_elems(args); + push_int(0); +} + +/****** try_feed - internal main ********************/ + +static void try_feed(int finished) +{ + /* + o if tag_stack: + o pop & parse that + o ev put on stack + */ + + if (THIS->out_end) + THIS->out_end->next=THIS->feed; + else + THIS->out=THIS->out_end=THIS->feed; + + while (THIS->out_end && THIS->out_end->next) + THIS->out_end=THIS->out_end->next; + + THIS->feed=THIS->feed_end=NULL; +} + +/****** feed ****************************************/ + +static void html_feed(INT32 args) +{ + struct piece *f; + + if (!args || + sp[-args].type!=T_STRING) + error("feed: illegal arguments\n"); + + f=malloc(sizeof(struct piece)); + if (!f) + error("feed: out of memory\n"); + copy_shared_string(f->s,sp[-args].u.string); + + pop_n_elems(args); + + f->next=NULL; + + if (THIS->feed_end==NULL) + THIS->feed=THIS->feed_end=f; + else + { + THIS->feed_end->next=f; + THIS->feed_end=f; + } + + try_feed(0); + + ref_push_object(THISOBJ); +} + +static void put_out_feed(struct pike_string *s) +{ + struct piece *f=malloc(sizeof(struct piece)); + if (!f) + error("Parser.HTML(): out of memory\n"); + copy_shared_string(f->s,s); + + f->next=NULL; + + if (THIS->out_end==NULL) + THIS->out=THIS->out_end=f; + else + { + THIS->out_end->next=f; + THIS->out_end=f; + } +} + +static void html_finish(INT32 args) +{ + pop_n_elems(args); + try_feed(1); + ref_push_object(THISOBJ); +} + +static void html_read(INT32 args) +{ + int n; + int m=0; // strings on stack + + if (!args) + n=0x7fffffff; /* a lot */ + else if (sp[-args].type==T_INT) + n=sp[-args].u.integer; + else + error("read: illegal argument\n"); + + pop_n_elems(args); + + /* collect up to n characters */ + + while (THIS->out && n) + { + struct piece *z; + + if (THIS->out->s->len>n) + { + struct pike_string *ps; + push_string(string_slice(THIS->out->s,0,n)); + m++; + ps=string_slice(THIS->out->s,n,THIS->out->s->len-n); + free_string(THIS->out->s); + THIS->out->s=ps; + break; + } + n-=THIS->out->s->len; + push_string(THIS->out->s); + m++; + z=THIS->out; + THIS->out=THIS->out->next; + free(z); + } + + if (!THIS->out) + THIS->out_end=NULL; + + if (!m) + push_text(""); + else + f_add(m); +} + +/****** module init *********************************/ + +void init_parser_html(void) +{ + ADD_STORAGE(struct parser_html_storage); + + set_init_callback(init_html_struct); + set_exit_callback(exit_html_struct); + +#define CBRET "string|array(string)" /* 0|string|({string}) */ + + /* feed control */ + + add_function("feed",html_feed, + "function(string:object)",0); + add_function("finish",html_finish, + "function(:object)",0); + add_function("read",html_read, + "function(void|int:string)",0); + + /* special callbacks */ + + add_function("_set_tag_callback",html__set_tag_callback, + "function(function(object,mixed ...:"CBRET"):void)",0); + add_function("_set_data_callback",html__set_data_callback, + "function(function(object,mixed ...:"CBRET"):void)",0); + add_function("_set_entity_callback",html__set_entity_callback, + "function(function(object,mixed ...:"CBRET"):void)",0); +} + + +/* + +class Parse_HTML +{ + void feed(string something); // more data in stream + void finish(); // stream ends here + + string read(void|int chars); // read out-feed + + void reset(); // reset stream + + object clone(); // new object, fresh stream + + // argument quote ( < ... foo="bar" foo='bar' ...> ) + void set_quote(string start,string end); + void set_quote(string start,string end, + string start2,string end2, ...); // tupels + + // tag quote + void set_tag_quote(string start,string end); // "<" ">" + + // call to_call(this,mapping(string:string) args,...extra) + void add_tag(string tag,function to_call); + + // call to_call(this,mapping(string:string) args,string cont,...extra) + void add_container(string tag,function to_call); + + // same as above, but tries globs (slower<tm>) + void add_glob_tag(string tag,function to_call); + void add_glob_container(string tag,function to_call); + + // set extra args + void extra(mixed ...extra); + + // query where we are now + string at_tag(); // tag name + string tag_data();// tag string (< foo bar=z > -> " foo bar=z ") + int at_line(); // line number (first=1) + int at_char(); // char (first=1) + int at_column(); // column (first=1) + + // low-level callbacks + // calls to_call(this,string data) + void _set_tag_callback(function to_call); + void _set_data_callback(function to_call); + + // just useful + mapping parse_get_tag(string tag); + mapping parse_get_args(string tag); + + // entity quote + void set_entity_quote(string start,string end); // "&",";" + + int set_allow_open_entity(void|int yes); // &entfoo<bar> -> ent + + // call to_call(this,string entity,...extra); + void add_entity(string entity,function to_call); + void add_glob_entity(string entity,function to_call); + + // calls to_call(this,string data) + void _set_entity_callback(function to_call); +} + +*/ diff --git a/src/modules/Parser/module.pmod.in b/src/modules/Parser/module.pmod.in new file mode 100644 index 0000000000..747882d115 --- /dev/null +++ b/src/modules/Parser/module.pmod.in @@ -0,0 +1,8 @@ +/* + * $Id: module.pmod.in,v 1.1 1999/02/19 04:58:40 mirar Exp $ + * + */ + +inherit @module@; + + diff --git a/src/modules/Parser/parser.c b/src/modules/Parser/parser.c new file mode 100644 index 0000000000..c5caac4454 --- /dev/null +++ b/src/modules/Parser/parser.c @@ -0,0 +1,43 @@ +/* + * $Id: parser.c,v 1.1 1999/02/19 04:58:40 mirar Exp $ + */ + +#include "global.h" +#include "config.h" + +#include "program.h" + +#include "parser.h" + +/*** module init & exit & stuff *****************************************/ + +/* add other parsers here */ + +static struct parser_class +{ + char *name; + void (*func)(void); +} sub[] = { + {"HTML",init_parser_html}, +}; + +void pike_module_exit(void) +{ +} + +void pike_module_init(void) +{ + int i; + + for (i=0; i<(int)(sizeof(sub)/sizeof(sub[0])); i++) + { + struct program *p; + + start_new_program(); + fprintf(stderr,"%d %x\n",i,sub[i].func); + sub[i].func(); + p=end_program(); + add_program_constant(sub[i].name,p,0); + free_program(p); + } +} diff --git a/src/modules/Parser/parser.h b/src/modules/Parser/parser.h new file mode 100644 index 0000000000..3557f1bc1e --- /dev/null +++ b/src/modules/Parser/parser.h @@ -0,0 +1 @@ +void init_parser_html(void); diff --git a/src/modules/Parser/testsuite.in b/src/modules/Parser/testsuite.in new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/src/modules/Parser/testsuite.in @@ -0,0 +1 @@ + -- GitLab