From 40a223b785bc73072e26cf69adcffdb61e9691d3 Mon Sep 17 00:00:00 2001 From: Per Hedbor <ph@opera.com> Date: Sat, 12 Aug 2000 08:17:22 +0200 Subject: [PATCH] Removed the streaming parser, and moved http_decode_string to Roxen. This is the start of the eradication of the spider module with it's confusing global functions.. Rev: src/modules/spider/Makefile.in:1.20 Rev: src/modules/spider/configure.in:1.25 Rev: src/modules/spider/spider.c:1.98 Rev: src/modules/spider/streamed_parser.c:1.13(DEAD) Rev: src/modules/spider/streamed_parser.h:1.4(DEAD) --- .gitattributes | 2 - src/modules/spider/Makefile.in | 3 +- src/modules/spider/configure.in | 4 +- src/modules/spider/spider.c | 265 +------- src/modules/spider/streamed_parser.c | 862 --------------------------- src/modules/spider/streamed_parser.h | 21 - 6 files changed, 7 insertions(+), 1150 deletions(-) delete mode 100644 src/modules/spider/streamed_parser.c delete mode 100644 src/modules/spider/streamed_parser.h diff --git a/.gitattributes b/.gitattributes index ebd5a03ac8..fb3c607fc0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -483,8 +483,6 @@ testfont binary /src/modules/spider/discdate.c foreign_ident /src/modules/spider/spider.c foreign_ident /src/modules/spider/stardate.c foreign_ident -/src/modules/spider/streamed_parser.c foreign_ident -/src/modules/spider/streamed_parser.h foreign_ident /src/modules/sprintf/Makefile.in foreign_ident /src/modules/sprintf/configure.in foreign_ident /src/modules/sprintf/sprintf.c foreign_ident diff --git a/src/modules/spider/Makefile.in b/src/modules/spider/Makefile.in index 54982a1eef..7230279fa3 100644 --- a/src/modules/spider/Makefile.in +++ b/src/modules/spider/Makefile.in @@ -1,9 +1,8 @@ -# $Id: Makefile.in,v 1.19 1999/11/18 08:03:08 hubbe Exp $ +# $Id: Makefile.in,v 1.20 2000/08/12 06:17:21 per Exp $ @make_variables@ VPATH=@srcdir@:@srcdir@/../..:../.. OBJS=spider.o discdate.o stardate.o xml.o MODULE_LDFLAGS=@LDFLAGS@ -# streamed_parser.o CONFIG_HEADERS=@CONFIG_HEADERS@ diff --git a/src/modules/spider/configure.in b/src/modules/spider/configure.in index 596e36051e..34a5ec55f6 100644 --- a/src/modules/spider/configure.in +++ b/src/modules/spider/configure.in @@ -1,11 +1,9 @@ -# $Id: configure.in,v 1.24 1998/09/20 08:33:50 hubbe Exp $ +# $Id: configure.in,v 1.25 2000/08/12 06:17:21 per Exp $ AC_INIT(spider.c) AC_CONFIG_HEADER(config.h) AC_MODULE_INIT() -AC_ARG_WITH(streamed_parser, [ --with-streamed-parser Enable the streamed-parser module.], [AC_DEFINE(ENABLE_STREAMED_PARSER)], []) - AC_CHECK_LIB(bind, __inet_ntoa) AC_CHECK_LIB(socket, socket) AC_CHECK_LIB(nsl, gethostbyname) diff --git a/src/modules/spider/spider.c b/src/modules/spider/spider.c index 2bd5c9eebc..49d843099b 100644 --- a/src/modules/spider/spider.c +++ b/src/modules/spider/spider.c @@ -43,7 +43,7 @@ #include "threads.h" #include "operators.h" -RCSID("$Id: spider.c,v 1.97 2000/08/10 09:51:55 per Exp $"); +RCSID("$Id: spider.c,v 1.98 2000/08/12 06:17:18 per Exp $"); #ifdef HAVE_PWD_H #include <pwd.h> @@ -103,52 +103,6 @@ void do_html_parse_lines(struct pike_string *ss, struct array *extra_args, int line); - -void f_nice(INT32 args) -{ -#ifdef HAVE_NICE - int ta = sp[-1].u.integer; - if(!args) error("You must supply an argument to nice(int)!\n"); - pop_n_elems(args); - push_int(nice(ta)); -#endif -} - -void f_http_decode_string(INT32 args) -{ - int proc; - char *foo,*bar,*end; - struct pike_string *newstr; - - if (!args || sp[-args].type != T_STRING) - error("Invalid argument to http_decode_string(STRING);\n"); - - foo=bar=sp[-args].u.string->str; - end=foo+sp[-args].u.string->len; - - /* count '%' characters */ - for (proc=0; foo<end; ) if (*foo=='%') { proc++; foo+=3; } else foo++; - - if (!proc) { pop_n_elems(args-1); return; } - - /* new string len is (foo-bar)-proc*2 */ - newstr=begin_shared_string((foo-bar)-proc*2); - foo=newstr->str; - for (proc=0; bar<end; foo++) - if (*bar=='%') - { - if (bar<end-2) - *foo=(((bar[1]<'A')?(bar[1]&15):((bar[1]+9)&15))<<4)| - ((bar[2]<'A')?(bar[2]&15):((bar[2]+9)&15)); - else - *foo=0; - bar+=3; - } - else { *foo=*(bar++); } - pop_n_elems(args); - push_string(end_shared_string(newstr)); -} - void f_parse_accessed_database(INT32 args) { ptrdiff_t cnum = 0, i; @@ -958,22 +912,6 @@ void do_html_parse_lines(struct pike_string *ss, } } -#ifndef HAVE_INT_TIMEZONE -int _tz; -#else -extern long int timezone; -#endif - -void f_timezone(INT32 args) -{ - pop_n_elems(args); -#ifndef HAVE_INT_TIMEZONE - push_int(_tz); -#else - push_int(timezone); -#endif -} - void f_get_all_active_fd(INT32 args) { int i,fds,q, ne; @@ -1076,151 +1014,6 @@ void f__dump_obj_table(INT32 args) #define MIN(A,B) ((A)<(B)?(A):(B)) #endif -#ifdef ENABLE_STREAMED_PARSER -#include "streamed_parser.h" - -static struct program *streamed_parser; - -#endif /* ENABLE_STREAMED_PARSER */ - -extern void init_udp(void); -extern void init_xml(void); -extern void exit_xml(void); - - -/* Hohum. Here we go. This is try number three for a more optimized Roxen. */ - -#ifdef _REENTRANT -#define BUFFER (8192) - -struct thread_args -{ - struct thread_args *next; - struct object *from; - struct object *to; - INT_TYPE to_fd, from_fd; - struct svalue cb; - struct svalue args; - INT_TYPE len; - INT_TYPE sent; - char buffer[BUFFER]; -}; - -MUTEX_T done_lock STATIC_MUTEX_INIT; -struct thread_args *done; - -/* WARNING! This function is running _without_ any stack etc. */ - -#define MY_MIN(a,b) ((a)<(b)?(a):(b)) -void do_shuffle(void *_a) -{ - struct thread_args *a = (struct thread_args *)_a; - -#ifdef DIRECTIO_ON - if(a->len > (65536*2)) - directio(a->from_fd, DIRECTIO_ON); -#endif - - while(a->len) - { - int nread, written=0; - nread = fd_read(a->from_fd, a->buffer, MY_MIN(BUFFER,a->len)); - if(nread <= 0) { - if (!nread) - break; - if(errno == EINTR) - continue; - else - break; - } - - while(nread) - { - int nsent = fd_write(a->to_fd, a->buffer+written, nread); - if(nsent < 0) { - if(errno != EINTR) - goto end; - else - continue; - } - written += nsent; - a->sent += nsent; - nread -= nsent; - a->len -= nsent; - } - } - - /* We are done. It is up to the backend callback to call the - * finish function - */ - end: - mt_lock(&done_lock); - a->next = done; - done = a; - mt_unlock(&done_lock); - wake_up_backend(); -} - -static int num_shuffles = 0; -static struct callback *my_callback; - -void finished_p(struct callback *foo, void *b, void *c) -{ - while(done) - { - struct thread_args *d; - - mt_lock(&done_lock); - d = done; - done = d->next; - mt_unlock(&done_lock); - - num_shuffles--; - - push_int( d->sent ); - *(sp++) = d->args; - push_object( d->from ); - push_object( d->to ); - apply_svalue( &d->cb, 4 ); - free_svalue( &d->cb ); - pop_stack(); - free(d); - } - - if(!num_shuffles) - { - remove_callback( foo ); - my_callback = 0; - } -} - -void f_shuffle(INT32 args) -{ - struct thread_args *a = malloc(sizeof(struct thread_args)); - struct svalue *q, *w; - get_all_args("shuffle", args, "%o%o%*%*%d", &a->from, &a->to,&q,&w,&a->len); - a->sent = 0; - - num_shuffles++; - apply(a->to, "query_fd", 0); - apply(a->from, "query_fd", 0); - get_all_args("shuffle", 2, "%d%d", &a->to_fd, &a->from_fd); - - add_ref(a->from); - add_ref(a->to); - - assign_svalue_no_free(&a->cb, q); - assign_svalue_no_free(&a->args, w); - - th_farm(do_shuffle, (void *)a); - - if(!my_callback) - my_callback = add_backend_callback( finished_p, 0, 0 ); - - pop_n_elems(args+2); -} -#endif - void pike_module_init(void) { @@ -1228,17 +1021,8 @@ void pike_module_init(void) empty_string = sp[-1]; pop_stack(); - -#ifdef _REENTRANT - /* function(object,object,function,mixed,int:void) */ - ADD_FUNCTION("shuffle", f_shuffle,tFunc(tObj tObj tFunction tMix tInt,tVoid), 0); -#endif ADD_EFUN("_low_program_name", f__low_program_name,tFunc(tProgram,tStr),0); -/* function(string:string) */ - ADD_EFUN("http_decode_string",f_http_decode_string,tFunc(tStr,tStr), - OPT_TRY_OPTIMIZE); - /* function(int:int) */ ADD_EFUN("set_start_quote",f_set_start_quote,tFunc(tInt,tInt),OPT_EXTERNAL_DEPEND); @@ -1298,64 +1082,25 @@ void pike_module_init(void) /* function(int,void|int:int) */ ADD_EFUN("stardate", f_stardate,tFunc(tInt tOr(tVoid,tInt),tInt), 0); -/* function(:int) */ - ADD_EFUN("timezone", f_timezone,tFunc(tNone,tInt), 0); - /* function(:array(int)) */ ADD_EFUN("get_all_active_fd", f_get_all_active_fd,tFunc(tNone,tArr(tInt)), OPT_EXTERNAL_DEPEND); -/* function(int:int) */ - ADD_EFUN("nice", f_nice,tFunc(tInt,tInt), - OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT); - /* function(int:string) */ ADD_EFUN("fd_info", f_fd_info,tFunc(tInt,tStr), OPT_EXTERNAL_DEPEND); - - /* timezone() needs */ { - time_t foo = (time_t)0; - struct tm *g; - - g = localtime(&foo); -#ifndef HAVE_INT_TIMEZONE - _tz = g->tm_gmtoff; -#endif + extern void init_xml(); + init_xml(); } - -#ifdef ENABLE_STREAMED_PARSER - start_new_program(); - add_storage( sizeof (struct streamed_parser) ); - /* function(mapping(string:function(string,mapping(string:string),mixed:mixed)),mapping(string:function(string,mapping(string:string),string,mixed:mixed)),mapping(string:function(string,mixed:mixed)):void) */ - ADD_FUNCTION( "init", streamed_parser_set_data,tFunc(tMap(tStr,tFunc(tStr tMap(tStr,tStr) tMix,tMix)) tMap(tStr,tFunc(tStr tMap(tStr,tStr) tStr tMix,tMix)) tMap(tStr,tFunc(tStr tMix,tMix)),tVoid), 0 ); - /* function(string,mixed:string) */ - ADD_FUNCTION( "parse", streamed_parser_parse,tFunc(tStr tMix,tStr), 0 ); - /* function(void:string) */ - ADD_FUNCTION( "finish", streamed_parser_finish,tFunc(tVoid,tStr), 0 ); - set_init_callback( streamed_parser_init ); - set_exit_callback( streamed_parser_destruct ); - - streamed_parser = end_program(); - add_program_constant("streamed_parser", streamed_parser,0); -#endif /* ENABLE_STREAMED_PARSER */ - - init_xml(); } void pike_module_exit(void) { int i; - free_string(empty_string.u.string); - -#ifdef ENABLE_STREAMED_PARSER - if(streamed_parser) { - free_program(streamed_parser); - streamed_parser=0; + extern void exit_xml(); + exit_xml(); } -#endif /* ENABLE_STREAMED_PARSER */ - - exit_xml(); } diff --git a/src/modules/spider/streamed_parser.c b/src/modules/spider/streamed_parser.c deleted file mode 100644 index 32938e1f49..0000000000 --- a/src/modules/spider/streamed_parser.c +++ /dev/null @@ -1,862 +0,0 @@ -/* - * $Id: streamed_parser.c,v 1.12 2000/07/28 07:15:49 hubbe Exp $ - */ - -#include "global.h" - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif /* HAVE_CONFIG_H */ - -#include "stralloc.h" -#include "pike_macros.h" -#include "object.h" -#include "constants.h" -#include "interpret.h" -#include "svalue.h" -#include "mapping.h" -#include "array.h" -#include "builtin_functions.h" -#include "error.h" -#include "module_support.h" -#include "multiset.h" -#include "operators.h" - -#include "streamed_parser.h" - -/* This must be included last! */ -#include "module_magic.h" - -RCSID("$Id: streamed_parser.c,v 1.12 2000/07/28 07:15:49 hubbe Exp $"); - -#ifdef ENABLE_STREAMED_PARSER - -/* streamed SGML parser, by wing */ - -/* State machine for parsing - -notag < tag_start - notag - -tag_start / tag_end - WS tag_start - > notag - tag_name - -tag_end WS skip (pop-tag-stack) - > notag (error) (pop-tag-stack) - tag_end - -tag_end_name WS tag_end_name - > notag - tag_end_name - -skip > notag - " skip_fnutt_fnutt - ' skip_fnutt - skip - -skip_fnutt_fnutt " skip - skip_fnutt_fnutt - -skip_fnutt ' skip - skip_fnut - -tag_name WS skip (or) tag_ws - > notag (or) content - tag_name - -tag_ws WS tag_ws - > notag (check if something changed) (or) content - tag_arg_name - -tag_arg_name WS tag_post_arg_name - = tag_pre_arg_name - > notag (check if something changed) (or) content - tag_arg_name - -tag_post_arg_name WS tag_post_arg_name - = tag_pre_arg_value - > notag (check if something changed) (or) content - tag_arg_name - -tag_pre_arg_value WS tag_pre_arg_value - > notag (error) (check if something changed) - (or) content - " tag_arg_value_fnutt_fnutt - ' tag_arg_value_fnutt - tag_arg_value - -tag_arg_value_fnutt_fnutt " tag_ws - tag_arg_value_fnutt_fnutt - -tag_arg_value_fnutt ' tag_ws - tag_arg_value_fnutt - -tag_arg_value WS tag_ws - > notag (check if something changed) (or) content - tag_arg_value - -content < content_tag_start - content - -content_tag_start / content_tag_end - content_skip - -content_tag_end WS content_tag_end - > (error) content - content_tag_end_name - -content_tag_end_name WS content_tag_end_name - > content (or) notag - content_tag_end_name - -content_skip > content - " content_skip_fnutt_fnutt - ' content_skip_fnutt - content_skip - -content_skip_fnutt_fnutt " content_skip - content_skip_fnutt_fnutt - -content_skip_fnutt ' content_skip - content_skip_fnutt - -*/ - -#define NOTAG 0 -#define TAG_START 1 -#define TAG_END 2 -#define TAG_END_NAME 3 -#define SKIP 4 -#define SKIP_FNUTT_FNUTT 5 -#define SKIP_FNUTT 6 -#define TAG_NAME 7 -#define TAG_WS 8 -#define TAG_ARG_NAME 9 -#define TAG_POST_ARG_NAME 10 -#define TAG_PRE_ARG_VALUE 11 -#define TAG_ARG_VALUE_FNUTT_FNUTT 12 -#define TAG_ARG_VALUE_FNUTT 13 -#define TAG_ARG_VALUE 14 -#define CONTENT 15 -#define CONTENT_TAG_START 16 -#define CONTENT_TAG_END 17 -#define CONTENT_TAG_END_NAME 18 -#define CONTENT_SKIP 19 -#define CONTENT_SKIP_FNUTT_FNUTT 20 -#define CONTENT_SKIP_FNUTT 21 - -#define ARG_TYPE_NONE 0 -#define ARG_TYPE_IN 1 -#define ARG_TYPE_OUT 2 - -#define WS ' ': case '\t': case '\n': case '\r' - -#define DATA ((struct streamed_parser *)(fp->current_storage)) - -void streamed_parser_init(void) -{ - DATA->last_buffer = 0; - DATA->last_buffer_size = 0; - DATA->start_tags = 0; - DATA->content_tags = 0; - DATA->end_tags = 0; -} - -void streamed_parser_destruct(void) -{ - if (DATA->last_buffer) - free( DATA->last_buffer ); -#if 0 /* Per, to clean up _some_ of the warnings */ - if (DATA->start_tags) - ; - if (DATA->end_tags) - ; -#endif -} - -void streamed_parser_set_data( INT32 args ) -{ - get_all_args("spider.streamed_parser->set_data", args, "%m%m%m", - &(DATA->start_tags), &(DATA->content_tags), &(DATA->end_tags)); - add_ref(DATA->start_tags); - add_ref(DATA->content_tags); - add_ref(DATA->end_tags); - pop_n_elems(args); -} - -#define SWAP \ - *sp = sp[-2]; \ - sp[-2] = sp[-1]; \ - sp[-1] = *sp - -static int handle_tag( struct svalue *data_arg ) -{ - struct svalue *fun; - - push_svalue(data_arg); - if(!(fun = low_mapping_lookup( DATA->start_tags, sp-3 ))) - error("Error in streamed parser.\n"); - - apply_svalue(fun, 3); - - if (sp[-1].type == T_STRING) - { - return 1; - } else { - pop_stack(); - return 0; - } -} - -static int handle_content_tag( struct svalue *data_arg ) -{ - struct svalue *fun; - - push_svalue(data_arg); - if(!(fun = low_mapping_lookup( DATA->content_tags, sp-3 ))) - error("Error in streamed parser.\n"); - - apply_svalue(fun, 3); - if (sp[-1].type == T_STRING) - return 1; - else - { - pop_stack(); - return 0; - } -} - -static int handle_end_tag( struct svalue *data_arg ) -{ - struct svalue *fun; - - fun = low_mapping_lookup( DATA->start_tags, sp-1); - - /* NB: fun would not be valid if the value popped here is an object, - * fortunately it is not. */ - pop_stack(); - - push_svalue(data_arg); - if(fun) - { - apply_svalue(fun, 1); - if (sp[-1].type == T_STRING) - return 1; - else - { - pop_stack(); - return 0; - } - } - return 0; -} - -static void add_arg(void) -{ - mapping_insert( sp[-3].u.mapping, sp-2, sp-1 ); - pop_stack(); - pop_stack(); -} - -void streamed_parser_parse( INT32 args ) -{ - int c, length, state, begin, last, ind=0, ind2=0, ind3=0, ind4=0, ind5=0, content_tag=0; - char *str; - struct svalue *sp_save; - struct svalue *sp_tag_save; - struct svalue *data_arg; - struct pike_string *to_parse; - - get_all_args("spider.streamed_parser->parse", args, "%S", &to_parse); - - state = NOTAG; - begin = 0; - last = -1; - SWAP; - length = to_parse->len; - if (!(str = alloca( DATA->last_buffer_size + length ))) { - error("spider.streamed_parser->parse(): Out of memory\n"); - } - if (DATA->last_buffer_size > 0) - { - MEMCPY( str, DATA->last_buffer, DATA->last_buffer_size ); - MEMCPY( str + DATA->last_buffer_size, to_parse->str, length ); - length += DATA->last_buffer_size; - free( DATA->last_buffer ); - DATA->last_buffer = 0; - DATA->last_buffer_size = 0; - } else { - MEMCPY(str, to_parse->str, length); - } - pop_stack(); - - data_arg = sp-1; - sp_save = sp; - sp_tag_save = 0; - for (c=0; c < length; c++) - switch (state) - { - case NOTAG: - switch (str[c]) - { - case '<': - state = TAG_START; - break; - default: - last = c; - } - break; - - case TAG_START: - switch (str[c]) - { - case '/': - state = TAG_END; - break; - case WS: - state = TAG_START; - break; - case '>': - last = c; - state = NOTAG; - break; - default: - ind = c; - state = TAG_NAME; - break; - } - break; - - case TAG_END: - switch (str[c]) - { - case WS: - break; - case '>': /* error */ - last = c; - state = NOTAG; - break; - default: - ind = c; - ind2 = -1; - state = TAG_END_NAME; - break; - } - break; - - case TAG_END_NAME: - switch (str[c]) - { - case WS: - if (ind2 == -1) - ind2 = c-1; - break; - case '>': - if (ind2 == -1) - ind2 = c-1; - push_string( make_shared_binary_string( str + ind, ind2 - ind ) ); - f_lower_case( 1 ); - if (low_mapping_lookup( DATA->end_tags, sp-1 )) - { - if (handle_end_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } - } - else - pop_stack(); - last = c; - state = NOTAG; - break; - default: - break; - } - break; - - case SKIP: - switch (str[c]) - { - case '>': - last = c; - state = NOTAG; - break; - case '"': - state = SKIP_FNUTT_FNUTT; - break; - case '\'': - state = SKIP_FNUTT; - break; - } - break; - - case SKIP_FNUTT_FNUTT: - switch (str[c]) - { - case '"': - state = SKIP; - break; - } - break; - - case SKIP_FNUTT: - switch (str[c]) - { - case '\'': - state = SKIP; - break; - } - break; - - case TAG_NAME: - switch (str[c]) - { - case WS: - push_string( make_shared_binary_string( str + ind, c - ind ) ); - f_lower_case( 1 ); - if (low_mapping_lookup( DATA->start_tags, sp-1 )) - { - f_aggregate_mapping( 0 ); - state = TAG_WS; - sp_tag_save = sp-1; - content_tag = 0; - } - else if (low_mapping_lookup( DATA->content_tags, sp-1 )) - { - f_aggregate_mapping( 0 ); - state = TAG_WS; - sp_tag_save = sp-1; - content_tag = 1; - } - else - { - pop_stack(); - state = SKIP; - } - break; - case '>': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - f_lower_case( 1 ); - if (low_mapping_lookup( DATA->start_tags, sp-1 )) - { - f_aggregate_mapping( 0 ); - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - } - else if (low_mapping_lookup( DATA->content_tags, sp-1 )) - { - f_aggregate_mapping( 0 ); - ind2 = c+1; - state = CONTENT; - break; - } - else - pop_stack(); - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - } - break; - - case TAG_WS: - switch (str[c]) - { - case WS: - break; - case '>': - if (content_tag) - { - ind2 = c+1; - state = CONTENT; - break; - } - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - default: - ind = c; - state = TAG_ARG_NAME; - break; - } - break; - - case TAG_ARG_NAME: - switch (str[c]) - { - case WS: - push_string( make_shared_binary_string( str + ind, c - ind ) ); - f_lower_case( 1 ); - state = TAG_POST_ARG_NAME; - break; - case '=': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - f_lower_case( 1 ); - state = TAG_PRE_ARG_VALUE; - break; - case '>': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - f_lower_case( 1 ); - push_text( "" ); - add_arg(); - if (content_tag) - { - ind2 = c+1; - state = CONTENT; - break; - } - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - } - break; - - case TAG_POST_ARG_NAME: - switch (str[c]) - { - case WS: - break; - case '=': - state = TAG_PRE_ARG_VALUE; - break; - case '>': - push_text( "" ); - add_arg(); - if (content_tag) - { - ind2 = c+1; - state = CONTENT; - break; - } - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - default: - push_text( "" ); - add_arg(); - ind = c; - state = TAG_ARG_NAME; - break; - } - break; - - case TAG_PRE_ARG_VALUE: - switch (str[c]) - { - case WS: - break; - case '>': - push_text( "" ); - add_arg(); - if (content_tag) - { - ind2 = c+1; - state = CONTENT; - break; - } - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; /* error */ - break; - case '"': - state = TAG_ARG_VALUE_FNUTT_FNUTT; - ind = c+1; - break; - case '\'': - state = TAG_ARG_VALUE_FNUTT; - ind = c+1; - break; - default: - state = TAG_ARG_VALUE; - ind = c; - break; - } - break; - - case TAG_ARG_VALUE_FNUTT_FNUTT: - switch (str[c]) - { - case '"': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - add_arg(); - state = TAG_WS; - break; - } - - case TAG_ARG_VALUE_FNUTT: - switch (str[c]) - { - case '\'': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - add_arg(); - state = TAG_WS; - break; - } - break; - - case TAG_ARG_VALUE: - switch (str[c]) - { - case WS: - push_string( make_shared_binary_string( str + ind, c - ind ) ); - add_arg(); - state = TAG_WS; - break; - case '>': - push_string( make_shared_binary_string( str + ind, c - ind ) ); - add_arg(); - state = TAG_WS; - if (content_tag) - { - ind2 = c+1; - state = CONTENT; - break; - } - if (handle_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO ½ */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - } - break; - - case CONTENT: - switch (str[c]) - { - case '<': - state = CONTENT_TAG_START; - ind3 = c-1; - break; - } - break; - - case CONTENT_TAG_START: - switch (str[c]) - { - case '/': - state = CONTENT_TAG_END; - break; - default: - state = CONTENT_SKIP; - break; - } - break; - - case CONTENT_TAG_END: - switch (str[c]) - { - case WS: - state = CONTENT_TAG_END; - break; - case '>': /* error */ - state = CONTENT; - break; - default: - ind4 = c; - ind5 = -1; - state = CONTENT_TAG_END_NAME; - break; - } - break; - - case CONTENT_TAG_END_NAME: - switch (str[c]) - { - case WS: - if (ind5 == -1) - ind5 = c-1; - break; - case '>': - if (ind5 == -1) - ind5 = c-1; - push_string( make_shared_binary_string( str + ind4, ind5 - ind4 ) ); - f_lower_case( 1 ); - if (is_same_string( sp[-1].u.string, sp[-3].u.string )) - { - pop_stack(); - push_string( make_shared_binary_string( str + ind2, ind2 - ind3 ) ); - if (handle_content_tag( data_arg )) - { - if (last >= begin) - { - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - SWAP; - } - begin = c+1; - } -#if 0 /* DITTO */ - else - ; -#endif - last = c; - sp_tag_save = 0; - state = NOTAG; - break; - } - else - { - pop_stack(); - state = CONTENT; - } - break; - default: - break; - } - break; - - case CONTENT_SKIP: - switch (str[c]) - { - case '>': - last = c; - state = CONTENT; - break; - case '"': - state = CONTENT_SKIP_FNUTT_FNUTT; - break; - case '\'': - state = CONTENT_SKIP_FNUTT; - break; - } - break; - - case CONTENT_SKIP_FNUTT_FNUTT: - switch (str[c]) - { - case '"': - state = CONTENT_SKIP; - break; - } - break; - - case CONTENT_SKIP_FNUTT: - switch (str[c]) - { - case '\'': - state = CONTENT_SKIP; - break; - } - break; -#if 0 - default: - /* Make HPCC happy */ -#endif - } - if (sp_tag_save) - while (sp_tag_save <= sp) - pop_stack(); - if (last >= begin) - push_string( make_shared_binary_string( str + begin, last - begin + 1 ) ); - if (sp - sp_save == 0) - push_text( "" ); - else if (sp - sp_save != 1) - f_add( sp - sp_save ); /* fix? this is what we return */ - SWAP; - pop_stack(); /* get rid of data_arg */ - if (last < length-1) - { - DATA->last_buffer = malloc( length - (last + 1) ); - MEMCPY( DATA->last_buffer, str + last + 1, length - (last + 1) ); - DATA->last_buffer_size = length - (last + 1); - } -} - -void streamed_parser_finish( INT32 args ) -{ - if(args) error("FOO!\n"); /* Per ... */ - push_string( make_shared_binary_string( (char *)(DATA->last_buffer), DATA->last_buffer_size ) ); - if (DATA->last_buffer) - free( DATA->last_buffer ); - DATA->last_buffer = 0; - DATA->last_buffer_size = 0; -} - -#else /* ENABLE_STREAMED_PARSER */ - -int streamed_parser_place_holder; /* Place holder */ - -#endif /* ENABLE_STREAMED_PARSER */ diff --git a/src/modules/spider/streamed_parser.h b/src/modules/spider/streamed_parser.h deleted file mode 100644 index a59ec4b826..0000000000 --- a/src/modules/spider/streamed_parser.h +++ /dev/null @@ -1,21 +0,0 @@ -/* $Id: streamed_parser.h,v 1.3 1998/03/28 13:49:44 grubba Exp $ */ -#ifndef STREAMED_PARSER_H -#define STREAMED_PARSER_H - -struct streamed_parser -{ - unsigned char *last_buffer; - int last_buffer_size; - struct mapping *start_tags; /* ([ tag : function_ptr ]) */ - struct mapping *content_tags; /* ([ tag : function_ptr ]) */ - struct mapping *end_tags; /* ([ tag : function_ptr ]) */ - struct svalue *digest; -}; - -void streamed_parser_init(void); -void streamed_parser_destruct(void); -void streamed_parser_set_data( INT32 args ); -void streamed_parser_parse( INT32 args ); -void streamed_parser_finish( INT32 args ); - -#endif -- GitLab