From 40a223b785bc73072e26cf69adcffdb61e9691d3 Mon Sep 17 00:00:00 2001
From: Per Hedbor <ph@opera.com>
Date: Sat, 12 Aug 2000 08:17:22 +0200
Subject: [PATCH] Removed the streaming parser, and moved http_decode_string to
 Roxen. This is the start of the eradication of the spider module with it's
 confusing global functions..

Rev: src/modules/spider/Makefile.in:1.20
Rev: src/modules/spider/configure.in:1.25
Rev: src/modules/spider/spider.c:1.98
Rev: src/modules/spider/streamed_parser.c:1.13(DEAD)
Rev: src/modules/spider/streamed_parser.h:1.4(DEAD)
---
 .gitattributes                       |   2 -
 src/modules/spider/Makefile.in       |   3 +-
 src/modules/spider/configure.in      |   4 +-
 src/modules/spider/spider.c          | 265 +-------
 src/modules/spider/streamed_parser.c | 862 ---------------------------
 src/modules/spider/streamed_parser.h |  21 -
 6 files changed, 7 insertions(+), 1150 deletions(-)
 delete mode 100644 src/modules/spider/streamed_parser.c
 delete mode 100644 src/modules/spider/streamed_parser.h

diff --git a/.gitattributes b/.gitattributes
index ebd5a03ac8..fb3c607fc0 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -483,8 +483,6 @@ testfont binary
 /src/modules/spider/discdate.c foreign_ident
 /src/modules/spider/spider.c foreign_ident
 /src/modules/spider/stardate.c foreign_ident
-/src/modules/spider/streamed_parser.c foreign_ident
-/src/modules/spider/streamed_parser.h foreign_ident
 /src/modules/sprintf/Makefile.in foreign_ident
 /src/modules/sprintf/configure.in foreign_ident
 /src/modules/sprintf/sprintf.c foreign_ident
diff --git a/src/modules/spider/Makefile.in b/src/modules/spider/Makefile.in
index 54982a1eef..7230279fa3 100644
--- a/src/modules/spider/Makefile.in
+++ b/src/modules/spider/Makefile.in
@@ -1,9 +1,8 @@
-# $Id: Makefile.in,v 1.19 1999/11/18 08:03:08 hubbe Exp $
+# $Id: Makefile.in,v 1.20 2000/08/12 06:17:21 per Exp $
 @make_variables@
 VPATH=@srcdir@:@srcdir@/../..:../..
 OBJS=spider.o discdate.o stardate.o xml.o
 MODULE_LDFLAGS=@LDFLAGS@
-#  streamed_parser.o
 
 CONFIG_HEADERS=@CONFIG_HEADERS@
 
diff --git a/src/modules/spider/configure.in b/src/modules/spider/configure.in
index 596e36051e..34a5ec55f6 100644
--- a/src/modules/spider/configure.in
+++ b/src/modules/spider/configure.in
@@ -1,11 +1,9 @@
-# $Id: configure.in,v 1.24 1998/09/20 08:33:50 hubbe Exp $
+# $Id: configure.in,v 1.25 2000/08/12 06:17:21 per Exp $
 AC_INIT(spider.c)
 AC_CONFIG_HEADER(config.h)
 
 AC_MODULE_INIT()
 
-AC_ARG_WITH(streamed_parser, [ --with-streamed-parser	Enable the streamed-parser module.], [AC_DEFINE(ENABLE_STREAMED_PARSER)], [])
-
 AC_CHECK_LIB(bind, __inet_ntoa)
 AC_CHECK_LIB(socket, socket)
 AC_CHECK_LIB(nsl, gethostbyname)
diff --git a/src/modules/spider/spider.c b/src/modules/spider/spider.c
index 2bd5c9eebc..49d843099b 100644
--- a/src/modules/spider/spider.c
+++ b/src/modules/spider/spider.c
@@ -43,7 +43,7 @@
 #include "threads.h"
 #include "operators.h"
 
-RCSID("$Id: spider.c,v 1.97 2000/08/10 09:51:55 per Exp $");
+RCSID("$Id: spider.c,v 1.98 2000/08/12 06:17:18 per Exp $");
 
 #ifdef HAVE_PWD_H
 #include <pwd.h>
@@ -103,52 +103,6 @@ void do_html_parse_lines(struct pike_string *ss,
 			 struct array *extra_args,
 			 int line);
 
-
-void f_nice(INT32 args)
-{
-#ifdef HAVE_NICE
-  int ta = sp[-1].u.integer;
-  if(!args) error("You must supply an argument to nice(int)!\n");
-  pop_n_elems(args);
-  push_int(nice(ta));
-#endif
-}
-
-void f_http_decode_string(INT32 args)
-{
-   int proc;
-   char *foo,*bar,*end;
-   struct pike_string *newstr;
-
-   if (!args || sp[-args].type != T_STRING)
-     error("Invalid argument to http_decode_string(STRING);\n");
-
-   foo=bar=sp[-args].u.string->str;
-   end=foo+sp[-args].u.string->len;
-
-   /* count '%' characters */
-   for (proc=0; foo<end; ) if (*foo=='%') { proc++; foo+=3; } else foo++;
-
-   if (!proc) { pop_n_elems(args-1); return; }
-
-   /* new string len is (foo-bar)-proc*2 */
-   newstr=begin_shared_string((foo-bar)-proc*2);
-   foo=newstr->str;
-   for (proc=0; bar<end; foo++)
-      if (*bar=='%')
-      {
-        if (bar<end-2)
-          *foo=(((bar[1]<'A')?(bar[1]&15):((bar[1]+9)&15))<<4)|
-            ((bar[2]<'A')?(bar[2]&15):((bar[2]+9)&15));
-        else
-          *foo=0;
-        bar+=3;
-      }
-      else { *foo=*(bar++); }
-   pop_n_elems(args);
-   push_string(end_shared_string(newstr));
-}
-
 void f_parse_accessed_database(INT32 args)
 {
   ptrdiff_t cnum = 0, i;
@@ -958,22 +912,6 @@ void do_html_parse_lines(struct pike_string *ss,
   }
 }
 
-#ifndef HAVE_INT_TIMEZONE
-int _tz;
-#else
-extern long int timezone;
-#endif
-
-void f_timezone(INT32 args)
-{
-  pop_n_elems(args);
-#ifndef HAVE_INT_TIMEZONE
-  push_int(_tz);
-#else
-  push_int(timezone);
-#endif
-}
-
 void f_get_all_active_fd(INT32 args)
 {
   int i,fds,q, ne;
@@ -1076,151 +1014,6 @@ void f__dump_obj_table(INT32 args)
 #define MIN(A,B) ((A)<(B)?(A):(B))
 #endif
 
-#ifdef ENABLE_STREAMED_PARSER
-#include "streamed_parser.h"
-
-static struct program *streamed_parser;
-
-#endif /* ENABLE_STREAMED_PARSER */
-
-extern void init_udp(void);
-extern void init_xml(void);
-extern void exit_xml(void);
-
-
-/* Hohum. Here we go. This is try number three for a more optimized Roxen. */
-
-#ifdef _REENTRANT
-#define BUFFER (8192)
-
-struct thread_args
-{
-  struct thread_args *next;
-  struct object *from;
-  struct object *to;
-  INT_TYPE to_fd, from_fd;
-  struct svalue cb;
-  struct svalue args;
-  INT_TYPE len;
-  INT_TYPE sent;
-  char buffer[BUFFER];
-};
-
-MUTEX_T done_lock STATIC_MUTEX_INIT;
-struct thread_args *done;
-
-/* WARNING! This function is running _without_ any stack etc. */
-
-#define MY_MIN(a,b) ((a)<(b)?(a):(b))
-void do_shuffle(void *_a)
-{
-  struct thread_args *a = (struct thread_args *)_a;
-
-#ifdef DIRECTIO_ON
-  if(a->len > (65536*2))
-    directio(a->from_fd, DIRECTIO_ON);
-#endif
-
-  while(a->len)
-  {
-    int nread, written=0;
-    nread = fd_read(a->from_fd, a->buffer, MY_MIN(BUFFER,a->len));
-    if(nread <= 0) {
-      if (!nread)
-	break;
-      if(errno == EINTR)
-	continue;
-      else
-	break;
-    }
-
-    while(nread)
-    {
-      int nsent = fd_write(a->to_fd, a->buffer+written, nread);
-      if(nsent < 0) {
-	if(errno != EINTR)
-	  goto end;
-	else
-	  continue;
-      }
-      written += nsent;
-      a->sent += nsent;
-      nread -= nsent;
-      a->len -= nsent;
-    }
-  }
-
-  /* We are done. It is up to the backend callback to call the
-   * finish function
-   */
- end:
-  mt_lock(&done_lock);
-  a->next = done;
-  done = a;
-  mt_unlock(&done_lock);
-  wake_up_backend();
-}
-
-static int num_shuffles = 0;
-static struct callback *my_callback;
-
-void finished_p(struct callback *foo, void *b, void *c)
-{
-  while(done)
-  {
-    struct thread_args *d;
-
-    mt_lock(&done_lock);
-    d = done;
-    done = d->next;
-    mt_unlock(&done_lock);
-
-    num_shuffles--;
-
-    push_int( d->sent );
-    *(sp++) = d->args;
-    push_object( d->from );
-    push_object( d->to );
-    apply_svalue( &d->cb, 4 );
-    free_svalue( &d->cb );
-    pop_stack();
-    free(d);
-  }
-
-  if(!num_shuffles)
-  {
-    remove_callback( foo );
-    my_callback = 0;
-  }
-}
-
-void f_shuffle(INT32 args)
-{
-  struct thread_args *a = malloc(sizeof(struct thread_args));
-  struct svalue *q, *w;
-  get_all_args("shuffle", args, "%o%o%*%*%d", &a->from, &a->to,&q,&w,&a->len);
-  a->sent = 0;
-
-  num_shuffles++;
-  apply(a->to, "query_fd", 0);
-  apply(a->from, "query_fd", 0);
-  get_all_args("shuffle", 2, "%d%d", &a->to_fd, &a->from_fd);
-
-  add_ref(a->from);
-  add_ref(a->to);
-
-  assign_svalue_no_free(&a->cb, q);
-  assign_svalue_no_free(&a->args, w);
-
-  th_farm(do_shuffle, (void *)a);
-
-  if(!my_callback)
-    my_callback = add_backend_callback( finished_p, 0, 0 );
-
-  pop_n_elems(args+2);
-}
-#endif
-
 
 void pike_module_init(void)
 {
@@ -1228,17 +1021,8 @@ void pike_module_init(void)
   empty_string = sp[-1];
   pop_stack();
 
-
-#ifdef _REENTRANT
-  /* function(object,object,function,mixed,int:void) */
-  ADD_FUNCTION("shuffle", f_shuffle,tFunc(tObj tObj tFunction tMix tInt,tVoid), 0);
-#endif
   ADD_EFUN("_low_program_name", f__low_program_name,tFunc(tProgram,tStr),0);
 
-/* function(string:string) */
-  ADD_EFUN("http_decode_string",f_http_decode_string,tFunc(tStr,tStr),
-	   OPT_TRY_OPTIMIZE);
-
 
 /* function(int:int) */
   ADD_EFUN("set_start_quote",f_set_start_quote,tFunc(tInt,tInt),OPT_EXTERNAL_DEPEND);
@@ -1298,64 +1082,25 @@ void pike_module_init(void)
 /* function(int,void|int:int) */
   ADD_EFUN("stardate", f_stardate,tFunc(tInt tOr(tVoid,tInt),tInt), 0);
 
-/* function(:int) */
-  ADD_EFUN("timezone", f_timezone,tFunc(tNone,tInt), 0);
-
 /* function(:array(int)) */
   ADD_EFUN("get_all_active_fd", f_get_all_active_fd,tFunc(tNone,tArr(tInt)),
 	   OPT_EXTERNAL_DEPEND);
 
-/* function(int:int) */
-  ADD_EFUN("nice", f_nice,tFunc(tInt,tInt),
-	   OPT_EXTERNAL_DEPEND|OPT_SIDE_EFFECT);
-
 /* function(int:string) */
   ADD_EFUN("fd_info", f_fd_info,tFunc(tInt,tStr), OPT_EXTERNAL_DEPEND);
-
-  /* timezone() needs */
   {
-    time_t foo = (time_t)0;
-    struct tm *g;
-
-    g = localtime(&foo);
-#ifndef HAVE_INT_TIMEZONE
-    _tz = g->tm_gmtoff;
-#endif
+    extern void init_xml();
+    init_xml();
   }
-
-#ifdef ENABLE_STREAMED_PARSER
-  start_new_program();
-  add_storage( sizeof (struct streamed_parser) );
-  /* function(mapping(string:function(string,mapping(string:string),mixed:mixed)),mapping(string:function(string,mapping(string:string),string,mixed:mixed)),mapping(string:function(string,mixed:mixed)):void) */
-  ADD_FUNCTION( "init", streamed_parser_set_data,tFunc(tMap(tStr,tFunc(tStr tMap(tStr,tStr) tMix,tMix)) tMap(tStr,tFunc(tStr tMap(tStr,tStr) tStr tMix,tMix)) tMap(tStr,tFunc(tStr tMix,tMix)),tVoid), 0 );
-  /* function(string,mixed:string) */
-  ADD_FUNCTION( "parse", streamed_parser_parse,tFunc(tStr tMix,tStr), 0 );
-  /* function(void:string) */
-  ADD_FUNCTION( "finish", streamed_parser_finish,tFunc(tVoid,tStr), 0 );
-  set_init_callback( streamed_parser_init );
-  set_exit_callback( streamed_parser_destruct );
-
-  streamed_parser = end_program();
-  add_program_constant("streamed_parser", streamed_parser,0);
-#endif /* ENABLE_STREAMED_PARSER */
-
-  init_xml();
 }
 
 
 void pike_module_exit(void)
 {
   int i;
-
   free_string(empty_string.u.string);
-
-#ifdef ENABLE_STREAMED_PARSER
-  if(streamed_parser)
   {
-    free_program(streamed_parser);
-    streamed_parser=0;
+    extern void exit_xml();
+    exit_xml();
   }
-#endif /* ENABLE_STREAMED_PARSER */
-
-  exit_xml();
 }
diff --git a/src/modules/spider/streamed_parser.c b/src/modules/spider/streamed_parser.c
deleted file mode 100644
index 32938e1f49..0000000000
--- a/src/modules/spider/streamed_parser.c
+++ /dev/null
@@ -1,862 +0,0 @@
-/*
- * $Id: streamed_parser.c,v 1.12 2000/07/28 07:15:49 hubbe Exp $
- */
-
-#include "global.h"
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif /* HAVE_CONFIG_H */
-
-#include "stralloc.h"
-#include "pike_macros.h"
-#include "object.h"
-#include "constants.h"
-#include "interpret.h"
-#include "svalue.h"
-#include "mapping.h"
-#include "array.h"
-#include "builtin_functions.h"
-#include "error.h"
-#include "module_support.h"
-#include "multiset.h"
-#include "operators.h"
-
-#include "streamed_parser.h"
-
-/* This must be included last! */
-#include "module_magic.h"
-
-RCSID("$Id: streamed_parser.c,v 1.12 2000/07/28 07:15:49 hubbe Exp $");
-
-#ifdef ENABLE_STREAMED_PARSER
-
-/* streamed SGML parser, by wing */
-
-/* State machine for parsing
-
-notag		<	tag_start
-	notag
-
-tag_start	/	tag_end
-		WS	tag_start
-		>	notag
-	tag_name
-
-tag_end		WS	skip		(pop-tag-stack)
-		>	notag	(error)	(pop-tag-stack)
-	tag_end
-
-tag_end_name	WS	tag_end_name
-		>	notag
-	tag_end_name
-	
-skip		>	notag
-		"	skip_fnutt_fnutt
-		'	skip_fnutt
-	skip
-
-skip_fnutt_fnutt	"	skip
-	skip_fnutt_fnutt
-
-skip_fnutt	'	skip
-	skip_fnut
-
-tag_name	WS	skip (or) tag_ws
-		>	notag (or) content
-	tag_name
-
-tag_ws		WS	tag_ws
-		>	notag (check if something changed) (or) content
-	tag_arg_name
-
-tag_arg_name	WS	tag_post_arg_name
-		=	tag_pre_arg_name
-		>	notag (check if something changed) (or) content
-	tag_arg_name
-
-tag_post_arg_name	WS	tag_post_arg_name
-			=	tag_pre_arg_value
-			>	notag (check if something changed) (or) content
-	tag_arg_name
-
-tag_pre_arg_value	WS	tag_pre_arg_value
-			>	notag (error) (check if something changed)
-				  (or) content
-			"	tag_arg_value_fnutt_fnutt
-			'	tag_arg_value_fnutt
-	tag_arg_value
-
-tag_arg_value_fnutt_fnutt	"	tag_ws
-	tag_arg_value_fnutt_fnutt
-
-tag_arg_value_fnutt	'	tag_ws
-	tag_arg_value_fnutt
-
-tag_arg_value	WS	tag_ws
-		>	notag (check if something changed) (or) content
-	tag_arg_value
-
-content		<	content_tag_start
-       content
-
-content_tag_start	/	content_tag_end
-	content_skip
-
-content_tag_end		WS	content_tag_end
-			>	(error) content
-	content_tag_end_name
-
-content_tag_end_name	WS	content_tag_end_name
-			>	content (or) notag
-	content_tag_end_name
-
-content_skip	>	content
-		"	content_skip_fnutt_fnutt
-		'	content_skip_fnutt
-	content_skip
-
-content_skip_fnutt_fnutt	"	content_skip
-	content_skip_fnutt_fnutt
-
-content_skip_fnutt		'	content_skip
-	content_skip_fnutt
-
-*/
-
-#define NOTAG				 0
-#define TAG_START			 1
-#define TAG_END				 2
-#define TAG_END_NAME			 3
-#define SKIP				 4
-#define SKIP_FNUTT_FNUTT		 5
-#define SKIP_FNUTT			 6
-#define TAG_NAME			 7
-#define TAG_WS				 8
-#define TAG_ARG_NAME			 9
-#define TAG_POST_ARG_NAME		10
-#define TAG_PRE_ARG_VALUE		11
-#define TAG_ARG_VALUE_FNUTT_FNUTT	12
-#define TAG_ARG_VALUE_FNUTT		13
-#define TAG_ARG_VALUE			14
-#define CONTENT				15
-#define CONTENT_TAG_START		16
-#define CONTENT_TAG_END			17
-#define CONTENT_TAG_END_NAME		18
-#define CONTENT_SKIP			19
-#define CONTENT_SKIP_FNUTT_FNUTT	20
-#define CONTENT_SKIP_FNUTT		21
-
-#define ARG_TYPE_NONE			 0
-#define ARG_TYPE_IN			 1
-#define ARG_TYPE_OUT			 2
-
-#define WS ' ': case '\t': case '\n': case '\r'
-
-#define DATA ((struct streamed_parser *)(fp->current_storage))
-
-void streamed_parser_init(void)
-{
-  DATA->last_buffer = 0;
-  DATA->last_buffer_size = 0;
-  DATA->start_tags = 0;
-  DATA->content_tags = 0;
-  DATA->end_tags = 0;
-}
-
-void streamed_parser_destruct(void)
-{
-  if (DATA->last_buffer)
-    free( DATA->last_buffer );
-#if 0 /* Per, to clean up _some_ of the warnings */
-  if (DATA->start_tags)
-    ;
-  if (DATA->end_tags)
-    ;
-#endif
-}
-
-void streamed_parser_set_data( INT32 args )
-{
-  get_all_args("spider.streamed_parser->set_data", args, "%m%m%m",
-	       &(DATA->start_tags), &(DATA->content_tags), &(DATA->end_tags));
-  add_ref(DATA->start_tags);
-  add_ref(DATA->content_tags);
-  add_ref(DATA->end_tags);
-  pop_n_elems(args);
-}
-
-#define SWAP \
-	    *sp = sp[-2]; \
-	    sp[-2] = sp[-1]; \
-	    sp[-1] = *sp
-
-static int handle_tag( struct svalue *data_arg )
-{
-  struct svalue *fun;
-
-  push_svalue(data_arg);
-  if(!(fun = low_mapping_lookup( DATA->start_tags, sp-3 )))
-    error("Error in streamed parser.\n");
-
-  apply_svalue(fun, 3);
-
-  if (sp[-1].type == T_STRING)
-  {
-    return 1;
-  } else {
-    pop_stack();
-    return 0;
-  }
-}
-
-static int handle_content_tag( struct svalue *data_arg )
-{
-  struct svalue *fun;
-
-  push_svalue(data_arg);
-  if(!(fun = low_mapping_lookup( DATA->content_tags, sp-3 )))
-    error("Error in streamed parser.\n");
-
-  apply_svalue(fun, 3);
-  if (sp[-1].type == T_STRING)
-    return 1;
-  else
-  {
-    pop_stack();
-    return 0;
-  }
-}
-
-static int handle_end_tag( struct svalue *data_arg )
-{
-  struct svalue *fun;
-
-  fun = low_mapping_lookup( DATA->start_tags, sp-1);
-
-  /* NB: fun would not be valid if the value popped here is an object,
-   * fortunately it is not. */
-  pop_stack();
-
-  push_svalue(data_arg);
-  if(fun)
-  {
-    apply_svalue(fun, 1);
-    if (sp[-1].type == T_STRING)
-      return 1;
-    else
-    {
-      pop_stack();
-      return 0;
-    }
-  }
-  return 0;
-}
-
-static void add_arg(void)
-{
-  mapping_insert( sp[-3].u.mapping, sp-2, sp-1 );
-  pop_stack();
-  pop_stack();
-}
-
-void streamed_parser_parse( INT32 args )
-{
-  int c, length, state, begin, last, ind=0, ind2=0, ind3=0, ind4=0, ind5=0, content_tag=0;
-  char *str;
-  struct svalue *sp_save;
-  struct svalue *sp_tag_save;
-  struct svalue *data_arg;
-  struct pike_string *to_parse;
-
-  get_all_args("spider.streamed_parser->parse", args, "%S", &to_parse);
-  
-  state = NOTAG;
-  begin = 0; 
-  last = -1;
-  SWAP;
-  length = to_parse->len;
-  if (!(str = alloca( DATA->last_buffer_size + length ))) {
-    error("spider.streamed_parser->parse(): Out of memory\n");
-  }
-  if (DATA->last_buffer_size > 0)
-  {
-    MEMCPY( str, DATA->last_buffer, DATA->last_buffer_size );
-    MEMCPY( str + DATA->last_buffer_size, to_parse->str, length );
-    length += DATA->last_buffer_size;
-    free( DATA->last_buffer );
-    DATA->last_buffer = 0;
-    DATA->last_buffer_size = 0;
-  } else {
-    MEMCPY(str, to_parse->str, length);
-  }
-  pop_stack();
-
-  data_arg = sp-1;
-  sp_save = sp;
-  sp_tag_save = 0;
-  for (c=0; c < length; c++)
-    switch (state)
-    {
-     case NOTAG:
-      switch (str[c])
-      {
-       case '<':
-	state = TAG_START;
-	break;
-       default:
-	last = c;
-      }
-      break;
-
-     case TAG_START:
-      switch (str[c])
-      {
-       case '/':
-	state = TAG_END;
-	break;
-       case WS:
-	state = TAG_START;
-	break;
-       case '>':
-	last = c;
-	state = NOTAG;
-	break;
-       default:
-	ind = c;
-	state = TAG_NAME;
-	break;
-      }
-      break;
-
-     case TAG_END:
-      switch (str[c])
-      {
-       case WS:
-	break;
-       case '>': /* error */
-	last = c;
-	state = NOTAG;
-	break;
-       default:
-	ind = c;
-	ind2 = -1;
-	state = TAG_END_NAME;
-	break;
-      }
-      break;
-
-     case TAG_END_NAME:
-      switch (str[c])
-      {
-       case WS:
-	if (ind2 == -1)
-	  ind2 = c-1;
-	break;
-       case '>':
-	if (ind2 == -1)
-	  ind2 = c-1;
-	push_string( make_shared_binary_string( str + ind, ind2 - ind ) );
-	f_lower_case( 1 );
-	if (low_mapping_lookup( DATA->end_tags, sp-1 ))
-	{
-	  if (handle_end_tag( data_arg ))
-	  {
-	    if (last >= begin)
-	    {
-	      push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	      SWAP;
-	    }
-	    begin = c+1;
-	  }
-	}
-	else
-	  pop_stack();
-	last = c;
-	state = NOTAG;
-	break;
-       default:
-	break;
-      }
-      break;
-
-     case SKIP:
-      switch (str[c])
-      {
-       case '>':
-	last = c;
-	state = NOTAG;
-	break;
-       case '"':
-	state = SKIP_FNUTT_FNUTT;
-	break;
-       case '\'':
-	state = SKIP_FNUTT;
-	break;
-      }
-      break;
-
-     case SKIP_FNUTT_FNUTT:
-      switch (str[c])
-      {
-       case '"':
-	state = SKIP;
-	break;
-      }
-      break;
-
-     case SKIP_FNUTT:
-      switch (str[c])
-      {
-       case '\'':
-	state = SKIP;
-	break;
-      }
-      break;
-
-     case TAG_NAME:
-      switch (str[c])
-      {
-       case WS:
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	f_lower_case( 1 );
-	if (low_mapping_lookup( DATA->start_tags, sp-1 ))
-	{
-	  f_aggregate_mapping( 0 );
-	  state = TAG_WS;
-	  sp_tag_save = sp-1;
-	  content_tag = 0;
-	}
-	else if (low_mapping_lookup( DATA->content_tags, sp-1 ))
-	{
-	  f_aggregate_mapping( 0 );
-	  state = TAG_WS;
-	  sp_tag_save = sp-1;
-	  content_tag = 1;
-	}
-	else
-	{
-	  pop_stack();
-	  state = SKIP;
-	}
-	break;
-       case '>':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	f_lower_case( 1 );
-	if (low_mapping_lookup( DATA->start_tags, sp-1 ))
-	{
-	  f_aggregate_mapping( 0 );
-	  if (handle_tag( data_arg ))
-	  {
-	    if (last >= begin)
-	    {
-	      push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	      SWAP;
-	    }
-	    begin = c+1;
-	  }
-#if 0 /* DITTO */
-	  else
-	    ;
-#endif
-	}
-	else if (low_mapping_lookup( DATA->content_tags, sp-1 ))
-	{
-	  f_aggregate_mapping( 0 );
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	else
-	  pop_stack();
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG;
-	break;
-      }
-      break;
-
-     case TAG_WS:
-      switch (str[c])
-      {
-       case WS:
-	break;
-       case '>':
-	if (content_tag)
-	{
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	if (handle_tag( data_arg ))
-	{
-	  if (last >= begin)
-	  {
-	    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	    SWAP;
-	  }
-	  begin = c+1;
-	}
-#if 0 /* DITTO */
-	else
-	  ;
-#endif
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG;
-	break;
-       default:
-	ind = c;
-	state = TAG_ARG_NAME;
-	break;
-      }
-      break;
-
-     case TAG_ARG_NAME:
-      switch (str[c])
-      {
-       case WS:
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	f_lower_case( 1 );
-	state = TAG_POST_ARG_NAME;
-	break;
-       case '=':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	f_lower_case( 1 );
-	state = TAG_PRE_ARG_VALUE;
-	break;
-       case '>':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	f_lower_case( 1 );
-	push_text( "" );
-	add_arg();
-	if (content_tag)
-	{
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	if (handle_tag( data_arg ))
-	{
-	  if (last >= begin)
-	  {
-	    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	    SWAP;
-	  }
-	  begin = c+1;
-	}
-#if 0 /* DITTO */
-	else
-	  ;
-#endif
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG;
-	break;	
-      }
-      break;
-
-     case TAG_POST_ARG_NAME:
-      switch (str[c])
-      {
-       case WS:
-	break;
-       case '=':
-	state = TAG_PRE_ARG_VALUE;
-	break;
-       case '>':
-	push_text( "" );
-	add_arg();
-	if (content_tag)
-	{
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	if (handle_tag( data_arg ))
-	{
-	  if (last >= begin)
-	  {
-	    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	    SWAP;
-	  }
-	  begin = c+1;
-	}
-#if 0 /* DITTO */
-	else
-	  ;
-#endif
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG;
-	break;
-       default:
-	push_text( "" );
-	add_arg();
-	ind = c;
-	state = TAG_ARG_NAME;
-	break;
-      }
-      break;
-
-     case TAG_PRE_ARG_VALUE:
-      switch (str[c])
-      {
-       case WS:
-	break;
-       case '>':
-	push_text( "" );
-	add_arg();
-	if (content_tag)
-	{
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	if (handle_tag( data_arg ))
-	{
-	  if (last >= begin)
-	  {
-	    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	    SWAP;
-	  }
-	  begin = c+1;
-	}
-#if 0 /* DITTO */
-	else
-	  ;
-#endif
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG; /* error */
-	break;
-       case '"':
-	state = TAG_ARG_VALUE_FNUTT_FNUTT;
-	ind = c+1;
-	break;
-       case '\'':
-	state = TAG_ARG_VALUE_FNUTT;
-	ind = c+1;
-	break;
-       default:
-	state = TAG_ARG_VALUE;
-	ind = c;
-	break;
-      }
-      break;
-
-     case TAG_ARG_VALUE_FNUTT_FNUTT:
-      switch (str[c])
-      {
-       case '"':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	add_arg();
-	state = TAG_WS;
-	break;
-      }
-
-     case TAG_ARG_VALUE_FNUTT:
-      switch (str[c])
-      {
-       case '\'':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	add_arg();
-	state = TAG_WS;
-	break;
-      }
-      break;
-
-     case TAG_ARG_VALUE:
-      switch (str[c])
-      {
-       case WS:
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	add_arg();
-	state = TAG_WS;
-	break;
-       case '>':
-	push_string( make_shared_binary_string( str + ind, c - ind ) );
-	add_arg();
-	state = TAG_WS;
-	if (content_tag)
-	{
-	  ind2 = c+1;
-	  state = CONTENT;
-	  break;
-	}
-	if (handle_tag( data_arg ))
-	{
-	  if (last >= begin)
-	  {
-	    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	    SWAP;
-	  }
-	  begin = c+1;
-	}
-#if 0 /* DITTO ½ */
-	else
-	  ;
-#endif
-	last = c;
-	sp_tag_save = 0;
-	state = NOTAG;
-	break;
-      }
-      break;
-
-     case CONTENT:
-      switch (str[c])
-      {
-       case '<':
-	state = CONTENT_TAG_START;
-	ind3 = c-1;
-	break;
-      }
-      break;
-
-     case CONTENT_TAG_START:
-      switch (str[c])
-      {
-       case '/':
-	state = CONTENT_TAG_END;
-	break;
-       default:
-	state = CONTENT_SKIP;
-	break;
-      }
-      break;
-      
-     case CONTENT_TAG_END:
-      switch (str[c])
-      {
-       case WS:
-	state = CONTENT_TAG_END;
-	break;
-       case '>': /* error */
-	state = CONTENT;
-	break;
-       default:
-	ind4 = c;
-	ind5 = -1;
-	state = CONTENT_TAG_END_NAME;
-	break;
-      }
-      break;
-      
-     case CONTENT_TAG_END_NAME:
-      switch (str[c])
-      {
-       case WS:
-	if (ind5 == -1)
-	  ind5 = c-1;
-	break;
-       case '>':
-	if (ind5 == -1)
-	  ind5 = c-1;
-	push_string( make_shared_binary_string( str + ind4, ind5 - ind4 ) );
-	f_lower_case( 1 );
-	if (is_same_string( sp[-1].u.string, sp[-3].u.string ))
-	{
-	  pop_stack();
-	  push_string( make_shared_binary_string( str + ind2, ind2 - ind3 ) );
-	  if (handle_content_tag( data_arg ))
-	  {
-	    if (last >= begin)
-	    {
-	      push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-	      SWAP;
-	    }
-	    begin = c+1;
-	  }
-#if 0 /* DITTO */
-	  else
-	    ;
-#endif
-	  last = c;
-	  sp_tag_save = 0;
-	  state = NOTAG;
-	  break;
-	}
-	else
-	{
-	  pop_stack();
-	  state = CONTENT;
-	}
-	break;
-       default:
-	break;
-      }
-      break;
-      
-     case CONTENT_SKIP:
-      switch (str[c])
-      {
-       case '>':
-	last = c;
-	state = CONTENT;
-	break;
-       case '"':
-	state = CONTENT_SKIP_FNUTT_FNUTT;
-	break;
-       case '\'':
-	state = CONTENT_SKIP_FNUTT;
-	break;
-      }
-      break;
-
-     case CONTENT_SKIP_FNUTT_FNUTT:
-      switch (str[c])
-      {
-       case '"':
-	state = CONTENT_SKIP;
-	break;
-      }
-      break;
-
-     case CONTENT_SKIP_FNUTT:
-      switch (str[c])
-      {
-       case '\'':
-	state = CONTENT_SKIP;
-	break;
-      }
-      break;
-#if 0
-     default:
-      /*  Make HPCC happy */
-#endif
-    }
-  if (sp_tag_save)
-    while (sp_tag_save <= sp)
-      pop_stack();
-  if (last >= begin)
-    push_string( make_shared_binary_string( str + begin, last - begin + 1 ) );
-  if (sp - sp_save == 0)
-    push_text( "" );
-  else if (sp - sp_save != 1)
-    f_add( sp - sp_save ); /* fix? this is what we return */
-  SWAP;
-  pop_stack(); /* get rid of data_arg */
-  if (last < length-1)
-  {
-    DATA->last_buffer = malloc( length - (last + 1) );
-    MEMCPY( DATA->last_buffer, str + last + 1, length - (last + 1) );
-    DATA->last_buffer_size = length - (last + 1);
-  }
-}
-
-void streamed_parser_finish( INT32 args )
-{
-  if(args) error("FOO!\n"); /* Per ... */
-  push_string( make_shared_binary_string( (char *)(DATA->last_buffer), DATA->last_buffer_size ) );
-  if (DATA->last_buffer)
-    free( DATA->last_buffer );
-  DATA->last_buffer = 0;
-  DATA->last_buffer_size = 0;
-}
-
-#else /* ENABLE_STREAMED_PARSER */
-
-int streamed_parser_place_holder;	/* Place holder */
-
-#endif /* ENABLE_STREAMED_PARSER */
diff --git a/src/modules/spider/streamed_parser.h b/src/modules/spider/streamed_parser.h
deleted file mode 100644
index a59ec4b826..0000000000
--- a/src/modules/spider/streamed_parser.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* $Id: streamed_parser.h,v 1.3 1998/03/28 13:49:44 grubba Exp $ */
-#ifndef STREAMED_PARSER_H
-#define STREAMED_PARSER_H
-
-struct streamed_parser
-{
-  unsigned char *last_buffer;
-  int last_buffer_size;
-  struct mapping *start_tags; /* ([ tag : function_ptr ]) */
-  struct mapping *content_tags; /* ([ tag : function_ptr ]) */
-  struct mapping *end_tags; /* ([ tag : function_ptr ]) */
-  struct svalue *digest;
-};
-
-void streamed_parser_init(void);
-void streamed_parser_destruct(void);
-void streamed_parser_set_data( INT32 args );
-void streamed_parser_parse( INT32 args );
-void streamed_parser_finish( INT32 args );
-
-#endif
-- 
GitLab