From 7b1f7dc15a30cec59a59f6274f0e7b49e9266c08 Mon Sep 17 00:00:00 2001
From: "Mirar (Pontus Hagland)" <pike@sort.mirar.org>
Date: Thu, 11 Mar 1999 14:42:42 +0100
Subject: [PATCH] cool, it works (a bit)

Rev: src/modules/Parser/html.c:1.5
---
 src/modules/Parser/html.c | 246 +++++++++++++++++++++++++++++++-------
 1 file changed, 204 insertions(+), 42 deletions(-)

diff --git a/src/modules/Parser/html.c b/src/modules/Parser/html.c
index 5bde1ecd36..433d8e345e 100644
--- a/src/modules/Parser/html.c
+++ b/src/modules/Parser/html.c
@@ -18,13 +18,15 @@
 #ifdef DEBUG
 #undef DEBUG
 #endif
-#if 1
+#if 0
 #define DEBUG(X) fprintf X
 #else
 #define DEBUG(X) do; while(0)
 #endif
 
+#if 0
 #define free(X) fprintf(stderr,"free line %d: %p\n",__LINE__,X); free(X)
+#endif
 
 #define MAX_FEED_STACK_DEPTH 10
 
@@ -71,18 +73,30 @@ struct parser_html_storage
    /* parser stack */
    struct feed_stack *stack;
    int stack_count;
+   int max_stack_depth;
 
+   /* callback functions */
    struct svalue callback__tag;
    struct svalue callback__data;
    struct svalue callback__entity;
 
-   int max_stack_depth;
+   /* arg quote may have tag_end to end quote and tag */
+   int lazy_end_arg_quote; 
 
    p_wchar2 tag_start,tag_end;
    p_wchar2 entity_start,entity_end;
    int nargq;
 #define MAX_ARGQ 8
    p_wchar2 argq_start[MAX_ARGQ],argq_stop[MAX_ARGQ];
+
+   /* pre-calculated */
+   /* end of tag or start of arg quote */
+   p_wchar2 look_for_start[MAX_ARGQ+2];
+   int num_look_for_start;
+
+   /* end(s) of _this_ arg quote */
+   p_wchar2 look_for_end[MAX_ARGQ][MAX_ARGQ+2];
+   int num_look_for_end[MAX_ARGQ];
 };
 
 #ifdef THIS
@@ -94,57 +108,92 @@ struct parser_html_storage
 
 /****** init & exit *********************************/
 
-void _reset_feed()
+void reset_feed(struct parser_html_storage *this)
 {
    struct piece *f;
    struct feed_stack *st;
 
    /* kill feed */
 
-   while (THIS->feed)
+   while (this->feed)
    {
-      f=THIS->feed;
-      THIS->feed=f->next;
+      f=this->feed;
+      this->feed=f->next;
       free_string(f->s);
       free(f);
    }
-   THIS->feed_end=NULL;
+   this->feed_end=NULL;
 
    /* kill out-feed */
 
-   while (THIS->out)
+   while (this->out)
    {
-      f=THIS->out;
-      THIS->out=f->next;
+      f=this->out;
+      this->out=f->next;
       free_string(f->s);
       free(f);
    }
-   THIS->out_end=NULL;
+   this->out_end=NULL;
 
 
    /* free stack */
 
-   while (THIS->stack)
+   while (this->stack)
    {
-      st=THIS->stack;
-      THIS->stack=st->prev;
+      st=this->stack;
+      this->stack=st->prev;
       free(st);
    }
 
    /* new stack head */
 
-   THIS->stack=malloc(sizeof(struct feed_stack));
-   if (!THIS->stack)
+   this->stack=malloc(sizeof(struct feed_stack));
+   if (!this->stack)
       error("out of memory\n");
-   THIS->stack->prev=NULL;
-   THIS->stack->local_feed=NULL;
-   THIS->stack->ignore_data=0;
-   THIS->stack->pos.byteno=1;
-   THIS->stack->pos.lineno=1;
-   THIS->stack->pos.linestart=1;
-   THIS->stack->c=0;
-
-   THIS->stack_count=0;
+   this->stack->prev=NULL;
+   this->stack->local_feed=NULL;
+   this->stack->ignore_data=0;
+   this->stack->pos.byteno=1;
+   this->stack->pos.lineno=1;
+   this->stack->pos.linestart=1;
+   this->stack->c=0;
+
+   this->stack_count=0;
+}
+
+static void recalculate_argq(struct parser_html_storage *this)
+{
+   int n,i,j,k;
+
+   /* prepare look for start of argument quote or end of tag */
+   this->look_for_start[0]=this->tag_end;
+   n=1;
+   for (i=0; i<this->nargq; i++)
+   {
+      for (j=0; j<n; j++)
+	 if (this->look_for_start[j]==this->argq_start[i]) goto found_start;
+      this->look_for_start[n++]=this->argq_start[i];
+found_start:
+   }
+   this->num_look_for_start=n;
+
+   for (k=0; k<this->nargq; k++)
+   {
+      n=0;
+      for (i=0; i<this->nargq; i++)
+	 if (this->argq_start[k]==this->argq_start[i])
+	 {
+	    for (j=0; j<this->nargq; j++)
+	       if (this->look_for_end[k][j]==this->argq_start[i])
+		  goto found_end;
+	    this->look_for_end[k][n++]=this->argq_start[i];
+   found_end:
+	 }
+      if (this->lazy_end_arg_quote)
+	 this->look_for_end[k][n++]=this->tag_end;
+
+      this->num_look_for_end[k]=n;
+   }
 }
 
 static void init_html_struct(struct object *o)
@@ -162,11 +211,15 @@ static void init_html_struct(struct object *o)
    THIS->argq_start[1]='\'';
    THIS->argq_stop[1]='\'';
 
+   THIS->lazy_end_arg_quote=0;
+
+   recalculate_argq(THIS);
+
    /* initialize feed */
    THIS->feed=NULL;
    THIS->out=NULL;
    THIS->stack=NULL;
-   _reset_feed();
+   reset_feed(THIS);
    
    /* clear callbacks */
    THIS->callback__tag.type=T_INT;
@@ -181,7 +234,7 @@ static void exit_html_struct(struct object *o)
 {
    DEBUG((stderr,"exit_html_struct %p\n",THIS));
 
-   _reset_feed(); /* frees feed & out */
+   reset_feed(THIS); /* frees feed & out */
 
    free_svalue(&(THIS->callback__tag));
    free_svalue(&(THIS->callback__data));
@@ -302,11 +355,11 @@ static void push_feed_range(struct piece *head,
    {
       if (head==tail)
       {
-	 push_string(string_slice(head->s,c_head,c_tail-c_head));
+	 ref_push_string(string_slice(head->s,c_head,c_tail-c_head));
 	 n++;
 	 break;
       }
-      push_string(string_slice(head->s,c_head,head->s->len-c_head));
+      ref_push_string(string_slice(head->s,c_head,head->s->len-c_head));
       n++;
       if (n==10)
       {
@@ -424,7 +477,9 @@ static int scan_forward(struct piece *feed,
 			p_wchar2 *look_for,
 			int num_look_for)
 {
-   DEBUG((stderr,"scan_forward num_look_for=%d look_for=%d %d %d %d %d\n",
+   DEBUG((stderr,"scan_forward %p:%d "
+	  "num_look_for=%d look_for=%d %d %d %d %d\n",
+	  feed,c,
 	  num_look_for,
 	  (num_look_for>0?look_for[0]:-1),
 	  (num_look_for>1?look_for[1]:-1),
@@ -447,8 +502,8 @@ static int scan_forward(struct piece *feed,
 	 {
 	    int ce=feed->s->len-c;
 	    p_wchar2 f=*look_for;
-	    fprintf(stderr,"%p:%d .. %p:%d (%d)\n",
-		    feed,c,feed,feed->s->len,ce);
+/* 	    fprintf(stderr,"%p:%d .. %p:%d (%d)\n", */
+/* 		    feed,c,feed,feed->s->len,ce); */
 	    switch (feed->s->size_shift)
 	    {
 	       case 0:
@@ -570,6 +625,75 @@ found:
    return 1;
 }
 
+static int scan_for_end_of_tag(struct parser_html_storage *this,
+			       struct piece *feed,
+			       int c,
+			       struct piece **destp,
+			       int *d_p,
+			       int finished)
+{
+   p_wchar2 ch;
+   int res,i;
+
+   /* maybe these should be cached */
+
+   /* bla bla <tag foo 'bar' "gazonk" > */
+   /*          ^                      ^ */
+   /*       here now             scan here */
+
+   DEBUG((stderr,"scan for end of tag: %p:%d\n",feed,c));
+
+   for (;;)
+   {
+      /* scan for start of argument quote or end of tag */
+
+      res=scan_forward(feed,c,destp,d_p,
+		       this->look_for_start,this->num_look_for_start);
+      if (!res) 
+	 if (!finished) 
+	 {
+	    DEBUG((stderr,"scan for end of tag: wait at %p:%d\n",feed,c));
+	    return 0; /* not found - no end of tag, yet */
+	 }
+	 else
+	 {
+	    DEBUG((stderr,"scan for end of tag: forced end at %p:%d\n",
+		   destp[0],*d_p));
+	    return 1; /* end of tag, sure... */
+	 }
+
+      ch=index_shared_string(destp[0]->s,*d_p);
+      if (ch==this->tag_end)
+      {
+	 DEBUG((stderr,"scan for end of tag: end at %p:%d\n",destp[0],*d_p));
+	 return 1; /* end of tag here */
+      }
+
+      /* scan for (possible) end(s) of this argument quote */
+
+      for (i=0; i<this->nargq; i++)
+	 if (ch==this->argq_start[i]) break;
+      res=scan_forward(*destp,d_p[0]+1,destp,d_p,
+		       this->look_for_end[i],this->num_look_for_end[i]);
+      if (!res)
+	 if (!finished) 
+	 {
+	    DEBUG((stderr,"scan for end of tag: wait at %p:%d\n",
+		   destp[0],*d_p));
+	    return 0; /* not found - no end of tag, yet */
+	 }
+	 else
+	 {
+	    DEBUG((stderr,"scan for end of tag: forced end at %p:%d\n",
+		   feed,c));
+	    return 1; /* end of tag, sure... */
+	 }
+
+      feed=*destp;
+      c=d_p[0]+1;
+   }
+}
+
 /* ---------------------------------------------------------------- */
 /* this is called to get data from callbacks and do the right thing */
 
@@ -661,7 +785,8 @@ static int do_try_feed(struct parser_html_storage *this,
 		       struct piece **feed,
 		       int finished)
 {
-   p_wchar2 look_for[MAX_ARGQ+2],ch;
+   p_wchar2 look_for[2 /* entity or tag */];
+   p_wchar2 ch;
    int n;
    struct piece *dst;
    int cdst;
@@ -671,7 +796,7 @@ static int do_try_feed(struct parser_html_storage *this,
 
    recheck_scan(this,&scan_entity,&scan_tag);
 
-   for (;;)
+   while (*feed)
    {
       DEBUG((stderr,"%*d do_try_feed scan loop "
 	     "scan_entity=%d scan_tag=%d ignore_data=%d\n",
@@ -759,7 +884,6 @@ static int do_try_feed(struct parser_html_storage *this,
 	    ref_push_object(thisobj);
 	    push_feed_range(*feed,st->c+1,dst,cdst);
 	    apply_svalue(&(this->callback__entity),2);
-	    st->ignore_data=1;
 
 	    if ((res=handle_result(this,st,
 				   feed,&(st->c),dst,cdst+1)))
@@ -767,6 +891,7 @@ static int do_try_feed(struct parser_html_storage *this,
 	       DEBUG((stderr,"%*d do_try_feed return %d %p:%d\n",
 		      this->stack_count,this->stack_count,
 		      res,*feed,st->c));
+	       st->ignore_data=1;
 	       return res;
 	    }
 	    recheck_scan(this,&scan_entity,&scan_tag);
@@ -787,12 +912,49 @@ static int do_try_feed(struct parser_html_storage *this,
 		this->stack_count,this->stack_count,
 		*feed,st->c));
 
-	 DEBUG((stderr,"%*d do_try_feed return 0 %p:%d\n",
-		this->stack_count,this->stack_count,
-		*feed,st->c));
-	 return 0;
+	 if (this->callback__tag.type!=T_INT)
+	 {
+	    res=scan_for_end_of_tag(this,*feed,st->c+1,&dst,&cdst,
+				    finished);
+	    if (!res) 
+	    {
+	       st->ignore_data=1;
+	       return 1; /* come again */
+	    }
+
+	    DEBUG((stderr,"%*d calling _tag callback %p:%d..%p:%d\n",
+		   this->stack_count,this->stack_count,
+		   *feed,st->c+1,dst,cdst));
+
+	    /* low-level tag call */
+	    ref_push_object(thisobj);
+	    push_feed_range(*feed,st->c+1,dst,cdst);
+	    apply_svalue(&(this->callback__tag),2);
+	    st->ignore_data=1;
+
+	    if ((res=handle_result(this,st,
+				   feed,&(st->c),dst,cdst+1)))
+	    {
+	       DEBUG((stderr,"%*d do_try_feed return %d %p:%d\n",
+		      this->stack_count,this->stack_count,
+		      res,*feed,st->c));
+	       st->ignore_data=1;
+	       return res;
+	    }
+	    recheck_scan(this,&scan_entity,&scan_tag);
+	 }
+	 else
+	 {
+	    res=scan_for_end_of_tag(this,*feed,st->c+1,&dst,&cdst,
+				    finished);
+	    if (!res) return 1; /* come again */
+
+	    put_out_feed_range(this,*feed,st->c,dst,cdst+1);
+	    skip_feed_range(st,feed,&(st->c),dst,cdst+1);
+	 }
       }
    }
+   return 0; /* done */
 }
 
 static void try_feed(int finished)
@@ -933,7 +1095,7 @@ static void html_finish(INT32 args)
 static void html_read(INT32 args)
 {
    int n;
-   int m=0; // strings on stack
+   int m=0; /* strings on stack */
 
    if (!args) 
       n=0x7fffffff; /* a lot */
@@ -953,7 +1115,7 @@ static void html_read(INT32 args)
       if (THIS->out->s->len>n)
       {
 	 struct pike_string *ps;
-	 push_string(string_slice(THIS->out->s,0,n));
+	 ref_push_string(string_slice(THIS->out->s,0,n));
 	 m++;
 	 ps=string_slice(THIS->out->s,n,THIS->out->s->len-n);
 	 free_string(THIS->out->s);
@@ -961,7 +1123,7 @@ static void html_read(INT32 args)
 	 break;
       }
       n-=THIS->out->s->len;
-      push_string(THIS->out->s);
+      ref_push_string(THIS->out->s);
       m++;
       z=THIS->out;
       THIS->out=THIS->out->next;
-- 
GitLab