From 43f4f994d1842b6519faa6d7506ed526ebfc8f66 Mon Sep 17 00:00:00 2001 From: "Mirar (Pontus Hagland)" <pike@sort.mirar.org> Date: Sun, 1 Aug 1999 00:05:55 +0200 Subject: [PATCH] added tag-quote matching (<..>) Rev: src/modules/Parser/html.c:1.30 --- src/modules/Parser/html.c | 186 ++++++++++++++++++++++++++------------ 1 file changed, 130 insertions(+), 56 deletions(-) diff --git a/src/modules/Parser/html.c b/src/modules/Parser/html.c index 8b627e29b8..851d0518a2 100644 --- a/src/modules/Parser/html.c +++ b/src/modules/Parser/html.c @@ -129,6 +129,9 @@ struct parser_html_storage /* flag: nonalphanum ends entity */ int lazy_entity_end; + /* flag: match '<' and '>' for in-tag-tags (<foo <bar>>) */ + int match_tag; + p_wchar2 tag_start,tag_end; p_wchar2 entity_start,entity_end; int nargq; @@ -149,7 +152,7 @@ struct parser_html_storage int n_ws_or_endarg_or_quote; /* end of tag, arg_eq or start of arg quote */ - p_wchar2 look_for_start[MAX_ARGQ+3]; + p_wchar2 look_for_start[MAX_ARGQ+4]; int num_look_for_start; /* end(s) of _this_ arg quote */ @@ -158,7 +161,7 @@ struct parser_html_storage }; /* P_WAIT was already used by MSVC++ :( /Hubbe */ -typedef enum { Pi_DONE=0, Pi_WAIT, Pi_REREAD } newstate; +typedef enum { STATE_DONE=0, STATE_WAIT, STATE_REREAD } newstate; #ifdef THIS #undef THIS /* Needed for NT */ @@ -265,7 +268,8 @@ static void recalculate_argq(struct parser_html_storage *this) /* prepare look for start of argument quote or end of tag */ this->look_for_start[0]=this->tag_end; this->look_for_start[1]=this->arg_eq; - n=2; + if (this->match_tag) this->look_for_start[2]=this->tag_start; + n=2+this->match_tag; for (i=0; i<this->nargq; i++) { for (j=0; j<n; j++) @@ -301,11 +305,14 @@ found_start: free(THIS->ws_or_endarg); THIS->ws_or_endarg=NULL; } - THIS->n_ws_or_endarg=THIS->n_ws+2; + THIS->n_ws_or_endarg=THIS->n_ws+2+THIS->match_tag; THIS->ws_or_endarg=(p_wchar2*)xalloc(sizeof(p_wchar2)*THIS->n_ws_or_endarg); - MEMCPY(THIS->ws_or_endarg+2,THIS->ws,THIS->n_ws*sizeof(p_wchar2)); + MEMCPY(THIS->ws_or_endarg+2+THIS->match_tag, + THIS->ws,THIS->n_ws*sizeof(p_wchar2)); THIS->ws_or_endarg[0]=THIS->arg_eq; THIS->ws_or_endarg[1]=THIS->tag_end; + if (THIS->match_tag) + THIS->ws_or_endarg[2]=THIS->tag_start; if (THIS->ws_or_endarg_or_quote) { @@ -355,6 +362,7 @@ static void init_html_struct(struct object *o) THIS->lazy_end_arg_quote=0; THIS->lazy_entity_end=0; + THIS->match_tag=1; THIS->extra_args=NULL; @@ -411,9 +419,9 @@ static void exit_html_struct(struct object *o) /****** setup callbacks *****************************/ /* -**! method void _set_tag_callback(function to_call) -**! method void _set_entity_callback(function to_call) -**! method void _set_data_callback(function to_call) +**! method object _set_tag_callback(function to_call) +**! method object _set_entity_callback(function to_call) +**! method object _set_data_callback(function to_call) **! This functions set up the parser object to **! call the given callbacks upon tags, entities **! and/or data. @@ -436,7 +444,7 @@ static void html__set_tag_callback(INT32 args) if (!args) error("_set_tag_callback: too few arguments\n"); assign_svalue(&(THIS->callback__tag),sp-args); pop_n_elems(args); - push_int(0); + ref_push_object(THISOBJ); } static void html__set_data_callback(INT32 args) @@ -444,7 +452,7 @@ static void html__set_data_callback(INT32 args) if (!args) error("_set_data_callback: too few arguments\n"); assign_svalue(&(THIS->callback__data),sp-args); pop_n_elems(args); - push_int(0); + ref_push_object(THISOBJ); } static void html__set_entity_callback(INT32 args) @@ -452,16 +460,16 @@ static void html__set_entity_callback(INT32 args) if (!args) error("_set_entity_callback: too few arguments\n"); assign_svalue(&(THIS->callback__entity),sp-args); pop_n_elems(args); - push_int(0); + ref_push_object(THISOBJ); } /* -**! method void add_tag(string name,mixed to_do) -**! method void add_container(string name,mixed to_do) -**! method void add_entity(string entity,mixed to_do) -**! method void add_tags(mapping(string:mixed)) -**! method void add_containers(mapping(string:mixed)) -**! method void add_entities(mapping(string:mixed)) +**! method object add_tag(string name,mixed to_do) +**! method object add_container(string name,mixed to_do) +**! method object add_entity(string entity,mixed to_do) +**! method object add_tags(mapping(string:mixed)) +**! method object add_containers(mapping(string:mixed)) +**! method object add_entities(mapping(string:mixed)) **! Upon **! **! <tt>to_do</tt> can be: @@ -511,6 +519,7 @@ static void html_add_tag(INT32 args) mapping_insert(THIS->maptag,sp-2,sp-1); pop_n_elems(args); + ref_push_object(THISOBJ); } static void html_add_container(INT32 args) @@ -524,6 +533,7 @@ static void html_add_container(INT32 args) } mapping_insert(THIS->mapcont,sp-2,sp-1); pop_n_elems(args); + ref_push_object(THISOBJ); } static void html_add_entity(INT32 args) @@ -537,6 +547,7 @@ static void html_add_entity(INT32 args) } mapping_insert(THIS->mapentity,sp-2,sp-1); pop_n_elems(args); + ref_push_object(THISOBJ); } static void html_add_tags(INT32 args) @@ -556,6 +567,7 @@ static void html_add_tags(INT32 args) } pop_n_elems(args); + ref_push_object(THISOBJ); } static void html_add_containers(INT32 args) @@ -575,6 +587,7 @@ static void html_add_containers(INT32 args) } pop_n_elems(args); + ref_push_object(THISOBJ); } static void html_add_entities(INT32 args) @@ -594,6 +607,7 @@ static void html_add_entities(INT32 args) } pop_n_elems(args); + ref_push_object(THISOBJ); } /* @@ -680,6 +694,10 @@ static void put_out_feed_range(struct parser_html_storage *this, struct piece *tail, int c_tail) { + DEBUG((stderr,"put out feed range %p:%d - %p:%d\n", + head,c_head,tail,c_tail)); + /* fit it in range (this allows other code to ignore eof stuff) */ + if (c_tail>tail->s->len) c_tail=tail->s->len; while (head) { struct pike_string *ps; @@ -1025,6 +1043,7 @@ static int scan_forward_arg(struct parser_html_storage *this, p_wchar2 ch; int res,i; int n=0; + int q=0; DEBUG_MARK_SPOT("scan_forward_arg: start",feed,c); @@ -1050,12 +1069,38 @@ static int scan_forward_arg(struct parser_html_storage *this, } ch=index_shared_string(destp[0]->s,*d_p); - if (ch==this->tag_end || ch==this->arg_eq) + + if (ch==this->arg_eq) { - DEBUG_MARK_SPOT("scan for end of arg: end by tag end",destp[0],*d_p); + DEBUG_MARK_SPOT("scan for end of arg: end by arg_eq", + destp[0],*d_p); break; } + if (ch==this->tag_end) + if (this->match_tag && q--) + { + DEBUG_MARK_SPOT("scan for end of arg: inner tag end", + destp[0],*d_p); + if (do_push) push_feed_range(*destp,*d_p,*destp,*d_p+1),n++; + goto next; + } + else + { + DEBUG_MARK_SPOT("scan for end of arg: end by tag end", + destp[0],*d_p); + break; + } + + if (ch==this->tag_start && this->match_tag) + { + DEBUG_MARK_SPOT("scan for end of arg: inner tag start", + destp[0],*d_p); + q++; + if (do_push) push_feed_range(*destp,*d_p,*destp,*d_p+1),n++; + goto next; + } + /* scan for (possible) end(s) of this argument quote */ for (i=0; i<this->nargq; i++) @@ -1079,6 +1124,8 @@ static int scan_forward_arg(struct parser_html_storage *this, break; } +next: + feed=*destp; c=d_p[0]+1; } @@ -1100,6 +1147,7 @@ static int scan_for_end_of_tag(struct parser_html_storage *this, { p_wchar2 ch; int res,i; + int q=0; /* maybe these should be cached */ @@ -1129,14 +1177,36 @@ static int scan_for_end_of_tag(struct parser_html_storage *this, } ch=index_shared_string(destp[0]->s,*d_p); - if (ch==this->tag_end) + if (ch==this->arg_eq) { - DEBUG((stderr,"scan for end of tag: end at %p:%d\n",destp[0],*d_p)); - return 1; /* end of tag here */ + DEBUG_MARK_SPOT("scan for end of tag: arg_eq", + destp[0],*d_p); + feed=*destp; + c=d_p[0]+1; + continue; } - else if (ch==this->arg_eq) + + if (ch==this->tag_end) + if (this->match_tag && q--) + { + DEBUG_MARK_SPOT("scan for end of tag: inner tag end", + destp[0],*d_p); + feed=*destp; + c=d_p[0]+1; + continue; // scan more + } + else + { + DEBUG_MARK_SPOT("scan for end of tag: end by tag end", + destp[0],*d_p); + return 1; + } + + if (ch==this->tag_start && this->match_tag) { - DEBUG((stderr,"scan for end of tag: arg_eq at %p:%d\n",destp[0],*d_p)); + DEBUG_MARK_SPOT("scan for end of arg: inner tag start", + destp[0],*d_p); + q++; feed=*destp; c=d_p[0]+1; continue; @@ -1214,7 +1284,7 @@ static newstate handle_result(struct parser_html_storage *this, st2->c=0; this->stack=st2; THIS->stack_count++; - return Pi_REREAD; /* please reread stack head */ + return STATE_REREAD; /* please reread stack head */ case T_INT: switch (sp[-1].u.integer) @@ -1223,11 +1293,13 @@ static newstate handle_result(struct parser_html_storage *this, /* just output range */ put_out_feed_range(this,*head,*c_head,tail,c_tail); skip_feed_range(st,head,c_head,tail,c_tail); - return Pi_DONE; /* continue */ + pop_stack(); + return STATE_DONE; /* continue */ case 1: /* wait: "incomplete" */ skip_feed_range(st,head,c_head,tail,c_tail); - return Pi_WAIT; /* continue */ + pop_stack(); + return STATE_WAIT; /* continue */ } error("Parse.HTML: illegal result from callback: %d, " "not 0 (skip) or 1 (wait)\n", @@ -1243,7 +1315,8 @@ static newstate handle_result(struct parser_html_storage *this, put_out_feed(this,sp[-1].u.array->item[i].u.string); } skip_feed_range(st,head,c_head,tail,c_tail); - return Pi_DONE; /* continue */ + pop_stack(); + return STATE_DONE; /* continue */ default: error("Parse.HTML: illegal result from callback: not 0, string or array(string)\n"); @@ -1271,7 +1344,7 @@ static void do_callback(struct parser_html_storage *this, if (this->extra_args) { - this->extra_args->refs++; + add_ref(this->extra_args); push_array_items(this->extra_args); DEBUG((stderr,"_-callback args=%d\n",2+this->extra_args->size)); @@ -1298,7 +1371,7 @@ static newstate entity_callback(struct parser_html_storage *this, { case T_STRING: push_svalue(v); - return Pi_DONE; + return STATE_DONE; case T_ARRAY: error("unimplemented"); @@ -1320,7 +1393,7 @@ static newstate entity_callback(struct parser_html_storage *this, if (this->extra_args) { - this->extra_args->refs++; + add_ref(this->extra_args); push_array_items(this->extra_args); DEBUG((stderr,"entity_callback args=%d\n",2+this->extra_args->size)); @@ -1349,7 +1422,7 @@ static newstate tag_callback(struct parser_html_storage *this, { case T_STRING: push_svalue(v); - return Pi_DONE; + return STATE_DONE; case T_ARRAY: error("unimplemented"); @@ -1372,7 +1445,7 @@ static newstate tag_callback(struct parser_html_storage *this, if (this->extra_args) { - this->extra_args->refs++; + add_ref(this->extra_args); push_array_items(this->extra_args); DEBUG((stderr,"tag_callback args=%d\n",3+this->extra_args->size)); @@ -1403,7 +1476,7 @@ static newstate container_callback(struct parser_html_storage *this, { case T_STRING: push_svalue(v); - return Pi_DONE; /* done */ + return STATE_DONE; /* done */ case T_ARRAY: error("unimplemented"); @@ -1427,7 +1500,7 @@ static newstate container_callback(struct parser_html_storage *this, if (this->extra_args) { - this->extra_args->refs++; + add_ref(this->extra_args); push_array_items(this->extra_args); DEBUG((stderr,"container_callback args=%d\n",4+this->extra_args->size)); @@ -1479,7 +1552,7 @@ static newstate find_end_of_container(struct parser_html_storage *this, { DEBUG_MARK_SPOT("find_end_of_cont : wait\n",s1,c1); free_svalue(endtagname); - return Pi_WAIT; /* please wait */ + return STATE_WAIT; /* please wait */ } else { @@ -1487,7 +1560,7 @@ static newstate find_end_of_container(struct parser_html_storage *this, *e1=*e2=s1; *ce1=*ce2=c1; free_svalue(endtagname); - return Pi_DONE; /* end of tag, sure... */ + return STATE_DONE; /* end of tag, sure... */ } } DEBUG_MARK_SPOT("find_end_of_container got tag",feed,c); @@ -1495,7 +1568,7 @@ static newstate find_end_of_container(struct parser_html_storage *this, { DEBUG_MARK_SPOT("find_end_of_cont : wait for end\n",s1,c1); free_svalue(endtagname); - return Pi_WAIT; + return STATE_WAIT; } tag_name(this,s1,c1+1); @@ -1528,7 +1601,7 @@ static newstate find_end_of_container(struct parser_html_storage *this, *e2=s2; *ce2=c2+1; - return Pi_DONE; + return STATE_DONE; } else { @@ -1603,7 +1676,7 @@ static int do_try_feed(struct parser_html_storage *this, DEBUG((stderr,"%*d do_try_feed return %d %p:%d\n", this->stack_count,this->stack_count, res,*feed,st->c)); - st->ignore_data=(res==Pi_WAIT); + st->ignore_data=(res==STATE_WAIT); return res; } recheck_scan(this,&scan_entity,&scan_tag); @@ -1664,7 +1737,7 @@ static int do_try_feed(struct parser_html_storage *this, DEBUG((stderr,"%*d tag callback return %d %p:%d\n", this->stack_count,this->stack_count, res,*feed,st->c)); - st->ignore_data=(res==Pi_WAIT); + st->ignore_data=(res==STATE_WAIT); return res; } @@ -1692,7 +1765,7 @@ static int do_try_feed(struct parser_html_storage *this, DEBUG((stderr,"%*d find end of cont return %d %p:%d\n", this->stack_count,this->stack_count, res,*feed,st->c)); - st->ignore_data=(res==Pi_WAIT); + st->ignore_data=(res==STATE_WAIT); pop_stack(); return res; } @@ -1707,7 +1780,7 @@ static int do_try_feed(struct parser_html_storage *this, DEBUG((stderr,"%*d container callback return %d %p:%d\n", this->stack_count,this->stack_count, res,*feed,st->c)); - st->ignore_data=(res==Pi_WAIT); + st->ignore_data=(res==STATE_WAIT); return res; } @@ -1790,7 +1863,7 @@ static int do_try_feed(struct parser_html_storage *this, DEBUG((stderr,"%*d entity callback return %d %p:%d\n", this->stack_count,this->stack_count, res,*feed,st->c)); - st->ignore_data=(res==Pi_WAIT); + st->ignore_data=(res==STATE_WAIT); return res; } @@ -1861,7 +1934,7 @@ static void try_feed(int finished) :&(THIS->feed), finished||(THIS->stack->prev!=NULL))) { - case Pi_DONE: /* done, pop stack */ + case STATE_DONE: /* done, pop stack */ if (!THIS->feed) THIS->feed_end=NULL; st=THIS->stack->prev; @@ -1879,10 +1952,10 @@ static void try_feed(int finished) THIS->stack_count--; break; - case Pi_WAIT: /* incomplete, call again */ + case STATE_WAIT: /* incomplete, call again */ return; - case Pi_REREAD: /* reread stack head */ + case STATE_REREAD: /* reread stack head */ if (THIS->stack_count>THIS->max_stack_depth) error("Parse.HTML: too deep recursion\n"); break; @@ -2487,6 +2560,7 @@ static void html_set_extra(INT32 args) f_aggregate(args); if (THIS->extra_args) free_array(THIS->extra_args); THIS->extra_args=sp[-1].u.array; + sp--; ref_push_object(THISOBJ); } @@ -2535,18 +2609,18 @@ void init_parser_html(void) /* callback setup */ ADD_FUNCTION("add_tag",html_add_tag, - tFunc(tStr tTodo(tTagargs),tVoid),0); + tFunc(tStr tTodo(tTagargs),tObj),0); ADD_FUNCTION("add_container",html_add_container, - tFunc(tStr tTodo(tTagargs tStr),tVoid),0); + tFunc(tStr tTodo(tTagargs tStr),tObj),0); ADD_FUNCTION("add_entity",html_add_entity, - tFunc(tStr tTodo(""),tVoid),0); + tFunc(tStr tTodo(""),tObj),0); ADD_FUNCTION("add_tags",html_add_tags, - tFunc(tMap(tStr,tTodo( tTagargs )),tVoid),0); + tFunc(tMap(tStr,tTodo( tTagargs )),tObj),0); ADD_FUNCTION("add_containers",html_add_containers, - tFunc(tMap(tStr,tTodo( tTagargs tStr )),tVoid),0); + tFunc(tMap(tStr,tTodo( tTagargs tStr )),tObj),0); ADD_FUNCTION("add_entities",html_add_entities, - tFunc(tMap(tStr,tTodo( "" )),tVoid),0); + tFunc(tMap(tStr,tTodo( "" )),tObj),0); ADD_FUNCTION("tags",html_tags, tFunc(tNone,tMap(tStr,tTodo( tTagargs ))),0); @@ -2563,11 +2637,11 @@ void init_parser_html(void) /* special callbacks */ ADD_FUNCTION("_set_tag_callback",html__set_tag_callback, - tFunc(tFuncV(tObj tStr,tMix,tCbret),tVoid),0); + tFunc(tFuncV(tObj tStr,tMix,tCbret),tObj),0); ADD_FUNCTION("_set_data_callback",html__set_data_callback, - tFunc(tFuncV(tObj tStr,tMix,tCbret),tVoid),0); + tFunc(tFuncV(tObj tStr,tMix,tCbret),tObj),0); ADD_FUNCTION("_set_entity_callback",html__set_entity_callback, - tFunc(tFuncV(tObj tStr,tMix,tCbret),tVoid),0); + tFunc(tFuncV(tObj tStr,tMix,tCbret),tObj),0); /* debug, whatever */ -- GitLab