From 0e4d6388d6b1a7562c4a3dfd8811aaafb7d09caa Mon Sep 17 00:00:00 2001 From: Marcus Comstedt <marcus@mc.pp.se> Date: Mon, 16 Nov 1998 23:44:45 +0100 Subject: [PATCH] Encoding for all 8bit charsets. Rev: src/modules/_Charset/charsetmod.c:1.6 --- src/modules/_Charset/charsetmod.c | 185 +++++++++++++++++++++++++++++- 1 file changed, 179 insertions(+), 6 deletions(-) diff --git a/src/modules/_Charset/charsetmod.c b/src/modules/_Charset/charsetmod.c index 30b578c89c..33fa2d3056 100644 --- a/src/modules/_Charset/charsetmod.c +++ b/src/modules/_Charset/charsetmod.c @@ -3,7 +3,7 @@ #endif /* HAVE_CONFIG_H */ #include "global.h" -RCSID("$Id: charsetmod.c,v 1.5 1998/11/16 21:39:34 marcus Exp $"); +RCSID("$Id: charsetmod.c,v 1.6 1998/11/16 22:44:45 marcus Exp $"); #include "program.h" #include "interpret.h" #include "stralloc.h" @@ -26,7 +26,7 @@ static struct program *utf7_program = NULL, *utf8_program = NULL; static struct program *utf7e_program = NULL, *utf8e_program = NULL; static struct program *std_94_program = NULL, *std_96_program = NULL; static struct program *std_9494_program = NULL, *std_9696_program = NULL; -static struct program *std_8bit_program = NULL; +static struct program *std_8bit_program = NULL, *std_8bite_program = NULL; struct std_cs_stor { struct string_builder strbuild; @@ -49,6 +49,12 @@ struct utf7_stor { }; static SIZE_T utf7_stor_offs = 0; +struct std8e_stor { + p_wchar0 *revtab; + unsigned int lowtrans, lo, hi; +}; +static SIZE_T std8e_stor_offs = 0; + static SIGNED char rev64t['z'-'+'+1]; static char fwd64t[64]= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -312,6 +318,24 @@ static void f_feed_utf7(INT32 args) f_std_feed(args, feed_utf7); } +static struct std8e_stor *push_std_8bite(int args, int allargs, int lo, int hi) +{ + struct std8e_stor *s8; + push_object(clone_object(std_8bite_program, args)); + if((allargs-=args)>0) { + struct object *o = sp[-1].u.object; + add_ref(o); + pop_n_elems(allargs+1); + push_object(o); + } + s8 = (struct std8e_stor *)(sp[-1].u.object->storage+std8e_stor_offs); + memset((s8->revtab = xalloc((hi-lo)*sizeof(p_wchar0))), 0, + (hi-lo)*sizeof(p_wchar0)); + s8->lo = lo; + s8->hi = hi; + s8->lowtrans = 0; + return s8; +} static void f_rfc1345(INT32 args) { @@ -334,8 +358,36 @@ static void f_rfc1345(INT32 args) if((c = strcmp(STR0(str), charset_map[mid].name))==0) { struct program *p; - if(args>1 && sp[1-args].type == T_INT && sp[1-args].u.integer != 0) - error("No '%s' encoding today, sorry.\n", STR0(str)); + if(args>1 && sp[1-args].type == T_INT && sp[1-args].u.integer != 0) { + struct std8e_stor *s8; + int lowtrans, i; + unsigned int c; + + switch(charset_map[mid].mode) { + case MODE_94: lowtrans=lo=33; hi=126; break; + case MODE_96: lowtrans=128; lo=160; hi=255; break; + case MODE_9494: + case MODE_9696: + error("No '%s' encoding today, sorry.\n", STR0(str)); + default: + fatal("Internal error in rfc1345\n"); + } + + s8 = push_std_8bite((args>2 && sp[2-args].type == T_STRING? args-2:0), + args, lowtrans, 65536); + + s8->lowtrans = lowtrans; + s8->lo = lowtrans; + s8->hi = lowtrans; + + for(i=lo; i<=hi; i++) + if((c=charset_map[mid].table[i-lo])!=0xfffd && c>=s8->lo) { + s8->revtab[c-lo]=i; + if(c>=s8->hi) + s8->hi = c+1; + } + return; + } pop_n_elems(args); switch(charset_map[mid].mode) { @@ -360,8 +412,24 @@ static void f_rfc1345(INT32 args) if(str->size_shift==0 && (tabl = misc_charset_lookup(STR0(str), &lo, &hi))!=NULL) { - if(args>1 && sp[1-args].type == T_INT && sp[1-args].u.integer != 0) - error("No '%s' encoding today, sorry.\n", STR0(str)); + if(args>1 && sp[1-args].type == T_INT && sp[1-args].u.integer != 0) { + struct std8e_stor *s8; + int i; + unsigned int c; + + s8 = push_std_8bite((args>2 && sp[2-args].type == T_STRING? args-2:0), + args, lo, 65536); + s8->lowtrans = lo; + s8->lo = lo; + s8->hi = lo; + for(i=lo; i<=hi; i++) + if((c=tabl[i-lo])!=0xfffd && c>=s8->lo) { + s8->revtab[c-lo]=i; + if(c>=s8->hi) + s8->hi = c+1; + } + return; + } pop_n_elems(args); push_object(clone_object(std_8bit_program, 0)); @@ -717,6 +785,8 @@ static void feed_utf7e(struct utf7_stor *u7, struct string_builder *sb, } } break; + default: + fatal("Illegal shift size!\n"); } u7->dat = dat; @@ -756,6 +826,98 @@ static void f_drain_utf7e(INT32 args) f_drain(args); } +static void std_8bite_init_stor(struct object *o) +{ + struct std8e_stor *s8 = + (struct std8e_stor *)(fp->current_storage+std8e_stor_offs); + + s8->revtab = NULL; + s8->lowtrans = 32; + s8->lo = 0; + s8->hi = 0; +} + +static void std_8bite_exit_stor(struct object *o) +{ + struct std8e_stor *s8 = + (struct std8e_stor *)(fp->current_storage+std8e_stor_offs); + + if(s8->revtab != NULL) + free(s8->revtab); +} + +static void feed_std8e(struct std8e_stor *s8, struct string_builder *sb, + struct pike_string *str, struct pike_string *rep) +{ + INT32 l = str->len; + p_wchar0 *tab = s8->revtab; + unsigned int lowtrans = s8->lowtrans, lo = s8->lo, hi = s8->hi; + p_wchar0 ch; + + switch(str->size_shift) { + case 0: + { + p_wchar0 c, *p = STR0(str); + while(l--) + if((c=*p++)<lowtrans) + string_builder_putchar(sb, c); + else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) + string_builder_putchar(sb, ch); + else if(rep != NULL) + feed_std8e(s8, sb, rep, NULL); + else + error("Character unsupported by encoding.\n"); + } + break; + case 1: + { + p_wchar1 c, *p = STR1(str); + while(l--) + if((c=*p++)<lowtrans) + string_builder_putchar(sb, c); + else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) + string_builder_putchar(sb, ch); + else if(rep != NULL) + feed_std8e(s8, sb, rep, NULL); + else + error("Character unsupported by encoding.\n"); + } + break; + case 2: + { + p_wchar2 c, *p = STR2(str); + while(l--) + if((c=*p++)<lowtrans) + string_builder_putchar(sb, c); + else if(c>=lo && c<hi && (ch=tab[c-lo])!=0) + string_builder_putchar(sb, ch); + else if(rep != NULL) + feed_std8e(s8, sb, rep, NULL); + else + error("Character unsupported by encoding.\n"); + } + break; + default: + fatal("Illegal shift size!\n"); + } +} + +static void f_feed_std8e(INT32 args) +{ + struct pike_string *str; + struct std_cs_stor *cs = (struct std_cs_stor *)fp->current_storage; + + get_all_args("feed()", args, "%W", &str); + + feed_std8e((struct std8e_stor *)(((char*)fp->current_storage)+ + std8e_stor_offs), + &cs->strbuild, str, cs->replace); + + pop_n_elems(args); + push_object(this_object()); +} + + void pike_module_init(void) { int i; @@ -808,6 +970,14 @@ void pike_module_init(void) add_function("feed", f_feed_utf8e, "function(string:object)", 0); add_program_constant("UTF8enc", utf8e_program = end_program(), ID_STATIC|ID_NOMASK); + start_new_program(); + do_inherit(&prog, 0, NULL); + std8e_stor_offs = add_storage(sizeof(struct std8e_stor)); + add_function("feed", f_feed_std8e, "function(string:object)", 0); + set_init_callback(std_8bite_init_stor); + set_exit_callback(std_8bite_exit_stor); + std_8bite_program = end_program(); + start_new_program(); do_inherit(&prog, 0, NULL); std_rfc_stor_offs = add_storage(sizeof(struct std_rfc_stor)); @@ -876,6 +1046,9 @@ void pike_module_exit(void) if(std_8bit_program != NULL) free_program(std_8bit_program); + if(std_8bite_program != NULL) + free_program(std_8bite_program); + if(std_rfc_program != NULL) free_program(std_rfc_program); -- GitLab