From 770b6db014ca68c49ec4612a638a515a1c9e235f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?= <grubba@grubba.org> Date: Tue, 23 Feb 1999 03:21:15 +0100 Subject: [PATCH] Added some code for wide string support. Rev: src/cpp.c:1.40 --- src/cpp.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 18 deletions(-) diff --git a/src/cpp.c b/src/cpp.c index 8b2f58d583..2317480cd5 100644 --- a/src/cpp.c +++ b/src/cpp.c @@ -5,7 +5,7 @@ \*/ /* - * $Id: cpp.c,v 1.39 1999/02/23 00:49:37 grubba Exp $ + * $Id: cpp.c,v 1.40 1999/02/23 02:21:15 grubba Exp $ */ #include "global.h" #include "language.h" @@ -2083,8 +2083,12 @@ static int do_safe_index_call(struct pike_string *s) void f_cpp(INT32 args) { + struct pike_string *data; struct pike_predef_s *tmpf; + struct svalue *save_sp = sp - args; struct cpp this; + int auto_convert = 0; + if(args<1) error("Too few arguments to cpp()\n"); @@ -2096,11 +2100,20 @@ void f_cpp(INT32 args) if(sp[1-args].type != T_STRING) error("Bad argument 2 to cpp()\n"); copy_shared_string(this.current_file, sp[1-args].u.string); + + if (args > 2) { + if (sp[2-args].type != T_INT) { + error("Bad argument 3 to cpp()\n"); + } + auto_convert = sp[2-args].u.integer; + } }else{ this.current_file=make_shared_string("-"); } - if ((!sp[-args].u.string->size_shift) && (sp[-args].u.string->len > 1)) { + data = sp[-args].u.string; + + if (auto_convert && (!data->size_shift) && (data->len > 1)) { /* Try to determine if we need to recode the string */ /* Observations: @@ -2124,29 +2137,107 @@ void f_cpp(INT32 args) * 0x7b | 0x09 | EBCDIC-US ("#\t"). * --------+---------+------------------------------------------ * Other | Other | 8bit standard string. - */ - /* Notes on EBCDIC: * - * * EBCDIC conversion needs to first convert the first line - * according to EBCDIC-US, and then the rest of the string - * according to the encoding specified by the first line. + * Note that the tests below are more lenient than the table above. + * This shouldn't matter, since the other cases would be erroneus + * anyway. * - * * It's an error for a program written in EBCDIC not to - * start with a #charset directive. + * Note: * - * Obfuscation note: - * - * * This still allows the rest of the file to be written in - * another encoding than EBCDIC. + * * The code below may leave some extra strings on the stack. */ + if ((!((unsigned char *)data->str)[0]) || + (((unsigned char *)data->str)[0] == 0xfe) || + (((unsigned char *)data->str)[0] == 0xff) || + (!((unsigned char *)data->str)[1])) { + /* Unicode */ + if ((!((unsigned char *)data->str)[0]) && + (!((unsigned char *)data->str)[1])) { + /* 32bit Unicode (UCS4) */ + } else { + /* 16bit Unicode */ + if ((!((unsigned char *)data->str)[1]) || + (((unsigned char *)data->str)[1] == 0xfe)) { + /* Reverse Byte-order */ + } + push_string(data); + f_unicode_to_string(1); + data = sp[-1].u.string; + } + } else if (((unsigned char *)data->str)[0] == 0x7b) { + /* EBCDIC */ + /* Notes on EBCDIC: + * + * * EBCDIC conversion needs to first convert the first line + * according to EBCDIC-US, and then the rest of the string + * according to the encoding specified by the first line. + * + * * It's an error for a program written in EBCDIC not to + * start with a #charset directive. + * + * Obfuscation note: + * + * * This still allows the rest of the file to be written in + * another encoding than EBCDIC. + */ + } } - if (sp[-args].u.string->size_shift) { + if (data->size_shift) { /* More notes: * * * Character 0xfeff (ZERO WIDTH NO-BREAK SPACE = BYTE ORDER MARK = BOM) * needs to be filtered away before processing continues. + * + * * The code below may leave some extra strings on the stack. */ + int i; + int j = 0; + int len = data->len; + + init_string_builder(&this.buf, data->size_shift); + if (data->size_shift == 1) { + /* 16 bit string */ + p_wchar1 *ptr = STR1(data); + for(i = 0; i<len; i++) { + if (ptr[i] == 0xfeff) { + if (i != j) { + string_builder_append(&this.buf, MKPCHARP(ptr + j, 1), i - j); + j = i+1; + } + } + } + if ((j) && (i != j)) { + /* Add the trailing string */ + string_builder_append(&this.buf, MKPCHARP(ptr + j, 1), i - j); + push_string(finish_string_builder(&this.buf)); + data = sp[-1].u.string; + } else { + /* String didn't contain 0xfeff */ + free_string_builder(&this.buf); + } + } else { + /* 32 bit string */ + p_wchar2 *ptr = STR2(data); + for(i = 0; i<len; i++) { + if (ptr[i] == 0xfeff) { + if (i != j) { + string_builder_append(&this.buf, MKPCHARP(ptr + j, 2), i - j); + j = i+1; + } + } + } + if ((j) && (i != j)) { + /* Add the trailing string */ + string_builder_append(&this.buf, MKPCHARP(ptr + j, 2), i - j); + push_string(finish_string_builder(&this.buf)); + data = sp[-1].u.string; + } else { + /* String didn't contain 0xfeff */ + free_string_builder(&this.buf); + } + } } + init_string_builder(&this.buf, 0); this.current_line=1; this.compile_errors=0; @@ -2179,7 +2270,7 @@ void f_cpp(INT32 args) PUSH_STRING(this.current_file->str, this.current_file->len, &this.buf); string_builder_putchar(&this.buf, '\n'); - low_cpp(&this, sp[-args].u.string->str, sp[-args].u.string->len, 0); + low_cpp(&this, data->str, data->len, 0); if(this.defines) free_hashtable(this.defines, free_one_define); @@ -2190,7 +2281,7 @@ void f_cpp(INT32 args) free_string_builder(&this.buf); error("Cpp() failed\n"); }else{ - pop_n_elems(args); + pop_n_elems(sp - save_sp); push_string(finish_string_builder(&this.buf)); } } @@ -2206,8 +2297,8 @@ void init_cpp() constant_macro->args=1; -/* function(string,string|void:string) */ - ADD_EFUN("cpp",f_cpp,tFunc(tStr tOr(tStr,tVoid),tStr),OPT_EXTERNAL_DEPEND); +/* function(string,string|void,int|void:string) */ + ADD_EFUN("cpp",f_cpp,tFunc(tStr tOr(tStr,tVoid) tOr(tInt,tVoid),tStr),OPT_EXTERNAL_DEPEND); } -- GitLab