From 770b6db014ca68c49ec4612a638a515a1c9e235f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?=
 <grubba@grubba.org>
Date: Tue, 23 Feb 1999 03:21:15 +0100
Subject: [PATCH] Added some code for wide string support.

Rev: src/cpp.c:1.40
---
 src/cpp.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 109 insertions(+), 18 deletions(-)

diff --git a/src/cpp.c b/src/cpp.c
index 8b2f58d583..2317480cd5 100644
--- a/src/cpp.c
+++ b/src/cpp.c
@@ -5,7 +5,7 @@
 \*/
 
 /*
- * $Id: cpp.c,v 1.39 1999/02/23 00:49:37 grubba Exp $
+ * $Id: cpp.c,v 1.40 1999/02/23 02:21:15 grubba Exp $
  */
 #include "global.h"
 #include "language.h"
@@ -2083,8 +2083,12 @@ static int do_safe_index_call(struct pike_string *s)
 
 void f_cpp(INT32 args)
 {
+  struct pike_string *data;
   struct pike_predef_s *tmpf;
+  struct svalue *save_sp = sp - args;
   struct cpp this;
+  int auto_convert = 0;
+
   if(args<1)
     error("Too few arguments to cpp()\n");
 
@@ -2096,11 +2100,20 @@ void f_cpp(INT32 args)
     if(sp[1-args].type != T_STRING)
       error("Bad argument 2 to cpp()\n");
     copy_shared_string(this.current_file, sp[1-args].u.string);
+
+    if (args > 2) {
+      if (sp[2-args].type != T_INT) {
+	error("Bad argument 3 to cpp()\n");
+      }
+      auto_convert = sp[2-args].u.integer;
+    }
   }else{
     this.current_file=make_shared_string("-");
   }
 
-  if ((!sp[-args].u.string->size_shift) && (sp[-args].u.string->len > 1)) {
+  data = sp[-args].u.string;
+
+  if (auto_convert && (!data->size_shift) && (data->len > 1)) {
     /* Try to determine if we need to recode the string */
 
     /* Observations:
@@ -2124,29 +2137,107 @@ void f_cpp(INT32 args)
      *    0x7b |    0x09 | EBCDIC-US ("#\t").
      * --------+---------+------------------------------------------
      *   Other |   Other | 8bit standard string.
-     */
-    /* Notes on EBCDIC:
      *
-     * * EBCDIC conversion needs to first convert the first line
-     *   according to EBCDIC-US, and then the rest of the string
-     *   according to the encoding specified by the first line.
+     * Note that the tests below are more lenient than the table above.
+     * This shouldn't matter, since the other cases would be erroneus
+     * anyway.
      *
-     * * It's an error for a program written in EBCDIC not to
-     *   start with a #charset directive.
+     * Note:
      *
-     * Obfuscation note:
-     *
-     * * This still allows the rest of the file to be written in
-     *   another encoding than EBCDIC.
+     * * The code below may leave some extra strings on the stack.
      */
+    if ((!((unsigned char *)data->str)[0]) ||
+	(((unsigned char *)data->str)[0] == 0xfe) ||
+	(((unsigned char *)data->str)[0] == 0xff) ||
+	(!((unsigned char *)data->str)[1])) {
+      /* Unicode */
+      if ((!((unsigned char *)data->str)[0]) &&
+	  (!((unsigned char *)data->str)[1])) {
+	/* 32bit Unicode (UCS4) */
+      } else {
+	/* 16bit Unicode */
+	if ((!((unsigned char *)data->str)[1]) ||
+	    (((unsigned char *)data->str)[1] == 0xfe)) {
+	  /* Reverse Byte-order */
+	}
+	push_string(data);
+	f_unicode_to_string(1);
+	data = sp[-1].u.string;
+      }
+    } else if (((unsigned char *)data->str)[0] == 0x7b) {
+      /* EBCDIC */
+      /* Notes on EBCDIC:
+       *
+       * * EBCDIC conversion needs to first convert the first line
+       *   according to EBCDIC-US, and then the rest of the string
+       *   according to the encoding specified by the first line.
+       *
+       * * It's an error for a program written in EBCDIC not to
+       *   start with a #charset directive.
+       *
+       * Obfuscation note:
+       *
+       * * This still allows the rest of the file to be written in
+       *   another encoding than EBCDIC.
+       */
+    }
   }
-  if (sp[-args].u.string->size_shift) {
+  if (data->size_shift) {
     /* More notes:
      *
      * * Character 0xfeff (ZERO WIDTH NO-BREAK SPACE = BYTE ORDER MARK = BOM)
      *   needs to be filtered away before processing continues.
+     *
+     * * The code below may leave some extra strings on the stack.
      */
+    int i;
+    int j = 0;
+    int len = data->len;
+
+    init_string_builder(&this.buf, data->size_shift);
+    if (data->size_shift == 1) {
+      /* 16 bit string */
+      p_wchar1 *ptr = STR1(data);
+      for(i = 0; i<len; i++) {
+	if (ptr[i] == 0xfeff) {
+	  if (i != j) {
+	    string_builder_append(&this.buf, MKPCHARP(ptr + j, 1), i - j);
+	    j = i+1;
+	  }
+	}
+      }
+      if ((j) && (i != j)) {
+	/* Add the trailing string */
+	string_builder_append(&this.buf, MKPCHARP(ptr + j, 1), i - j);
+	push_string(finish_string_builder(&this.buf));
+	data = sp[-1].u.string;
+      } else {
+	/* String didn't contain 0xfeff */
+	free_string_builder(&this.buf);
+      }
+    } else {
+      /* 32 bit string */
+      p_wchar2 *ptr = STR2(data);
+      for(i = 0; i<len; i++) {
+	if (ptr[i] == 0xfeff) {
+	  if (i != j) {
+	    string_builder_append(&this.buf, MKPCHARP(ptr + j, 2), i - j);
+	    j = i+1;
+	  }
+	}
+      }
+      if ((j) && (i != j)) {
+	/* Add the trailing string */
+	string_builder_append(&this.buf, MKPCHARP(ptr + j, 2), i - j);
+	push_string(finish_string_builder(&this.buf));
+	data = sp[-1].u.string;
+      } else {
+	/* String didn't contain 0xfeff */
+	free_string_builder(&this.buf);
+      }
+    }
   }
+
   init_string_builder(&this.buf, 0);
   this.current_line=1;
   this.compile_errors=0;
@@ -2179,7 +2270,7 @@ void f_cpp(INT32 args)
   PUSH_STRING(this.current_file->str, this.current_file->len, &this.buf);
   string_builder_putchar(&this.buf, '\n');
 
-  low_cpp(&this, sp[-args].u.string->str, sp[-args].u.string->len, 0);
+  low_cpp(&this, data->str, data->len, 0);
   if(this.defines)
     free_hashtable(this.defines, free_one_define);
 
@@ -2190,7 +2281,7 @@ void f_cpp(INT32 args)
     free_string_builder(&this.buf);
     error("Cpp() failed\n");
   }else{
-    pop_n_elems(args);
+    pop_n_elems(sp - save_sp);
     push_string(finish_string_builder(&this.buf));
   }
 }
@@ -2206,8 +2297,8 @@ void init_cpp()
   constant_macro->args=1;
 
   
-/* function(string,string|void:string) */
-  ADD_EFUN("cpp",f_cpp,tFunc(tStr tOr(tStr,tVoid),tStr),OPT_EXTERNAL_DEPEND);
+/* function(string,string|void,int|void:string) */
+  ADD_EFUN("cpp",f_cpp,tFunc(tStr tOr(tStr,tVoid) tOr(tInt,tVoid),tStr),OPT_EXTERNAL_DEPEND);
 }
 
 
-- 
GitLab