From 015f43286472fbdbd7ce32315740063f0ab3d605 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Grubbstr=C3=B6m=20=28Grubba=29?=
 <grubba@grubba.org>
Date: Thu, 1 Mar 2018 14:31:25 +0100
Subject: [PATCH] Unicode: Optimize normalize() in NFC mode on 8-bit strings.

Unicode.normalize() in NFS mode on 8-bit strings is a noop,
so there's no need to scan though the string.

Fixes [PIKE-79].
---
 .gitattributes                       |  1 -
 src/post_modules/Unicode/normalize.c | 12 ++++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index 84159033d0..d92ab66e9f 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -634,7 +634,6 @@ testfont binary
 /src/post_modules/Unicode/acconfig.h foreign_ident
 /src/post_modules/Unicode/buffer.c foreign_ident
 /src/post_modules/Unicode/configure.in foreign_ident
-/src/post_modules/Unicode/normalize.c foreign_ident
 /src/post_modules/Unicode/split.c foreign_ident
 /src/post_modules/Unicode/unicode_module.cmod foreign_ident
 /src/post_modules/configure.in foreign_ident
diff --git a/src/post_modules/Unicode/normalize.c b/src/post_modules/Unicode/normalize.c
index ccf36be33a..48c0b8e92a 100644
--- a/src/post_modules/Unicode/normalize.c
+++ b/src/post_modules/Unicode/normalize.c
@@ -1,7 +1,7 @@
 #include "global.h"
 #include "stralloc.h"
 #include "global.h"
-RCSID("$Id: normalize.c,v 1.7 2001/11/22 14:52:18 grubba Exp $");
+RCSID("$Id$");
 #include "pike_macros.h"
 #include "interpret.h"
 #include "program.h"
@@ -276,7 +276,14 @@ struct pike_string *unicode_normalize( struct pike_string *source,
     return source;
   }
   /* What, me lisp? */
-  if( how & COMPOSE_BIT )
+  if( how & COMPOSE_BIT ) {
+    if (!source->size_shift && !(how & COMPAT_BIT)) {
+      /* NB: There are 8-bit characters that are changed in
+       *     compat mode; eg NBSP (0xA0) and DIAERESIS (0xA8).
+       */
+      add_ref(source);
+      return source;
+    }
     return
       uc_buffer_to_pikestring(
 	unicode_compose_buffer(
@@ -286,6 +293,7 @@ struct pike_string *unicode_normalize( struct pike_string *source,
 	      source ),
 	    how ),
 	  how ) );
+  }
   return
     uc_buffer_to_pikestring(
       unicode_decompose_buffer(
-- 
GitLab