From b8ff3ca6a3b5d3a067e48b9f09a4f2491601fd58 Mon Sep 17 00:00:00 2001
From: Arne Goedeke <el@laramies.com>
Date: Sat, 11 Jan 2014 19:53:18 -0500
Subject: [PATCH] Unicode.normalize: use unsigned ints for hash value

hval % HSIZE for a negative hval will result in a negative htable index.
this is triggered by characters in 32 bit strings which are represented
by negative 32 bit signed integers
---
 src/post_modules/Unicode/normalize.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/post_modules/Unicode/normalize.c b/src/post_modules/Unicode/normalize.c
index 6c6e7db345..3140773528 100644
--- a/src/post_modules/Unicode/normalize.c
+++ b/src/post_modules/Unicode/normalize.c
@@ -86,21 +86,21 @@ static void init_hashes()
 
   for( i = 0; i<sizeof(_d)/sizeof(_d[0]); i++ )
   {
-    int h = _d[i].c%HSIZE;
+    unsigned int h = (unsigned int)_d[i].c%HSIZE;
     decomp_h[i].v = _d+i;
     decomp_h[i].next = decomp_hash[h];
     decomp_hash[h] = decomp_h+i;
   }
   for( i = 0; i<sizeof(_c)/sizeof(_c[0]); i++ )
   {
-    int h = ((_c[i].c1<<16)|_c[i].c2)%HSIZE;
+    unsigned int h = (((unsigned int)_c[i].c1<<16)|_c[i].c2)%HSIZE;
     comp_h[i].v = _c+i;
     comp_h[i].next = comp_hash[h];
     comp_hash[h] = comp_h+i;
   }
   for( i = 0; i<sizeof(_ca)/sizeof(_ca[0]); i++ )
   {
-    int h = _ca[i].c % HSIZE;
+    unsigned int h = (unsigned int)_ca[i].c % HSIZE;
     canonic_h[i].v = _ca+i;
     canonic_h[i].next = canonic_hash[h];
     canonic_hash[h] = canonic_h+i;
@@ -115,7 +115,7 @@ void unicode_normalize_init()
 
 const struct decomp *get_decomposition( int c )
 {
-  int hv = c % HSIZE;
+  unsigned int hv = (unsigned int)c % HSIZE;
   const struct decomp_h *r = decomp_hash[hv];
   while( r )
   {
@@ -128,7 +128,7 @@ const struct decomp *get_decomposition( int c )
 
 int get_canonical_class( int c )
 {
-  int hv = c % HSIZE;
+  unsigned int hv = (unsigned int)c % HSIZE;
   const struct canonic_h *r = canonic_hash[hv];
   while( r )
   {
@@ -152,6 +152,8 @@ int get_canonical_class( int c )
 int get_compose_pair( int c1, int c2 )
 {
   const struct comp_h *r;
+  unsigned int hv;
+
   if( c1 >= LBase )
   {
     /* Perhaps hangul */
@@ -176,8 +178,10 @@ int get_compose_pair( int c1, int c2 )
     }
   }
 
+  hv = (unsigned int)c1 << 16 | (unsigned int)c2;
   /* Nope. Not hangul. */
-  for( r=comp_hash[ ((unsigned int)((c1<<16) | (c2))) % HSIZE ]; r; r=r->next )
+  for( r=comp_hash[ hv % HSIZE ];
+       r; r=r->next )
     if( (r->v->c1 == c1) && (r->v->c2 == c2) )
       return r->v->c;
 
-- 
GitLab