From b8ff3ca6a3b5d3a067e48b9f09a4f2491601fd58 Mon Sep 17 00:00:00 2001 From: Arne Goedeke <el@laramies.com> Date: Sat, 11 Jan 2014 19:53:18 -0500 Subject: [PATCH] Unicode.normalize: use unsigned ints for hash value hval % HSIZE for a negative hval will result in a negative htable index. this is triggered by characters in 32 bit strings which are represented by negative 32 bit signed integers --- src/post_modules/Unicode/normalize.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/post_modules/Unicode/normalize.c b/src/post_modules/Unicode/normalize.c index 6c6e7db345..3140773528 100644 --- a/src/post_modules/Unicode/normalize.c +++ b/src/post_modules/Unicode/normalize.c @@ -86,21 +86,21 @@ static void init_hashes() for( i = 0; i<sizeof(_d)/sizeof(_d[0]); i++ ) { - int h = _d[i].c%HSIZE; + unsigned int h = (unsigned int)_d[i].c%HSIZE; decomp_h[i].v = _d+i; decomp_h[i].next = decomp_hash[h]; decomp_hash[h] = decomp_h+i; } for( i = 0; i<sizeof(_c)/sizeof(_c[0]); i++ ) { - int h = ((_c[i].c1<<16)|_c[i].c2)%HSIZE; + unsigned int h = (((unsigned int)_c[i].c1<<16)|_c[i].c2)%HSIZE; comp_h[i].v = _c+i; comp_h[i].next = comp_hash[h]; comp_hash[h] = comp_h+i; } for( i = 0; i<sizeof(_ca)/sizeof(_ca[0]); i++ ) { - int h = _ca[i].c % HSIZE; + unsigned int h = (unsigned int)_ca[i].c % HSIZE; canonic_h[i].v = _ca+i; canonic_h[i].next = canonic_hash[h]; canonic_hash[h] = canonic_h+i; @@ -115,7 +115,7 @@ void unicode_normalize_init() const struct decomp *get_decomposition( int c ) { - int hv = c % HSIZE; + unsigned int hv = (unsigned int)c % HSIZE; const struct decomp_h *r = decomp_hash[hv]; while( r ) { @@ -128,7 +128,7 @@ const struct decomp *get_decomposition( int c ) int get_canonical_class( int c ) { - int hv = c % HSIZE; + unsigned int hv = (unsigned int)c % HSIZE; const struct canonic_h *r = canonic_hash[hv]; while( r ) { @@ -152,6 +152,8 @@ int get_canonical_class( int c ) int get_compose_pair( int c1, int c2 ) { const struct comp_h *r; + unsigned int hv; + if( c1 >= LBase ) { /* Perhaps hangul */ @@ -176,8 +178,10 @@ int get_compose_pair( int c1, int c2 ) } } + hv = (unsigned int)c1 << 16 | (unsigned int)c2; /* Nope. Not hangul. */ - for( r=comp_hash[ ((unsigned int)((c1<<16) | (c2))) % HSIZE ]; r; r=r->next ) + for( r=comp_hash[ hv % HSIZE ]; + r; r=r->next ) if( (r->v->c1 == c1) && (r->v->c2 == c2) ) return r->v->c; -- GitLab