diff --git a/src/main.c b/src/main.c index c9db3ad6ace9293e044cb21a7f9dfe741693bd5d..7a9e3a5180e0e0a1edd877921a5539480a25cded 100644 --- a/src/main.c +++ b/src/main.c @@ -249,6 +249,7 @@ void main(int argc, char **argv, char **env) void init_main_efuns() { + init_shared_string_table(); init_interpreter(); init_lex(); init_types(); diff --git a/src/stralloc.c b/src/stralloc.c index 548b8c803db0beff345acaba08c9da86c9208c89..fb1948627fb6b1939e5e366cd4a8aa028a272d90 100644 --- a/src/stralloc.c +++ b/src/stralloc.c @@ -11,73 +11,23 @@ #include "memory.h" #include "error.h" -static struct pike_string *base_table[HTABLE_SIZE]; +#define BEGIN_HASH_SIZE 997 +#define MAX_AVG_LINK_LENGTH 3 + +unsigned INT32 htable_size=0; +static struct pike_string **base_table=0; static unsigned INT32 full_hash_value; +unsigned INT32 num_strings=0; -/* - */ +/*** Main string hash function ***/ static unsigned int StrHash(const char *s,int len) { full_hash_value=hashmem((unsigned char *)s, len, 20); - return full_hash_value % HTABLE_SIZE; + return full_hash_value % htable_size; } -#ifdef DEBUG -void check_string(struct pike_string *s) -{ - StrHash(s->str, s->len); - if(full_hash_value != s->hval) - fatal("Hash value changed?\n"); - - if(debug_findstring(s) !=s) - fatal("Shared string not shared.\n"); - - if(s->str[s->len]) - fatal("Shared string is not zero terminated properly.\n"); -} - -void verify_shared_strings_tables() -{ - unsigned int e, h; - struct pike_string *s; - - for(e=0;e<HTABLE_SIZE;e++) - { - h=0; - for(s=base_table[e];s;s=s->next) - { - h++; - if(s->len < 0) - fatal("Shared string shorter than zero bytes.\n"); - - if(s->refs <= 0) - fatal("Shared string had too few references.\n"); - - if(s->str[s->len]) - fatal("Shared string didn't end with a zero.\n"); - - if(StrHash(s->str, s->len) != e) - fatal("Shared string hashed to wrong place.\n"); - - if(s->hval != full_hash_value) - fatal("Shared string hashed to other number.\n"); - - if(h>10000) - { - struct pike_string *s2; - for(s2=s;s2;s2=s2->next) - if(s2 == s) - fatal("Shared string table is cyclic.\n"); - h=0; - } - } - } -} -#endif -/* - * find a string in the shared string table. - */ +/*** find a string in the shared string table. ***/ static struct pike_string *internal_findstring(const char *s,int len,int h) { struct pike_string *curr,**prev, **base; @@ -137,50 +87,38 @@ static struct pike_string *propagate_shared_string(const struct pike_string *s,i return 0; /* not found */ } -#ifdef DEBUG -struct pike_string *debug_findstring(const struct pike_string *foo) +/*** rehash ***/ + +static void rehash_string_backwards(struct pike_string *s) { - struct pike_string *tmp; - tmp=propagate_shared_string(foo, foo->hval % HTABLE_SIZE); + int h; + if(!s) return; + rehash_string_backwards(s->next); + h=s->hval % htable_size; + s->next=base_table[h]; + base_table[h]=s; +} -#if 0 - if(!tmp) - { - int e; - struct pike_string *tmp2; - fprintf(stderr,"String %p %ld %ld %s\n", - foo, - (long)foo->hval, - (long)foo->len, - foo->str); - StrHash(foo->str,foo->len); - fprintf(stderr,"------ %p %ld\n", - base_table[foo->hval %HTABLE_SIZE], - (long)full_hash_value); - for(tmp2=base_table[foo->hval % HTABLE_SIZE];tmp2;tmp2=tmp2->next) - { - if(tmp2 == tmp) - fprintf(stderr,"!!%p!!->",tmp2); - else - fprintf(stderr,"%p->",tmp2); - } - fprintf(stderr,"0\n"); +static void rehash() +{ + int h,old; + struct pike_string **old_base; - for(e=0;e<HTABLE_SIZE;e++) - { - for(tmp2=base_table[e];tmp2;tmp2=tmp2->next) - { - if(tmp2 == tmp) - fprintf(stderr,"String found in hashbin %ld (not %ld)\n", - (long)e, - (long)(foo->hval % HTABLE_SIZE)); - } - } - } -#endif - return tmp; + old=htable_size; + old_base=base_table; + + htable_size=htable_size*2 +1; + base_table=(struct pike_string **)xalloc(sizeof(struct pike_string *)*htable_size); + MEMSET((char *)base_table,0,sizeof(struct pike_string *)*htable_size); + + for(h=0;h<old;h++) rehash_string_backwards(old_base[h]); + + if(old_base) + free((char *)old_base); } -#endif + + +/*** Make new strings ***/ /* note that begin_shared_string expects the _exact_ size of the string, * not the maximum size @@ -194,6 +132,17 @@ struct pike_string *begin_shared_string(int len) return t; } +static void link_pike_string(struct pike_string *s, int h) +{ + s->refs = 0; + s->next = base_table[h]; + base_table[h] = s; + s->hval=full_hash_value; + num_strings++; + if(num_strings > MAX_AVG_LINK_LENGTH * htable_size) + rehash(); +} + struct pike_string *end_shared_string(struct pike_string *s) { int len,h; @@ -208,10 +157,7 @@ struct pike_string *end_shared_string(struct pike_string *s) free((char *)s); s=s2; }else{ - s->refs = 0; - s->next = base_table[h]; - base_table[h] = s; - s->hval=full_hash_value; + link_pike_string(s, h); } s->refs++; @@ -228,11 +174,7 @@ struct pike_string * make_shared_binary_string(const char *str,int len) { s=begin_shared_string(len); MEMCPY(s->str, str, len); - s->str[len] = 0; - s->refs = 0; - s->next = base_table[h]; - base_table[h] = s; - s->hval=full_hash_value; + link_pike_string(s, h); } s->refs++; @@ -245,70 +187,7 @@ struct pike_string *make_shared_string(const char *str) return make_shared_binary_string(str, strlen(str)); } -/* does not take locale into account */ -int low_quick_binary_strcmp(char *a,INT32 alen, - char *b,INT32 blen) -{ - int tmp; - if(alen > blen) - { - tmp=MEMCMP(a, b, blen); - if(tmp) return tmp; - return 1; - }else if(alen < blen){ - tmp=MEMCMP(a, b, alen); - if(tmp) return tmp; - return -1; - }else{ - return MEMCMP(a, b, alen); - } -} - -#ifndef HAVE_STRCOLL -/* No locale function available */ -static int low_binary_strcmp(char *a,INT32 alen, - char *b,INT32 blen) -{ - low_quick_binary_strcmp(a,alen,b,blen); -} -#else - -/* takes locale into account */ -static int low_binary_strcmp(char *a,INT32 alen, - char *b,INT32 blen) -{ - INT32 tmp; - while(alen>0 && blen>0) - { - tmp=strcoll(a,b); - if(tmp) return (int)tmp; - tmp=strlen(a)+1; - a+=tmp; - b+=tmp; - alen-=tmp; - blen-=tmp; - } - if(alen==blen) return 0; - if(alen > blen) return 1; - return -1; -} -#endif - -/* Does not take locale into account */ -int my_quick_strcmp(struct pike_string *a,struct pike_string *b) -{ - if(a==b) return 0; - - return low_quick_binary_strcmp(a->str,a->len,b->str,b->len); -} - -/* Does take locale into account */ -int my_strcmp(struct pike_string *a,struct pike_string *b) -{ - if(a==b) return 0; - - return low_binary_strcmp(a->str,a->len,b->str,b->len); -} +/*** Free strings ***/ void unlink_pike_string(struct pike_string *s) { @@ -325,8 +204,9 @@ void really_free_string(struct pike_string *s) free((char *)s); } + /* - * + * String table status */ struct pike_string *add_string_status(int verbose) { @@ -341,9 +221,9 @@ struct pike_string *add_string_status(int verbose) int num_distinct_strings=0; int bytes_distinct_strings=0; int overhead_bytes=0; - int e; + unsigned INT32 e; struct pike_string *p; - for(e=0;e<HTABLE_SIZE;e++) + for(e=0;e<htable_size;e++) { for(p=base_table[e];p;p=p->next) { @@ -378,15 +258,183 @@ struct pike_string *add_string_status(int verbose) return free_buf(); } +/*** DEBUG ***/ +#ifdef DEBUG + +void check_string(struct pike_string *s) +{ + StrHash(s->str, s->len); + if(full_hash_value != s->hval) + fatal("Hash value changed?\n"); + + if(debug_findstring(s) !=s) + fatal("Shared string not shared.\n"); + + if(s->str[s->len]) + fatal("Shared string is not zero terminated properly.\n"); +} + +void verify_shared_strings_tables() +{ + unsigned INT32 e, h; + struct pike_string *s; + + for(e=0;e<htable_size;e++) + { + h=0; + for(s=base_table[e];s;s=s->next) + { + h++; + if(s->len < 0) + fatal("Shared string shorter than zero bytes.\n"); + + if(s->refs <= 0) + fatal("Shared string had too few references.\n"); + + if(s->str[s->len]) + fatal("Shared string didn't end with a zero.\n"); + + if(StrHash(s->str, s->len) != e) + fatal("Shared string hashed to wrong place.\n"); + + if(s->hval != full_hash_value) + fatal("Shared string hashed to other number.\n"); + + if(h>10000) + { + struct pike_string *s2; + for(s2=s;s2;s2=s2->next) + if(s2 == s) + fatal("Shared string table is cyclic.\n"); + h=0; + } + } + } +} + +struct pike_string *debug_findstring(const struct pike_string *foo) +{ + struct pike_string *tmp; + tmp=propagate_shared_string(foo, foo->hval % htable_size); + +#if 0 + if(!tmp) + { + unsigned INT32 e; + struct pike_string *tmp2; + fprintf(stderr,"String %p %ld %ld %s\n", + foo, + (long)foo->hval, + (long)foo->len, + foo->str); + StrHash(foo->str,foo->len); + fprintf(stderr,"------ %p %ld\n", + base_table[foo->hval %htable_size], + (long)full_hash_value); + for(tmp2=base_table[foo->hval % htable_size];tmp2;tmp2=tmp2->next) + { + if(tmp2 == tmp) + fprintf(stderr,"!!%p!!->",tmp2); + else + fprintf(stderr,"%p->",tmp2); + } + fprintf(stderr,"0\n"); + + for(e=0;e<htable_size;e++) + { + for(tmp2=base_table[e];tmp2;tmp2=tmp2->next) + { + if(tmp2 == tmp) + fprintf(stderr,"String found in hashbin %ld (not %ld)\n", + (long)e, + (long)(foo->hval % htable_size)); + } + } + } +#endif + return tmp; +} + void dump_stralloc_strings() { - int e; + unsigned INT32 e; struct pike_string *p; - for(e=0;e<HTABLE_SIZE;e++) + for(e=0;e<htable_size;e++) for(p=base_table[e];p;p=p->next) printf("%ld refs \"%s\"\n",(long)p->refs,p->str); } +#endif + + +/*** String compare functions ***/ + +/* does not take locale into account */ +int low_quick_binary_strcmp(char *a,INT32 alen, + char *b,INT32 blen) +{ + int tmp; + if(alen > blen) + { + tmp=MEMCMP(a, b, blen); + if(tmp) return tmp; + return 1; + }else if(alen < blen){ + tmp=MEMCMP(a, b, alen); + if(tmp) return tmp; + return -1; + }else{ + return MEMCMP(a, b, alen); + } +} + +#ifndef HAVE_STRCOLL +/* No locale function available */ +static int low_binary_strcmp(char *a,INT32 alen, + char *b,INT32 blen) +{ + low_quick_binary_strcmp(a,alen,b,blen); +} +#else + +/* takes locale into account */ +static int low_binary_strcmp(char *a,INT32 alen, + char *b,INT32 blen) +{ + INT32 tmp; + while(alen>0 && blen>0) + { + tmp=strcoll(a,b); + if(tmp) return (int)tmp; + tmp=strlen(a)+1; + a+=tmp; + b+=tmp; + alen-=tmp; + blen-=tmp; + } + if(alen==blen) return 0; + if(alen > blen) return 1; + return -1; +} +#endif + +/* Does not take locale into account */ +int my_quick_strcmp(struct pike_string *a,struct pike_string *b) +{ + if(a==b) return 0; + + return low_quick_binary_strcmp(a->str,a->len,b->str,b->len); +} + +/* Does take locale into account */ +int my_strcmp(struct pike_string *a,struct pike_string *b) +{ + if(a==b) return 0; + + return low_binary_strcmp(a->str,a->len,b->str,b->len); +} + +/*** Add strings ***/ struct pike_string *add_shared_strings(struct pike_string *a, struct pike_string *b) { @@ -405,6 +453,7 @@ struct pike_string *add_shared_strings(struct pike_string *a, return ret; } +/*** replace function ***/ struct pike_string *string_replace(struct pike_string *str, struct pike_string *del, struct pike_string *to) @@ -449,11 +498,19 @@ struct pike_string *string_replace(struct pike_string *str, return end_shared_string(ret); } +/*** init/exit memory ***/ +void init_shared_string_table() +{ + htable_size=BEGIN_HASH_SIZE; + base_table=(struct pike_string **)xalloc(sizeof(struct pike_string *)*htable_size); + MEMSET((char *)base_table,0,sizeof(struct pike_string *)*htable_size); +} + void cleanup_shared_string_table() { - int e; + unsigned INT32 e; struct pike_string *s,*next; - for(e=0;e<HTABLE_SIZE;e++) + for(e=0;e<htable_size;e++) { for(s=base_table[e];s;s=next) { diff --git a/src/stralloc.h b/src/stralloc.h index c9979ccf8766f1fb4288af925fc03fad8894bf61..a0201a1b5585a69648d1f8f0973810b1fd92e0ce 100644 --- a/src/stralloc.h +++ b/src/stralloc.h @@ -32,27 +32,29 @@ struct pike_string *debug_findstring(const struct pike_string *foo); #define copy_shared_string(to,s) ((to)=(s))->refs++ /* Prototypes begin here */ -void check_string(struct pike_string *s); -void verify_shared_strings_tables(); -struct pike_string *binary_findstring(const char *foo, INT32 len); +struct pike_string *binary_findstring(const char *foo, INT32 l); struct pike_string *findstring(const char *foo); -struct pike_string *debug_findstring(const struct pike_string *foo); struct pike_string *begin_shared_string(int len); struct pike_string *end_shared_string(struct pike_string *s); struct pike_string * make_shared_binary_string(const char *str,int len); struct pike_string *make_shared_string(const char *str); +void unlink_pike_string(struct pike_string *s); +void really_free_string(struct pike_string *s); +struct pike_string *add_string_status(int verbose); +void check_string(struct pike_string *s); +void verify_shared_strings_tables(); +struct pike_string *debug_findstring(const struct pike_string *foo); +void dump_stralloc_strings(); int low_quick_binary_strcmp(char *a,INT32 alen, char *b,INT32 blen); int my_quick_strcmp(struct pike_string *a,struct pike_string *b); int my_strcmp(struct pike_string *a,struct pike_string *b); -void really_free_string(struct pike_string *s); -struct pike_string *add_string_status(int verbose); -void dump_stralloc_strings(); struct pike_string *add_shared_strings(struct pike_string *a, struct pike_string *b); struct pike_string *string_replace(struct pike_string *str, struct pike_string *del, struct pike_string *to); +void init_shared_string_table(); void cleanup_shared_string_table(); /* Prototypes end here */