diff --git a/src/builtin_functions.c b/src/builtin_functions.c index 3e3aa78a0061385ab7dd501b5804722226dac278..28451e5e5e27bf424672ed5a97e4490a1da4fadf 100644 --- a/src/builtin_functions.c +++ b/src/builtin_functions.c @@ -668,8 +668,12 @@ PMOD_EXPORT void f_lower_case(INT32 args) pop_n_elems(args-1); return; } - + orig = Pike_sp[-args].u.string; + + if( orig->flags & STRING_IS_LOWERCASE ) + return; + ret = begin_wide_shared_string(orig->len, orig->size_shift); MEMCPY(ret->str, orig->str, orig->len << orig->size_shift); @@ -700,8 +704,10 @@ PMOD_EXPORT void f_lower_case(INT32 args) #endif } + ret = end_shared_string(ret); + ret->flags |= STRING_IS_LOWERCASE; pop_n_elems(args); - push_string(end_shared_string(ret)); + push_string(ret); } /*! @decl string upper_case(string s) @@ -738,8 +744,13 @@ PMOD_EXPORT void f_upper_case(INT32 args) pop_n_elems(args-1); return; } - + orig = Pike_sp[-args].u.string; + if( orig->flags & STRING_IS_UPPERCASE ) + { + return; + } + ret=begin_wide_shared_string(orig->len,orig->size_shift); MEMCPY(ret->str, orig->str, orig->len << orig->size_shift); @@ -800,7 +811,9 @@ PMOD_EXPORT void f_upper_case(INT32 args) } pop_n_elems(args); - push_string(end_shared_string(ret)); + ret = end_shared_string(ret); + ret->flags |= STRING_IS_UPPERCASE; + push_string(ret); } /*! @decl string random_string(int len) @@ -954,7 +967,13 @@ PMOD_EXPORT void f_search(INT32 args) } else { val = index_shared_string(Pike_sp[1-args].u.string, 0); } - + + if( !string_range_contains( haystack, val ) ) + { + pop_n_elems(args); + push_int( -1 ); + return; + } switch(Pike_sp[-args].u.string->size_shift) { case 0: { diff --git a/src/operators.c b/src/operators.c index 5f0c741df1049193daec5175c1178fb91b0191bd..1ebcb8cdf4af0710463af68ab37996870580ac20 100644 --- a/src/operators.c +++ b/src/operators.c @@ -1555,7 +1555,7 @@ PMOD_EXPORT void f_add(INT32 args) PCHARP buf; ptrdiff_t tmp; int max_shift=0; - + unsigned char tmp_flags, tmp_min, tmp_max; if(args==1) return; size=0; @@ -1579,16 +1579,32 @@ PMOD_EXPORT void f_add(INT32 args) } tmp=sp[-args].u.string->len; + tmp_flags = sp[-args].u.string->flags; + tmp_min = sp[-args].u.string->min; + tmp_max = sp[-args].u.string->max; + r=new_realloc_shared_string(sp[-args].u.string,size,max_shift); + + r->flags |= tmp_flags & ~15; + r->min = tmp_min; + r->max = tmp_max; + mark_free_svalue (sp - args); buf=MKPCHARP_STR_OFF(r,tmp); for(e=-args+1;e<0;e++) { - pike_string_cpy(buf,sp[e].u.string); - INC_PCHARP(buf,sp[e].u.string->len); + if( sp[e].u.string->len ) + { + update_flags_for_add( r, sp[e].u.string ); + pike_string_cpy(buf,sp[e].u.string); + INC_PCHARP(buf,sp[e].u.string->len); + } } SET_SVAL(sp[-args], T_STRING, 0, string, low_end_shared_string(r)); - for(e=-args+1;e<0;e++) free_string(sp[e].u.string); + + for(e=-args+1;e<0;e++) + free_string(sp[e].u.string); + sp-=args-1; break; diff --git a/src/stralloc.c b/src/stralloc.c index c4661d57cbb34974f0376897c08842b642c2f1dc..1583439528aaf45a9de356ec96d2fe4f57bd667d 100644 --- a/src/stralloc.c +++ b/src/stralloc.c @@ -107,6 +107,137 @@ PMOD_EXPORT struct pike_string *empty_pike_string = 0; #define low_do_hash(STR,LEN,SHIFT) low_hashmem( (STR), (LEN)<<(SHIFT), HASH_PREFIX<<(SHIFT), hashkey ) #define do_hash(STR) low_do_hash(STR->str,STR->len,STR->size_shift) +/* Returns true if str could contain n. */ +PMOD_EXPORT int string_range_contains( struct pike_string *str, int n ) +{ + INT32 min, max; + check_string_range( str, 1, &min, &max ); + if( n >= min && n <= max ) + return 1; + return 0; +} + +/* Returns true if str2 could be in str1. */ +PMOD_EXPORT int string_range_contains_string( struct pike_string *str1, + struct pike_string *str2 ) +{ + INT32 max1, min1; + INT32 max2, min2; + check_string_range( str1, 1, &min1, &max1 ); + check_string_range( str2, 1, &min2, &max2 ); + if( (min2 < min1) || (max2 > max1) ) + { + if( (str1->flags & STRING_CONTENT_CHECKED) == + (str2->flags & STRING_CONTENT_CHECKED) ) + return 0; + /* fallback to simple size-shift check. */ + return str1->size_shift >= str2->size_shift; + } + if( (min2 < min1) || (max2 > max1) ) + return 0; + return 1; +} + +PMOD_EXPORT void check_string_range( struct pike_string *str, + int loose, + INT32 *min, INT32 *max ) +{ + INT32 s_min = MAX_INT32; + INT32 s_max = MIN_INT32; + ssize_t i; + + if( loose || ((str->flags & STRING_CONTENT_CHECKED ) && (!str->size_shift || !max)) ) + { + if( str->flags & STRING_CONTENT_CHECKED ) + { + s_min = str->min; + s_max = str->max; + + if( str->size_shift ) + { + s_min <<= 8 * str->size_shift; + s_max <<= 8 * str->size_shift; + if( s_min ) + s_min -= (1<<(8*str->size_shift))-1; + s_max += str->size_shift == 1 ? 255 : 65535; + } + } + else + { + switch( str->size_shift ) + { + case 2: s_min = MIN_INT32; s_max=MAX_INT32; break; + case 1: s_min = 0; s_max = 65535; break; + case 0: s_min = 0; s_max = 255; break; + } + } + } + else + { + str->flags |= STRING_CONTENT_CHECKED; + + switch( str->size_shift ) + { + case 0: + { + p_wchar0 *p = (p_wchar0*)str->str; + int upper = 0, lower = 0; + for( i=0; i<str->len; i++,p++ ) + { + /* For 7-bit strings it's easy to check for + * lower/uppercase, so do that here as well. + */ + if( *p >= 'A' && *p <= 'Z') upper++; + if( *p >= 'a' && *p <= 'z') lower++; + + if( *p > s_max ) s_max = *p; + if( *p < s_min ) s_min = *p; + } + + if( s_max < 128 ) + { + if( upper && !lower ) + str->flags |= STRING_IS_UPPERCASE; + if( lower && !upper ) + str->flags |= STRING_IS_LOWERCASE; + if( !lower && !upper ) + str->flags |= STRING_IS_LOWERCASE|STRING_IS_UPPERCASE; + } + } + str->min = s_min; + str->max = s_max; + break; + + case 1: + { + p_wchar1 *p = (p_wchar1*)str->str; + for( i=0; i<str->len; i++,p++ ) + { + if( *p > s_max ) s_max = *p; + if( *p < s_min ) s_min = *p; + } + } + str->min = (s_min+255) >> 8; + str->max = (s_max+255) >> 8; + break; + + case 2: + { + p_wchar2 *p = (p_wchar2*)str->str; + for( i=0; i<str->len; i++,p++ ) + { + if( *p > s_max ) s_max = *p; + if( *p < s_min ) s_min = *p; + } + } + str->min = (s_min+65535) >> 16; + str->max = (s_max+65535) >> 16; + break; + } + } + if( min ) *min = s_min; + if( max ) *max = s_max; +} static INLINE int find_magnitude1(const p_wchar1 *s, ptrdiff_t len) { @@ -628,6 +759,7 @@ PMOD_EXPORT struct pike_string *debug_begin_shared_string(size_t len) add_ref(t); /* For DMALLOC */ t->str[len]=0; t->len=len; +/* t->min = t->max = 0; */ t->size_shift=0; DO_IF_DEBUG(t->next = NULL); return t; @@ -776,6 +908,8 @@ PMOD_EXPORT struct pike_string *debug_begin_wide_shared_string(size_t len, int s PMOD_EXPORT void hash_string(struct pike_string *s) { if (!(s->flags & STRING_NOT_HASHED)) return; + /* if( s->len < HASH_PREFIX ) */ + /* check_string_range( s, 0, 0, 0 ); */ s->hval=do_hash(s); s->flags &= ~STRING_NOT_HASHED; } @@ -872,7 +1006,7 @@ PMOD_EXPORT struct pike_string *end_shared_string(struct pike_string *s) /* Fall though */ } break; - + case 1: if(!find_magnitude1(STR1(s),s->len)) { @@ -1295,7 +1429,7 @@ PMOD_EXPORT void check_string(struct pike_string *s) locate_problem(wrong_hash); Pike_fatal("Hash value changed?\n"); } - + if(debug_findstring(s) !=s) Pike_fatal("Shared string not shared.\n"); @@ -1728,6 +1862,7 @@ static struct pike_string *realloc_shared_string(struct pike_string *a, if(a->refs==1) { unlink_pike_string(a); + CLEAR_STRING_CHECKED(a); return realloc_unlinked_string(a, size); }else{ r=begin_wide_shared_string(size,a->size_shift); @@ -1875,6 +2010,7 @@ struct pike_string *modify_shared_string(struct pike_string *a, unlink_pike_string(a); low_set_index(a, index, c); + CLEAR_STRING_CHECKED(a); if((((unsigned int)index) >= HASH_PREFIX) && (index < a->len-8)) { struct pike_string *old; @@ -1906,9 +2042,63 @@ struct pike_string *modify_shared_string(struct pike_string *a, } } +PMOD_EXPORT void set_flags_for_add( struct pike_string *ret, + unsigned char aflags, + unsigned char amin, + unsigned char amax, + struct pike_string *b) +{ + if( !b->len ) { + ret->flags |= aflags & ~15; + ret->min = amin; + ret->max = amax; + return; + } + if( (aflags & STRING_CONTENT_CHECKED) && (b->flags & STRING_CONTENT_CHECKED) ) + { + ret->min = MIN( amin, b->min ); + ret->max = MAX( amax, b->max ); + ret->flags |= STRING_CONTENT_CHECKED; + } + else + ret->flags &= ~STRING_CONTENT_CHECKED; + + if( (aflags & STRING_IS_LOWERCASE) && (b->flags & STRING_IS_LOWERCASE) ) + ret->flags |= STRING_IS_LOWERCASE; + else + ret->flags &= ~STRING_IS_LOWERCASE; + + if( (aflags & STRING_IS_UPPERCASE) && (b->flags & STRING_IS_UPPERCASE) ) + ret->flags |= STRING_IS_UPPERCASE; + else + ret->flags &= ~STRING_IS_UPPERCASE; +} + +PMOD_EXPORT void update_flags_for_add( struct pike_string *a, struct pike_string *b) +{ + if( !b->len ) return; + if( a->flags & STRING_CONTENT_CHECKED ) + { + if(b->flags & STRING_CONTENT_CHECKED) + { + if( b->min < a->min ) a->min = b->min; + if( b->max > a->max ) a->max = b->max; + } + else + a->flags &= ~STRING_CONTENT_CHECKED; + } + + if( (a->flags & STRING_IS_LOWERCASE) && !(b->flags & STRING_IS_LOWERCASE) ) + a->flags &= ~STRING_IS_LOWERCASE; + + if( (a->flags & STRING_IS_UPPERCASE) && !(b->flags & STRING_IS_UPPERCASE) ) + a->flags &= ~STRING_IS_UPPERCASE; + +} + /*** Add strings ***/ PMOD_EXPORT struct pike_string *add_shared_strings(struct pike_string *a, - struct pike_string *b) + struct pike_string *b) { struct pike_string *ret; PCHARP tmp; @@ -1919,16 +2109,21 @@ PMOD_EXPORT struct pike_string *add_shared_strings(struct pike_string *a, pike_string_cpy(tmp,a); INC_PCHARP(tmp,a->len); pike_string_cpy(tmp,b); + set_flags_for_add( ret, a->flags, a->min, a->max, b ); return low_end_shared_string(ret); } PMOD_EXPORT struct pike_string *add_and_free_shared_strings(struct pike_string *a, - struct pike_string *b) + struct pike_string *b) { ptrdiff_t alen = a->len; if(a->size_shift == b->size_shift) { - a = realloc_shared_string(a,alen + b->len); + unsigned char aflags = a->flags; + unsigned char amin = a->min; + unsigned char amax = a->max; + a = realloc_shared_string(a, alen + b->len); + set_flags_for_add( a, aflags, amin, amax, b ); MEMCPY(a->str+(alen<<a->size_shift),b->str,b->len<<b->size_shift); free_string(b); a->flags |= STRING_NOT_HASHED; @@ -1949,8 +2144,10 @@ PMOD_EXPORT ptrdiff_t string_search(struct pike_string *haystack, SearchMojt mojt; char *r; - if(needle->size_shift > haystack->size_shift || - start + needle->len > haystack->len) + if( !string_range_contains_string( haystack, needle ) ) + return -1; + + if(start + needle->len > haystack->len) return -1; if(!needle->len) return start; @@ -2147,6 +2344,8 @@ void init_shared_string_table(void) } #endif empty_pike_string = make_shared_string(""); + empty_pike_string->flags |= STRING_CONTENT_CHECKED | STRING_IS_LOWERCASE | STRING_IS_UPPERCASE; + empty_pike_string->min = empty_pike_string->max = 0; } #ifdef DO_PIKE_CLEANUP @@ -2357,6 +2556,7 @@ PMOD_EXPORT int init_string_builder_with_string (struct string_builder *s, if (str->refs == 1 && str->len > SHORT_STRING_THRESHOLD) { /* Unlink the string and use it as buffer directly. */ unlink_pike_string (str); + str->flags = 0; s->s = str; s->malloced = str->len; s->known_shift = str->size_shift; diff --git a/src/stralloc.h b/src/stralloc.h index 779c6772b569f99ac923353b79f8d2eb814d79a7..79681fe166a1ce01d66a241f3f25ef8d74135b18 100644 --- a/src/stralloc.h +++ b/src/stralloc.h @@ -21,19 +21,23 @@ #define PIKE_STRING_CONTENTS \ INT32 refs; \ INT32 ref_type; \ - INT16 flags; \ - INT16 size_shift; /* 14 bit waste, but good for alignment... */ \ + unsigned char flags; \ + unsigned char size_shift; \ + unsigned char min; \ + unsigned char max; \ ptrdiff_t len; /* Not counting terminating NUL. */ \ size_t hval; \ - struct pike_string *next + struct pike_string *next #else /* !ATOMIC_SVALUE */ -#define PIKE_STRING_CONTENTS \ - INT32 refs; \ - INT16 flags; \ - INT16 size_shift; /* 14 bit waste, but good for alignment... */ \ - ptrdiff_t len; /* Not counting terminating NUL. */ \ - size_t hval; \ - struct pike_string *next +#define PIKE_STRING_CONTENTS \ + INT32 refs; \ + unsigned char flags; \ + unsigned char size_shift; \ + unsigned char min; \ + unsigned char max; \ + ptrdiff_t len; /* Not counting terminating NUL. */ \ + size_t hval; \ + struct pike_string *next #endif struct pike_string @@ -50,10 +54,16 @@ struct string_builder }; /* Flags used in pike_string->flags. */ -#define STRING_NOT_HASHED 1 /* Hash value is invalid. */ -#define STRING_NOT_SHARED 2 /* String not shared. */ -#define STRING_IS_SHORT 4 /* String is blockalloced. */ -#define STRING_CLEAR_ON_EXIT 8 /* Overwrite before free. */ +#define STRING_NOT_HASHED 1 /* Hash value is invalid. */ +#define STRING_NOT_SHARED 2 /* String not shared. */ +#define STRING_IS_SHORT 4 /* String is blockalloced. */ +#define STRING_CLEAR_ON_EXIT 8 /* Overwrite before free. */ + +#define STRING_CONTENT_CHECKED 16 /* if true, min and max are valid */ +#define STRING_IS_LOWERCASE 32 +#define STRING_IS_UPPERCASE 64 + +#define CLEAR_STRING_CHECKED(X) do{(X)->flags &= 15;}while(0) /* Flags used by string_builder_append_integer() */ #define APPEND_SIGNED 1 /* Value is signed */ @@ -96,10 +106,6 @@ struct pike_string *debug_findstring(const struct pike_string *foo); #define my_hash_string(X) PTR_TO_INT(X) #define is_same_string(X,Y) ((X)==(Y)) -/* NB: This intentionally only works for narrow strings. */ -#define string_has_null(X) \ - (STRNLEN((X)->str, (size_t)(X)->len) != (size_t)(X)->len) - #ifdef PIKE_DEBUG #define STR0(X) ((p_wchar0 *)debug_check_size_shift((X),0)->str) #define STR1(X) ((p_wchar1 *)debug_check_size_shift((X),1)->str) @@ -290,6 +296,14 @@ PMOD_EXPORT struct pike_string *debug_make_shared_string(const char *str); PMOD_EXPORT struct pike_string *debug_make_shared_string0(const p_wchar0 *str); PMOD_EXPORT struct pike_string *debug_make_shared_string1(const p_wchar1 *str); PMOD_EXPORT struct pike_string *debug_make_shared_string2(const p_wchar2 *str); +PMOD_EXPORT void check_string_range( struct pike_string *str, int loose, + INT32 *min, INT32 *max ); +/* Returns true if str1 could contain str2. */ +PMOD_EXPORT int string_range_contains_string( struct pike_string *str1, + struct pike_string *str2 ); +/* Returns true if str could contain n. */ +PMOD_EXPORT int string_range_contains( struct pike_string *str, int n ); + PMOD_EXPORT void do_free_string(struct pike_string *s); PMOD_EXPORT void do_free_unlinked_pike_string(struct pike_string *s); PMOD_EXPORT void really_free_string(struct pike_string *s); @@ -378,6 +392,11 @@ PMOD_EXPORT ptrdiff_t string_builder_quote_string(struct string_builder *buf, ptrdiff_t start, ptrdiff_t max_len, int flags); +PMOD_EXPORT void update_flags_for_add( struct pike_string *a, struct pike_string *b); +PMOD_EXPORT void set_flags_for_add( struct pike_string *ret, + unsigned char aflags, unsigned char amin, + unsigned char amax, + struct pike_string *b); PMOD_EXPORT void string_builder_append_integer(struct string_builder *s, LONGEST val, unsigned int base, @@ -435,6 +454,15 @@ static INLINE void string_builder_binary_strcat(struct string_builder *s, string_builder_binary_strcat0 (s, (const p_wchar0 *) str, len); } +/* Note: Does not work 100% correctly with shift==2 strings. */ +static INLINE int string_has_null( struct pike_string *x ) +{ + INT32 min; + if( !x->len ) return 0; + check_string_range(x,0,&min,0); + return min <= 0; +} + #define ISCONSTSTR(X,Y) c_compare_string((X),Y,sizeof(Y)-sizeof("")) #define visit_string_ref(S, REF_TYPE) \