diff --git a/lib/modules/String.pmod/module.pmod b/lib/modules/String.pmod/module.pmod index 2696d935af9630f5e6992455f493ee9ebeb3986f..8fef01347ed1aaa198a39717fac10abe9b1e0f3c 100644 --- a/lib/modules/String.pmod/module.pmod +++ b/lib/modules/String.pmod/module.pmod @@ -6,6 +6,7 @@ constant Buffer = __builtin.Buffer; constant count=__builtin.string_count; constant width=__builtin.string_width; constant trim_whites = __builtin.string_trim_whites; +constant normalize_space = __builtin.string_normalize_space; constant trim_all_whites = __builtin.string_trim_all_whites; constant Iterator = __builtin.string_iterator; constant SplitIterator = __builtin.string_split_iterator; diff --git a/lib/modules/String.pmod/testsuite.in b/lib/modules/String.pmod/testsuite.in index 9e939c0c7dba205739d2d8a840f26e197fae778d..e5eb04661f359b1567acc8f0996e464beaf24136 100644 --- a/lib/modules/String.pmod/testsuite.in +++ b/lib/modules/String.pmod/testsuite.in @@ -1,5 +1,5 @@ START_MARKER -dnl $Id: testsuite.in,v 1.26 2008/06/29 18:03:03 nilsson Exp $ +dnl $Id: testsuite.in,v 1.27 2010/01/02 13:17:10 srb Exp $ test_eq([[ String.Buffer()->add("xxx") ]], 3) test_any([[ @@ -116,6 +116,25 @@ test_eq(String.trim_all_whites ("\200000"), "\200000") test_eq(String.trim_all_whites (" \t\n\r "), "") test_eq(String.trim_all_whites (""), "") +test_eq(String.normalize_space ("\v\f \t\n\r\0\v\f \t\n\r"), "\0") +test_eq(String.normalize_space ("\v\f \t\n\r\400\v\f \t\n\r"), "\400") +test_eq(String.normalize_space ("\v\f \t\n\r\200000\v\f \t\n\r"), "\200000") +test_eq(String.normalize_space ("\v\f \t\n\ra\400\v\f \t\n\ra\400\t \v\f"), + "a\400 a\400") +test_eq(String.normalize_space ("\0"), "\0") +test_eq(String.normalize_space ("\400"), "\400") +test_eq(String.normalize_space ("\200000"), "\200000") +test_eq(String.normalize_space (" \t\n\r\v\f "), "") +test_eq(String.normalize_space (""), "") +test_eq(String.normalize_space (" a bb ccc ddd \n eee f g\n"), + "a bb ccc ddd eee f g") +test_eq(String.normalize_space (" a bb ccc ddd \n eee f g\n"," \t"), + "a bb ccc ddd\neee f g\n") +test_eq(String.normalize_space (" a bb ccc ddd \n eee f g\n","\t "), + "a\tbb\tccc\tddd\neee\tf\tg\n") +test_eq(String.normalize_space (" a bb ccc ddd \n eee f g\n",""), + " a bb ccc ddd \n eee f g\n") + dnl MISSING TEST: String.Iterator test_any([[ diff --git a/src/builtin.cmod b/src/builtin.cmod index 194ef65ce9c59e10e436adb9a9e090528d9769d3..c2795e06a36b1a9412dc7212c8045bb6eec8c7bb 100644 --- a/src/builtin.cmod +++ b/src/builtin.cmod @@ -2,7 +2,7 @@ || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. -|| $Id: builtin.cmod,v 1.241 2009/11/19 23:45:22 mast Exp $ +|| $Id: builtin.cmod,v 1.242 2010/01/02 13:17:10 srb Exp $ */ #include "global.h" @@ -750,6 +750,106 @@ PIKEFUN string string_trim_whites (string s) RETURN string_slice (s, start, end + 1 - start); } +/*! @decl string normalize_space (string s, string|void whitespace) + *! @belongs String + *! + *! Returns @[s] with white space normalised. + *! White space is normalised by stripping leading and trailing white space + *! and replacing sequences of white space characters with a single space. + *! @[whitespace] is defined to be " \t\r\n\v\f" if omitted, the first + *! character denotes the replacement character for replacing sequences. + *! + *! Note that trailing and leading whitespace around \r and \n characters + *! is stripped as well. + */ +PMOD_EXPORT +PIKEFUN string string_normalize_space (string s, string|void whitespace) + errname String.normalize_space; + optflags OPT_TRY_OPTIMIZE; +{ size_t len = s->len; + void *src = s->str; + unsigned shift = s->size_shift; + const char *ws; + struct string_builder sb; + unsigned foundspace = 0; + + if(whitespace) + if(whitespace->size_shift>8) + Pike_error("Cannot use wide strings for whitespace\n"); + else if(!whitespace->len) + REF_RETURN s; + else + ws = whitespace->str; + else + ws = 0; + + init_string_builder_alloc (&sb, len, shift); + sb.known_shift = shift; +#define DO_IT_SPACECHECK(c) \ + ((c)==' '||(c)=='\t'||(c)=='\r'||(c)=='\n'||(c)=='\v'||(c)=='\f') + switch (shift) { +#define DO_IT(TYPE) \ + { TYPE *start = src, *end = start+len, *dst = (void*)sb.s->str; \ + for (; start < end; start++) { \ + unsigned chr = *start; \ + if (!ws) { \ + if (!DO_IT_SPACECHECK(chr)) \ + break; \ + } else { \ + char *p = ws; \ + do { \ + if (*p == chr) \ + goto lead##TYPE; \ + } while(*++p); \ + break; \ + } \ +lead##TYPE:; \ + } \ + for (; start < end; start++) { \ + unsigned chr = *start; \ + if (!ws) { \ + if (DO_IT_SPACECHECK(chr)) \ + if (foundspace) \ + continue; \ + else \ + foundspace=1,chr=' '; \ + else \ + foundspace=0; \ + } else { \ + char *p = ws; \ + do { \ + if (*p == chr) \ + if (foundspace) \ + goto skip##TYPE; \ + else { \ + foundspace=1;chr=*ws; \ + goto copy##TYPE; \ + } \ + } while(*++p); \ + if (foundspace && (chr=='\n' || chr=='\r')) { \ + dst[-1] = chr; foundspace=0; \ + goto lead##TYPE; \ + } \ + foundspace=0; \ + } \ +copy##TYPE: \ + *dst++ = chr; \ +skip##TYPE:; \ + } \ + len = dst - (TYPE*)sb.s->str; \ + } + case 0: DO_IT (p_wchar0); break; + case 1: DO_IT (p_wchar1); break; + case 2: DO_IT (p_wchar2); break; +#undef DO_IT +#undef DO_IT_SPACECHECK + } + if (foundspace) + len--; + sb.s->len = len; + RETURN finish_string_builder (&sb); +} + /*! @decl string trim_all_whites (string s) *! @belongs String *! diff --git a/src/builtin_functions.h b/src/builtin_functions.h index 36ef712705638d1e775a4db2c8f1cf19f23f747d..90e959732924659bbd1b5326a8ccd0d4cf8d579e 100644 --- a/src/builtin_functions.h +++ b/src/builtin_functions.h @@ -2,7 +2,7 @@ || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. -|| $Id: builtin_functions.h,v 1.40 2009/11/12 15:10:21 grubba Exp $ +|| $Id: builtin_functions.h,v 1.41 2010/01/02 13:17:10 srb Exp $ */ #ifndef BUILTIN_EFUNS_H @@ -181,6 +181,7 @@ PMOD_EXPORT void f_ctime(INT32 args); PMOD_EXPORT void f_mkmapping(INT32 args); PMOD_EXPORT void f_string_count(INT32 args); PMOD_EXPORT void f_string_trim_whites(INT32 args); +PMOD_EXPORT void f_string_normalize_space(INT32 args); PMOD_EXPORT void f_string_trim_all_whites(INT32 args); PMOD_EXPORT void f_program_implements(INT32 args); PMOD_EXPORT void f_program_inherits(INT32 args);