diff --git a/lib/modules/Sql.pmod/mysql.pike b/lib/modules/Sql.pmod/mysql.pike index d0947f880061f9a48b2098fe3517047689613608..f57922b83821594c3da27ce4b36804e89190784f 100644 --- a/lib/modules/Sql.pmod/mysql.pike +++ b/lib/modules/Sql.pmod/mysql.pike @@ -1,5 +1,5 @@ /* - * $Id: mysql.pike,v 1.24 2006/08/10 19:35:26 mast Exp $ + * $Id: mysql.pike,v 1.25 2006/08/12 02:57:55 mast Exp $ * * Glue for the Mysql-module */ @@ -13,161 +13,277 @@ inherit Mysql.mysql; -#define UTF8_DECODE_QUERY 1 -#define UTF8_ENCODE_QUERY 2 +#define UNICODE_DECODE_MODE 1 // Unicode decode mode +#define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset +#define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset -//! Set to the above if the connection is in utf8-mode. +// Set to the above if the connection is in utf8-mode. Enable latin1 +// unicode encode mode by default; it should be compatible with +// earlier pike versions. static int utf8_mode; -//! The charset passed with the @expr{mysql_charset_name@} option. -static string initial_charset; +// The charset, either "latin1" or "utf8", currently assigned to +// character_set_client when unicode encode mode is enabled. Zero when +// the connection charset has been set to something else than "latin1" +// or "unicode". +static string send_charset; -//! Enter unicode encode/decode mode. +static void update_unicode_encode_mode_from_charset (string charset) +{ + switch (charset) { // Lowercase assumed. + case "latin1": + utf8_mode |= LATIN1_UNICODE_ENCODE_MODE; + utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE; + send_charset = "latin1"; + break; + case "unicode": + utf8_mode |= UTF8_UNICODE_ENCODE_MODE; + utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE; + send_charset = "utf8"; + break; + default: + // Wrong charset - the mode can't be used. + utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE; + send_charset = 0; + break; + } + werror ("utf8_mode %x, send_charset %O\n", utf8_mode, send_charset); +} + +int(0..1) set_unicode_encode_mode (int enable) +//! Enables or disables unicode encode mode. //! -//! After this has been enabled, query-strings may be provided -//! as wide (Unicode) strings, and any non-binary data will be -//! decoded automatically according to UTF8. +//! In this mode, if the server supports UTF-8 and the connection +//! charset is @expr{latin1@} (the default) or @expr{unicode@} then +//! @[big_query] handles wide unicode queries. Enabled by default. //! -//! The statement "@expr{SET NAMES 'utf8'@}" is sent to enable UTF8 -//! mode for the connection. +//! Unicode encode mode works as follows: Eight bit strings are sent +//! as @expr{latin1@} and wide strings are sent using @expr{utf8@}. +//! @[big_query] sends @expr{SET character_set_client@} statements as +//! necessary to update the charset on the server side. If the server +//! doesn't support that then it fails, but the wide string query +//! would fail anyway. //! -//! @param force -//! If this optional flag is nonzero then the statement to enable -//! UTF8 mode is sent even if this mode already is enabled according -//! to the internal flags. +//! To make this transparent, string literals with introducers (e.g. +//! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This +//! means that @[big_query] needs to do some superficial parsing of +//! the query when it is a wide string. //! //! @returns -//! Returns @expr{1@} on success or @expr{0@} if the server doesn't -//! support unicode (i.e. if the statement to enable UTF8 mode -//! fails). +//! @int +//! @value 1 +//! Unicode encode mode is enabled. +//! @value 0 +//! Unicode encode mode couldn't be enabled because an +//! incompatible connection charset is set. You need to do +//! @expr{@[set_charset]("latin1")@} or +//! @expr{@[set_charset]("unicode")@} to enable it. +//! @endint //! //! @note -//! Literal strings prefixed by the keyword @tt{BINARY@} will not be -//! encoded using UTF8. +//! Note that this mode doesn't affect the MySQL system variable +//! @expr{character_set_connection@}, i.e. it will still be set to +//! @expr{latin1@} by default which means server functions like +//! @expr{UPPER()@} won't handle non-@expr{latin1@} characters +//! correctly in all cases. +//! +//! To fix that, do @expr{@[set_charset]("unicode")@}. That will +//! allow unicode encode mode to work while @expr{utf8@} is fully +//! enabled at the server side. +//! +//! Tip: If you enable @expr{utf8@} on the server side, you need to +//! send raw binary strings as @expr{_binary'...'@}. Otherwise they +//! will get UTF-8 encoded by the server. //! //! @note -//! Unicode support was added in MySQL 4.1. +//! When unicode encode mode is enabled and the connection charset +//! is @expr{latin1@}, the charset accepted by @[big_query] is not +//! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}. +//! The differences are in the range @expr{0x80..0x9f@} where +//! Unicode have control chars. +//! +//! This small discrepancy is not present when the connection +//! charset is @expr{unicode@}. //! //! @seealso -//! @[enter_unicode_decode_mode()], @[leave_unicode_mode()] -int(0..1) enter_unicode_mode (void|int force) +//! @[set_unicode_decode_mode], @[set_charset] { - if (force || utf8_mode != UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY) { - if (catch { - big_query("SET NAMES 'utf8'"); - }) { - return 0; - } - utf8_mode = UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY; + if (enable) + update_unicode_encode_mode_from_charset (lower_case (get_charset())); + else { + utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE); + send_charset = 0; } - return 1; + return !!send_charset; } -//! Enter unicode decode mode. -//! -//! After this has been enabled, non-binary data from the database -//! will be decoded according to UTF8. +int get_unicode_encode_mode() +//! Returns nonzero if unicode encode mode is enabled, zero otherwise. //! -//! The statement "@expr{SET character_set_results = utf8@}" is sent -//! to enable UTF8 mode for the returned results. +//! @seealso +//! @[set_unicode_encode_mode] +{ + return !!send_charset; +} + +void set_unicode_decode_mode (int enable) +//! Enable or disable unicode decode mode. //! -//! @param force -//! If this optional flag is nonzero then the statement to enable -//! UTF8 encoding of results is sent even though this mode already -//! is enabled according to the internal flags. +//! In this mode, if the server supports UTF-8 then non-binary text +//! strings in results are are automatically decoded to (possibly +//! wide) unicode strings. Not enabled by default. //! -//! @returns -//! Returns @expr{1@} on success or @expr{0@} if the server doesn't -//! support unicode (i.e. if the statement to enable UTF8 mode -//! fails). +//! The statement "@expr{SET character_set_results = utf8@}" is sent +//! to the server to enable the mode. When the mode is disabled, +//! "@expr{SET character_set_results = xxx@}" is sent, where +//! @expr{xxx@} is the connection charset that @[get_charset] returns. //! -//! @note -//! Any query encoding will need to be done by hand. +//! @param enable +//! Nonzero enables this feature, zero disables it. //! -//! @note -//! If the connection previously was in full unicode mode as set by -//! @[enter_unicode_mode] then the server will still expect queries -//! to be UTF8 encoded. I.e. the server system variable -//! @expr{character_set_client@} retains the value @expr{'utf8'@}. +//! @throws +//! Throws an exception if the server doesn't support this, i.e. if +//! the statement above fails. The MySQL system variable +//! @expr{character_set_results@} was added in MySQL 4.1.1. //! //! @note -//! The server system variable @expr{character_set_results@} was -//! added in MySQL 4.1.1. +//! This mode is not compatible with earlier pike versions. You need +//! to run in compatibility mode <= 7.6 to have it disabled by +//! default. //! //! @seealso -//! @[enter_unicode_mode()], @[leave_unicode_mode()] -int(0..1) enter_unicode_decode_mode (void|int force) +//! @[set_unicode_encode_mode] { - if (force || utf8_mode != UTF8_DECODE_QUERY) { - if (catch { - big_query("SET character_set_results = utf8"); - }) { - return 0; - } - utf8_mode = UTF8_DECODE_QUERY; + if (enable) { + ::big_query ("SET character_set_results = utf8"); + utf8_mode |= UNICODE_DECODE_MODE; + } + else { + ::big_query ("SET character_set_results = " + get_charset()); + utf8_mode &= ~UNICODE_DECODE_MODE; } - return 1; } -//! Leave unicode mode. +int get_unicode_decode_mode() +//! Returns nonzero if unicode decode mode is enabled, zero otherwise. //! -//! After this no automatic UTF8 conversion is done of queries and -//! results. +//! @seealso +//! @[set_unicode_decode_mode] +{ + return utf8_mode & UNICODE_DECODE_MODE; +} + +void set_charset (string charset) +//! Changes the connection charset. Works similar to sending the query +//! @expr{SET NAMES @[charset]@} but also records the charset on the +//! client side so that various client functions work correctly. //! -//! The statement "@expr{SET NAMES 'xxx'@}" is sent to the server, -//! where @expr{xxx@} is the charset that was passed with the -//! @expr{mysql_charset_name@} option when the connection was opened. -//! If that option wasn't specified then the charset @expr{latin1@} is -//! used, which is the default connection charset in MySQL. +//! @[charset] is a MySQL charset name or the special value +//! @expr{"unicode"@} (see below). You can use @expr{SHOW CHARACTER +//! SET@} to get a list of valid charsets. //! -//! @param force -//! If this optional flag is nonzero then the statement to reset the -//! connection charset is sent even though unicode mode already is -//! disabled according to the internal flags. +//! Specifying @expr{"unicode"@} as charset is the same as +//! @expr{"utf8"@} except that unicode encode and decode modes are +//! enabled too. Briefly, this means that you can send queries as +//! unencoded unicode strings and will get back non-binary text +//! results as unencoded unicode strings. See +//! @[set_unicode_encode_mode] and @[set_unicode_decode_mode] for +//! further details. //! -//! @returns -//! Returns @expr{1@} on success or @expr{0@} if the server doesn't -//! support unicode (i.e. if the statement to reset the connection -//! charset fails). +//! @throws +//! Throws an exception if the server doesn't support this, i.e. if +//! the statement @expr{SET NAMES@} fails. Support for it was added +//! in MySQL 4.1.0. +//! +//! @note +//! If @[charset] is @expr{"latin1"@} and unicode encode mode is +//! enabled (the default) then @[big_query] can send wide unicode +//! queries transparently if the server supports UTF-8. See +//! @[set_unicode_encode_mode]. //! //! @note -//! Unicode support was added in MySQL 4.1. +//! If unicode decode mode is already enabled (see +//! @[set_unicode_decode_mode]) then this function won't affect the +//! result charset (i.e. the MySQL system variable +//! @expr{character_set_results@}). +//! +//! Actually, a query @expr{SET character_set_results = utf8@} will +//! be sent immediately after setting the charset as above if +//! unicode decode mode is enabled and @[charset] isn't +//! @expr{"utf8"@}. +//! +//! @note +//! You should always use either this function or the +//! @expr{"mysql_charset_name"@} option to @[create] to set the +//! connection charset, or more specifically the charset that the +//! server expects queries to have (i.e. the MySQL system variable +//! @expr{character_set_client@}). Otherwise @[big_query] might not +//! work correctly. +//! +//! Afterwards you may change the system variable +//! @expr{character_set_connection@}, and also +//! @expr{character_set_results@} if unicode decode mode isn't +//! enabled. +//! +//! @note +//! The MySQL @expr{latin1@} charset is close to Windows +//! @expr{cp1252@}. The difference from ISO-8859-1 is a bunch of +//! printable chars in the range @expr{0x80..0x9f@} (which contains +//! control chars in ISO-8859-1). For instance, the euro currency +//! sign is @expr{0x80@}. +//! +//! You can use the @expr{mysql-latin1@} encoding in the +//! @[Locale.Charset] module to do conversions, or just use the +//! special @expr{"unicode"@} charset instead. //! //! @seealso -//! @[enter_unicode_mode()], @[enter_unicode_decode_mode()] -int(0..1) leave_unicode_mode (void|int force) +//! @[get_charset], @[set_unicode_encode_mode], @[set_unicode_decode_mode] { - if (force || utf8_mode) { - if (catch { - big_query("SET NAMES '" + (initial_charset || "latin1") + "'"); - }) { - return 0; - } - utf8_mode = 0; - } - return 1; + charset = lower_case (charset); + + ::set_charset (charset == "unicode" ? "utf8" : charset); + + if (charset == "unicode" || + utf8_mode & (LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE)) + update_unicode_encode_mode_from_charset (charset); + + if (charset == "unicode") + utf8_mode |= UNICODE_DECODE_MODE; + else if (utf8_mode & UNICODE_DECODE_MODE && charset != "utf8") + // This setting has been overridden by ::set_charset, so we need + // to reinstate it. + ::big_query ("SET character_set_results = utf8"); } -string query_unicode_mode() -//! Returns the current unicode mode status. +string get_charset() +//! Returns the MySQL name for the current connection charset. //! -//! @returns -//! @string -//! @value "full" -//! Full unicode mode as set by @[enter_unicode_mode] is -//! enabled. -//! @value "decode" -//! Decode unicode mode as set by @[enter_unicode_decode_mode] -//! is enabled. -//! @value 0 -//! Unicode mode is not enabled. C.f. @[leave_unicode_mode]. -//! @endstring +//! Returns @expr{"unicode"@} if unicode encode mode is enabled and +//! UTF-8 is used on the server side (i.e. in +//! @expr{character_set_connection@}). +//! +//! @note +//! In servers with full charset support (i.e. MySQL 4.1.0 or +//! later), this corresponds to the MySQL system variable +//! @expr{character_set_client@} (with one exception - see next +//! note) and thus controls the charset in which queries are sent. +//! The charset used for text strings in results might be something +//! else (and typically is if unicode decode mode is enabled; see +//! @[set_unicode_decode_mode]). +//! +//! @note +//! If the returned charset is @expr{latin1@} or @expr{unicode@} and +//! unicode encode mode is enabled (the default) then +//! @expr{character_set_client@} in the server might be either +//! @expr{latin1@} or @expr{utf8@}, depending on the last sent +//! query. See @[set_unicode_encode_mode] for more info. +//! +//! @seealso +//! @[set_charset] { - switch (utf8_mode) { - case UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY: return "full"; - case UTF8_DECODE_QUERY: return "decode"; - default: return 0; - } + if (utf8_mode & UTF8_UNICODE_ENCODE_MODE && send_charset) + return "unicode"; + return ::get_charset(); } #if constant( Mysql.mysql.MYSQL_NO_ADD_DROP_DB ) @@ -194,28 +310,33 @@ string quote(string s) ({ "\\\\", "\\\"", "\\0", "\\\'", "\\n", "\\r" })); } -//! Encode the apropriate sections of the query according to UTF8. -//! ie Those sections that are not strings prefixed by BINARY. -string utf8_encode_query(string q) +string latin1_to_utf8 (string s) +//! Converts a string in MySQL @expr{latin1@} format to UTF-8. { - string uq = upper_case(q); - if (!has_value(uq, "BINARY")) return string_to_utf8(q); - // The following optimization is disabled since it causes more - // overhead in the case when q contains a large binary string (which - // is arguably the main reason for q getting really large). - //if ((q & ("\x7f" * sizeof(q))) == q) return q; + return string_to_utf8 (replace (s, ([ + "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192", + "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021", + "\x88": "\u02C6", "\x89": "\u2030", "\x8a": "\u0160", "\x8b": "\u2039", + "\x8c": "\u0152", /*"\x8d": "\u008D",*/ "\x8e": "\u017D", /*"\x8f": "\u008F",*/ + /*"\x90": "\u0090",*/ "\x91": "\u2018", "\x92": "\u2019", "\x93": "\u201C", + "\x94": "\u201D", "\x95": "\u2022", "\x96": "\u2013", "\x97": "\u2014", + "\x98": "\u02DC", "\x99": "\u2122", "\x9a": "\u0161", "\x9b": "\u203A", + "\x9c": "\u0153", /*"\x9d": "\u009D",*/ "\x9e": "\u017E", "\x9f": "\u0178", + ]))); +} +string utf8_encode_query (string q, function(string:string) encode_fn) +//! Encodes the appropriate sections of the query with @[encode_fn]. +//! Everything except strings prefixed by an introducer (i.e. +//! @expr{_something@} or @expr{N@}) is encoded. +{ // We need to find the segments that shouldn't be encoded. string e = ""; - while(has_value(uq, "BINARY")) { - string prefix = ""; - string suffix; - sscanf(q, "%[^\'\"]%s", prefix, suffix); - e += string_to_utf8(prefix); - if (!suffix || !sizeof(suffix)) { - q = uq = ""; - break; - } + while (1) { + sscanf(q, "%[^\'\"]%s", string prefix, string suffix); + e += encode_fn (prefix); + + if (suffix == "") break; string quote = suffix[..0]; int start = 1; @@ -242,30 +363,73 @@ string utf8_encode_query(string q) break; } - string uprefix = uq[..sizeof(prefix)-1]; - int is_binary; - // Common cases. - if (has_suffix(uprefix, "BINARY") || has_suffix(uprefix, "BINARY ")) { - // Binary string. - is_binary = 1; - } else { +#define IS_IDENTIFIER_CHAR(chr) (Unicode.is_wordchar (chr) || \ + (<'_', '$'>)[chr]) + + int intpos = -1; + + // Optimize the use of _binary. + if (has_suffix (prefix, "_binary")) + intpos = sizeof (prefix) - sizeof ("_binary"); + else if (has_suffix (prefix, "_binary ")) + intpos = sizeof (prefix) - sizeof ("_binary "); + + else { // Find the white-space suffix of the prefix. - int i = sizeof(uprefix); + int i = sizeof(prefix); while (i--) { - if (!(< ' ', '\n', '\r', '\t' >)[uprefix[i]]) break; + if (!(< ' ', '\n', '\r', '\t' >)[prefix[i]]) break; + } + + if (i >= 0) { + if ((<'n', 'N'>)[prefix[i]]) + // Probably got a national charset string. + intpos = i; + else { + // The following assumes all possible charset names contain + // only [a-zA-Z0-9_$] and are max 32 chars (from + // MY_CS_NAME_SIZE in m_ctype.h). + sscanf (reverse (prefix[i - 33..i]), "%[a-zA-Z0-9_$]%s", + string rev_intro, string rest); + if (sizeof (rev_intro) && rev_intro[-1] == '_' && sizeof (rest)) + intpos = i - sizeof (rev_intro) + 1; + } } - is_binary = has_suffix(uprefix, "BINARY" + uprefix[i+1..]); } - if (is_binary) { - e += suffix[..end]; + + int got_introducer; + if (intpos == 0) + // The prefix begins with the introducer. + got_introducer = 1; + else if (intpos > 0) { + // Check that the introducer sequence we found isn't a suffix of + // some longer keyword or identifier. + int prechar = prefix[intpos - 1]; + if (!IS_IDENTIFIER_CHAR (prechar)) + got_introducer = 1; + } + + if (got_introducer) { + string s = suffix[..end]; + if (String.width (s) > 8) { + string encoding = prefix[intpos..]; + if (has_prefix (encoding, "_")) + sscanf (encoding[1..], "%[a-zA-Z0-9]", encoding); + else + encoding = "utf8"; // Gotta be "N". + s = s[1..sizeof (s) - 2]; + if (sizeof (s) > 40) s = sprintf ("%O...", s[..37]); + else s = sprintf ("%O", s); + predef::error ("A string in the query should be %s encoded " + "but it is wide: %s\n", encoding, s); + } + e += s; } else { - e += string_to_utf8(suffix[..end]); + e += encode_fn (suffix[..end]); } + q = suffix[end+1..]; - uq = uq[sizeof(uq)-sizeof(q)..]; } - // Encode the trailer. - e += string_to_utf8(q); return e; } @@ -376,26 +540,121 @@ int decode_datetime (string timestr) } } +#define QUERY_BODY(do_query) \ + if (bindings) \ + query = .sql_util.emulate_bindings(query,bindings,this); \ + \ + string restore_charset; \ + if (charset) { \ + restore_charset = send_charset || get_charset(); \ + if (charset != restore_charset) \ + ::big_query ("SET character_set_client=" + charset); \ + else \ + restore_charset = 0; \ + } \ + \ + else if (send_charset) { \ + string new_send_charset; \ + \ + if (utf8_mode & LATIN1_UNICODE_ENCODE_MODE) { \ + if (String.width (query) == 8) \ + new_send_charset = "latin1"; \ + else { \ + query = utf8_encode_query (query, latin1_to_utf8); \ + new_send_charset = "utf8"; \ + } \ + } \ + \ + else { /* utf8_mode & UTF8_UNICODE_ENCODE_MODE */ \ + if (_can_send_as_latin1 (query)) \ + new_send_charset = "latin1"; \ + else { \ + query = utf8_encode_query (query, string_to_utf8); \ + new_send_charset = "utf8"; \ + } \ + } \ + \ + if (new_send_charset != send_charset) { \ + if (mixed err = \ + ::big_query ("SET character_set_client=" + new_send_charset)) { \ + if (new_send_charset = "utf8") \ + predef::error ("The query is a wide string " \ + "and the MySQL server doesn't support UTF-8: %s\n", \ + describe_error (err)); \ + else \ + throw err; \ + } \ + send_charset = new_send_charset; \ + werror ("set charset %O\n", send_charset); \ + } \ + } \ + \ + int|object res = ::do_query(query); \ + \ + if (restore_charset) { \ + if (send_charset && (<"latin1", "utf8">)[charset]) \ + send_charset = charset; \ + else \ + ::big_query ("SET character_set_client=" + restore_charset); \ + } \ + \ + if (!objectp(res)) return res; \ + \ + if (utf8_mode & UNICODE_DECODE_MODE) { \ + return .sql_util.UnicodeWrapper(res); \ + } \ + return res; + +Mysql.mysql_result big_query (string query, + mapping(string|int:mixed)|void bindings, + void|string charset) +//! Sends a query to the server. +//! +//! @param query +//! The SQL query. +//! +//! @param bindings +//! An optional bindings mapping. See @[Sql.query] for details about +//! this. +//! +//! @param charset +//! An optional charset that will be used temporarily while sending +//! @[query] to the server. If necessary, a query +//! @code +//! SET character_set_client=@[charset] +//! @endcode +//! is sent to the server first, then @[query] is sent as-is, and then +//! the connection charset is restored again (if necessary). +//! +//! Primarily useful with @[charset] set to @expr{"latin1"@} if +//! unicode encode mode (see @[set_unicode_encode_mode]) is enabled +//! (the default) and you have some large queries (typically blob +//! inserts) where you want to avoid the query parsing overhead. +//! +//! @returns +//! A @[Mysql.mysql_result] object is returned if the query is of a +//! kind that returns a result. Zero is returned otherwise. //! -int|object big_query(string q, mapping(string|int:mixed)|void bindings) +//! @seealso +//! @[Sql.big_query] { - if (bindings) - q = .sql_util.emulate_bindings(q,bindings,this); - if (utf8_mode & UTF8_ENCODE_QUERY) { - // Mysql's line protocol is stupid; we need to detect - // the binary strings in the query. - q = utf8_encode_query(q); - } - - int|object res = ::big_query(q); - if (!objectp(res)) return res; - - if (utf8_mode & UTF8_DECODE_QUERY) { - return .sql_util.UnicodeWrapper(res); - } - return ::big_query(q); + QUERY_BODY (big_query); } +Mysql.mysql_result streaming_query (string query, + mapping(string|int:mixed)|void bindings, + void|string charset) +//! Makes a streaming SQL query. +//! +//! This function sends the SQL query @[query] to the Mysql-server. +//! The result of the query is streamed through the returned +//! @[Mysql.mysql_result] object. Note that the involved database +//! tables are locked until all the results has been read. +//! +//! In all other respects, it behaves like @[big_query]. +{ + QUERY_BODY (streaming_query); +} int(0..1) is_keyword( string name ) //! Return 1 if the argument @[name] is a mysql keyword. @@ -447,14 +706,21 @@ static void create(string|void host, string|void database, mapping(string:string|int)|void options) { if (options) { + string charset = options->mysql_charset_name || "latin1"; + if (charset == "unicode") + options->mysql_charset_name = "utf8"; + ::create(host||"", database||"", user||"", password||"", options); - initial_charset = options->mysql_charset_name; - switch (options->unicode_mode) { - case "full": enter_unicode_mode(); break; - case "decode": enter_unicode_decode_mode(); break; - } + + update_unicode_encode_mode_from_charset (lower_case (charset)); + + if (options->unicode_decode_mode) + set_unicode_decode_mode (1); + } else { ::create(host||"", database||"", user||"", password||""); + + update_unicode_encode_mode_from_charset ("latin1"); } } diff --git a/src/modules/Mysql/configure.in b/src/modules/Mysql/configure.in index e4bf871d765c82fb69fb04b3d74314ebe90c09d6..df96c8b6637dd9a98db30bcdc6efbaeb64a4ad8d 100644 --- a/src/modules/Mysql/configure.in +++ b/src/modules/Mysql/configure.in @@ -1,5 +1,5 @@ # -# $Id: configure.in,v 1.50 2006/07/04 20:45:27 mast Exp $ +# $Id: configure.in,v 1.51 2006/08/12 02:57:55 mast Exp $ # # Configure script for the mysql-module # @@ -371,6 +371,7 @@ fi PIKE_CHECK_MYSQL_FUNC(mysql_real_query) PIKE_CHECK_MYSQL_FUNC(mysql_fetch_lengths) PIKE_CHECK_MYSQL_FUNC(mysql_options) + PIKE_CHECK_MYSQL_FUNC(mysql_set_character_set) AC_CHECK_LIB(mysqlclient, mysql_ssl_set, [ AC_DEFINE(HAVE_MYSQL_SSL) diff --git a/src/modules/Mysql/mysql.c b/src/modules/Mysql/mysql.c index e88428336cd5e88d59096c7981a5116609619743..bc646019857b712e00c0ebdec8bf625d68ed3168 100644 --- a/src/modules/Mysql/mysql.c +++ b/src/modules/Mysql/mysql.c @@ -2,7 +2,7 @@ || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. -|| $Id: mysql.c,v 1.97 2005/11/17 13:39:09 grubba Exp $ +|| $Id: mysql.c,v 1.98 2006/08/12 02:57:55 mast Exp $ */ /* @@ -213,6 +213,12 @@ static void exit_mysql_struct(struct object *o) free_string(PIKE_MYSQL->host); PIKE_MYSQL->host = NULL; } +#ifndef HAVE_MYSQL_SET_CHARACTER_SET + if (PIKE_MYSQL->conn_charset) { + free_string (PIKE_MYSQL->conn_charset); + PIKE_MYSQL->conn_charset = NULL; + } +#endif MYSQL_ALLOW(); @@ -397,6 +403,12 @@ static void pike_mysql_reconnect(void) options = (unsigned int)val->u.integer; } +#if !defined (HAVE_MYSQL_SET_CHARACTER_SET) && defined (HAVE_MYSQL_OPTIONS) && defined (HAVE_MYSQL_SET_CHARSET_NAME) + if (PIKE_MYSQL->conn_charset) + mysql_options (mysql, MYSQL_SET_CHARSET_NAME, + PIKE_MYSQL->conn_charset->str); +#endif + MYSQL_ALLOW(); #if defined(HAVE_MYSQL_PORT) || defined(HAVE_MYSQL_UNIX_PORT) @@ -537,7 +549,17 @@ static void pike_mysql_reconnect(void) *! Change charset directory. *! *! @member string "mysql_charset_name" - *! Change charset name. + *! Set connection charset - see @[set_charset] for details. The + *! default is @expr{"latin1"@}. As opposed to @[set_charset], + *! this way of specifying the connection charset doesn't + *! require MySQL 4.1.0. + *! + *! @member int "unicode_decode_mode" + *! Enable unicode decode mode for the connection if nonzero. In + *! this mode non-binary string results are automatically + *! converted to (possibly wide) unicode strings. An error is + *! thrown if the server doesn't support this. See + *! @[set_unicode_decode_mode]. *! *! @member string "ssl_key" *! Path to SSL-key for use in SSL-communication. @@ -620,6 +642,20 @@ static void f_create(INT32 args) pike_mysql_set_ssl(PIKE_MYSQL->options); pike_mysql_reconnect(); + +#ifndef HAVE_MYSQL_SET_CHARACTER_SET + { + const char *charset = mysql_character_set_name (PIKE_MYSQL->socket); + if (PIKE_MYSQL->conn_charset) + free_string (PIKE_MYSQL->conn_charset); + if (charset) + PIKE_MYSQL->conn_charset = make_shared_string (charset); + else + /* Just paranoia; mysql_character_set_name should always return + * a string. */ + PIKE_MYSQL->conn_charset = NULL; + } +#endif } /*! @decl string _sprintf(int type, void|mapping flags) @@ -960,39 +996,11 @@ static void low_query(INT32 args, char *name, int flags) } } -/*! @decl Mysql.mysql_result big_query(string query) - *! - *! Make an SQL query. - *! - *! This function sends the SQL query @[query] to the Mysql-server. The result - *! of the query is returned as a @[Mysql.mysql_result] object. - *! - *! Returns @expr{0@} (zero) if the query didn't return any result - *! (e.g. @tt{INSERT@} or similar). - *! - *! @seealso - *! @[Mysql.mysql_result] @[streaming_query] - */ static void f_big_query(INT32 args) { low_query(args, "big_query", PIKE_MYSQL_FLAG_STORE_RESULT); } -/*! @decl Mysql.mysql_result big_query(string query) - *! - *! Makes a streaming SQL query. - *! - *! This function sends the SQL query @[query] to the Mysql-server. - *! The result of the query is streamed through the returned - *! @[Mysql.mysql_result] object. Note that the involved database - *! tables are locked until all the results has been read. - *! - *! Returns @expr{0@} (zero) if the query didn't return any result - *! (e.g. @tt{INSERT@} or similar). - *! - *! @seealso - *! @[Mysql.mysql_result] - */ static void f_streaming_query(INT32 args) { low_query(args, "streaming_query", 0); @@ -1731,6 +1739,104 @@ static void f_binary_data(INT32 args) #endif /* HAVE_MYSQL_FETCH_LENGTHS */ } +static void f_set_charset (INT32 args) +{ + struct pike_string *charset; + get_all_args ("set_charset", args, "%n", &charset); + if (string_has_null (charset)) + SIMPLE_ARG_ERROR ("set_charset", 0, + "The charset name cannot contain a NUL character."); + +#ifdef HAVE_MYSQL_SET_CHARACTER_SET + { + int res; + MYSQL_ALLOW(); + res = mysql_set_character_set (PIKE_MYSQL->socket, charset->str); + MYSQL_DISALLOW(); + if (!res) { + const char *err; + MYSQL_ALLOW(); + err = mysql_error(PIKE_MYSQL->socket); + MYSQL_DISALLOW(); + Pike_error("Setting the charset failed: %s\n", err); + } + } + +#else /* !HAVE_MYSQL_SET_CHARACTER_SET */ + push_constant_text ("SET NAMES '"); + ref_push_string (charset); + push_constant_text ("'"); + f_add (3); + low_query (1, "set_charset", PIKE_MYSQL_FLAG_STORE_RESULT); + args++; + if (PIKE_MYSQL->conn_charset) + free_string (PIKE_MYSQL->conn_charset); + copy_shared_string (PIKE_MYSQL->conn_charset, charset); +#endif + + pop_n_elems (args); +} + +static void f_get_charset (INT32 args) +{ + pop_n_elems (args); +#ifdef HAVE_MYSQL_SET_CHARACTER_SET + { + const char *charset = mysql_character_set_name (PIKE_MYSQL->socket); + if (charset) + push_text (charset); + else + /* Just paranoia; mysql_character_set_name should always return + * a string. */ + push_constant_text ("latin1"); + } +#else + if (PIKE_MYSQL->conn_charset) + ref_push_string (PIKE_MYSQL->conn_charset); + else + push_constant_text ("latin1"); +#endif +} + +static void f__can_send_as_latin1 (INT32 args) +/* Helper function to detect if a string can be sent in the latin1 + * encoding. */ +{ + struct pike_string *str; + ptrdiff_t i; + int res; + + if (args != 1) + SIMPLE_WRONG_NUM_ARGS_ERROR ("_can_send_as_latin1", 1); + if (Pike_sp[-1].type != T_STRING) + SIMPLE_ARG_TYPE_ERROR ("_can_send_as_latin1", 0, "string"); + str = Pike_sp[-1].u.string; + + if (str->size_shift) + res = 0; + + else { + /* Have to go through the string to check that it doesn't contain + * any of those pesky chars in the 0x80..0x9f range that MySQL has + * remapped in latin1. */ + /* This check could be made tighter by ignoring chars in strings + * with introducers. */ + res = 1; + for (i = str->len; i--;) { + int chr = STR0 (str)[i]; + if (chr >= 0x80 && chr <= 0x9f && + chr != 0x81 && chr != 0x8d && chr != 0x8f && + chr != 0x90 && chr != 0x9d) { + res = 0; + break; + } + } + } + + pop_stack(); + push_int (res); +} + /*! @endclass */ @@ -1800,6 +1906,11 @@ PIKE_MODULE_INIT /* function(void:int) */ ADD_FUNCTION("binary_data", f_binary_data,tFunc(tVoid,tInt), ID_PUBLIC); + ADD_FUNCTION ("set_charset", f_set_charset, tFunc(tStr,tVoid), ID_PUBLIC); + ADD_FUNCTION ("get_charset", f_get_charset, tFunc(tVoid,tStr), ID_PUBLIC); + ADD_FUNCTION ("_can_send_as_latin1", f__can_send_as_latin1, + tFunc(tStr,tInt01), ID_STATIC); + add_integer_constant( "CLIENT_COMPRESS", CLIENT_COMPRESS, 0); add_integer_constant( "CLIENT_FOUND_ROWS", CLIENT_FOUND_ROWS, 0); add_integer_constant( "CLIENT_IGNORE_SPACE", CLIENT_IGNORE_SPACE, 0); diff --git a/src/modules/Mysql/precompiled_mysql.h b/src/modules/Mysql/precompiled_mysql.h index d5319017f418ffeeaeba95ee7fea5696acd9f4d1..fe76fb27d6c7acdf409daf626fcc492f87ae1ba4 100644 --- a/src/modules/Mysql/precompiled_mysql.h +++ b/src/modules/Mysql/precompiled_mysql.h @@ -2,7 +2,7 @@ || This file is part of Pike. For copyright information see COPYRIGHT. || Pike is distributed under GPL, LGPL and MPL. See the file COPYING || for more information. -|| $Id: precompiled_mysql.h,v 1.17 2005/04/12 00:36:13 nilsson Exp $ +|| $Id: precompiled_mysql.h,v 1.18 2006/08/12 02:57:55 mast Exp $ */ /* @@ -59,6 +59,13 @@ struct precompiled_mysql { MYSQL *mysql, *socket; struct pike_string *host, *database, *user, *password; /* Reconnect */ struct mapping *options; +#ifndef HAVE_MYSQL_SET_CHARACTER_SET + /* Old libs (< 4.1.13) doesn't support changing the connection + * charset. We emulate it by storing the charset ourselves. Note + * that this doesn't work with mysql_real_escape_string, but that + * function isn't used. */ + struct pike_string *conn_charset; +#endif }; struct precompiled_mysql_result {