diff --git a/lib/modules/Sql.pmod/mysql.pike b/lib/modules/Sql.pmod/mysql.pike index 08e2fabb8423170778a69655d8ebb5453d7a9b28..3e3e440f2052dee4fbb6fcb3e3ff8753ea4a809f 100644 --- a/lib/modules/Sql.pmod/mysql.pike +++ b/lib/modules/Sql.pmod/mysql.pike @@ -1,5 +1,5 @@ /* - * $Id: mysql.pike,v 1.29 2006/08/15 14:50:39 grubba Exp $ + * $Id: mysql.pike,v 1.30 2006/08/22 11:27:08 grubba Exp $ * * Glue for the Mysql-module */ @@ -16,6 +16,13 @@ inherit Mysql.mysql; #define UNICODE_DECODE_MODE 1 // Unicode decode mode #define LATIN1_UNICODE_ENCODE_MODE 2 // Unicode encode mode with latin1 charset #define UTF8_UNICODE_ENCODE_MODE 4 // Unicode encode mode with utf8 charset +#define BINARY_LATIN1_MODE 8 // Don't special-case latin1 control chars + +#ifdef MYSQL_CHARSET_DEBUG +#define CH_DEBUG(X...) werror("Sql.mysql: " + X) +#else +#define CH_DEBUG(X...) +#endif // Set to the above if the connection is in utf8-mode. Enable latin1 // unicode encode mode by default; it should be compatible with @@ -23,9 +30,9 @@ inherit Mysql.mysql; static int utf8_mode; // The charset, either "latin1" or "utf8", currently assigned to -// character_set_client when unicode encode mode is enabled. Zero when -// the connection charset has been set to something else than "latin1" -// or "unicode". +// character_set_client and character_set_connection when unicode +// encode mode is enabled. Zero when the connection charset has been +// set to something else than "latin1" or "unicode". static string send_charset; static void update_unicode_encode_mode_from_charset (string charset) @@ -35,16 +42,19 @@ static void update_unicode_encode_mode_from_charset (string charset) utf8_mode |= LATIN1_UNICODE_ENCODE_MODE; utf8_mode &= ~UTF8_UNICODE_ENCODE_MODE; send_charset = "latin1"; + CH_DEBUG("Entering latin1 mode.\n"); break; case "unicode": utf8_mode |= UTF8_UNICODE_ENCODE_MODE; utf8_mode &= ~LATIN1_UNICODE_ENCODE_MODE; send_charset = "utf8"; + CH_DEBUG("Entering utf8 mode.\n"); break; default: // Wrong charset - the mode can't be used. utf8_mode |= LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE; send_charset = 0; + CH_DEBUG("Entering other mode.\n"); break; } } @@ -58,10 +68,10 @@ int(0..1) set_unicode_encode_mode (int enable) //! //! Unicode encode mode works as follows: Eight bit strings are sent //! as @expr{latin1@} and wide strings are sent using @expr{utf8@}. -//! @[big_query] sends @expr{SET character_set_client@} statements as -//! necessary to update the charset on the server side. If the server -//! doesn't support that then it fails, but the wide string query -//! would fail anyway. +//! @[big_query] sends @expr{SET character_set_client@} and @expr{SET +//! character_set_connection@} statements as necessary to update the +//! charset on the server side. If the server doesn't support that +//! then it fails, but the wide string query would fail anyway. //! //! To make this transparent, string literals with introducers (e.g. //! @expr{_binary 'foo'@}) are excluded from the UTF-8 encoding. This @@ -95,6 +105,12 @@ int(0..1) set_unicode_encode_mode (int enable) //! will get UTF-8 encoded by the server. //! //! @note +//! When unicode encode mode is enabled, the connection charset +//! will mirror the client charset. This is necessary for unicode +//! characters to survive for wide queries, and for binary data +//! to survive for narrow queries in a transparent manner. +//! +//! @note //! When unicode encode mode is enabled and the connection charset //! is @expr{latin1@}, the charset accepted by @[big_query] is not //! quite Unicode since @expr{latin1@} is based on @expr{cp1252@}. @@ -107,11 +123,13 @@ int(0..1) set_unicode_encode_mode (int enable) //! @seealso //! @[set_unicode_decode_mode], @[set_charset] { - if (enable) + if (enable) { + CH_DEBUG("Enabling unicode encode mode.\n"); update_unicode_encode_mode_from_charset (lower_case (get_charset())); - else { + } else { utf8_mode &= ~(LATIN1_UNICODE_ENCODE_MODE|UTF8_UNICODE_ENCODE_MODE); send_charset = 0; + CH_DEBUG("Disabling unicode encode mode.\n"); } return !!send_charset; } @@ -154,10 +172,12 @@ void set_unicode_decode_mode (int enable) //! @[set_unicode_encode_mode] { if (enable) { + CH_DEBUG("Enabling unicode decode mode.\n"); ::big_query ("SET character_set_results = utf8"); utf8_mode |= UNICODE_DECODE_MODE; } else { + CH_DEBUG("Disabling unicode decode mode.\n"); ::big_query ("SET character_set_results = " + get_charset()); utf8_mode &= ~UNICODE_DECODE_MODE; } @@ -240,6 +260,8 @@ void set_charset (string charset) { charset = lower_case (charset); + CH_DEBUG("Setting charset to %O.\n", charset); + ::set_charset (charset == "unicode" ? "utf8" : charset); if (charset == "unicode" || @@ -312,6 +334,7 @@ string quote(string s) string latin1_to_utf8 (string s) //! Converts a string in MySQL @expr{latin1@} format to UTF-8. { + CH_DEBUG("Converting latin1 query to utf8.\n"); return string_to_utf8 (replace (s, ([ "\x80": "\u20AC", /*"\x81": "\u0081",*/ "\x82": "\u201A", "\x83": "\u0192", "\x84": "\u201E", "\x85": "\u2026", "\x86": "\u2020", "\x87": "\u2021", @@ -546,9 +569,11 @@ int decode_datetime (string timestr) string restore_charset; \ if (charset) { \ restore_charset = send_charset || get_charset(); \ - if (charset != restore_charset) \ - ::big_query ("SET character_set_client=" + charset); \ - else \ + CH_DEBUG("Restore charset is %O.\n", restore_charset); \ + if (charset != restore_charset) { \ + ::big_query("SET character_set_client=" + charset); \ + ::big_query("SET character_set_connection=" + charset); \ + } else \ restore_charset = 0; \ } \ \ @@ -572,14 +597,19 @@ int decode_datetime (string timestr) new_send_charset = "utf8"; \ } \ } \ + CH_DEBUG("New send charset is %O.\n", new_send_charset); \ \ if (new_send_charset != send_charset) { \ - if (mixed err = \ - ::big_query ("SET character_set_client=" + new_send_charset)) { \ + CH_DEBUG("Send charset was %O.\n", send_charset); \ + if (mixed err = catch { \ + ::big_query("SET character_set_client=" + new_send_charset); \ + ::big_query("SET character_set_connection=" + \ + new_send_charset); \ + }) { \ if (new_send_charset == "utf8") \ predef::error ("The query is a wide string " \ "and the MySQL server doesn't support UTF-8: %s\n", \ - describe_error (err)); \ + describe_error(err)); \ else \ throw(err); \ } \ @@ -587,18 +617,24 @@ int decode_datetime (string timestr) } \ } \ \ + CH_DEBUG("Sending query %O.\n", query); \ + \ int|object res = ::do_query(query); \ \ if (restore_charset) { \ if (send_charset && (<"latin1", "utf8">)[charset]) \ send_charset = charset; \ - else \ - ::big_query ("SET character_set_client=" + restore_charset); \ + else { \ + CH_DEBUG("Restoring charset to %O.\n", restore_charset); \ + ::big_query("SET character_set_client=" + restore_charset); \ + ::big_query("SET character_set_connection=" + restore_charset); \ + } \ } \ \ if (!objectp(res)) return res; \ \ if (utf8_mode & UNICODE_DECODE_MODE) { \ + CH_DEBUG("Adding UnicodeWrapper.\n"); \ return .sql_util.UnicodeWrapper(res); \ } \ return res;