From b3d136b0b137790ad5220551377d0676628de361 Mon Sep 17 00:00:00 2001 From: Martin Stjernholm <mast@lysator.liu.se> Date: Thu, 10 Aug 2006 21:35:26 +0200 Subject: [PATCH] Unicode mode improvements. It's not enabled per default since it isn't compatible wrt binary strings. Rev: lib/modules/Sql.pmod/mysql.pike:1.24 --- lib/modules/Sql.pmod/mysql.pike | 139 +++++++++++++++++++++++++++----- 1 file changed, 121 insertions(+), 18 deletions(-) diff --git a/lib/modules/Sql.pmod/mysql.pike b/lib/modules/Sql.pmod/mysql.pike index 056c791505..d0947f8800 100644 --- a/lib/modules/Sql.pmod/mysql.pike +++ b/lib/modules/Sql.pmod/mysql.pike @@ -1,5 +1,5 @@ /* - * $Id: mysql.pike,v 1.23 2006/08/10 14:24:04 grubba Exp $ + * $Id: mysql.pike,v 1.24 2006/08/10 19:35:26 mast Exp $ * * Glue for the Mysql-module */ @@ -19,31 +19,47 @@ inherit Mysql.mysql; //! Set to the above if the connection is in utf8-mode. static int utf8_mode; +//! The charset passed with the @expr{mysql_charset_name@} option. +static string initial_charset; + //! Enter unicode encode/decode mode. //! //! After this has been enabled, query-strings may be provided //! as wide (Unicode) strings, and any non-binary data will be //! decoded automatically according to UTF8. //! +//! The statement "@expr{SET NAMES 'utf8'@}" is sent to enable UTF8 +//! mode for the connection. +//! +//! @param force +//! If this optional flag is nonzero then the statement to enable +//! UTF8 mode is sent even if this mode already is enabled according +//! to the internal flags. +//! //! @returns -//! Returns @expr{1@} on success. +//! Returns @expr{1@} on success or @expr{0@} if the server doesn't +//! support unicode (i.e. if the statement to enable UTF8 mode +//! fails). +//! +//! @note +//! Literal strings prefixed by the keyword @tt{BINARY@} will not be +//! encoded using UTF8. //! //! @note -//! All strings not prefixed by the keyword @tt{BINARY@} -//! will be encoded according to UTF8. +//! Unicode support was added in MySQL 4.1. //! //! @seealso -//! @[enter_unicode_decode_mode()] -int(0..1) enter_unicode_mode() +//! @[enter_unicode_decode_mode()], @[leave_unicode_mode()] +int(0..1) enter_unicode_mode (void|int force) { - if (!utf8_mode) { + if (force || utf8_mode != UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY) { if (catch { big_query("SET NAMES 'utf8'"); }) { return 0; } + utf8_mode = UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY; } - utf8_mode = UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY; return 1; } @@ -52,28 +68,107 @@ int(0..1) enter_unicode_mode() //! After this has been enabled, non-binary data from the database //! will be decoded according to UTF8. //! +//! The statement "@expr{SET character_set_results = utf8@}" is sent +//! to enable UTF8 mode for the returned results. +//! +//! @param force +//! If this optional flag is nonzero then the statement to enable +//! UTF8 encoding of results is sent even though this mode already +//! is enabled according to the internal flags. +//! //! @returns -//! Returns @expr{1@} on success. +//! Returns @expr{1@} on success or @expr{0@} if the server doesn't +//! support unicode (i.e. if the statement to enable UTF8 mode +//! fails). //! //! @note //! Any query encoding will need to be done by hand. //! +//! @note +//! If the connection previously was in full unicode mode as set by +//! @[enter_unicode_mode] then the server will still expect queries +//! to be UTF8 encoded. I.e. the server system variable +//! @expr{character_set_client@} retains the value @expr{'utf8'@}. +//! +//! @note +//! The server system variable @expr{character_set_results@} was +//! added in MySQL 4.1.1. +//! //! @seealso -//! @[enter_unicode_mode()] -int(0..1) enter_unicode_decode_mode() +//! @[enter_unicode_mode()], @[leave_unicode_mode()] +int(0..1) enter_unicode_decode_mode (void|int force) { - if (!utf8_mode) { + if (force || utf8_mode != UTF8_DECODE_QUERY) { if (catch { - big_query("SET NAMES 'utf8'"); + big_query("SET character_set_results = utf8"); + }) { + return 0; + } + utf8_mode = UTF8_DECODE_QUERY; + } + return 1; +} + +//! Leave unicode mode. +//! +//! After this no automatic UTF8 conversion is done of queries and +//! results. +//! +//! The statement "@expr{SET NAMES 'xxx'@}" is sent to the server, +//! where @expr{xxx@} is the charset that was passed with the +//! @expr{mysql_charset_name@} option when the connection was opened. +//! If that option wasn't specified then the charset @expr{latin1@} is +//! used, which is the default connection charset in MySQL. +//! +//! @param force +//! If this optional flag is nonzero then the statement to reset the +//! connection charset is sent even though unicode mode already is +//! disabled according to the internal flags. +//! +//! @returns +//! Returns @expr{1@} on success or @expr{0@} if the server doesn't +//! support unicode (i.e. if the statement to reset the connection +//! charset fails). +//! +//! @note +//! Unicode support was added in MySQL 4.1. +//! +//! @seealso +//! @[enter_unicode_mode()], @[enter_unicode_decode_mode()] +int(0..1) leave_unicode_mode (void|int force) +{ + if (force || utf8_mode) { + if (catch { + big_query("SET NAMES '" + (initial_charset || "latin1") + "'"); }) { return 0; } + utf8_mode = 0; } - utf8_mode = UTF8_DECODE_QUERY; return 1; } -// FIXME: Add a latin1 mode? +string query_unicode_mode() +//! Returns the current unicode mode status. +//! +//! @returns +//! @string +//! @value "full" +//! Full unicode mode as set by @[enter_unicode_mode] is +//! enabled. +//! @value "decode" +//! Decode unicode mode as set by @[enter_unicode_decode_mode] +//! is enabled. +//! @value 0 +//! Unicode mode is not enabled. C.f. @[leave_unicode_mode]. +//! @endstring +{ + switch (utf8_mode) { + case UTF8_DECODE_QUERY|UTF8_ENCODE_QUERY: return "full"; + case UTF8_DECODE_QUERY: return "decode"; + default: return 0; + } +} #if constant( Mysql.mysql.MYSQL_NO_ADD_DROP_DB ) // Documented in the C-file. @@ -105,7 +200,10 @@ string utf8_encode_query(string q) { string uq = upper_case(q); if (!has_value(uq, "BINARY")) return string_to_utf8(q); - if ((q & ("\x7f" * sizeof(q))) == q) return q; + // The following optimization is disabled since it causes more + // overhead in the case when q contains a large binary string (which + // is arguably the main reason for q getting really large). + //if ((q & ("\x7f" * sizeof(q))) == q) return q; // We need to find the segments that shouldn't be encoded. string e = ""; @@ -124,7 +222,7 @@ string utf8_encode_query(string q) int end; while ((end = search(suffix, quote, start)) >= 0) { if (suffix[end-1] == '\\') { - // Count the number of preceeding back-slashes. + // Count the number of preceding back-slashes. // if odd, continue searching after the quote. int i; for (i = 2; i < end; i++) { @@ -302,6 +400,7 @@ int|object big_query(string q, mapping(string|int:mixed)|void bindings) int(0..1) is_keyword( string name ) //! Return 1 if the argument @[name] is a mysql keyword. { + // FIXME: Document which version of MySQL this is up-to-date with. return (< "action", "add", "aggregate", "all", "alter", "after", "and", "as", "asc", "avg", "avg_row_length", "auto_increment", "between", "bigint", @@ -349,10 +448,14 @@ static void create(string|void host, string|void database, { if (options) { ::create(host||"", database||"", user||"", password||"", options); + initial_charset = options->mysql_charset_name; + switch (options->unicode_mode) { + case "full": enter_unicode_mode(); break; + case "decode": enter_unicode_decode_mode(); break; + } } else { ::create(host||"", database||"", user||"", password||""); } - enter_unicode_mode(); } #else -- GitLab