From edb742fee70b7e0e9486ca56bde8caf98f569045 Mon Sep 17 00:00:00 2001 From: Brian Lopez Date: Sat, 10 Jul 2010 23:15:54 -0700 Subject: [PATCH] check for and support field-level encodings raise an exception for invalid encodings passed to Mysql2::Client#new --- ext/mysql2/mysql2_ext.c | 16 ++-- ext/mysql2/result.c | 21 ++++-- lib/mysql2/client.rb | 145 ++++++++++++++++++++++++++++++++++++- spec/mysql2/client_spec.rb | 8 ++ 4 files changed, 175 insertions(+), 15 deletions(-) diff --git a/ext/mysql2/mysql2_ext.c b/ext/mysql2/mysql2_ext.c index f252d9b..79d8d17 100644 --- a/ext/mysql2/mysql2_ext.c +++ b/ext/mysql2/mysql2_ext.c @@ -1,7 +1,7 @@ #include VALUE mMysql2, cMysql2Client; -VALUE cMysql2Error; +VALUE cMysql2Error, intern_encoding_from_charset; ID sym_id, sym_version, sym_async; #define REQUIRE_OPEN_DB(_ctxt) \ @@ -409,10 +409,14 @@ static VALUE set_charset_name(VALUE self, VALUE value) #ifdef HAVE_RUBY_ENCODING_H VALUE new_encoding, old_encoding; - new_encoding = rb_funcall(cMysql2Client, rb_intern("encoding_from_charset"), 1, value); - old_encoding = rb_iv_get(self, "@encoding"); - if (old_encoding == Qnil) { - rb_iv_set(self, "@encoding", new_encoding); + new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset, 1, value); + if (new_encoding == Qnil) { + rb_raise(cMysql2Error, "Unsupported charset: '%s'", RSTRING_PTR(value)); + } else { + old_encoding = rb_iv_get(self, "@encoding"); + if (old_encoding == Qnil) { + rb_iv_set(self, "@encoding", new_encoding); + } } #endif @@ -487,4 +491,6 @@ void Init_mysql2() { sym_id = ID2SYM(rb_intern("id")); sym_version = ID2SYM(rb_intern("version")); sym_async = ID2SYM(rb_intern("async")); + + intern_encoding_from_charset = rb_intern("encoding_from_charset"); } diff --git a/ext/mysql2/result.c b/ext/mysql2/result.c index 2aff55d..5b12b87 100644 --- a/ext/mysql2/result.c +++ b/ext/mysql2/result.c @@ -5,10 +5,11 @@ rb_encoding *binaryEncoding; #endif ID sym_symbolize_keys; -ID intern_new, intern_utc; +ID intern_new, intern_utc, intern_encoding_from_charset_code; VALUE cBigDecimal, cDate, cDateTime; VALUE cMysql2Result; +extern VALUE cMysql2Client; static void rb_mysql_result_mark(void * wrapper) { mysql2_result_wrapper * w = wrapper; @@ -198,13 +199,20 @@ static VALUE rb_mysql_result_fetch_row(int argc, VALUE * argv, VALUE self) { default: val = rb_str_new(row[i], fieldLengths[i]); #ifdef HAVE_RUBY_ENCODING_H - // rudimentary check for binary content - if ((fields[i].flags & BINARY_FLAG) || fields[i].charsetnr == 63) { + // if binary flag is set, respect it's wishes + if (fields[i].flags & BINARY_FLAG) { rb_enc_associate(val, binaryEncoding); } else { - // TODO: we should probably lookup the encoding that was set on the field - // and either use that and/or fall back to the connection's encoding - rb_enc_associate(val, conn_enc); + // lookup the encoding configured on this field + VALUE new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset_code, 1, INT2NUM(fields[i].charsetnr)); + if (new_encoding != Qnil) { + // use the field encoding we were able to match + rb_encoding *enc = rb_to_encoding(new_encoding); + rb_enc_associate(val, enc); + } else { + // otherwise fall-back to the connection's encoding + rb_enc_associate(val, conn_enc); + } if (default_internal_enc) { val = rb_str_export_to_enc(val, default_internal_enc); } @@ -324,6 +332,7 @@ void init_mysql2_result() sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys")); intern_new = rb_intern("new"); intern_utc = rb_intern("utc"); + intern_encoding_from_charset_code = rb_intern("encoding_from_charset_code"); #ifdef HAVE_RUBY_ENCODING_H binaryEncoding = rb_enc_find("binary"); diff --git a/lib/mysql2/client.rb b/lib/mysql2/client.rb index 9450f91..e49ed4b 100644 --- a/lib/mysql2/client.rb +++ b/lib/mysql2/client.rb @@ -63,11 +63,148 @@ module Mysql2 "utf8" => Encoding::UTF_8, } + MYSQL_CHARSET_MAP = { + 1 => {:name => "big5", :collation => "big5_chinese_ci"}, + 2 => {:name => "latin2", :collation => "latin2_czech_cs"}, + 3 => {:name => "dec8", :collation => "dec8_swedish_ci"}, + 4 => {:name => "cp850", :collation => "cp850_general_ci"}, + 5 => {:name => "latin1", :collation => "latin1_german1_ci"}, + 6 => {:name => "hp8", :collation => "hp8_english_ci"}, + 7 => {:name => "koi8r", :collation => "koi8r_general_ci"}, + 8 => {:name => "latin1", :collation => "latin1_swedish_ci"}, + 9 => {:name => "latin2", :collation => "latin2_general_ci"}, + 10 => {:name => "swe7", :collation => "swe7_swedish_ci"}, + 11 => {:name => "ascii", :collation => "ascii_general_ci"}, + 12 => {:name => "ujis", :collation => "ujis_japanese_ci"}, + 13 => {:name => "sjis", :collation => "sjis_japanese_ci"}, + 14 => {:name => "cp1251", :collation => "cp1251_bulgarian_ci"}, + 15 => {:name => "latin1", :collation => "latin1_danish_ci"}, + 16 => {:name => "hebrew", :collation => "hebrew_general_ci"}, + 17 => {:name => "filename", :collation => "filename"}, + 18 => {:name => "tis620", :collation => "tis620_thai_ci"}, + 19 => {:name => "euckr", :collation => "euckr_korean_ci"}, + 20 => {:name => "latin7", :collation => "latin7_estonian_cs"}, + 21 => {:name => "latin2", :collation => "latin2_hungarian_ci"}, + 22 => {:name => "koi8u", :collation => "koi8u_general_ci"}, + 23 => {:name => "cp1251", :collation => "cp1251_ukrainian_ci"}, + 24 => {:name => "gb2312", :collation => "gb2312_chinese_ci"}, + 25 => {:name => "greek", :collation => "greek_general_ci"}, + 26 => {:name => "cp1250", :collation => "cp1250_general_ci"}, + 27 => {:name => "latin2", :collation => "latin2_croatian_ci"}, + 28 => {:name => "gbk", :collation => "gbk_chinese_ci"}, + 29 => {:name => "cp1257", :collation => "cp1257_lithuanian_ci"}, + 30 => {:name => "latin5", :collation => "latin5_turkish_ci"}, + 31 => {:name => "latin1", :collation => "latin1_german2_ci"}, + 32 => {:name => "armscii8", :collation => "armscii8_general_ci"}, + 33 => {:name => "utf8", :collation => "utf8_general_ci"}, + 34 => {:name => "cp1250", :collation => "cp1250_czech_cs"}, + 35 => {:name => "ucs2", :collation => "ucs2_general_ci"}, + 36 => {:name => "cp866", :collation => "cp866_general_ci"}, + 37 => {:name => "keybcs2", :collation => "keybcs2_general_ci"}, + 38 => {:name => "macce", :collation => "macce_general_ci"}, + 39 => {:name => "macroman", :collation => "macroman_general_ci"}, + 40 => {:name => "cp852", :collation => "cp852_general_ci"}, + 41 => {:name => "latin7", :collation => "latin7_general_ci"}, + 42 => {:name => "latin7", :collation => "latin7_general_cs"}, + 43 => {:name => "macce", :collation => "macce_bin"}, + 44 => {:name => "cp1250", :collation => "cp1250_croatian_ci"}, + 47 => {:name => "latin1", :collation => "latin1_bin"}, + 48 => {:name => "latin1", :collation => "latin1_general_ci"}, + 49 => {:name => "latin1", :collation => "latin1_general_cs"}, + 50 => {:name => "cp1251", :collation => "cp1251_bin"}, + 51 => {:name => "cp1251", :collation => "cp1251_general_ci"}, + 52 => {:name => "cp1251", :collation => "cp1251_general_cs"}, + 53 => {:name => "macroman", :collation => "macroman_bin"}, + 57 => {:name => "cp1256", :collation => "cp1256_general_ci"}, + 58 => {:name => "cp1257", :collation => "cp1257_bin"}, + 59 => {:name => "cp1257", :collation => "cp1257_general_ci"}, + 63 => {:name => "binary", :collation => "binary"}, + 64 => {:name => "armscii8", :collation => "armscii8_bin"}, + 65 => {:name => "ascii", :collation => "ascii_bin"}, + 66 => {:name => "cp1250", :collation => "cp1250_bin"}, + 67 => {:name => "cp1256", :collation => "cp1256_bin"}, + 68 => {:name => "cp866", :collation => "cp866_bin"}, + 69 => {:name => "dec8", :collation => "dec8_bin"}, + 70 => {:name => "greek", :collation => "greek_bin"}, + 71 => {:name => "hebrew", :collation => "hebrew_bin"}, + 72 => {:name => "hp8", :collation => "hp8_bin"}, + 73 => {:name => "keybcs2", :collation => "keybcs2_bin"}, + 74 => {:name => "koi8r", :collation => "koi8r_bin"}, + 75 => {:name => "koi8u", :collation => "koi8u_bin"}, + 77 => {:name => "latin2", :collation => "latin2_bin"}, + 78 => {:name => "latin5", :collation => "latin5_bin"}, + 79 => {:name => "latin7", :collation => "latin7_bin"}, + 80 => {:name => "cp850", :collation => "cp850_bin"}, + 81 => {:name => "cp852", :collation => "cp852_bin"}, + 82 => {:name => "swe7", :collation => "swe7_bin"}, + 83 => {:name => "utf8", :collation => "utf8_bin"}, + 84 => {:name => "big5", :collation => "big5_bin"}, + 85 => {:name => "euckr", :collation => "euckr_bin"}, + 86 => {:name => "gb2312", :collation => "gb2312_bin"}, + 87 => {:name => "gbk", :collation => "gbk_bin"}, + 88 => {:name => "sjis", :collation => "sjis_bin"}, + 89 => {:name => "tis620", :collation => "tis620_bin"}, + 90 => {:name => "ucs2", :collation => "ucs2_bin"}, + 91 => {:name => "ujis", :collation => "ujis_bin"}, + 92 => {:name => "geostd8", :collation => "geostd8_general_ci"}, + 93 => {:name => "geostd8", :collation => "geostd8_bin"}, + 94 => {:name => "latin1", :collation => "latin1_spanish_ci"}, + 95 => {:name => "cp932", :collation => "cp932_japanese_ci"}, + 96 => {:name => "cp932", :collation => "cp932_bin"}, + 97 => {:name => "eucjpms", :collation => "eucjpms_japanese_ci"}, + 98 => {:name => "eucjpms", :collation => "eucjpms_bin"}, + 99 => {:name => "cp1250", :collation => "cp1250_polish_ci"}, + 128 => {:name => "ucs2", :collation => "ucs2_unicode_ci"}, + 129 => {:name => "ucs2", :collation => "ucs2_icelandic_ci"}, + 130 => {:name => "ucs2", :collation => "ucs2_latvian_ci"}, + 131 => {:name => "ucs2", :collation => "ucs2_romanian_ci"}, + 132 => {:name => "ucs2", :collation => "ucs2_slovenian_ci"}, + 133 => {:name => "ucs2", :collation => "ucs2_polish_ci"}, + 134 => {:name => "ucs2", :collation => "ucs2_estonian_ci"}, + 135 => {:name => "ucs2", :collation => "ucs2_spanish_ci"}, + 136 => {:name => "ucs2", :collation => "ucs2_swedish_ci"}, + 137 => {:name => "ucs2", :collation => "ucs2_turkish_ci"}, + 138 => {:name => "ucs2", :collation => "ucs2_czech_ci"}, + 139 => {:name => "ucs2", :collation => "ucs2_danish_ci"}, + 140 => {:name => "ucs2", :collation => "ucs2_lithuanian_ci"}, + 141 => {:name => "ucs2", :collation => "ucs2_slovak_ci"}, + 142 => {:name => "ucs2", :collation => "ucs2_spanish2_ci"}, + 143 => {:name => "ucs2", :collation => "ucs2_roman_ci"}, + 144 => {:name => "ucs2", :collation => "ucs2_persian_ci"}, + 145 => {:name => "ucs2", :collation => "ucs2_esperanto_ci"}, + 146 => {:name => "ucs2", :collation => "ucs2_hungarian_ci"}, + 192 => {:name => "utf8", :collation => "utf8_unicode_ci"}, + 193 => {:name => "utf8", :collation => "utf8_icelandic_ci"}, + 194 => {:name => "utf8", :collation => "utf8_latvian_ci"}, + 195 => {:name => "utf8", :collation => "utf8_romanian_ci"}, + 196 => {:name => "utf8", :collation => "utf8_slovenian_ci"}, + 197 => {:name => "utf8", :collation => "utf8_polish_ci"}, + 198 => {:name => "utf8", :collation => "utf8_estonian_ci"}, + 199 => {:name => "utf8", :collation => "utf8_spanish_ci"}, + 200 => {:name => "utf8", :collation => "utf8_swedish_ci"}, + 201 => {:name => "utf8", :collation => "utf8_turkish_ci"}, + 202 => {:name => "utf8", :collation => "utf8_czech_ci"}, + 203 => {:name => "utf8", :collation => "utf8_danish_ci"}, + 204 => {:name => "utf8", :collation => "utf8_lithuanian_ci"}, + 205 => {:name => "utf8", :collation => "utf8_slovak_ci"}, + 206 => {:name => "utf8", :collation => "utf8_spanish2_ci"}, + 207 => {:name => "utf8", :collation => "utf8_roman_ci"}, + 208 => {:name => "utf8", :collation => "utf8_persian_ci"}, + 209 => {:name => "utf8", :collation => "utf8_esperanto_ci"}, + 210 => {:name => "utf8", :collation => "utf8_hungarian_ci"}, + 254 => {:name => "utf8", :collation => "utf8_general_cs"} + } + def self.encoding_from_charset(charset) - charset = charset.to_s.downcase - enc = CHARSET_MAP[charset] - raise Mysql2::Error, "unsupported charset: #{charset}" unless enc - enc + CHARSET_MAP[charset.to_s.downcase] + end + + def self.encoding_from_charset_code(code) + if mapping = MYSQL_CHARSET_MAP[code] + encoding_from_charset(mapping[:name]) + else + nil + end end end end diff --git a/spec/mysql2/client_spec.rb b/spec/mysql2/client_spec.rb index 88cf602..7165f48 100644 --- a/spec/mysql2/client_spec.rb +++ b/spec/mysql2/client_spec.rb @@ -6,6 +6,14 @@ describe Mysql2::Client do @client = Mysql2::Client.new end + if defined? Encoding + it "should raise an exception on create for invalid encodings" do + lambda { + c = Mysql2::Client.new(:encoding => "fake") + }.should raise_error(Mysql2::Error) + end + end + it "should be able to connect via SSL options" do pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.") ssl_client = nil