check for and support field-level encodings

raise an exception for invalid encodings passed to Mysql2::Client#new
This commit is contained in:
Brian Lopez 2010-07-10 23:15:54 -07:00
parent d8f5f4de1d
commit edb742fee7
4 changed files with 175 additions and 15 deletions

View File

@ -1,7 +1,7 @@
#include <mysql2_ext.h> #include <mysql2_ext.h>
VALUE mMysql2, cMysql2Client; VALUE mMysql2, cMysql2Client;
VALUE cMysql2Error; VALUE cMysql2Error, intern_encoding_from_charset;
ID sym_id, sym_version, sym_async; ID sym_id, sym_version, sym_async;
#define REQUIRE_OPEN_DB(_ctxt) \ #define REQUIRE_OPEN_DB(_ctxt) \
@ -409,10 +409,14 @@ static VALUE set_charset_name(VALUE self, VALUE value)
#ifdef HAVE_RUBY_ENCODING_H #ifdef HAVE_RUBY_ENCODING_H
VALUE new_encoding, old_encoding; VALUE new_encoding, old_encoding;
new_encoding = rb_funcall(cMysql2Client, rb_intern("encoding_from_charset"), 1, value); new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset, 1, value);
old_encoding = rb_iv_get(self, "@encoding"); if (new_encoding == Qnil) {
if (old_encoding == Qnil) { rb_raise(cMysql2Error, "Unsupported charset: '%s'", RSTRING_PTR(value));
rb_iv_set(self, "@encoding", new_encoding); } else {
old_encoding = rb_iv_get(self, "@encoding");
if (old_encoding == Qnil) {
rb_iv_set(self, "@encoding", new_encoding);
}
} }
#endif #endif
@ -487,4 +491,6 @@ void Init_mysql2() {
sym_id = ID2SYM(rb_intern("id")); sym_id = ID2SYM(rb_intern("id"));
sym_version = ID2SYM(rb_intern("version")); sym_version = ID2SYM(rb_intern("version"));
sym_async = ID2SYM(rb_intern("async")); sym_async = ID2SYM(rb_intern("async"));
intern_encoding_from_charset = rb_intern("encoding_from_charset");
} }

View File

@ -5,10 +5,11 @@ rb_encoding *binaryEncoding;
#endif #endif
ID sym_symbolize_keys; ID sym_symbolize_keys;
ID intern_new, intern_utc; ID intern_new, intern_utc, intern_encoding_from_charset_code;
VALUE cBigDecimal, cDate, cDateTime; VALUE cBigDecimal, cDate, cDateTime;
VALUE cMysql2Result; VALUE cMysql2Result;
extern VALUE cMysql2Client;
static void rb_mysql_result_mark(void * wrapper) { static void rb_mysql_result_mark(void * wrapper) {
mysql2_result_wrapper * w = wrapper; mysql2_result_wrapper * w = wrapper;
@ -198,13 +199,20 @@ static VALUE rb_mysql_result_fetch_row(int argc, VALUE * argv, VALUE self) {
default: default:
val = rb_str_new(row[i], fieldLengths[i]); val = rb_str_new(row[i], fieldLengths[i]);
#ifdef HAVE_RUBY_ENCODING_H #ifdef HAVE_RUBY_ENCODING_H
// rudimentary check for binary content // if binary flag is set, respect it's wishes
if ((fields[i].flags & BINARY_FLAG) || fields[i].charsetnr == 63) { if (fields[i].flags & BINARY_FLAG) {
rb_enc_associate(val, binaryEncoding); rb_enc_associate(val, binaryEncoding);
} else { } else {
// TODO: we should probably lookup the encoding that was set on the field // lookup the encoding configured on this field
// and either use that and/or fall back to the connection's encoding VALUE new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset_code, 1, INT2NUM(fields[i].charsetnr));
rb_enc_associate(val, conn_enc); if (new_encoding != Qnil) {
// use the field encoding we were able to match
rb_encoding *enc = rb_to_encoding(new_encoding);
rb_enc_associate(val, enc);
} else {
// otherwise fall-back to the connection's encoding
rb_enc_associate(val, conn_enc);
}
if (default_internal_enc) { if (default_internal_enc) {
val = rb_str_export_to_enc(val, default_internal_enc); val = rb_str_export_to_enc(val, default_internal_enc);
} }
@ -324,6 +332,7 @@ void init_mysql2_result()
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys")); sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
intern_new = rb_intern("new"); intern_new = rb_intern("new");
intern_utc = rb_intern("utc"); intern_utc = rb_intern("utc");
intern_encoding_from_charset_code = rb_intern("encoding_from_charset_code");
#ifdef HAVE_RUBY_ENCODING_H #ifdef HAVE_RUBY_ENCODING_H
binaryEncoding = rb_enc_find("binary"); binaryEncoding = rb_enc_find("binary");

View File

@ -63,11 +63,148 @@ module Mysql2
"utf8" => Encoding::UTF_8, "utf8" => Encoding::UTF_8,
} }
MYSQL_CHARSET_MAP = {
1 => {:name => "big5", :collation => "big5_chinese_ci"},
2 => {:name => "latin2", :collation => "latin2_czech_cs"},
3 => {:name => "dec8", :collation => "dec8_swedish_ci"},
4 => {:name => "cp850", :collation => "cp850_general_ci"},
5 => {:name => "latin1", :collation => "latin1_german1_ci"},
6 => {:name => "hp8", :collation => "hp8_english_ci"},
7 => {:name => "koi8r", :collation => "koi8r_general_ci"},
8 => {:name => "latin1", :collation => "latin1_swedish_ci"},
9 => {:name => "latin2", :collation => "latin2_general_ci"},
10 => {:name => "swe7", :collation => "swe7_swedish_ci"},
11 => {:name => "ascii", :collation => "ascii_general_ci"},
12 => {:name => "ujis", :collation => "ujis_japanese_ci"},
13 => {:name => "sjis", :collation => "sjis_japanese_ci"},
14 => {:name => "cp1251", :collation => "cp1251_bulgarian_ci"},
15 => {:name => "latin1", :collation => "latin1_danish_ci"},
16 => {:name => "hebrew", :collation => "hebrew_general_ci"},
17 => {:name => "filename", :collation => "filename"},
18 => {:name => "tis620", :collation => "tis620_thai_ci"},
19 => {:name => "euckr", :collation => "euckr_korean_ci"},
20 => {:name => "latin7", :collation => "latin7_estonian_cs"},
21 => {:name => "latin2", :collation => "latin2_hungarian_ci"},
22 => {:name => "koi8u", :collation => "koi8u_general_ci"},
23 => {:name => "cp1251", :collation => "cp1251_ukrainian_ci"},
24 => {:name => "gb2312", :collation => "gb2312_chinese_ci"},
25 => {:name => "greek", :collation => "greek_general_ci"},
26 => {:name => "cp1250", :collation => "cp1250_general_ci"},
27 => {:name => "latin2", :collation => "latin2_croatian_ci"},
28 => {:name => "gbk", :collation => "gbk_chinese_ci"},
29 => {:name => "cp1257", :collation => "cp1257_lithuanian_ci"},
30 => {:name => "latin5", :collation => "latin5_turkish_ci"},
31 => {:name => "latin1", :collation => "latin1_german2_ci"},
32 => {:name => "armscii8", :collation => "armscii8_general_ci"},
33 => {:name => "utf8", :collation => "utf8_general_ci"},
34 => {:name => "cp1250", :collation => "cp1250_czech_cs"},
35 => {:name => "ucs2", :collation => "ucs2_general_ci"},
36 => {:name => "cp866", :collation => "cp866_general_ci"},
37 => {:name => "keybcs2", :collation => "keybcs2_general_ci"},
38 => {:name => "macce", :collation => "macce_general_ci"},
39 => {:name => "macroman", :collation => "macroman_general_ci"},
40 => {:name => "cp852", :collation => "cp852_general_ci"},
41 => {:name => "latin7", :collation => "latin7_general_ci"},
42 => {:name => "latin7", :collation => "latin7_general_cs"},
43 => {:name => "macce", :collation => "macce_bin"},
44 => {:name => "cp1250", :collation => "cp1250_croatian_ci"},
47 => {:name => "latin1", :collation => "latin1_bin"},
48 => {:name => "latin1", :collation => "latin1_general_ci"},
49 => {:name => "latin1", :collation => "latin1_general_cs"},
50 => {:name => "cp1251", :collation => "cp1251_bin"},
51 => {:name => "cp1251", :collation => "cp1251_general_ci"},
52 => {:name => "cp1251", :collation => "cp1251_general_cs"},
53 => {:name => "macroman", :collation => "macroman_bin"},
57 => {:name => "cp1256", :collation => "cp1256_general_ci"},
58 => {:name => "cp1257", :collation => "cp1257_bin"},
59 => {:name => "cp1257", :collation => "cp1257_general_ci"},
63 => {:name => "binary", :collation => "binary"},
64 => {:name => "armscii8", :collation => "armscii8_bin"},
65 => {:name => "ascii", :collation => "ascii_bin"},
66 => {:name => "cp1250", :collation => "cp1250_bin"},
67 => {:name => "cp1256", :collation => "cp1256_bin"},
68 => {:name => "cp866", :collation => "cp866_bin"},
69 => {:name => "dec8", :collation => "dec8_bin"},
70 => {:name => "greek", :collation => "greek_bin"},
71 => {:name => "hebrew", :collation => "hebrew_bin"},
72 => {:name => "hp8", :collation => "hp8_bin"},
73 => {:name => "keybcs2", :collation => "keybcs2_bin"},
74 => {:name => "koi8r", :collation => "koi8r_bin"},
75 => {:name => "koi8u", :collation => "koi8u_bin"},
77 => {:name => "latin2", :collation => "latin2_bin"},
78 => {:name => "latin5", :collation => "latin5_bin"},
79 => {:name => "latin7", :collation => "latin7_bin"},
80 => {:name => "cp850", :collation => "cp850_bin"},
81 => {:name => "cp852", :collation => "cp852_bin"},
82 => {:name => "swe7", :collation => "swe7_bin"},
83 => {:name => "utf8", :collation => "utf8_bin"},
84 => {:name => "big5", :collation => "big5_bin"},
85 => {:name => "euckr", :collation => "euckr_bin"},
86 => {:name => "gb2312", :collation => "gb2312_bin"},
87 => {:name => "gbk", :collation => "gbk_bin"},
88 => {:name => "sjis", :collation => "sjis_bin"},
89 => {:name => "tis620", :collation => "tis620_bin"},
90 => {:name => "ucs2", :collation => "ucs2_bin"},
91 => {:name => "ujis", :collation => "ujis_bin"},
92 => {:name => "geostd8", :collation => "geostd8_general_ci"},
93 => {:name => "geostd8", :collation => "geostd8_bin"},
94 => {:name => "latin1", :collation => "latin1_spanish_ci"},
95 => {:name => "cp932", :collation => "cp932_japanese_ci"},
96 => {:name => "cp932", :collation => "cp932_bin"},
97 => {:name => "eucjpms", :collation => "eucjpms_japanese_ci"},
98 => {:name => "eucjpms", :collation => "eucjpms_bin"},
99 => {:name => "cp1250", :collation => "cp1250_polish_ci"},
128 => {:name => "ucs2", :collation => "ucs2_unicode_ci"},
129 => {:name => "ucs2", :collation => "ucs2_icelandic_ci"},
130 => {:name => "ucs2", :collation => "ucs2_latvian_ci"},
131 => {:name => "ucs2", :collation => "ucs2_romanian_ci"},
132 => {:name => "ucs2", :collation => "ucs2_slovenian_ci"},
133 => {:name => "ucs2", :collation => "ucs2_polish_ci"},
134 => {:name => "ucs2", :collation => "ucs2_estonian_ci"},
135 => {:name => "ucs2", :collation => "ucs2_spanish_ci"},
136 => {:name => "ucs2", :collation => "ucs2_swedish_ci"},
137 => {:name => "ucs2", :collation => "ucs2_turkish_ci"},
138 => {:name => "ucs2", :collation => "ucs2_czech_ci"},
139 => {:name => "ucs2", :collation => "ucs2_danish_ci"},
140 => {:name => "ucs2", :collation => "ucs2_lithuanian_ci"},
141 => {:name => "ucs2", :collation => "ucs2_slovak_ci"},
142 => {:name => "ucs2", :collation => "ucs2_spanish2_ci"},
143 => {:name => "ucs2", :collation => "ucs2_roman_ci"},
144 => {:name => "ucs2", :collation => "ucs2_persian_ci"},
145 => {:name => "ucs2", :collation => "ucs2_esperanto_ci"},
146 => {:name => "ucs2", :collation => "ucs2_hungarian_ci"},
192 => {:name => "utf8", :collation => "utf8_unicode_ci"},
193 => {:name => "utf8", :collation => "utf8_icelandic_ci"},
194 => {:name => "utf8", :collation => "utf8_latvian_ci"},
195 => {:name => "utf8", :collation => "utf8_romanian_ci"},
196 => {:name => "utf8", :collation => "utf8_slovenian_ci"},
197 => {:name => "utf8", :collation => "utf8_polish_ci"},
198 => {:name => "utf8", :collation => "utf8_estonian_ci"},
199 => {:name => "utf8", :collation => "utf8_spanish_ci"},
200 => {:name => "utf8", :collation => "utf8_swedish_ci"},
201 => {:name => "utf8", :collation => "utf8_turkish_ci"},
202 => {:name => "utf8", :collation => "utf8_czech_ci"},
203 => {:name => "utf8", :collation => "utf8_danish_ci"},
204 => {:name => "utf8", :collation => "utf8_lithuanian_ci"},
205 => {:name => "utf8", :collation => "utf8_slovak_ci"},
206 => {:name => "utf8", :collation => "utf8_spanish2_ci"},
207 => {:name => "utf8", :collation => "utf8_roman_ci"},
208 => {:name => "utf8", :collation => "utf8_persian_ci"},
209 => {:name => "utf8", :collation => "utf8_esperanto_ci"},
210 => {:name => "utf8", :collation => "utf8_hungarian_ci"},
254 => {:name => "utf8", :collation => "utf8_general_cs"}
}
def self.encoding_from_charset(charset) def self.encoding_from_charset(charset)
charset = charset.to_s.downcase CHARSET_MAP[charset.to_s.downcase]
enc = CHARSET_MAP[charset] end
raise Mysql2::Error, "unsupported charset: #{charset}" unless enc
enc def self.encoding_from_charset_code(code)
if mapping = MYSQL_CHARSET_MAP[code]
encoding_from_charset(mapping[:name])
else
nil
end
end end
end end
end end

View File

@ -6,6 +6,14 @@ describe Mysql2::Client do
@client = Mysql2::Client.new @client = Mysql2::Client.new
end end
if defined? Encoding
it "should raise an exception on create for invalid encodings" do
lambda {
c = Mysql2::Client.new(:encoding => "fake")
}.should raise_error(Mysql2::Error)
end
end
it "should be able to connect via SSL options" do it "should be able to connect via SSL options" do
pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.") pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.")
ssl_client = nil ssl_client = nil