check for and support field-level encodings

raise an exception for invalid encodings passed to Mysql2::Client#new
This commit is contained in:
Brian Lopez 2010-07-10 23:15:54 -07:00
parent d8f5f4de1d
commit edb742fee7
4 changed files with 175 additions and 15 deletions

View File

@ -1,7 +1,7 @@
#include <mysql2_ext.h>
VALUE mMysql2, cMysql2Client;
VALUE cMysql2Error;
VALUE cMysql2Error, intern_encoding_from_charset;
ID sym_id, sym_version, sym_async;
#define REQUIRE_OPEN_DB(_ctxt) \
@ -409,10 +409,14 @@ static VALUE set_charset_name(VALUE self, VALUE value)
#ifdef HAVE_RUBY_ENCODING_H
VALUE new_encoding, old_encoding;
new_encoding = rb_funcall(cMysql2Client, rb_intern("encoding_from_charset"), 1, value);
old_encoding = rb_iv_get(self, "@encoding");
if (old_encoding == Qnil) {
rb_iv_set(self, "@encoding", new_encoding);
new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset, 1, value);
if (new_encoding == Qnil) {
rb_raise(cMysql2Error, "Unsupported charset: '%s'", RSTRING_PTR(value));
} else {
old_encoding = rb_iv_get(self, "@encoding");
if (old_encoding == Qnil) {
rb_iv_set(self, "@encoding", new_encoding);
}
}
#endif
@ -487,4 +491,6 @@ void Init_mysql2() {
sym_id = ID2SYM(rb_intern("id"));
sym_version = ID2SYM(rb_intern("version"));
sym_async = ID2SYM(rb_intern("async"));
intern_encoding_from_charset = rb_intern("encoding_from_charset");
}

View File

@ -5,10 +5,11 @@ rb_encoding *binaryEncoding;
#endif
ID sym_symbolize_keys;
ID intern_new, intern_utc;
ID intern_new, intern_utc, intern_encoding_from_charset_code;
VALUE cBigDecimal, cDate, cDateTime;
VALUE cMysql2Result;
extern VALUE cMysql2Client;
static void rb_mysql_result_mark(void * wrapper) {
mysql2_result_wrapper * w = wrapper;
@ -198,13 +199,20 @@ static VALUE rb_mysql_result_fetch_row(int argc, VALUE * argv, VALUE self) {
default:
val = rb_str_new(row[i], fieldLengths[i]);
#ifdef HAVE_RUBY_ENCODING_H
// rudimentary check for binary content
if ((fields[i].flags & BINARY_FLAG) || fields[i].charsetnr == 63) {
// if binary flag is set, respect it's wishes
if (fields[i].flags & BINARY_FLAG) {
rb_enc_associate(val, binaryEncoding);
} else {
// TODO: we should probably lookup the encoding that was set on the field
// and either use that and/or fall back to the connection's encoding
rb_enc_associate(val, conn_enc);
// lookup the encoding configured on this field
VALUE new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset_code, 1, INT2NUM(fields[i].charsetnr));
if (new_encoding != Qnil) {
// use the field encoding we were able to match
rb_encoding *enc = rb_to_encoding(new_encoding);
rb_enc_associate(val, enc);
} else {
// otherwise fall-back to the connection's encoding
rb_enc_associate(val, conn_enc);
}
if (default_internal_enc) {
val = rb_str_export_to_enc(val, default_internal_enc);
}
@ -324,6 +332,7 @@ void init_mysql2_result()
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
intern_new = rb_intern("new");
intern_utc = rb_intern("utc");
intern_encoding_from_charset_code = rb_intern("encoding_from_charset_code");
#ifdef HAVE_RUBY_ENCODING_H
binaryEncoding = rb_enc_find("binary");

View File

@ -63,11 +63,148 @@ module Mysql2
"utf8" => Encoding::UTF_8,
}
MYSQL_CHARSET_MAP = {
1 => {:name => "big5", :collation => "big5_chinese_ci"},
2 => {:name => "latin2", :collation => "latin2_czech_cs"},
3 => {:name => "dec8", :collation => "dec8_swedish_ci"},
4 => {:name => "cp850", :collation => "cp850_general_ci"},
5 => {:name => "latin1", :collation => "latin1_german1_ci"},
6 => {:name => "hp8", :collation => "hp8_english_ci"},
7 => {:name => "koi8r", :collation => "koi8r_general_ci"},
8 => {:name => "latin1", :collation => "latin1_swedish_ci"},
9 => {:name => "latin2", :collation => "latin2_general_ci"},
10 => {:name => "swe7", :collation => "swe7_swedish_ci"},
11 => {:name => "ascii", :collation => "ascii_general_ci"},
12 => {:name => "ujis", :collation => "ujis_japanese_ci"},
13 => {:name => "sjis", :collation => "sjis_japanese_ci"},
14 => {:name => "cp1251", :collation => "cp1251_bulgarian_ci"},
15 => {:name => "latin1", :collation => "latin1_danish_ci"},
16 => {:name => "hebrew", :collation => "hebrew_general_ci"},
17 => {:name => "filename", :collation => "filename"},
18 => {:name => "tis620", :collation => "tis620_thai_ci"},
19 => {:name => "euckr", :collation => "euckr_korean_ci"},
20 => {:name => "latin7", :collation => "latin7_estonian_cs"},
21 => {:name => "latin2", :collation => "latin2_hungarian_ci"},
22 => {:name => "koi8u", :collation => "koi8u_general_ci"},
23 => {:name => "cp1251", :collation => "cp1251_ukrainian_ci"},
24 => {:name => "gb2312", :collation => "gb2312_chinese_ci"},
25 => {:name => "greek", :collation => "greek_general_ci"},
26 => {:name => "cp1250", :collation => "cp1250_general_ci"},
27 => {:name => "latin2", :collation => "latin2_croatian_ci"},
28 => {:name => "gbk", :collation => "gbk_chinese_ci"},
29 => {:name => "cp1257", :collation => "cp1257_lithuanian_ci"},
30 => {:name => "latin5", :collation => "latin5_turkish_ci"},
31 => {:name => "latin1", :collation => "latin1_german2_ci"},
32 => {:name => "armscii8", :collation => "armscii8_general_ci"},
33 => {:name => "utf8", :collation => "utf8_general_ci"},
34 => {:name => "cp1250", :collation => "cp1250_czech_cs"},
35 => {:name => "ucs2", :collation => "ucs2_general_ci"},
36 => {:name => "cp866", :collation => "cp866_general_ci"},
37 => {:name => "keybcs2", :collation => "keybcs2_general_ci"},
38 => {:name => "macce", :collation => "macce_general_ci"},
39 => {:name => "macroman", :collation => "macroman_general_ci"},
40 => {:name => "cp852", :collation => "cp852_general_ci"},
41 => {:name => "latin7", :collation => "latin7_general_ci"},
42 => {:name => "latin7", :collation => "latin7_general_cs"},
43 => {:name => "macce", :collation => "macce_bin"},
44 => {:name => "cp1250", :collation => "cp1250_croatian_ci"},
47 => {:name => "latin1", :collation => "latin1_bin"},
48 => {:name => "latin1", :collation => "latin1_general_ci"},
49 => {:name => "latin1", :collation => "latin1_general_cs"},
50 => {:name => "cp1251", :collation => "cp1251_bin"},
51 => {:name => "cp1251", :collation => "cp1251_general_ci"},
52 => {:name => "cp1251", :collation => "cp1251_general_cs"},
53 => {:name => "macroman", :collation => "macroman_bin"},
57 => {:name => "cp1256", :collation => "cp1256_general_ci"},
58 => {:name => "cp1257", :collation => "cp1257_bin"},
59 => {:name => "cp1257", :collation => "cp1257_general_ci"},
63 => {:name => "binary", :collation => "binary"},
64 => {:name => "armscii8", :collation => "armscii8_bin"},
65 => {:name => "ascii", :collation => "ascii_bin"},
66 => {:name => "cp1250", :collation => "cp1250_bin"},
67 => {:name => "cp1256", :collation => "cp1256_bin"},
68 => {:name => "cp866", :collation => "cp866_bin"},
69 => {:name => "dec8", :collation => "dec8_bin"},
70 => {:name => "greek", :collation => "greek_bin"},
71 => {:name => "hebrew", :collation => "hebrew_bin"},
72 => {:name => "hp8", :collation => "hp8_bin"},
73 => {:name => "keybcs2", :collation => "keybcs2_bin"},
74 => {:name => "koi8r", :collation => "koi8r_bin"},
75 => {:name => "koi8u", :collation => "koi8u_bin"},
77 => {:name => "latin2", :collation => "latin2_bin"},
78 => {:name => "latin5", :collation => "latin5_bin"},
79 => {:name => "latin7", :collation => "latin7_bin"},
80 => {:name => "cp850", :collation => "cp850_bin"},
81 => {:name => "cp852", :collation => "cp852_bin"},
82 => {:name => "swe7", :collation => "swe7_bin"},
83 => {:name => "utf8", :collation => "utf8_bin"},
84 => {:name => "big5", :collation => "big5_bin"},
85 => {:name => "euckr", :collation => "euckr_bin"},
86 => {:name => "gb2312", :collation => "gb2312_bin"},
87 => {:name => "gbk", :collation => "gbk_bin"},
88 => {:name => "sjis", :collation => "sjis_bin"},
89 => {:name => "tis620", :collation => "tis620_bin"},
90 => {:name => "ucs2", :collation => "ucs2_bin"},
91 => {:name => "ujis", :collation => "ujis_bin"},
92 => {:name => "geostd8", :collation => "geostd8_general_ci"},
93 => {:name => "geostd8", :collation => "geostd8_bin"},
94 => {:name => "latin1", :collation => "latin1_spanish_ci"},
95 => {:name => "cp932", :collation => "cp932_japanese_ci"},
96 => {:name => "cp932", :collation => "cp932_bin"},
97 => {:name => "eucjpms", :collation => "eucjpms_japanese_ci"},
98 => {:name => "eucjpms", :collation => "eucjpms_bin"},
99 => {:name => "cp1250", :collation => "cp1250_polish_ci"},
128 => {:name => "ucs2", :collation => "ucs2_unicode_ci"},
129 => {:name => "ucs2", :collation => "ucs2_icelandic_ci"},
130 => {:name => "ucs2", :collation => "ucs2_latvian_ci"},
131 => {:name => "ucs2", :collation => "ucs2_romanian_ci"},
132 => {:name => "ucs2", :collation => "ucs2_slovenian_ci"},
133 => {:name => "ucs2", :collation => "ucs2_polish_ci"},
134 => {:name => "ucs2", :collation => "ucs2_estonian_ci"},
135 => {:name => "ucs2", :collation => "ucs2_spanish_ci"},
136 => {:name => "ucs2", :collation => "ucs2_swedish_ci"},
137 => {:name => "ucs2", :collation => "ucs2_turkish_ci"},
138 => {:name => "ucs2", :collation => "ucs2_czech_ci"},
139 => {:name => "ucs2", :collation => "ucs2_danish_ci"},
140 => {:name => "ucs2", :collation => "ucs2_lithuanian_ci"},
141 => {:name => "ucs2", :collation => "ucs2_slovak_ci"},
142 => {:name => "ucs2", :collation => "ucs2_spanish2_ci"},
143 => {:name => "ucs2", :collation => "ucs2_roman_ci"},
144 => {:name => "ucs2", :collation => "ucs2_persian_ci"},
145 => {:name => "ucs2", :collation => "ucs2_esperanto_ci"},
146 => {:name => "ucs2", :collation => "ucs2_hungarian_ci"},
192 => {:name => "utf8", :collation => "utf8_unicode_ci"},
193 => {:name => "utf8", :collation => "utf8_icelandic_ci"},
194 => {:name => "utf8", :collation => "utf8_latvian_ci"},
195 => {:name => "utf8", :collation => "utf8_romanian_ci"},
196 => {:name => "utf8", :collation => "utf8_slovenian_ci"},
197 => {:name => "utf8", :collation => "utf8_polish_ci"},
198 => {:name => "utf8", :collation => "utf8_estonian_ci"},
199 => {:name => "utf8", :collation => "utf8_spanish_ci"},
200 => {:name => "utf8", :collation => "utf8_swedish_ci"},
201 => {:name => "utf8", :collation => "utf8_turkish_ci"},
202 => {:name => "utf8", :collation => "utf8_czech_ci"},
203 => {:name => "utf8", :collation => "utf8_danish_ci"},
204 => {:name => "utf8", :collation => "utf8_lithuanian_ci"},
205 => {:name => "utf8", :collation => "utf8_slovak_ci"},
206 => {:name => "utf8", :collation => "utf8_spanish2_ci"},
207 => {:name => "utf8", :collation => "utf8_roman_ci"},
208 => {:name => "utf8", :collation => "utf8_persian_ci"},
209 => {:name => "utf8", :collation => "utf8_esperanto_ci"},
210 => {:name => "utf8", :collation => "utf8_hungarian_ci"},
254 => {:name => "utf8", :collation => "utf8_general_cs"}
}
def self.encoding_from_charset(charset)
charset = charset.to_s.downcase
enc = CHARSET_MAP[charset]
raise Mysql2::Error, "unsupported charset: #{charset}" unless enc
enc
CHARSET_MAP[charset.to_s.downcase]
end
def self.encoding_from_charset_code(code)
if mapping = MYSQL_CHARSET_MAP[code]
encoding_from_charset(mapping[:name])
else
nil
end
end
end
end

View File

@ -6,6 +6,14 @@ describe Mysql2::Client do
@client = Mysql2::Client.new
end
if defined? Encoding
it "should raise an exception on create for invalid encodings" do
lambda {
c = Mysql2::Client.new(:encoding => "fake")
}.should raise_error(Mysql2::Error)
end
end
it "should be able to connect via SSL options" do
pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.")
ssl_client = nil