check for and support field-level encodings
raise an exception for invalid encodings passed to Mysql2::Client#new
This commit is contained in:
parent
d8f5f4de1d
commit
edb742fee7
@ -1,7 +1,7 @@
|
||||
#include <mysql2_ext.h>
|
||||
|
||||
VALUE mMysql2, cMysql2Client;
|
||||
VALUE cMysql2Error;
|
||||
VALUE cMysql2Error, intern_encoding_from_charset;
|
||||
ID sym_id, sym_version, sym_async;
|
||||
|
||||
#define REQUIRE_OPEN_DB(_ctxt) \
|
||||
@ -409,10 +409,14 @@ static VALUE set_charset_name(VALUE self, VALUE value)
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
VALUE new_encoding, old_encoding;
|
||||
new_encoding = rb_funcall(cMysql2Client, rb_intern("encoding_from_charset"), 1, value);
|
||||
old_encoding = rb_iv_get(self, "@encoding");
|
||||
if (old_encoding == Qnil) {
|
||||
rb_iv_set(self, "@encoding", new_encoding);
|
||||
new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset, 1, value);
|
||||
if (new_encoding == Qnil) {
|
||||
rb_raise(cMysql2Error, "Unsupported charset: '%s'", RSTRING_PTR(value));
|
||||
} else {
|
||||
old_encoding = rb_iv_get(self, "@encoding");
|
||||
if (old_encoding == Qnil) {
|
||||
rb_iv_set(self, "@encoding", new_encoding);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -487,4 +491,6 @@ void Init_mysql2() {
|
||||
sym_id = ID2SYM(rb_intern("id"));
|
||||
sym_version = ID2SYM(rb_intern("version"));
|
||||
sym_async = ID2SYM(rb_intern("async"));
|
||||
|
||||
intern_encoding_from_charset = rb_intern("encoding_from_charset");
|
||||
}
|
||||
|
@ -5,10 +5,11 @@ rb_encoding *binaryEncoding;
|
||||
#endif
|
||||
|
||||
ID sym_symbolize_keys;
|
||||
ID intern_new, intern_utc;
|
||||
ID intern_new, intern_utc, intern_encoding_from_charset_code;
|
||||
|
||||
VALUE cBigDecimal, cDate, cDateTime;
|
||||
VALUE cMysql2Result;
|
||||
extern VALUE cMysql2Client;
|
||||
|
||||
static void rb_mysql_result_mark(void * wrapper) {
|
||||
mysql2_result_wrapper * w = wrapper;
|
||||
@ -198,13 +199,20 @@ static VALUE rb_mysql_result_fetch_row(int argc, VALUE * argv, VALUE self) {
|
||||
default:
|
||||
val = rb_str_new(row[i], fieldLengths[i]);
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
// rudimentary check for binary content
|
||||
if ((fields[i].flags & BINARY_FLAG) || fields[i].charsetnr == 63) {
|
||||
// if binary flag is set, respect it's wishes
|
||||
if (fields[i].flags & BINARY_FLAG) {
|
||||
rb_enc_associate(val, binaryEncoding);
|
||||
} else {
|
||||
// TODO: we should probably lookup the encoding that was set on the field
|
||||
// and either use that and/or fall back to the connection's encoding
|
||||
rb_enc_associate(val, conn_enc);
|
||||
// lookup the encoding configured on this field
|
||||
VALUE new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset_code, 1, INT2NUM(fields[i].charsetnr));
|
||||
if (new_encoding != Qnil) {
|
||||
// use the field encoding we were able to match
|
||||
rb_encoding *enc = rb_to_encoding(new_encoding);
|
||||
rb_enc_associate(val, enc);
|
||||
} else {
|
||||
// otherwise fall-back to the connection's encoding
|
||||
rb_enc_associate(val, conn_enc);
|
||||
}
|
||||
if (default_internal_enc) {
|
||||
val = rb_str_export_to_enc(val, default_internal_enc);
|
||||
}
|
||||
@ -324,6 +332,7 @@ void init_mysql2_result()
|
||||
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
||||
intern_new = rb_intern("new");
|
||||
intern_utc = rb_intern("utc");
|
||||
intern_encoding_from_charset_code = rb_intern("encoding_from_charset_code");
|
||||
|
||||
#ifdef HAVE_RUBY_ENCODING_H
|
||||
binaryEncoding = rb_enc_find("binary");
|
||||
|
@ -63,11 +63,148 @@ module Mysql2
|
||||
"utf8" => Encoding::UTF_8,
|
||||
}
|
||||
|
||||
MYSQL_CHARSET_MAP = {
|
||||
1 => {:name => "big5", :collation => "big5_chinese_ci"},
|
||||
2 => {:name => "latin2", :collation => "latin2_czech_cs"},
|
||||
3 => {:name => "dec8", :collation => "dec8_swedish_ci"},
|
||||
4 => {:name => "cp850", :collation => "cp850_general_ci"},
|
||||
5 => {:name => "latin1", :collation => "latin1_german1_ci"},
|
||||
6 => {:name => "hp8", :collation => "hp8_english_ci"},
|
||||
7 => {:name => "koi8r", :collation => "koi8r_general_ci"},
|
||||
8 => {:name => "latin1", :collation => "latin1_swedish_ci"},
|
||||
9 => {:name => "latin2", :collation => "latin2_general_ci"},
|
||||
10 => {:name => "swe7", :collation => "swe7_swedish_ci"},
|
||||
11 => {:name => "ascii", :collation => "ascii_general_ci"},
|
||||
12 => {:name => "ujis", :collation => "ujis_japanese_ci"},
|
||||
13 => {:name => "sjis", :collation => "sjis_japanese_ci"},
|
||||
14 => {:name => "cp1251", :collation => "cp1251_bulgarian_ci"},
|
||||
15 => {:name => "latin1", :collation => "latin1_danish_ci"},
|
||||
16 => {:name => "hebrew", :collation => "hebrew_general_ci"},
|
||||
17 => {:name => "filename", :collation => "filename"},
|
||||
18 => {:name => "tis620", :collation => "tis620_thai_ci"},
|
||||
19 => {:name => "euckr", :collation => "euckr_korean_ci"},
|
||||
20 => {:name => "latin7", :collation => "latin7_estonian_cs"},
|
||||
21 => {:name => "latin2", :collation => "latin2_hungarian_ci"},
|
||||
22 => {:name => "koi8u", :collation => "koi8u_general_ci"},
|
||||
23 => {:name => "cp1251", :collation => "cp1251_ukrainian_ci"},
|
||||
24 => {:name => "gb2312", :collation => "gb2312_chinese_ci"},
|
||||
25 => {:name => "greek", :collation => "greek_general_ci"},
|
||||
26 => {:name => "cp1250", :collation => "cp1250_general_ci"},
|
||||
27 => {:name => "latin2", :collation => "latin2_croatian_ci"},
|
||||
28 => {:name => "gbk", :collation => "gbk_chinese_ci"},
|
||||
29 => {:name => "cp1257", :collation => "cp1257_lithuanian_ci"},
|
||||
30 => {:name => "latin5", :collation => "latin5_turkish_ci"},
|
||||
31 => {:name => "latin1", :collation => "latin1_german2_ci"},
|
||||
32 => {:name => "armscii8", :collation => "armscii8_general_ci"},
|
||||
33 => {:name => "utf8", :collation => "utf8_general_ci"},
|
||||
34 => {:name => "cp1250", :collation => "cp1250_czech_cs"},
|
||||
35 => {:name => "ucs2", :collation => "ucs2_general_ci"},
|
||||
36 => {:name => "cp866", :collation => "cp866_general_ci"},
|
||||
37 => {:name => "keybcs2", :collation => "keybcs2_general_ci"},
|
||||
38 => {:name => "macce", :collation => "macce_general_ci"},
|
||||
39 => {:name => "macroman", :collation => "macroman_general_ci"},
|
||||
40 => {:name => "cp852", :collation => "cp852_general_ci"},
|
||||
41 => {:name => "latin7", :collation => "latin7_general_ci"},
|
||||
42 => {:name => "latin7", :collation => "latin7_general_cs"},
|
||||
43 => {:name => "macce", :collation => "macce_bin"},
|
||||
44 => {:name => "cp1250", :collation => "cp1250_croatian_ci"},
|
||||
47 => {:name => "latin1", :collation => "latin1_bin"},
|
||||
48 => {:name => "latin1", :collation => "latin1_general_ci"},
|
||||
49 => {:name => "latin1", :collation => "latin1_general_cs"},
|
||||
50 => {:name => "cp1251", :collation => "cp1251_bin"},
|
||||
51 => {:name => "cp1251", :collation => "cp1251_general_ci"},
|
||||
52 => {:name => "cp1251", :collation => "cp1251_general_cs"},
|
||||
53 => {:name => "macroman", :collation => "macroman_bin"},
|
||||
57 => {:name => "cp1256", :collation => "cp1256_general_ci"},
|
||||
58 => {:name => "cp1257", :collation => "cp1257_bin"},
|
||||
59 => {:name => "cp1257", :collation => "cp1257_general_ci"},
|
||||
63 => {:name => "binary", :collation => "binary"},
|
||||
64 => {:name => "armscii8", :collation => "armscii8_bin"},
|
||||
65 => {:name => "ascii", :collation => "ascii_bin"},
|
||||
66 => {:name => "cp1250", :collation => "cp1250_bin"},
|
||||
67 => {:name => "cp1256", :collation => "cp1256_bin"},
|
||||
68 => {:name => "cp866", :collation => "cp866_bin"},
|
||||
69 => {:name => "dec8", :collation => "dec8_bin"},
|
||||
70 => {:name => "greek", :collation => "greek_bin"},
|
||||
71 => {:name => "hebrew", :collation => "hebrew_bin"},
|
||||
72 => {:name => "hp8", :collation => "hp8_bin"},
|
||||
73 => {:name => "keybcs2", :collation => "keybcs2_bin"},
|
||||
74 => {:name => "koi8r", :collation => "koi8r_bin"},
|
||||
75 => {:name => "koi8u", :collation => "koi8u_bin"},
|
||||
77 => {:name => "latin2", :collation => "latin2_bin"},
|
||||
78 => {:name => "latin5", :collation => "latin5_bin"},
|
||||
79 => {:name => "latin7", :collation => "latin7_bin"},
|
||||
80 => {:name => "cp850", :collation => "cp850_bin"},
|
||||
81 => {:name => "cp852", :collation => "cp852_bin"},
|
||||
82 => {:name => "swe7", :collation => "swe7_bin"},
|
||||
83 => {:name => "utf8", :collation => "utf8_bin"},
|
||||
84 => {:name => "big5", :collation => "big5_bin"},
|
||||
85 => {:name => "euckr", :collation => "euckr_bin"},
|
||||
86 => {:name => "gb2312", :collation => "gb2312_bin"},
|
||||
87 => {:name => "gbk", :collation => "gbk_bin"},
|
||||
88 => {:name => "sjis", :collation => "sjis_bin"},
|
||||
89 => {:name => "tis620", :collation => "tis620_bin"},
|
||||
90 => {:name => "ucs2", :collation => "ucs2_bin"},
|
||||
91 => {:name => "ujis", :collation => "ujis_bin"},
|
||||
92 => {:name => "geostd8", :collation => "geostd8_general_ci"},
|
||||
93 => {:name => "geostd8", :collation => "geostd8_bin"},
|
||||
94 => {:name => "latin1", :collation => "latin1_spanish_ci"},
|
||||
95 => {:name => "cp932", :collation => "cp932_japanese_ci"},
|
||||
96 => {:name => "cp932", :collation => "cp932_bin"},
|
||||
97 => {:name => "eucjpms", :collation => "eucjpms_japanese_ci"},
|
||||
98 => {:name => "eucjpms", :collation => "eucjpms_bin"},
|
||||
99 => {:name => "cp1250", :collation => "cp1250_polish_ci"},
|
||||
128 => {:name => "ucs2", :collation => "ucs2_unicode_ci"},
|
||||
129 => {:name => "ucs2", :collation => "ucs2_icelandic_ci"},
|
||||
130 => {:name => "ucs2", :collation => "ucs2_latvian_ci"},
|
||||
131 => {:name => "ucs2", :collation => "ucs2_romanian_ci"},
|
||||
132 => {:name => "ucs2", :collation => "ucs2_slovenian_ci"},
|
||||
133 => {:name => "ucs2", :collation => "ucs2_polish_ci"},
|
||||
134 => {:name => "ucs2", :collation => "ucs2_estonian_ci"},
|
||||
135 => {:name => "ucs2", :collation => "ucs2_spanish_ci"},
|
||||
136 => {:name => "ucs2", :collation => "ucs2_swedish_ci"},
|
||||
137 => {:name => "ucs2", :collation => "ucs2_turkish_ci"},
|
||||
138 => {:name => "ucs2", :collation => "ucs2_czech_ci"},
|
||||
139 => {:name => "ucs2", :collation => "ucs2_danish_ci"},
|
||||
140 => {:name => "ucs2", :collation => "ucs2_lithuanian_ci"},
|
||||
141 => {:name => "ucs2", :collation => "ucs2_slovak_ci"},
|
||||
142 => {:name => "ucs2", :collation => "ucs2_spanish2_ci"},
|
||||
143 => {:name => "ucs2", :collation => "ucs2_roman_ci"},
|
||||
144 => {:name => "ucs2", :collation => "ucs2_persian_ci"},
|
||||
145 => {:name => "ucs2", :collation => "ucs2_esperanto_ci"},
|
||||
146 => {:name => "ucs2", :collation => "ucs2_hungarian_ci"},
|
||||
192 => {:name => "utf8", :collation => "utf8_unicode_ci"},
|
||||
193 => {:name => "utf8", :collation => "utf8_icelandic_ci"},
|
||||
194 => {:name => "utf8", :collation => "utf8_latvian_ci"},
|
||||
195 => {:name => "utf8", :collation => "utf8_romanian_ci"},
|
||||
196 => {:name => "utf8", :collation => "utf8_slovenian_ci"},
|
||||
197 => {:name => "utf8", :collation => "utf8_polish_ci"},
|
||||
198 => {:name => "utf8", :collation => "utf8_estonian_ci"},
|
||||
199 => {:name => "utf8", :collation => "utf8_spanish_ci"},
|
||||
200 => {:name => "utf8", :collation => "utf8_swedish_ci"},
|
||||
201 => {:name => "utf8", :collation => "utf8_turkish_ci"},
|
||||
202 => {:name => "utf8", :collation => "utf8_czech_ci"},
|
||||
203 => {:name => "utf8", :collation => "utf8_danish_ci"},
|
||||
204 => {:name => "utf8", :collation => "utf8_lithuanian_ci"},
|
||||
205 => {:name => "utf8", :collation => "utf8_slovak_ci"},
|
||||
206 => {:name => "utf8", :collation => "utf8_spanish2_ci"},
|
||||
207 => {:name => "utf8", :collation => "utf8_roman_ci"},
|
||||
208 => {:name => "utf8", :collation => "utf8_persian_ci"},
|
||||
209 => {:name => "utf8", :collation => "utf8_esperanto_ci"},
|
||||
210 => {:name => "utf8", :collation => "utf8_hungarian_ci"},
|
||||
254 => {:name => "utf8", :collation => "utf8_general_cs"}
|
||||
}
|
||||
|
||||
def self.encoding_from_charset(charset)
|
||||
charset = charset.to_s.downcase
|
||||
enc = CHARSET_MAP[charset]
|
||||
raise Mysql2::Error, "unsupported charset: #{charset}" unless enc
|
||||
enc
|
||||
CHARSET_MAP[charset.to_s.downcase]
|
||||
end
|
||||
|
||||
def self.encoding_from_charset_code(code)
|
||||
if mapping = MYSQL_CHARSET_MAP[code]
|
||||
encoding_from_charset(mapping[:name])
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -6,6 +6,14 @@ describe Mysql2::Client do
|
||||
@client = Mysql2::Client.new
|
||||
end
|
||||
|
||||
if defined? Encoding
|
||||
it "should raise an exception on create for invalid encodings" do
|
||||
lambda {
|
||||
c = Mysql2::Client.new(:encoding => "fake")
|
||||
}.should raise_error(Mysql2::Error)
|
||||
end
|
||||
end
|
||||
|
||||
it "should be able to connect via SSL options" do
|
||||
pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.")
|
||||
ssl_client = nil
|
||||
|
Loading…
Reference in New Issue
Block a user