check for and support field-level encodings
raise an exception for invalid encodings passed to Mysql2::Client#new
This commit is contained in:
parent
d8f5f4de1d
commit
edb742fee7
@ -1,7 +1,7 @@
|
|||||||
#include <mysql2_ext.h>
|
#include <mysql2_ext.h>
|
||||||
|
|
||||||
VALUE mMysql2, cMysql2Client;
|
VALUE mMysql2, cMysql2Client;
|
||||||
VALUE cMysql2Error;
|
VALUE cMysql2Error, intern_encoding_from_charset;
|
||||||
ID sym_id, sym_version, sym_async;
|
ID sym_id, sym_version, sym_async;
|
||||||
|
|
||||||
#define REQUIRE_OPEN_DB(_ctxt) \
|
#define REQUIRE_OPEN_DB(_ctxt) \
|
||||||
@ -409,11 +409,15 @@ static VALUE set_charset_name(VALUE self, VALUE value)
|
|||||||
|
|
||||||
#ifdef HAVE_RUBY_ENCODING_H
|
#ifdef HAVE_RUBY_ENCODING_H
|
||||||
VALUE new_encoding, old_encoding;
|
VALUE new_encoding, old_encoding;
|
||||||
new_encoding = rb_funcall(cMysql2Client, rb_intern("encoding_from_charset"), 1, value);
|
new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset, 1, value);
|
||||||
|
if (new_encoding == Qnil) {
|
||||||
|
rb_raise(cMysql2Error, "Unsupported charset: '%s'", RSTRING_PTR(value));
|
||||||
|
} else {
|
||||||
old_encoding = rb_iv_get(self, "@encoding");
|
old_encoding = rb_iv_get(self, "@encoding");
|
||||||
if (old_encoding == Qnil) {
|
if (old_encoding == Qnil) {
|
||||||
rb_iv_set(self, "@encoding", new_encoding);
|
rb_iv_set(self, "@encoding", new_encoding);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
charset_name = StringValuePtr(value);
|
charset_name = StringValuePtr(value);
|
||||||
@ -487,4 +491,6 @@ void Init_mysql2() {
|
|||||||
sym_id = ID2SYM(rb_intern("id"));
|
sym_id = ID2SYM(rb_intern("id"));
|
||||||
sym_version = ID2SYM(rb_intern("version"));
|
sym_version = ID2SYM(rb_intern("version"));
|
||||||
sym_async = ID2SYM(rb_intern("async"));
|
sym_async = ID2SYM(rb_intern("async"));
|
||||||
|
|
||||||
|
intern_encoding_from_charset = rb_intern("encoding_from_charset");
|
||||||
}
|
}
|
||||||
|
@ -5,10 +5,11 @@ rb_encoding *binaryEncoding;
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
ID sym_symbolize_keys;
|
ID sym_symbolize_keys;
|
||||||
ID intern_new, intern_utc;
|
ID intern_new, intern_utc, intern_encoding_from_charset_code;
|
||||||
|
|
||||||
VALUE cBigDecimal, cDate, cDateTime;
|
VALUE cBigDecimal, cDate, cDateTime;
|
||||||
VALUE cMysql2Result;
|
VALUE cMysql2Result;
|
||||||
|
extern VALUE cMysql2Client;
|
||||||
|
|
||||||
static void rb_mysql_result_mark(void * wrapper) {
|
static void rb_mysql_result_mark(void * wrapper) {
|
||||||
mysql2_result_wrapper * w = wrapper;
|
mysql2_result_wrapper * w = wrapper;
|
||||||
@ -198,13 +199,20 @@ static VALUE rb_mysql_result_fetch_row(int argc, VALUE * argv, VALUE self) {
|
|||||||
default:
|
default:
|
||||||
val = rb_str_new(row[i], fieldLengths[i]);
|
val = rb_str_new(row[i], fieldLengths[i]);
|
||||||
#ifdef HAVE_RUBY_ENCODING_H
|
#ifdef HAVE_RUBY_ENCODING_H
|
||||||
// rudimentary check for binary content
|
// if binary flag is set, respect it's wishes
|
||||||
if ((fields[i].flags & BINARY_FLAG) || fields[i].charsetnr == 63) {
|
if (fields[i].flags & BINARY_FLAG) {
|
||||||
rb_enc_associate(val, binaryEncoding);
|
rb_enc_associate(val, binaryEncoding);
|
||||||
} else {
|
} else {
|
||||||
// TODO: we should probably lookup the encoding that was set on the field
|
// lookup the encoding configured on this field
|
||||||
// and either use that and/or fall back to the connection's encoding
|
VALUE new_encoding = rb_funcall(cMysql2Client, intern_encoding_from_charset_code, 1, INT2NUM(fields[i].charsetnr));
|
||||||
|
if (new_encoding != Qnil) {
|
||||||
|
// use the field encoding we were able to match
|
||||||
|
rb_encoding *enc = rb_to_encoding(new_encoding);
|
||||||
|
rb_enc_associate(val, enc);
|
||||||
|
} else {
|
||||||
|
// otherwise fall-back to the connection's encoding
|
||||||
rb_enc_associate(val, conn_enc);
|
rb_enc_associate(val, conn_enc);
|
||||||
|
}
|
||||||
if (default_internal_enc) {
|
if (default_internal_enc) {
|
||||||
val = rb_str_export_to_enc(val, default_internal_enc);
|
val = rb_str_export_to_enc(val, default_internal_enc);
|
||||||
}
|
}
|
||||||
@ -324,6 +332,7 @@ void init_mysql2_result()
|
|||||||
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
||||||
intern_new = rb_intern("new");
|
intern_new = rb_intern("new");
|
||||||
intern_utc = rb_intern("utc");
|
intern_utc = rb_intern("utc");
|
||||||
|
intern_encoding_from_charset_code = rb_intern("encoding_from_charset_code");
|
||||||
|
|
||||||
#ifdef HAVE_RUBY_ENCODING_H
|
#ifdef HAVE_RUBY_ENCODING_H
|
||||||
binaryEncoding = rb_enc_find("binary");
|
binaryEncoding = rb_enc_find("binary");
|
||||||
|
@ -63,11 +63,148 @@ module Mysql2
|
|||||||
"utf8" => Encoding::UTF_8,
|
"utf8" => Encoding::UTF_8,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MYSQL_CHARSET_MAP = {
|
||||||
|
1 => {:name => "big5", :collation => "big5_chinese_ci"},
|
||||||
|
2 => {:name => "latin2", :collation => "latin2_czech_cs"},
|
||||||
|
3 => {:name => "dec8", :collation => "dec8_swedish_ci"},
|
||||||
|
4 => {:name => "cp850", :collation => "cp850_general_ci"},
|
||||||
|
5 => {:name => "latin1", :collation => "latin1_german1_ci"},
|
||||||
|
6 => {:name => "hp8", :collation => "hp8_english_ci"},
|
||||||
|
7 => {:name => "koi8r", :collation => "koi8r_general_ci"},
|
||||||
|
8 => {:name => "latin1", :collation => "latin1_swedish_ci"},
|
||||||
|
9 => {:name => "latin2", :collation => "latin2_general_ci"},
|
||||||
|
10 => {:name => "swe7", :collation => "swe7_swedish_ci"},
|
||||||
|
11 => {:name => "ascii", :collation => "ascii_general_ci"},
|
||||||
|
12 => {:name => "ujis", :collation => "ujis_japanese_ci"},
|
||||||
|
13 => {:name => "sjis", :collation => "sjis_japanese_ci"},
|
||||||
|
14 => {:name => "cp1251", :collation => "cp1251_bulgarian_ci"},
|
||||||
|
15 => {:name => "latin1", :collation => "latin1_danish_ci"},
|
||||||
|
16 => {:name => "hebrew", :collation => "hebrew_general_ci"},
|
||||||
|
17 => {:name => "filename", :collation => "filename"},
|
||||||
|
18 => {:name => "tis620", :collation => "tis620_thai_ci"},
|
||||||
|
19 => {:name => "euckr", :collation => "euckr_korean_ci"},
|
||||||
|
20 => {:name => "latin7", :collation => "latin7_estonian_cs"},
|
||||||
|
21 => {:name => "latin2", :collation => "latin2_hungarian_ci"},
|
||||||
|
22 => {:name => "koi8u", :collation => "koi8u_general_ci"},
|
||||||
|
23 => {:name => "cp1251", :collation => "cp1251_ukrainian_ci"},
|
||||||
|
24 => {:name => "gb2312", :collation => "gb2312_chinese_ci"},
|
||||||
|
25 => {:name => "greek", :collation => "greek_general_ci"},
|
||||||
|
26 => {:name => "cp1250", :collation => "cp1250_general_ci"},
|
||||||
|
27 => {:name => "latin2", :collation => "latin2_croatian_ci"},
|
||||||
|
28 => {:name => "gbk", :collation => "gbk_chinese_ci"},
|
||||||
|
29 => {:name => "cp1257", :collation => "cp1257_lithuanian_ci"},
|
||||||
|
30 => {:name => "latin5", :collation => "latin5_turkish_ci"},
|
||||||
|
31 => {:name => "latin1", :collation => "latin1_german2_ci"},
|
||||||
|
32 => {:name => "armscii8", :collation => "armscii8_general_ci"},
|
||||||
|
33 => {:name => "utf8", :collation => "utf8_general_ci"},
|
||||||
|
34 => {:name => "cp1250", :collation => "cp1250_czech_cs"},
|
||||||
|
35 => {:name => "ucs2", :collation => "ucs2_general_ci"},
|
||||||
|
36 => {:name => "cp866", :collation => "cp866_general_ci"},
|
||||||
|
37 => {:name => "keybcs2", :collation => "keybcs2_general_ci"},
|
||||||
|
38 => {:name => "macce", :collation => "macce_general_ci"},
|
||||||
|
39 => {:name => "macroman", :collation => "macroman_general_ci"},
|
||||||
|
40 => {:name => "cp852", :collation => "cp852_general_ci"},
|
||||||
|
41 => {:name => "latin7", :collation => "latin7_general_ci"},
|
||||||
|
42 => {:name => "latin7", :collation => "latin7_general_cs"},
|
||||||
|
43 => {:name => "macce", :collation => "macce_bin"},
|
||||||
|
44 => {:name => "cp1250", :collation => "cp1250_croatian_ci"},
|
||||||
|
47 => {:name => "latin1", :collation => "latin1_bin"},
|
||||||
|
48 => {:name => "latin1", :collation => "latin1_general_ci"},
|
||||||
|
49 => {:name => "latin1", :collation => "latin1_general_cs"},
|
||||||
|
50 => {:name => "cp1251", :collation => "cp1251_bin"},
|
||||||
|
51 => {:name => "cp1251", :collation => "cp1251_general_ci"},
|
||||||
|
52 => {:name => "cp1251", :collation => "cp1251_general_cs"},
|
||||||
|
53 => {:name => "macroman", :collation => "macroman_bin"},
|
||||||
|
57 => {:name => "cp1256", :collation => "cp1256_general_ci"},
|
||||||
|
58 => {:name => "cp1257", :collation => "cp1257_bin"},
|
||||||
|
59 => {:name => "cp1257", :collation => "cp1257_general_ci"},
|
||||||
|
63 => {:name => "binary", :collation => "binary"},
|
||||||
|
64 => {:name => "armscii8", :collation => "armscii8_bin"},
|
||||||
|
65 => {:name => "ascii", :collation => "ascii_bin"},
|
||||||
|
66 => {:name => "cp1250", :collation => "cp1250_bin"},
|
||||||
|
67 => {:name => "cp1256", :collation => "cp1256_bin"},
|
||||||
|
68 => {:name => "cp866", :collation => "cp866_bin"},
|
||||||
|
69 => {:name => "dec8", :collation => "dec8_bin"},
|
||||||
|
70 => {:name => "greek", :collation => "greek_bin"},
|
||||||
|
71 => {:name => "hebrew", :collation => "hebrew_bin"},
|
||||||
|
72 => {:name => "hp8", :collation => "hp8_bin"},
|
||||||
|
73 => {:name => "keybcs2", :collation => "keybcs2_bin"},
|
||||||
|
74 => {:name => "koi8r", :collation => "koi8r_bin"},
|
||||||
|
75 => {:name => "koi8u", :collation => "koi8u_bin"},
|
||||||
|
77 => {:name => "latin2", :collation => "latin2_bin"},
|
||||||
|
78 => {:name => "latin5", :collation => "latin5_bin"},
|
||||||
|
79 => {:name => "latin7", :collation => "latin7_bin"},
|
||||||
|
80 => {:name => "cp850", :collation => "cp850_bin"},
|
||||||
|
81 => {:name => "cp852", :collation => "cp852_bin"},
|
||||||
|
82 => {:name => "swe7", :collation => "swe7_bin"},
|
||||||
|
83 => {:name => "utf8", :collation => "utf8_bin"},
|
||||||
|
84 => {:name => "big5", :collation => "big5_bin"},
|
||||||
|
85 => {:name => "euckr", :collation => "euckr_bin"},
|
||||||
|
86 => {:name => "gb2312", :collation => "gb2312_bin"},
|
||||||
|
87 => {:name => "gbk", :collation => "gbk_bin"},
|
||||||
|
88 => {:name => "sjis", :collation => "sjis_bin"},
|
||||||
|
89 => {:name => "tis620", :collation => "tis620_bin"},
|
||||||
|
90 => {:name => "ucs2", :collation => "ucs2_bin"},
|
||||||
|
91 => {:name => "ujis", :collation => "ujis_bin"},
|
||||||
|
92 => {:name => "geostd8", :collation => "geostd8_general_ci"},
|
||||||
|
93 => {:name => "geostd8", :collation => "geostd8_bin"},
|
||||||
|
94 => {:name => "latin1", :collation => "latin1_spanish_ci"},
|
||||||
|
95 => {:name => "cp932", :collation => "cp932_japanese_ci"},
|
||||||
|
96 => {:name => "cp932", :collation => "cp932_bin"},
|
||||||
|
97 => {:name => "eucjpms", :collation => "eucjpms_japanese_ci"},
|
||||||
|
98 => {:name => "eucjpms", :collation => "eucjpms_bin"},
|
||||||
|
99 => {:name => "cp1250", :collation => "cp1250_polish_ci"},
|
||||||
|
128 => {:name => "ucs2", :collation => "ucs2_unicode_ci"},
|
||||||
|
129 => {:name => "ucs2", :collation => "ucs2_icelandic_ci"},
|
||||||
|
130 => {:name => "ucs2", :collation => "ucs2_latvian_ci"},
|
||||||
|
131 => {:name => "ucs2", :collation => "ucs2_romanian_ci"},
|
||||||
|
132 => {:name => "ucs2", :collation => "ucs2_slovenian_ci"},
|
||||||
|
133 => {:name => "ucs2", :collation => "ucs2_polish_ci"},
|
||||||
|
134 => {:name => "ucs2", :collation => "ucs2_estonian_ci"},
|
||||||
|
135 => {:name => "ucs2", :collation => "ucs2_spanish_ci"},
|
||||||
|
136 => {:name => "ucs2", :collation => "ucs2_swedish_ci"},
|
||||||
|
137 => {:name => "ucs2", :collation => "ucs2_turkish_ci"},
|
||||||
|
138 => {:name => "ucs2", :collation => "ucs2_czech_ci"},
|
||||||
|
139 => {:name => "ucs2", :collation => "ucs2_danish_ci"},
|
||||||
|
140 => {:name => "ucs2", :collation => "ucs2_lithuanian_ci"},
|
||||||
|
141 => {:name => "ucs2", :collation => "ucs2_slovak_ci"},
|
||||||
|
142 => {:name => "ucs2", :collation => "ucs2_spanish2_ci"},
|
||||||
|
143 => {:name => "ucs2", :collation => "ucs2_roman_ci"},
|
||||||
|
144 => {:name => "ucs2", :collation => "ucs2_persian_ci"},
|
||||||
|
145 => {:name => "ucs2", :collation => "ucs2_esperanto_ci"},
|
||||||
|
146 => {:name => "ucs2", :collation => "ucs2_hungarian_ci"},
|
||||||
|
192 => {:name => "utf8", :collation => "utf8_unicode_ci"},
|
||||||
|
193 => {:name => "utf8", :collation => "utf8_icelandic_ci"},
|
||||||
|
194 => {:name => "utf8", :collation => "utf8_latvian_ci"},
|
||||||
|
195 => {:name => "utf8", :collation => "utf8_romanian_ci"},
|
||||||
|
196 => {:name => "utf8", :collation => "utf8_slovenian_ci"},
|
||||||
|
197 => {:name => "utf8", :collation => "utf8_polish_ci"},
|
||||||
|
198 => {:name => "utf8", :collation => "utf8_estonian_ci"},
|
||||||
|
199 => {:name => "utf8", :collation => "utf8_spanish_ci"},
|
||||||
|
200 => {:name => "utf8", :collation => "utf8_swedish_ci"},
|
||||||
|
201 => {:name => "utf8", :collation => "utf8_turkish_ci"},
|
||||||
|
202 => {:name => "utf8", :collation => "utf8_czech_ci"},
|
||||||
|
203 => {:name => "utf8", :collation => "utf8_danish_ci"},
|
||||||
|
204 => {:name => "utf8", :collation => "utf8_lithuanian_ci"},
|
||||||
|
205 => {:name => "utf8", :collation => "utf8_slovak_ci"},
|
||||||
|
206 => {:name => "utf8", :collation => "utf8_spanish2_ci"},
|
||||||
|
207 => {:name => "utf8", :collation => "utf8_roman_ci"},
|
||||||
|
208 => {:name => "utf8", :collation => "utf8_persian_ci"},
|
||||||
|
209 => {:name => "utf8", :collation => "utf8_esperanto_ci"},
|
||||||
|
210 => {:name => "utf8", :collation => "utf8_hungarian_ci"},
|
||||||
|
254 => {:name => "utf8", :collation => "utf8_general_cs"}
|
||||||
|
}
|
||||||
|
|
||||||
def self.encoding_from_charset(charset)
|
def self.encoding_from_charset(charset)
|
||||||
charset = charset.to_s.downcase
|
CHARSET_MAP[charset.to_s.downcase]
|
||||||
enc = CHARSET_MAP[charset]
|
end
|
||||||
raise Mysql2::Error, "unsupported charset: #{charset}" unless enc
|
|
||||||
enc
|
def self.encoding_from_charset_code(code)
|
||||||
|
if mapping = MYSQL_CHARSET_MAP[code]
|
||||||
|
encoding_from_charset(mapping[:name])
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -6,6 +6,14 @@ describe Mysql2::Client do
|
|||||||
@client = Mysql2::Client.new
|
@client = Mysql2::Client.new
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if defined? Encoding
|
||||||
|
it "should raise an exception on create for invalid encodings" do
|
||||||
|
lambda {
|
||||||
|
c = Mysql2::Client.new(:encoding => "fake")
|
||||||
|
}.should raise_error(Mysql2::Error)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
it "should be able to connect via SSL options" do
|
it "should be able to connect via SSL options" do
|
||||||
pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.")
|
pending("DON'T WORRY, THIS TEST PASSES :) - but is machine-specific. You need to have MySQL running with SSL configured and enabled. Then update the paths in this test to your needs and remove the pending state.")
|
||||||
ssl_client = nil
|
ssl_client = nil
|
||||||
|
Loading…
Reference in New Issue
Block a user