respect Encoding.default_internal in BSON decoder

This commit is contained in:
Mike Dirolf 2010-05-07 17:00:33 +03:00
parent 2d160ac602
commit 2cf0f2c597
3 changed files with 39 additions and 13 deletions

View File

@ -88,7 +88,15 @@ static VALUE DigestMD5;
#if HAVE_RUBY_ENCODING_H #if HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h" #include "ruby/encoding.h"
#define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding()) #define STR_NEW(p,n) \
({ \
VALUE _str = rb_enc_str_new((p), (n), rb_utf8_encoding()); \
rb_encoding* internal_encoding = rb_default_internal_encoding(); \
if (internal_encoding) { \
_str = rb_str_export_to_enc(_str, internal_encoding); \
} \
_str; \
})
/* MUST call TO_UTF8 before calling write_utf8. */ /* MUST call TO_UTF8 before calling write_utf8. */
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding()) #define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
static void write_utf8(buffer_t buffer, VALUE string, char check_null) { static void write_utf8(buffer_t buffer, VALUE string, char check_null) {

View File

@ -325,6 +325,16 @@ module BSON
Regexp.new(str, options) Regexp.new(str, options)
end end
def encoded_str(str)
if RUBY_VERSION >= '1.9'
str.force_encoding("utf-8")
if Encoding.default_internal
str.encode!(Encoding.default_internal)
end
end
str
end
def deserialize_string_data(buf) def deserialize_string_data(buf)
len = buf.get_int len = buf.get_int
bytes = buf.get(len) bytes = buf.get(len)
@ -332,10 +342,7 @@ module BSON
if str.respond_to? "pack" if str.respond_to? "pack"
str = str.pack("C*") str = str.pack("C*")
end end
if RUBY_VERSION >= '1.9' encoded_str(str)
str.force_encoding("utf-8")
end
str
end end
def deserialize_code_w_scope_data(buf) def deserialize_code_w_scope_data(buf)
@ -345,15 +352,12 @@ module BSON
if code.respond_to? "pack" if code.respond_to? "pack"
code = code.pack("C*") code = code.pack("C*")
end end
if RUBY_VERSION >= '1.9'
code.force_encoding("utf-8")
end
scope_size = buf.get_int scope_size = buf.get_int
buf.position -= 4 buf.position -= 4
scope = BSON_CODER.new().deserialize(buf.get(scope_size)) scope = BSON_CODER.new().deserialize(buf.get(scope_size))
Code.new(code, scope) Code.new(encoded_str(code), scope)
end end
def deserialize_oid_data(buf) def deserialize_oid_data(buf)
@ -536,10 +540,7 @@ module BSON
break if b == 0 break if b == 0
chars << b.chr chars << b.chr
end end
if RUBY_VERSION >= '1.9' encoded_str(chars)
chars.force_encoding("utf-8") # Mongo stores UTF-8
end
chars
end end
def bson_type(o) def bson_type(o)

View File

@ -90,6 +90,23 @@ class BSONTest < Test::Unit::TestCase
bson = BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'}) bson = BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
assert_equal 'hello', BSON::BSON_CODER.deserialize(bson)['aé'] assert_equal 'hello', BSON::BSON_CODER.deserialize(bson)['aé']
end end
# Based on a test from sqlite3-ruby
def test_default_internal_is_honored
before_enc = Encoding.default_internal
str = "壁に耳あり、障子に目あり"
bson = BSON::BSON_CODER.serialize("x" => str)
Encoding.default_internal = 'EUC-JP'
out = BSON::BSON_CODER.deserialize(bson)["x"]
assert_equal Encoding.default_internal, out.encoding
assert_equal str.encode('EUC-JP'), out
assert_equal str, out.encode(str.encoding)
ensure
Encoding.default_internal = before_enc
end
end end
def test_code def test_code