Do not encode strings to utf-8 after checking that utf-8 is valid.

Before this fix, an encoding operation could turn valid utf-8 into
invalid utf-8 if an encoding that requires conversion is forced on a
valid string.
This commit is contained in:
Tyler Brock 2012-06-01 13:36:34 -04:00
parent ad8a933cd4
commit f55fc95bf4
3 changed files with 8 additions and 4 deletions

View File

@ -109,10 +109,8 @@ static int max_bson_size;
} \
_str; \
})
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
#else
#define STR_NEW(p,n) rb_str_new((p), (n))
#define TO_UTF8(string) (string)
#endif
static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) {
@ -125,7 +123,6 @@ static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) {
bson_buffer_free(buffer);
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
}
string = TO_UTF8(string);
SAFE_WRITE(buffer, RSTRING_PTR(string), (int)RSTRING_LEN(string));
}

View File

@ -62,7 +62,7 @@ module BSON
rescue
raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
end
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
str.dup.force_encoding(BINARY_ENCODING)
end
else
NULL_BYTE = "\0"

View File

@ -141,6 +141,13 @@ class BSONTest < Test::Unit::TestCase
end
end
def test_forced_encoding_with_valid_utf8
doc = {'doc' => "\xC3\xB6".force_encoding("ISO-8859-1")}
serialized = @encoder.serialize(doc)
deserialized = @encoder.deserialize(serialized)
assert_equal(doc['doc'], deserialized['doc'].force_encoding("ISO-8859-1"))
end
# Based on a test from sqlite3-ruby
def test_default_internal_is_honored
before_enc = Encoding.default_internal