From f55fc95bf42bd0d6baf30c64ba34639e8b330ffe Mon Sep 17 00:00:00 2001 From: Tyler Brock Date: Fri, 1 Jun 2012 13:36:34 -0400 Subject: [PATCH] RUBY-432 Do not encode strings to utf-8 after checking that utf-8 is valid. Before this fix, an encoding operation could turn valid utf-8 into invalid utf-8 if an encoding that requires conversion is forced on a valid string. --- ext/cbson/cbson.c | 3 --- lib/bson/bson_ruby.rb | 2 +- test/bson/bson_test.rb | 7 +++++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ext/cbson/cbson.c b/ext/cbson/cbson.c index 1285285..8634f21 100644 --- a/ext/cbson/cbson.c +++ b/ext/cbson/cbson.c @@ -109,10 +109,8 @@ static int max_bson_size; } \ _str; \ }) -#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding()) #else #define STR_NEW(p,n) rb_str_new((p), (n)) -#define TO_UTF8(string) (string) #endif static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) { @@ -125,7 +123,6 @@ static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) { bson_buffer_free(buffer); rb_raise(InvalidStringEncoding, "String not valid UTF-8"); } - string = TO_UTF8(string); SAFE_WRITE(buffer, RSTRING_PTR(string), (int)RSTRING_LEN(string)); } diff --git a/lib/bson/bson_ruby.rb b/lib/bson/bson_ruby.rb index 9632432..45449e9 100644 --- a/lib/bson/bson_ruby.rb +++ b/lib/bson/bson_ruby.rb @@ -62,7 +62,7 @@ module BSON rescue raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}" end - str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING) + str.dup.force_encoding(BINARY_ENCODING) end else NULL_BYTE = "\0" diff --git a/test/bson/bson_test.rb b/test/bson/bson_test.rb index 61e703e..6cb4de6 100644 --- a/test/bson/bson_test.rb +++ b/test/bson/bson_test.rb @@ -141,6 +141,13 @@ class BSONTest < Test::Unit::TestCase end end + def test_forced_encoding_with_valid_utf8 + doc = {'doc' => "\xC3\xB6".force_encoding("ISO-8859-1")} + serialized = @encoder.serialize(doc) + deserialized = @encoder.deserialize(serialized) + assert_equal(doc['doc'], deserialized['doc'].force_encoding("ISO-8859-1")) + end + # Based on a test from sqlite3-ruby def test_default_internal_is_honored before_enc = Encoding.default_internal