RUBY-253 fix UTF8 check for Ruby 1.9

2011-03-24 12:11:12 -04:00 · 2011-03-24 12:11:12 -04:00 · b48a2bd84f
commit b48a2bd84f
parent 1ab2f171c8
3 changed files with 22 additions and 21 deletions
--- a/ext/cbson/cbson.c
+++ b/ext/cbson/cbson.c
@ -107,21 +107,12 @@ static int max_bson_size;
        }                                                               \
        _str;                                                           \
    })
 /* MUST call TO_UTF8 before calling write_utf8. */
 #define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
 static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
    result_t status = check_string(RSTRING_PTR(string), RSTRING_LENINT(string),
                                   0, check_null);
    if (status == HAS_NULL) {
        buffer_free(buffer);
        rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
    }
    SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LENINT(string));
 }
 #else
 #define STR_NEW(p,n) rb_str_new((p), (n))
 /* MUST call TO_UTF8 before calling write_utf8. */
 #define TO_UTF8(string) (string)
 #endif
 static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
    result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
                                   1, check_null);
@ -132,9 +123,9 @@ static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
        buffer_free(buffer);
        rb_raise(InvalidStringEncoding, "String not valid UTF-8");
    }
    string = TO_UTF8(string);
    SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
 }
 #endif
 // this sucks. but for some reason these moved around between 1.8 and 1.9
 #ifdef ONIGURUMA_H
@ -211,7 +202,6 @@ static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
 static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
    SAFE_WRITE(buffer, &type, 1);
    name = TO_UTF8(name);
    write_utf8(buffer, name, 1);
    SAFE_WRITE(buffer, &zero, 1);
 }
@ -340,7 +330,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
        {
            int length;
            write_name_and_type(buffer, key, 0x02);
            value = TO_UTF8(value);
            length = RSTRING_LENINT(value) + 1;
            SAFE_WRITE(buffer, (char*)&length, 4);
            write_utf8(buffer, value, 0);
@ -485,7 +474,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
            write_name_and_type(buffer, key, 0x0B);
            pattern = TO_UTF8(pattern);
            write_utf8(buffer, pattern, 1);
            SAFE_WRITE(buffer, &zero, 1);
--- a/lib/bson/bson_ruby.rb
+++ b/lib/bson/bson_ruby.rb
@ -57,6 +57,11 @@ module BSON
      BINARY_ENCODING = Encoding.find('binary')
      def self.to_utf8_binary(str)
        begin
          str.unpack("U*")
        rescue => ex
          raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
        end
        str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
      end
    else
--- a/test/bson/bson_test.rb
+++ b/test/bson/bson_test.rb
@ -119,15 +119,23 @@ class BSONTest < Test::Unit::TestCase
    end
  else
    def test_non_utf8_string
-      bson = BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
+      assert_raise BSON::InvalidStringEncoding do
-      result = BSON::BSON_CODER.deserialize(bson)['str']
+        BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
-      assert_equal 'aé', result
+      end
-      assert_equal 'UTF-8', result.encoding.name
+    end
    def test_invalid_utf8_string
      str = "123\xD9"
      assert !str.valid_encoding?
      assert_raise BSON::InvalidStringEncoding do
        BSON::BSON_CODER.serialize({'str' => str})
      end
    end
    def test_non_utf8_key
-      bson = BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
+      assert_raise BSON::InvalidStringEncoding do
-      assert_equal 'hello', BSON::BSON_CODER.deserialize(bson)['aé']
+        BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
      end
    end
    # Based on a test from sqlite3-ruby