RUBY-253 fix UTF8 check for Ruby 1.9

This commit is contained in:
Kyle Banker 2011-03-24 12:11:12 -04:00
parent 1ab2f171c8
commit b48a2bd84f
3 changed files with 22 additions and 21 deletions

View File

@ -107,21 +107,12 @@ static int max_bson_size;
} \
_str; \
})
/* MUST call TO_UTF8 before calling write_utf8. */
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
result_t status = check_string(RSTRING_PTR(string), RSTRING_LENINT(string),
0, check_null);
if (status == HAS_NULL) {
buffer_free(buffer);
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
}
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LENINT(string));
}
#else
#define STR_NEW(p,n) rb_str_new((p), (n))
/* MUST call TO_UTF8 before calling write_utf8. */
#define TO_UTF8(string) (string)
#endif
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
1, check_null);
@ -132,9 +123,9 @@ static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
buffer_free(buffer);
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
}
string = TO_UTF8(string);
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
}
#endif
// this sucks. but for some reason these moved around between 1.8 and 1.9
#ifdef ONIGURUMA_H
@ -211,7 +202,6 @@ static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
SAFE_WRITE(buffer, &type, 1);
name = TO_UTF8(name);
write_utf8(buffer, name, 1);
SAFE_WRITE(buffer, &zero, 1);
}
@ -340,7 +330,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
{
int length;
write_name_and_type(buffer, key, 0x02);
value = TO_UTF8(value);
length = RSTRING_LENINT(value) + 1;
SAFE_WRITE(buffer, (char*)&length, 4);
write_utf8(buffer, value, 0);
@ -485,7 +474,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
write_name_and_type(buffer, key, 0x0B);
pattern = TO_UTF8(pattern);
write_utf8(buffer, pattern, 1);
SAFE_WRITE(buffer, &zero, 1);

View File

@ -57,6 +57,11 @@ module BSON
BINARY_ENCODING = Encoding.find('binary')
def self.to_utf8_binary(str)
begin
str.unpack("U*")
rescue => ex
raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
end
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
end
else

View File

@ -119,15 +119,23 @@ class BSONTest < Test::Unit::TestCase
end
else
def test_non_utf8_string
bson = BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
result = BSON::BSON_CODER.deserialize(bson)['str']
assert_equal 'aé', result
assert_equal 'UTF-8', result.encoding.name
assert_raise BSON::InvalidStringEncoding do
BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
end
end
def test_invalid_utf8_string
str = "123\xD9"
assert !str.valid_encoding?
assert_raise BSON::InvalidStringEncoding do
BSON::BSON_CODER.serialize({'str' => str})
end
end
def test_non_utf8_key
bson = BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
assert_equal 'hello', BSON::BSON_CODER.deserialize(bson)['aé']
assert_raise BSON::InvalidStringEncoding do
BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
end
end
# Based on a test from sqlite3-ruby