RUBY-253 fix UTF8 check for Ruby 1.9
This commit is contained in:
parent
1ab2f171c8
commit
b48a2bd84f
|
@ -107,21 +107,12 @@ static int max_bson_size;
|
||||||
} \
|
} \
|
||||||
_str; \
|
_str; \
|
||||||
})
|
})
|
||||||
/* MUST call TO_UTF8 before calling write_utf8. */
|
|
||||||
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
|
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
|
||||||
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
|
||||||
result_t status = check_string(RSTRING_PTR(string), RSTRING_LENINT(string),
|
|
||||||
0, check_null);
|
|
||||||
if (status == HAS_NULL) {
|
|
||||||
buffer_free(buffer);
|
|
||||||
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
|
|
||||||
}
|
|
||||||
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LENINT(string));
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
#define STR_NEW(p,n) rb_str_new((p), (n))
|
#define STR_NEW(p,n) rb_str_new((p), (n))
|
||||||
/* MUST call TO_UTF8 before calling write_utf8. */
|
|
||||||
#define TO_UTF8(string) (string)
|
#define TO_UTF8(string) (string)
|
||||||
|
#endif
|
||||||
|
|
||||||
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
||||||
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
|
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
|
||||||
1, check_null);
|
1, check_null);
|
||||||
|
@ -132,9 +123,9 @@ static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
||||||
buffer_free(buffer);
|
buffer_free(buffer);
|
||||||
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
|
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
|
||||||
}
|
}
|
||||||
|
string = TO_UTF8(string);
|
||||||
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
|
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
||||||
#ifdef ONIGURUMA_H
|
#ifdef ONIGURUMA_H
|
||||||
|
@ -211,7 +202,6 @@ static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
|
||||||
|
|
||||||
static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
|
static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
|
||||||
SAFE_WRITE(buffer, &type, 1);
|
SAFE_WRITE(buffer, &type, 1);
|
||||||
name = TO_UTF8(name);
|
|
||||||
write_utf8(buffer, name, 1);
|
write_utf8(buffer, name, 1);
|
||||||
SAFE_WRITE(buffer, &zero, 1);
|
SAFE_WRITE(buffer, &zero, 1);
|
||||||
}
|
}
|
||||||
|
@ -340,7 +330,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
|
||||||
{
|
{
|
||||||
int length;
|
int length;
|
||||||
write_name_and_type(buffer, key, 0x02);
|
write_name_and_type(buffer, key, 0x02);
|
||||||
value = TO_UTF8(value);
|
|
||||||
length = RSTRING_LENINT(value) + 1;
|
length = RSTRING_LENINT(value) + 1;
|
||||||
SAFE_WRITE(buffer, (char*)&length, 4);
|
SAFE_WRITE(buffer, (char*)&length, 4);
|
||||||
write_utf8(buffer, value, 0);
|
write_utf8(buffer, value, 0);
|
||||||
|
@ -485,7 +474,6 @@ static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
|
||||||
|
|
||||||
write_name_and_type(buffer, key, 0x0B);
|
write_name_and_type(buffer, key, 0x0B);
|
||||||
|
|
||||||
pattern = TO_UTF8(pattern);
|
|
||||||
write_utf8(buffer, pattern, 1);
|
write_utf8(buffer, pattern, 1);
|
||||||
SAFE_WRITE(buffer, &zero, 1);
|
SAFE_WRITE(buffer, &zero, 1);
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,11 @@ module BSON
|
||||||
BINARY_ENCODING = Encoding.find('binary')
|
BINARY_ENCODING = Encoding.find('binary')
|
||||||
|
|
||||||
def self.to_utf8_binary(str)
|
def self.to_utf8_binary(str)
|
||||||
|
begin
|
||||||
|
str.unpack("U*")
|
||||||
|
rescue => ex
|
||||||
|
raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
|
||||||
|
end
|
||||||
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
|
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
|
|
@ -119,15 +119,23 @@ class BSONTest < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
def test_non_utf8_string
|
def test_non_utf8_string
|
||||||
bson = BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
|
assert_raise BSON::InvalidStringEncoding do
|
||||||
result = BSON::BSON_CODER.deserialize(bson)['str']
|
BSON::BSON_CODER.serialize({'str' => 'aé'.encode('iso-8859-1')})
|
||||||
assert_equal 'aé', result
|
end
|
||||||
assert_equal 'UTF-8', result.encoding.name
|
end
|
||||||
|
|
||||||
|
def test_invalid_utf8_string
|
||||||
|
str = "123\xD9"
|
||||||
|
assert !str.valid_encoding?
|
||||||
|
assert_raise BSON::InvalidStringEncoding do
|
||||||
|
BSON::BSON_CODER.serialize({'str' => str})
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_non_utf8_key
|
def test_non_utf8_key
|
||||||
bson = BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
|
assert_raise BSON::InvalidStringEncoding do
|
||||||
assert_equal 'hello', BSON::BSON_CODER.deserialize(bson)['aé']
|
BSON::BSON_CODER.serialize({'aé'.encode('iso-8859-1') => 'hello'})
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Based on a test from sqlite3-ruby
|
# Based on a test from sqlite3-ruby
|
||||||
|
|
Loading…
Reference in New Issue