decode strings as UTF-8 when using C extension on 1.9
This commit is contained in:
parent
3d304b1a02
commit
902c3120c8
|
@ -50,6 +50,13 @@ static VALUE RegexpOfHolding;
|
|||
static VALUE OrderedHash;
|
||||
static VALUE InvalidName;
|
||||
|
||||
#if HAVE_RUBY_ENCODING_H
|
||||
#include "ruby/encoding.h"
|
||||
#define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding())
|
||||
#else
|
||||
#define STR_NEW(p,n) rb_str_new((p), (n))
|
||||
#endif
|
||||
|
||||
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
||||
#ifdef ONIGURUMA_H
|
||||
#define IGNORECASE ONIG_OPTION_IGNORECASE
|
||||
|
@ -495,7 +502,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
|||
int value_length;
|
||||
*position += 4;
|
||||
value_length = strlen(buffer + *position);
|
||||
value = rb_str_new(buffer+ *position, value_length);
|
||||
value = STR_NEW(buffer + *position, value_length);
|
||||
*position += value_length + 1;
|
||||
break;
|
||||
}
|
||||
|
@ -509,7 +516,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
|||
int collection_length = strlen(buffer + offset);
|
||||
char id_type;
|
||||
|
||||
argv[0] = rb_str_new(buffer + offset, collection_length);
|
||||
argv[0] = STR_NEW(buffer + offset, collection_length);
|
||||
offset += collection_length + 1;
|
||||
id_type = buffer[offset];
|
||||
offset += 5;
|
||||
|
@ -599,7 +606,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
|||
case 11:
|
||||
{
|
||||
int pattern_length = strlen(buffer + *position);
|
||||
VALUE pattern = rb_str_new(buffer + *position, pattern_length);
|
||||
VALUE pattern = STR_NEW(buffer + *position, pattern_length);
|
||||
int flags_length, flags = 0, i = 0;
|
||||
char extra[10];
|
||||
VALUE argv[3];
|
||||
|
@ -635,7 +642,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
|||
VALUE collection, str, oid, id, argv[2];
|
||||
*position += 4;
|
||||
collection_length = strlen(buffer + *position);
|
||||
collection = rb_str_new(buffer + *position, collection_length);
|
||||
collection = STR_NEW(buffer + *position, collection_length);
|
||||
*position += collection_length + 1;
|
||||
|
||||
str = rb_str_new(buffer + *position, 12);
|
||||
|
@ -662,7 +669,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
|||
VALUE code, scope, argv[2];
|
||||
*position += 8;
|
||||
code_length = strlen(buffer + *position);
|
||||
code = rb_str_new(buffer + *position, code_length);
|
||||
code = STR_NEW(buffer + *position, code_length);
|
||||
*position += code_length + 1;
|
||||
|
||||
memcpy(&scope_size, buffer + *position, 4);
|
||||
|
@ -715,7 +722,7 @@ static VALUE elements_to_hash(const char* buffer, int max) {
|
|||
while (position < max) {
|
||||
int type = (int)buffer[position++];
|
||||
int name_length = strlen(buffer + position);
|
||||
VALUE name = rb_str_new(buffer + position, name_length);
|
||||
VALUE name = STR_NEW(buffer + position, name_length);
|
||||
VALUE value;
|
||||
position += name_length + 1;
|
||||
value = get_value(buffer, &position, type);
|
||||
|
|
|
@ -2,6 +2,7 @@ require 'mkmf'
|
|||
|
||||
have_header("ruby/st.h") || have_header("st.h")
|
||||
have_header("ruby/regex.h") || have_header("regex.h")
|
||||
have_header("ruby/encoding.h")
|
||||
|
||||
dir_config('cbson')
|
||||
create_makefile('mongo_ext/cbson')
|
||||
|
|
|
@ -809,4 +809,24 @@ class DBAPITest < Test::Unit::TestCase
|
|||
@@db.collection("test").find({}, :snapshot => true, :sort => 'a').to_a
|
||||
end
|
||||
end
|
||||
|
||||
def test_encodings
|
||||
if RUBY_VERSION >= '1.9'
|
||||
ascii = "hello world"
|
||||
utf8 = "hello world".encode("UTF-8")
|
||||
iso8859 = "hello world".encode("ISO-8859-1")
|
||||
|
||||
assert_equal "US-ASCII", ascii.encoding.name
|
||||
assert_equal "UTF-8", utf8.encoding.name
|
||||
assert_equal "ISO-8859-1", iso8859.encoding.name
|
||||
|
||||
@@coll.clear
|
||||
@@coll.save("ascii" => ascii, "utf8" => utf8, "iso8859" => iso8859)
|
||||
doc = @@coll.find_one()
|
||||
|
||||
assert_equal "UTF-8", doc["ascii"].encoding.name
|
||||
assert_equal "UTF-8", doc["utf8"].encoding.name
|
||||
assert_equal "UTF-8", doc["iso8859"].encoding.name
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue