decode strings as UTF-8 when using C extension on 1.9
This commit is contained in:
parent
3d304b1a02
commit
902c3120c8
|
@ -50,6 +50,13 @@ static VALUE RegexpOfHolding;
|
||||||
static VALUE OrderedHash;
|
static VALUE OrderedHash;
|
||||||
static VALUE InvalidName;
|
static VALUE InvalidName;
|
||||||
|
|
||||||
|
#if HAVE_RUBY_ENCODING_H
|
||||||
|
#include "ruby/encoding.h"
|
||||||
|
#define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding())
|
||||||
|
#else
|
||||||
|
#define STR_NEW(p,n) rb_str_new((p), (n))
|
||||||
|
#endif
|
||||||
|
|
||||||
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
||||||
#ifdef ONIGURUMA_H
|
#ifdef ONIGURUMA_H
|
||||||
#define IGNORECASE ONIG_OPTION_IGNORECASE
|
#define IGNORECASE ONIG_OPTION_IGNORECASE
|
||||||
|
@ -495,7 +502,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
||||||
int value_length;
|
int value_length;
|
||||||
*position += 4;
|
*position += 4;
|
||||||
value_length = strlen(buffer + *position);
|
value_length = strlen(buffer + *position);
|
||||||
value = rb_str_new(buffer+ *position, value_length);
|
value = STR_NEW(buffer + *position, value_length);
|
||||||
*position += value_length + 1;
|
*position += value_length + 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -509,7 +516,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
||||||
int collection_length = strlen(buffer + offset);
|
int collection_length = strlen(buffer + offset);
|
||||||
char id_type;
|
char id_type;
|
||||||
|
|
||||||
argv[0] = rb_str_new(buffer + offset, collection_length);
|
argv[0] = STR_NEW(buffer + offset, collection_length);
|
||||||
offset += collection_length + 1;
|
offset += collection_length + 1;
|
||||||
id_type = buffer[offset];
|
id_type = buffer[offset];
|
||||||
offset += 5;
|
offset += 5;
|
||||||
|
@ -599,7 +606,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
||||||
case 11:
|
case 11:
|
||||||
{
|
{
|
||||||
int pattern_length = strlen(buffer + *position);
|
int pattern_length = strlen(buffer + *position);
|
||||||
VALUE pattern = rb_str_new(buffer + *position, pattern_length);
|
VALUE pattern = STR_NEW(buffer + *position, pattern_length);
|
||||||
int flags_length, flags = 0, i = 0;
|
int flags_length, flags = 0, i = 0;
|
||||||
char extra[10];
|
char extra[10];
|
||||||
VALUE argv[3];
|
VALUE argv[3];
|
||||||
|
@ -635,7 +642,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
||||||
VALUE collection, str, oid, id, argv[2];
|
VALUE collection, str, oid, id, argv[2];
|
||||||
*position += 4;
|
*position += 4;
|
||||||
collection_length = strlen(buffer + *position);
|
collection_length = strlen(buffer + *position);
|
||||||
collection = rb_str_new(buffer + *position, collection_length);
|
collection = STR_NEW(buffer + *position, collection_length);
|
||||||
*position += collection_length + 1;
|
*position += collection_length + 1;
|
||||||
|
|
||||||
str = rb_str_new(buffer + *position, 12);
|
str = rb_str_new(buffer + *position, 12);
|
||||||
|
@ -662,7 +669,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
|
||||||
VALUE code, scope, argv[2];
|
VALUE code, scope, argv[2];
|
||||||
*position += 8;
|
*position += 8;
|
||||||
code_length = strlen(buffer + *position);
|
code_length = strlen(buffer + *position);
|
||||||
code = rb_str_new(buffer + *position, code_length);
|
code = STR_NEW(buffer + *position, code_length);
|
||||||
*position += code_length + 1;
|
*position += code_length + 1;
|
||||||
|
|
||||||
memcpy(&scope_size, buffer + *position, 4);
|
memcpy(&scope_size, buffer + *position, 4);
|
||||||
|
@ -715,7 +722,7 @@ static VALUE elements_to_hash(const char* buffer, int max) {
|
||||||
while (position < max) {
|
while (position < max) {
|
||||||
int type = (int)buffer[position++];
|
int type = (int)buffer[position++];
|
||||||
int name_length = strlen(buffer + position);
|
int name_length = strlen(buffer + position);
|
||||||
VALUE name = rb_str_new(buffer + position, name_length);
|
VALUE name = STR_NEW(buffer + position, name_length);
|
||||||
VALUE value;
|
VALUE value;
|
||||||
position += name_length + 1;
|
position += name_length + 1;
|
||||||
value = get_value(buffer, &position, type);
|
value = get_value(buffer, &position, type);
|
||||||
|
|
|
@ -2,6 +2,7 @@ require 'mkmf'
|
||||||
|
|
||||||
have_header("ruby/st.h") || have_header("st.h")
|
have_header("ruby/st.h") || have_header("st.h")
|
||||||
have_header("ruby/regex.h") || have_header("regex.h")
|
have_header("ruby/regex.h") || have_header("regex.h")
|
||||||
|
have_header("ruby/encoding.h")
|
||||||
|
|
||||||
dir_config('cbson')
|
dir_config('cbson')
|
||||||
create_makefile('mongo_ext/cbson')
|
create_makefile('mongo_ext/cbson')
|
||||||
|
|
|
@ -809,4 +809,24 @@ class DBAPITest < Test::Unit::TestCase
|
||||||
@@db.collection("test").find({}, :snapshot => true, :sort => 'a').to_a
|
@@db.collection("test").find({}, :snapshot => true, :sort => 'a').to_a
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_encodings
|
||||||
|
if RUBY_VERSION >= '1.9'
|
||||||
|
ascii = "hello world"
|
||||||
|
utf8 = "hello world".encode("UTF-8")
|
||||||
|
iso8859 = "hello world".encode("ISO-8859-1")
|
||||||
|
|
||||||
|
assert_equal "US-ASCII", ascii.encoding.name
|
||||||
|
assert_equal "UTF-8", utf8.encoding.name
|
||||||
|
assert_equal "ISO-8859-1", iso8859.encoding.name
|
||||||
|
|
||||||
|
@@coll.clear
|
||||||
|
@@coll.save("ascii" => ascii, "utf8" => utf8, "iso8859" => iso8859)
|
||||||
|
doc = @@coll.find_one()
|
||||||
|
|
||||||
|
assert_equal "UTF-8", doc["ascii"].encoding.name
|
||||||
|
assert_equal "UTF-8", doc["utf8"].encoding.name
|
||||||
|
assert_equal "UTF-8", doc["iso8859"].encoding.name
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue