decode strings as UTF-8 when using C extension on 1.9

This commit is contained in:
Mike Dirolf 2009-09-04 11:02:04 -04:00
parent 3d304b1a02
commit 902c3120c8
3 changed files with 34 additions and 6 deletions

View File

@ -50,6 +50,13 @@ static VALUE RegexpOfHolding;
static VALUE OrderedHash;
static VALUE InvalidName;
#if HAVE_RUBY_ENCODING_H
#include "ruby/encoding.h"
#define STR_NEW(p,n) rb_enc_str_new((p), (n), rb_utf8_encoding())
#else
#define STR_NEW(p,n) rb_str_new((p), (n))
#endif
// this sucks. but for some reason these moved around between 1.8 and 1.9
#ifdef ONIGURUMA_H
#define IGNORECASE ONIG_OPTION_IGNORECASE
@ -495,7 +502,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
int value_length;
*position += 4;
value_length = strlen(buffer + *position);
value = rb_str_new(buffer+ *position, value_length);
value = STR_NEW(buffer + *position, value_length);
*position += value_length + 1;
break;
}
@ -509,7 +516,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
int collection_length = strlen(buffer + offset);
char id_type;
argv[0] = rb_str_new(buffer + offset, collection_length);
argv[0] = STR_NEW(buffer + offset, collection_length);
offset += collection_length + 1;
id_type = buffer[offset];
offset += 5;
@ -599,7 +606,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
case 11:
{
int pattern_length = strlen(buffer + *position);
VALUE pattern = rb_str_new(buffer + *position, pattern_length);
VALUE pattern = STR_NEW(buffer + *position, pattern_length);
int flags_length, flags = 0, i = 0;
char extra[10];
VALUE argv[3];
@ -635,7 +642,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
VALUE collection, str, oid, id, argv[2];
*position += 4;
collection_length = strlen(buffer + *position);
collection = rb_str_new(buffer + *position, collection_length);
collection = STR_NEW(buffer + *position, collection_length);
*position += collection_length + 1;
str = rb_str_new(buffer + *position, 12);
@ -662,7 +669,7 @@ static VALUE get_value(const char* buffer, int* position, int type) {
VALUE code, scope, argv[2];
*position += 8;
code_length = strlen(buffer + *position);
code = rb_str_new(buffer + *position, code_length);
code = STR_NEW(buffer + *position, code_length);
*position += code_length + 1;
memcpy(&scope_size, buffer + *position, 4);
@ -715,7 +722,7 @@ static VALUE elements_to_hash(const char* buffer, int max) {
while (position < max) {
int type = (int)buffer[position++];
int name_length = strlen(buffer + position);
VALUE name = rb_str_new(buffer + position, name_length);
VALUE name = STR_NEW(buffer + position, name_length);
VALUE value;
position += name_length + 1;
value = get_value(buffer, &position, type);

View File

@ -2,6 +2,7 @@ require 'mkmf'
have_header("ruby/st.h") || have_header("st.h")
have_header("ruby/regex.h") || have_header("regex.h")
have_header("ruby/encoding.h")
dir_config('cbson')
create_makefile('mongo_ext/cbson')

View File

@ -809,4 +809,24 @@ class DBAPITest < Test::Unit::TestCase
@@db.collection("test").find({}, :snapshot => true, :sort => 'a').to_a
end
end
def test_encodings
if RUBY_VERSION >= '1.9'
ascii = "hello world"
utf8 = "hello world".encode("UTF-8")
iso8859 = "hello world".encode("ISO-8859-1")
assert_equal "US-ASCII", ascii.encoding.name
assert_equal "UTF-8", utf8.encoding.name
assert_equal "ISO-8859-1", iso8859.encoding.name
@@coll.clear
@@coll.save("ascii" => ascii, "utf8" => utf8, "iso8859" => iso8859)
doc = @@coll.find_one()
assert_equal "UTF-8", doc["ascii"].encoding.name
assert_equal "UTF-8", doc["utf8"].encoding.name
assert_equal "UTF-8", doc["iso8859"].encoding.name
end
end
end