2335108162
On OS X, Ruby Enterprise Edition allocates memory with tcmalloc which is not compatible with the system malloc. asprintf() allocates memory with the system malloc so we need to free its result with the system free() function instead of tcmalloc's free() function. REE's system_free() API call does that.
984 lines
34 KiB
C
984 lines
34 KiB
C
/*
|
|
* Copyright 2009-2010 10gen, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*
|
|
* This file contains C implementations of some of the functions needed by the
|
|
* bson module. If possible, these implementations should be used to speed up
|
|
* BSON encoding and decoding.
|
|
*/
|
|
|
|
#include "ruby.h"
|
|
|
|
/* Ensure compatibility with early releases of Ruby 1.8.5 */
|
|
#ifndef RSTRING_PTR
|
|
# define RSTRING_PTR(v) RSTRING(v)->ptr
|
|
#endif
|
|
|
|
#ifndef RSTRING_LEN
|
|
# define RSTRING_LEN(v) RSTRING(v)->len
|
|
#endif
|
|
#ifndef RSTRING_LENINT
|
|
# define RSTRING_LENINT(v) (int)(RSTRING_LEN(v))
|
|
#endif
|
|
|
|
#ifndef RARRAY_LEN
|
|
# define RARRAY_LEN(v) RARRAY(v)->len
|
|
#endif
|
|
#ifndef RARRAY_LENINT
|
|
# define RARRAY_LENINT(v) (int)(RARRAY_LEN(v))
|
|
#endif
|
|
|
|
#if HAVE_RUBY_ST_H
|
|
#include "ruby/st.h"
|
|
#endif
|
|
#if HAVE_ST_H
|
|
#include "st.h"
|
|
#endif
|
|
|
|
#if HAVE_RUBY_REGEX_H
|
|
#include "ruby/regex.h"
|
|
#endif
|
|
#if HAVE_REGEX_H
|
|
#include "regex.h"
|
|
#endif
|
|
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
|
|
#include "version.h"
|
|
#include "buffer.h"
|
|
#include "encoding_helpers.h"
|
|
|
|
#define SAFE_WRITE(buffer, data, size) \
|
|
if (buffer_write((buffer), (data), (size)) != 0) \
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c")
|
|
|
|
#define SAFE_WRITE_AT_POS(buffer, position, data, size) \
|
|
if (buffer_write_at_position((buffer), (position), (data), (size)) != 0) \
|
|
rb_raise(rb_eRuntimeError, "invalid write at position in buffer.c")
|
|
|
|
#define MAX_HOSTNAME_LENGTH 256
|
|
|
|
static ID element_assignment_method;
|
|
static ID unpack_method;
|
|
static ID utc_method;
|
|
static ID lt_operator;
|
|
static ID gt_operator;
|
|
|
|
static VALUE Binary;
|
|
static VALUE ObjectId;
|
|
static VALUE DBRef;
|
|
static VALUE Code;
|
|
static VALUE MinKey;
|
|
static VALUE MaxKey;
|
|
static VALUE Regexp;
|
|
static VALUE OrderedHash;
|
|
static VALUE InvalidKeyName;
|
|
static VALUE InvalidStringEncoding;
|
|
static VALUE InvalidDocument;
|
|
static VALUE DigestMD5;
|
|
static VALUE RB_HASH;
|
|
|
|
static int max_bson_size;
|
|
|
|
#if HAVE_RUBY_ENCODING_H
|
|
#include "ruby/encoding.h"
|
|
#define STR_NEW(p,n) \
|
|
({ \
|
|
VALUE _str = rb_enc_str_new((p), (n), rb_utf8_encoding()); \
|
|
rb_encoding* internal_encoding = rb_default_internal_encoding(); \
|
|
if (internal_encoding) { \
|
|
_str = rb_str_export_to_enc(_str, internal_encoding); \
|
|
} \
|
|
_str; \
|
|
})
|
|
/* MUST call TO_UTF8 before calling write_utf8. */
|
|
#define TO_UTF8(string) rb_str_export_to_enc((string), rb_utf8_encoding())
|
|
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
|
result_t status = check_string(RSTRING_PTR(string), RSTRING_LENINT(string),
|
|
0, check_null);
|
|
if (status == HAS_NULL) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
|
|
}
|
|
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LENINT(string));
|
|
}
|
|
#else
|
|
#define STR_NEW(p,n) rb_str_new((p), (n))
|
|
/* MUST call TO_UTF8 before calling write_utf8. */
|
|
#define TO_UTF8(string) (string)
|
|
static void write_utf8(buffer_t buffer, VALUE string, char check_null) {
|
|
result_t status = check_string(RSTRING_PTR(string), RSTRING_LEN(string),
|
|
1, check_null);
|
|
if (status == HAS_NULL) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
|
|
} else if (status == NOT_UTF_8) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidStringEncoding, "String not valid UTF-8");
|
|
}
|
|
SAFE_WRITE(buffer, RSTRING_PTR(string), RSTRING_LEN(string));
|
|
}
|
|
#endif
|
|
|
|
// this sucks. but for some reason these moved around between 1.8 and 1.9
|
|
#ifdef ONIGURUMA_H
|
|
#define IGNORECASE ONIG_OPTION_IGNORECASE
|
|
#define MULTILINE ONIG_OPTION_MULTILINE
|
|
#define EXTENDED ONIG_OPTION_EXTEND
|
|
#else
|
|
#define IGNORECASE RE_OPTION_IGNORECASE
|
|
#define MULTILINE RE_OPTION_MULTILINE
|
|
#define EXTENDED RE_OPTION_EXTENDED
|
|
#endif
|
|
|
|
/* TODO we ought to check that the malloc or asprintf was successful
|
|
* and raise an exception if not. */
|
|
/* TODO maybe we can use something more portable like vsnprintf instead
|
|
* of this hack. And share it with the Python extension ;) */
|
|
/* If we don't have ASPRINTF, there are two possibilities:
|
|
* either use _scprintf and _snprintf on for Windows or
|
|
* use snprintf for solaris. */
|
|
#ifndef HAVE_ASPRINTF
|
|
#ifdef _WIN32 || _MSC_VER
|
|
#define INT2STRING(buffer, i) \
|
|
{ \
|
|
int vslength = _scprintf("%d", i) + 1; \
|
|
*buffer = malloc(vslength); \
|
|
_snprintf(*buffer, vslength, "%d", i); \
|
|
}
|
|
#define FREE_INTSTRING(buffer) free(buffer)
|
|
#else
|
|
#define INT2STRING(buffer, i) \
|
|
{ \
|
|
int vslength = snprintf(NULL, 0, "%d", i) + 1; \
|
|
*buffer = malloc(vslength); \
|
|
snprintf(*buffer, vslength, "%d", i); \
|
|
}
|
|
#define FREE_INTSTRING(buffer) free(buffer)
|
|
#endif
|
|
#else
|
|
#define INT2STRING(buffer, i) asprintf(buffer, "%d", i);
|
|
#ifdef USING_SYSTEM_ALLOCATOR_LIBRARY /* Ruby Enterprise Edition with tcmalloc */
|
|
#define FREE_INTSTRING(buffer) system_free(buffer)
|
|
#else
|
|
#define FREE_INTSTRING(buffer) free(buffer)
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef RREGEXP_SRC
|
|
#define RREGEXP_SRC(r) rb_str_new(RREGEXP((r))->str, RREGEXP((r))->len)
|
|
#endif
|
|
|
|
// rubinius compatibility
|
|
#ifndef RREGEXP_OPTIONS
|
|
#define RREGEXP_OPTIONS(r) RREGEXP(value)->ptr->options
|
|
#endif
|
|
|
|
static char zero = 0;
|
|
static char one = 1;
|
|
|
|
static char hostname_digest[17];
|
|
static unsigned int object_id_inc = 0;
|
|
|
|
static int cmp_char(const void* a, const void* b) {
|
|
return *(char*)a - *(char*)b;
|
|
}
|
|
|
|
static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id);
|
|
static int write_element_with_id(VALUE key, VALUE value, VALUE extra);
|
|
static int write_element_without_id(VALUE key, VALUE value, VALUE extra);
|
|
static VALUE elements_to_hash(const char* buffer, int max);
|
|
|
|
static VALUE pack_extra(buffer_t buffer, VALUE check_keys) {
|
|
return rb_ary_new3(2, LL2NUM((long long)buffer), check_keys);
|
|
}
|
|
|
|
static void write_name_and_type(buffer_t buffer, VALUE name, char type) {
|
|
SAFE_WRITE(buffer, &type, 1);
|
|
name = TO_UTF8(name);
|
|
write_utf8(buffer, name, 1);
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
}
|
|
|
|
static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
|
|
buffer_t buffer = (buffer_t)NUM2LL(rb_ary_entry(extra, 0));
|
|
VALUE check_keys = rb_ary_entry(extra, 1);
|
|
|
|
if (TYPE(key) == T_SYMBOL) {
|
|
// TODO better way to do this... ?
|
|
key = rb_str_new2(rb_id2name(SYM2ID(key)));
|
|
}
|
|
|
|
if (TYPE(key) != T_STRING) {
|
|
buffer_free(buffer);
|
|
rb_raise(rb_eTypeError, "keys must be strings or symbols");
|
|
}
|
|
|
|
if (allow_id == 0 && strcmp("_id", RSTRING_PTR(key)) == 0) {
|
|
return ST_CONTINUE;
|
|
}
|
|
|
|
if (check_keys == Qtrue) {
|
|
int i;
|
|
if (RSTRING_LEN(key) > 0 && RSTRING_PTR(key)[0] == '$') {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidKeyName, "%s - key must not start with '$'", RSTRING_PTR(key));
|
|
}
|
|
for (i = 0; i < RSTRING_LEN(key); i++) {
|
|
if (RSTRING_PTR(key)[i] == '.') {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidKeyName, "%s - key must not contain '.'", RSTRING_PTR(key));
|
|
}
|
|
}
|
|
}
|
|
|
|
switch(TYPE(value)) {
|
|
case T_BIGNUM:
|
|
{
|
|
if (rb_funcall(value, gt_operator, 1, LL2NUM(9223372036854775807LL)) == Qtrue ||
|
|
rb_funcall(value, lt_operator, 1, LL2NUM(-9223372036854775808ULL)) == Qtrue) {
|
|
buffer_free(buffer);
|
|
rb_raise(rb_eRangeError, "MongoDB can only handle 8-byte ints");
|
|
}
|
|
}
|
|
// NOTE: falls through to T_FIXNUM code
|
|
case T_FIXNUM:
|
|
{
|
|
long long ll_value;
|
|
ll_value = NUM2LL(value);
|
|
|
|
if (ll_value > 2147483647LL ||
|
|
ll_value < -2147483648LL) {
|
|
write_name_and_type(buffer, key, 0x12);
|
|
SAFE_WRITE(buffer, (char*)&ll_value, 8);
|
|
} else {
|
|
int int_value;
|
|
write_name_and_type(buffer, key, 0x10);
|
|
int_value = (int)ll_value;
|
|
SAFE_WRITE(buffer, (char*)&int_value, 4);
|
|
}
|
|
break;
|
|
}
|
|
case T_TRUE:
|
|
{
|
|
write_name_and_type(buffer, key, 0x08);
|
|
SAFE_WRITE(buffer, &one, 1);
|
|
break;
|
|
}
|
|
case T_FALSE:
|
|
{
|
|
write_name_and_type(buffer, key, 0x08);
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
break;
|
|
}
|
|
case T_FLOAT:
|
|
{
|
|
double d = NUM2DBL(value);
|
|
write_name_and_type(buffer, key, 0x01);
|
|
SAFE_WRITE(buffer, (char*)&d, 8);
|
|
break;
|
|
}
|
|
case T_NIL:
|
|
{
|
|
write_name_and_type(buffer, key, 0x0A);
|
|
break;
|
|
}
|
|
case T_HASH:
|
|
{
|
|
write_name_and_type(buffer, key, 0x03);
|
|
write_doc(buffer, value, check_keys, Qfalse);
|
|
break;
|
|
}
|
|
case T_ARRAY:
|
|
{
|
|
buffer_position length_location, start_position, obj_length;
|
|
int items, i;
|
|
VALUE* values;
|
|
|
|
write_name_and_type(buffer, key, 0x04);
|
|
start_position = buffer_get_position(buffer);
|
|
|
|
// save space for length
|
|
length_location = buffer_save_space(buffer, 4);
|
|
if (length_location == -1) {
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
|
}
|
|
|
|
items = RARRAY_LENINT(value);
|
|
for(i = 0; i < items; i++) {
|
|
char* name;
|
|
VALUE key;
|
|
INT2STRING(&name, i);
|
|
key = rb_str_new2(name);
|
|
write_element_with_id(key, rb_ary_entry(value, i), pack_extra(buffer, check_keys));
|
|
FREE_INTSTRING(name);
|
|
}
|
|
|
|
// write null byte and fill in length
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
obj_length = buffer_get_position(buffer) - start_position;
|
|
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
|
|
break;
|
|
}
|
|
case T_STRING:
|
|
{
|
|
int length;
|
|
write_name_and_type(buffer, key, 0x02);
|
|
value = TO_UTF8(value);
|
|
length = RSTRING_LENINT(value) + 1;
|
|
SAFE_WRITE(buffer, (char*)&length, 4);
|
|
write_utf8(buffer, value, 0);
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
break;
|
|
}
|
|
case T_SYMBOL:
|
|
{
|
|
const char* str_value = rb_id2name(SYM2ID(value));
|
|
int length = (int)strlen(str_value) + 1;
|
|
write_name_and_type(buffer, key, 0x0E);
|
|
SAFE_WRITE(buffer, (char*)&length, 4);
|
|
SAFE_WRITE(buffer, str_value, length);
|
|
break;
|
|
}
|
|
case T_OBJECT:
|
|
{
|
|
// TODO there has to be a better way to do these checks...
|
|
const char* cls = rb_obj_classname(value);
|
|
if (strcmp(cls, "BSON::Binary") == 0 ||
|
|
strcmp(cls, "ByteBuffer") == 0) {
|
|
const char subtype = strcmp(cls, "ByteBuffer") ?
|
|
(const char)FIX2INT(rb_funcall(value, rb_intern("subtype"), 0)) : 2;
|
|
VALUE string_data = rb_funcall(value, rb_intern("to_s"), 0);
|
|
int length = RSTRING_LENINT(string_data);
|
|
write_name_and_type(buffer, key, 0x05);
|
|
if (subtype == 2) {
|
|
const int other_length = length + 4;
|
|
SAFE_WRITE(buffer, (const char*)&other_length, 4);
|
|
SAFE_WRITE(buffer, &subtype, 1);
|
|
}
|
|
SAFE_WRITE(buffer, (const char*)&length, 4);
|
|
if (subtype != 2) {
|
|
SAFE_WRITE(buffer, &subtype, 1);
|
|
}
|
|
SAFE_WRITE(buffer, RSTRING_PTR(string_data), length);
|
|
break;
|
|
}
|
|
if (strcmp(cls, "BSON::ObjectId") == 0) {
|
|
VALUE as_array = rb_funcall(value, rb_intern("to_a"), 0);
|
|
int i;
|
|
write_name_and_type(buffer, key, 0x07);
|
|
for (i = 0; i < 12; i++) {
|
|
char byte = (char)FIX2INT(rb_ary_entry(as_array, i));
|
|
SAFE_WRITE(buffer, &byte, 1);
|
|
}
|
|
break;
|
|
}
|
|
if (strcmp(cls, "BSON::DBRef") == 0) {
|
|
buffer_position length_location, start_position, obj_length;
|
|
VALUE ns, oid;
|
|
write_name_and_type(buffer, key, 0x03);
|
|
|
|
start_position = buffer_get_position(buffer);
|
|
|
|
// save space for length
|
|
length_location = buffer_save_space(buffer, 4);
|
|
if (length_location == -1) {
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
|
}
|
|
|
|
ns = rb_funcall(value, rb_intern("namespace"), 0);
|
|
write_element_with_id(rb_str_new2("$ref"), ns, pack_extra(buffer, Qfalse));
|
|
oid = rb_funcall(value, rb_intern("object_id"), 0);
|
|
write_element_with_id(rb_str_new2("$id"), oid, pack_extra(buffer, Qfalse));
|
|
|
|
// write null byte and fill in length
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
obj_length = buffer_get_position(buffer) - start_position;
|
|
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
|
|
break;
|
|
}
|
|
if (strcmp(cls, "BSON::Code") == 0) {
|
|
buffer_position length_location, start_position, total_length;
|
|
int length;
|
|
VALUE code_str;
|
|
write_name_and_type(buffer, key, 0x0F);
|
|
|
|
start_position = buffer_get_position(buffer);
|
|
length_location = buffer_save_space(buffer, 4);
|
|
if (length_location == -1) {
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
|
}
|
|
|
|
code_str = rb_funcall(value, rb_intern("code"), 0);
|
|
length = RSTRING_LENINT(code_str) + 1;
|
|
SAFE_WRITE(buffer, (char*)&length, 4);
|
|
SAFE_WRITE(buffer, RSTRING_PTR(code_str), length - 1);
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
write_doc(buffer, rb_funcall(value, rb_intern("scope"), 0), Qfalse, Qfalse);
|
|
|
|
total_length = buffer_get_position(buffer) - start_position;
|
|
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&total_length, 4);
|
|
break;
|
|
}
|
|
if (strcmp(cls, "BSON::MaxKey") == 0) {
|
|
write_name_and_type(buffer, key, 0x7f);
|
|
break;
|
|
}
|
|
if (strcmp(cls, "BSON::MinKey") == 0) {
|
|
write_name_and_type(buffer, key, 0xff);
|
|
break;
|
|
}
|
|
if (strcmp(cls, "DateTime") == 0 || strcmp(cls, "Date") == 0 || strcmp(cls, "ActiveSupport::TimeWithZone") == 0) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "%s is not currently supported; use a UTC Time instance instead.", cls);
|
|
break;
|
|
}
|
|
if(strcmp(cls, "Complex") == 0 || strcmp(cls, "Rational") == 0 || strcmp(cls, "BigDecimal") == 0) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
|
|
break;
|
|
}
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
|
|
break;
|
|
}
|
|
case T_DATA:
|
|
{
|
|
const char* cls = rb_obj_classname(value);
|
|
if (strcmp(cls, "Time") == 0) {
|
|
double t = NUM2DBL(rb_funcall(value, rb_intern("to_f"), 0));
|
|
long long time_since_epoch = (long long)round(t * 1000);
|
|
write_name_and_type(buffer, key, 0x09);
|
|
SAFE_WRITE(buffer, (const char*)&time_since_epoch, 8);
|
|
break;
|
|
}
|
|
if(strcmp(cls, "BigDecimal") == 0) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
|
|
break;
|
|
}
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
|
|
break;
|
|
}
|
|
case T_REGEXP:
|
|
{
|
|
VALUE pattern = RREGEXP_SRC(value);
|
|
long flags = RREGEXP_OPTIONS(value);
|
|
VALUE has_extra;
|
|
|
|
write_name_and_type(buffer, key, 0x0B);
|
|
|
|
pattern = TO_UTF8(pattern);
|
|
write_utf8(buffer, pattern, 1);
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
|
|
if (flags & IGNORECASE) {
|
|
char ignorecase = 'i';
|
|
SAFE_WRITE(buffer, &ignorecase, 1);
|
|
}
|
|
if (flags & MULTILINE) {
|
|
char multiline = 'm';
|
|
SAFE_WRITE(buffer, &multiline, 1);
|
|
}
|
|
if (flags & EXTENDED) {
|
|
char extended = 'x';
|
|
SAFE_WRITE(buffer, &extended, 1);
|
|
}
|
|
|
|
has_extra = rb_funcall(value, rb_intern("respond_to?"), 1, rb_str_new2("extra_options_str"));
|
|
if (TYPE(has_extra) == T_TRUE) {
|
|
VALUE extra = rb_funcall(value, rb_intern("extra_options_str"), 0);
|
|
buffer_position old_position = buffer_get_position(buffer);
|
|
SAFE_WRITE(buffer, RSTRING_PTR(extra), RSTRING_LENINT(extra));
|
|
qsort(buffer_get_buffer(buffer) + old_position, RSTRING_LEN(extra), sizeof(char), cmp_char);
|
|
}
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
const char* cls = rb_obj_classname(value);
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Cannot serialize an object of class %s (type %d) into BSON.", cls, TYPE(value));
|
|
break;
|
|
}
|
|
}
|
|
return ST_CONTINUE;
|
|
}
|
|
|
|
static int write_element_without_id(VALUE key, VALUE value, VALUE extra) {
|
|
return write_element(key, value, extra, 0);
|
|
}
|
|
|
|
static int write_element_with_id(VALUE key, VALUE value, VALUE extra) {
|
|
return write_element(key, value, extra, 1);
|
|
}
|
|
|
|
static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id) {
|
|
buffer_position start_position = buffer_get_position(buffer);
|
|
buffer_position length_location = buffer_save_space(buffer, 4);
|
|
buffer_position length;
|
|
int allow_id;
|
|
int (*write_function)(VALUE, VALUE, VALUE) = NULL;
|
|
VALUE id_str = rb_str_new2("_id");
|
|
VALUE id_sym = ID2SYM(rb_intern("_id"));
|
|
|
|
if (length_location == -1) {
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
|
}
|
|
|
|
// write '_id' first if move_id is true. then don't allow an id to be written.
|
|
if(move_id == Qtrue) {
|
|
allow_id = 0;
|
|
if (rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) {
|
|
VALUE id = rb_hash_aref(hash, id_str);
|
|
write_element_with_id(id_str, id, pack_extra(buffer, check_keys));
|
|
} else if (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue) {
|
|
VALUE id = rb_hash_aref(hash, id_sym);
|
|
write_element_with_id(id_sym, id, pack_extra(buffer, check_keys));
|
|
}
|
|
}
|
|
else {
|
|
allow_id = 1;
|
|
// Ensure that hash doesn't contain both '_id' and :_id
|
|
if ((rb_obj_classname(hash), "Hash") == 0) {
|
|
if ((rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) &&
|
|
(rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue)) {
|
|
VALUE oid_sym = rb_hash_delete(hash, id_sym);
|
|
rb_funcall(hash, rb_intern("[]="), 2, id_str, oid_sym);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(allow_id == 1) {
|
|
write_function = write_element_with_id;
|
|
}
|
|
else {
|
|
write_function = write_element_without_id;
|
|
}
|
|
|
|
// we have to check for an OrderedHash and handle that specially
|
|
if (strcmp(rb_obj_classname(hash), "BSON::OrderedHash") == 0) {
|
|
VALUE keys = rb_funcall(hash, rb_intern("keys"), 0);
|
|
int i;
|
|
for(i = 0; i < RARRAY_LEN(keys); i++) {
|
|
VALUE key = rb_ary_entry(keys, i);
|
|
VALUE value = rb_hash_aref(hash, key);
|
|
|
|
write_function(key, value, pack_extra(buffer, check_keys));
|
|
}
|
|
} else if (rb_obj_is_kind_of(hash, RB_HASH) == Qtrue) {
|
|
rb_hash_foreach(hash, write_function, pack_extra(buffer, check_keys));
|
|
} else {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "BSON.serialize takes a Hash but got a %s", rb_obj_classname(hash));
|
|
}
|
|
|
|
// write null byte and fill in length
|
|
SAFE_WRITE(buffer, &zero, 1);
|
|
length = buffer_get_position(buffer) - start_position;
|
|
|
|
// make sure that length doesn't exceed 4MB
|
|
if (length > max_bson_size) {
|
|
buffer_free(buffer);
|
|
rb_raise(InvalidDocument, "Document too large: BSON documents are limited to %d bytes.", max_bson_size);
|
|
return;
|
|
}
|
|
SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&length, 4);
|
|
}
|
|
|
|
static VALUE method_serialize(VALUE self, VALUE doc, VALUE check_keys, VALUE move_id) {
|
|
VALUE result;
|
|
buffer_t buffer = buffer_new();
|
|
if (buffer == NULL) {
|
|
rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
|
|
}
|
|
|
|
write_doc(buffer, doc, check_keys, move_id);
|
|
|
|
result = rb_str_new(buffer_get_buffer(buffer), buffer_get_position(buffer));
|
|
if (buffer_free(buffer) != 0) {
|
|
rb_raise(rb_eRuntimeError, "failed to free buffer");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static VALUE get_value(const char* buffer, int* position, int type) {
|
|
VALUE value;
|
|
switch (type) {
|
|
case -1:
|
|
{
|
|
value = rb_class_new_instance(0, NULL, MinKey);
|
|
break;
|
|
}
|
|
case 1:
|
|
{
|
|
double d;
|
|
memcpy(&d, buffer + *position, 8);
|
|
value = rb_float_new(d);
|
|
*position += 8;
|
|
break;
|
|
}
|
|
case 2:
|
|
case 13:
|
|
{
|
|
int value_length;
|
|
value_length = *(int*)(buffer + *position) - 1;
|
|
*position += 4;
|
|
value = STR_NEW(buffer + *position, value_length);
|
|
*position += value_length + 1;
|
|
break;
|
|
}
|
|
case 3:
|
|
{
|
|
int size;
|
|
memcpy(&size, buffer + *position, 4);
|
|
if (strcmp(buffer + *position + 5, "$ref") == 0) { // DBRef
|
|
int offset = *position + 10;
|
|
VALUE argv[2];
|
|
int collection_length = *(int*)(buffer + offset) - 1;
|
|
char id_type;
|
|
offset += 4;
|
|
|
|
argv[0] = STR_NEW(buffer + offset, collection_length);
|
|
offset += collection_length + 1;
|
|
id_type = buffer[offset];
|
|
offset += 5;
|
|
argv[1] = get_value(buffer, &offset, (int)id_type);
|
|
value = rb_class_new_instance(2, argv, DBRef);
|
|
} else {
|
|
value = elements_to_hash(buffer + *position + 4, size - 5);
|
|
}
|
|
*position += size;
|
|
break;
|
|
}
|
|
case 4:
|
|
{
|
|
int size, end;
|
|
memcpy(&size, buffer + *position, 4);
|
|
end = *position + size - 1;
|
|
*position += 4;
|
|
|
|
value = rb_ary_new();
|
|
while (*position < end) {
|
|
int type = (int)buffer[(*position)++];
|
|
int key_size = (int)strlen(buffer + *position);
|
|
VALUE to_append;
|
|
|
|
*position += key_size + 1; // just skip the key, they're in order.
|
|
to_append = get_value(buffer, position, type);
|
|
rb_ary_push(value, to_append);
|
|
}
|
|
(*position)++;
|
|
break;
|
|
}
|
|
case 5:
|
|
{
|
|
int length, subtype;
|
|
VALUE data, st;
|
|
VALUE argv[2];
|
|
memcpy(&length, buffer + *position, 4);
|
|
subtype = (unsigned char)buffer[*position + 4];
|
|
if (subtype == 2) {
|
|
data = rb_str_new(buffer + *position + 9, length - 4);
|
|
} else {
|
|
data = rb_str_new(buffer + *position + 5, length);
|
|
}
|
|
st = INT2FIX(subtype);
|
|
argv[0] = data;
|
|
argv[1] = st;
|
|
value = rb_class_new_instance(2, argv, Binary);
|
|
*position += length + 5;
|
|
break;
|
|
}
|
|
case 6:
|
|
{
|
|
value = Qnil;
|
|
break;
|
|
}
|
|
case 7:
|
|
{
|
|
VALUE str = rb_str_new(buffer + *position, 12);
|
|
VALUE oid = rb_funcall(str, unpack_method, 1, rb_str_new2("C*"));
|
|
value = rb_class_new_instance(1, &oid, ObjectId);
|
|
*position += 12;
|
|
break;
|
|
}
|
|
case 8:
|
|
{
|
|
value = buffer[(*position)++] ? Qtrue : Qfalse;
|
|
break;
|
|
}
|
|
case 9:
|
|
{
|
|
long long millis;
|
|
memcpy(&millis, buffer + *position, 8);
|
|
|
|
value = rb_time_new(millis / 1000, (millis % 1000) * 1000);
|
|
value = rb_funcall(value, utc_method, 0);
|
|
*position += 8;
|
|
break;
|
|
}
|
|
case 10:
|
|
{
|
|
value = Qnil;
|
|
break;
|
|
}
|
|
case 11:
|
|
{
|
|
int pattern_length = (int)strlen(buffer + *position);
|
|
VALUE pattern = STR_NEW(buffer + *position, pattern_length);
|
|
int flags_length, flags = 0, i = 0;
|
|
VALUE argv[3];
|
|
*position += pattern_length + 1;
|
|
|
|
flags_length = (int)strlen(buffer + *position);
|
|
for (i = 0; i < flags_length; i++) {
|
|
char flag = buffer[*position + i];
|
|
if (flag == 'i') {
|
|
flags |= IGNORECASE;
|
|
}
|
|
else if (flag == 'm') {
|
|
flags |= MULTILINE;
|
|
}
|
|
else if (flag == 'x') {
|
|
flags |= EXTENDED;
|
|
}
|
|
}
|
|
argv[0] = pattern;
|
|
argv[1] = INT2FIX(flags);
|
|
value = rb_class_new_instance(2, argv, Regexp);
|
|
*position += flags_length + 1;
|
|
break;
|
|
}
|
|
case 12:
|
|
{
|
|
int collection_length;
|
|
VALUE collection, str, oid, id, argv[2];
|
|
collection_length = *(int*)(buffer + *position) - 1;
|
|
*position += 4;
|
|
collection = STR_NEW(buffer + *position, collection_length);
|
|
*position += collection_length + 1;
|
|
|
|
str = rb_str_new(buffer + *position, 12);
|
|
oid = rb_funcall(str, unpack_method, 1, rb_str_new2("C*"));
|
|
id = rb_class_new_instance(1, &oid, ObjectId);
|
|
*position += 12;
|
|
|
|
argv[0] = collection;
|
|
argv[1] = id;
|
|
value = rb_class_new_instance(2, argv, DBRef);
|
|
break;
|
|
}
|
|
case 14:
|
|
{
|
|
int value_length;
|
|
memcpy(&value_length, buffer + *position, 4);
|
|
value = ID2SYM(rb_intern(buffer + *position + 4));
|
|
*position += value_length + 4;
|
|
break;
|
|
}
|
|
case 15:
|
|
{
|
|
int code_length, scope_size;
|
|
VALUE code, scope, argv[2];
|
|
*position += 4;
|
|
code_length = *(int*)(buffer + *position) - 1;
|
|
*position += 4;
|
|
code = STR_NEW(buffer + *position, code_length);
|
|
*position += code_length + 1;
|
|
|
|
memcpy(&scope_size, buffer + *position, 4);
|
|
scope = elements_to_hash(buffer + *position + 4, scope_size - 5);
|
|
*position += scope_size;
|
|
|
|
argv[0] = code;
|
|
argv[1] = scope;
|
|
value = rb_class_new_instance(2, argv, Code);
|
|
break;
|
|
}
|
|
case 16:
|
|
{
|
|
int i;
|
|
memcpy(&i, buffer + *position, 4);
|
|
value = LL2NUM(i);
|
|
*position += 4;
|
|
break;
|
|
}
|
|
case 17:
|
|
{
|
|
int i;
|
|
int j;
|
|
memcpy(&i, buffer + *position, 4);
|
|
memcpy(&j, buffer + *position + 4, 4);
|
|
value = rb_ary_new3(2, LL2NUM(i), LL2NUM(j));
|
|
*position += 8;
|
|
break;
|
|
}
|
|
case 18:
|
|
{
|
|
long long ll;
|
|
memcpy(&ll, buffer + *position, 8);
|
|
value = LL2NUM(ll);
|
|
*position += 8;
|
|
break;
|
|
}
|
|
case 127:
|
|
{
|
|
value = rb_class_new_instance(0, NULL, MaxKey);
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
rb_raise(rb_eTypeError, "no c decoder for this type yet (%d)", type);
|
|
break;
|
|
}
|
|
}
|
|
return value;
|
|
}
|
|
|
|
static VALUE elements_to_hash(const char* buffer, int max) {
|
|
VALUE hash = rb_class_new_instance(0, NULL, OrderedHash);
|
|
int position = 0;
|
|
while (position < max) {
|
|
int type = (int)buffer[position++];
|
|
int name_length = (int)strlen(buffer + position);
|
|
VALUE name = STR_NEW(buffer + position, name_length);
|
|
VALUE value;
|
|
position += name_length + 1;
|
|
value = get_value(buffer, &position, type);
|
|
rb_funcall(hash, element_assignment_method, 2, name, value);
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
static VALUE method_deserialize(VALUE self, VALUE bson) {
|
|
const char* buffer = RSTRING_PTR(bson);
|
|
int remaining = RSTRING_LENINT(bson);
|
|
|
|
// NOTE we just swallow the size and end byte here
|
|
buffer += 4;
|
|
remaining -= 5;
|
|
|
|
return elements_to_hash(buffer, remaining);
|
|
}
|
|
|
|
static VALUE objectid_generate(VALUE self)
|
|
{
|
|
VALUE oid;
|
|
unsigned char oid_bytes[12];
|
|
unsigned long t, inc;
|
|
unsigned short pid;
|
|
int i;
|
|
|
|
t = htonl((int)time(NULL));
|
|
MEMCPY(&oid_bytes, &t, unsigned char, 4);
|
|
|
|
MEMCPY(&oid_bytes[4], hostname_digest, unsigned char, 3);
|
|
|
|
pid = htons(getpid());
|
|
MEMCPY(&oid_bytes[7], &pid, unsigned char, 2);
|
|
|
|
/* No need to synchronize modification of this counter between threads;
|
|
* MRI global interpreter lock guarantees serializaility.
|
|
*
|
|
* Compiler should optimize out impossible branch.
|
|
*/
|
|
if (sizeof(unsigned int) == 4) {
|
|
object_id_inc++;
|
|
} else {
|
|
object_id_inc = (object_id_inc + 1) % 0xFFFFFF;
|
|
}
|
|
inc = htonl(object_id_inc);
|
|
MEMCPY(&oid_bytes[9], ((unsigned char*)&inc + 1), unsigned char, 3);
|
|
|
|
oid = rb_ary_new2(12);
|
|
for(i = 0; i < 12; i++) {
|
|
rb_ary_store(oid, i, INT2FIX((unsigned int)oid_bytes[i]));
|
|
}
|
|
return oid;
|
|
}
|
|
|
|
static VALUE method_update_max_bson_size(VALUE self, VALUE connection) {
|
|
max_bson_size = FIX2INT(rb_funcall(connection, rb_intern("max_bson_size"), 0));
|
|
return INT2FIX(max_bson_size);
|
|
}
|
|
|
|
static VALUE method_max_bson_size(VALUE self) {
|
|
return INT2FIX(max_bson_size);
|
|
}
|
|
|
|
void Init_cbson() {
|
|
VALUE bson, CBson, Digest, ext_version, digest;
|
|
static char hostname[MAX_HOSTNAME_LENGTH];
|
|
|
|
element_assignment_method = rb_intern("[]=");
|
|
unpack_method = rb_intern("unpack");
|
|
utc_method = rb_intern("utc");
|
|
lt_operator = rb_intern("<");
|
|
gt_operator = rb_intern(">");
|
|
|
|
bson = rb_const_get(rb_cObject, rb_intern("BSON"));
|
|
rb_require("bson/types/binary");
|
|
Binary = rb_const_get(bson, rb_intern("Binary"));
|
|
rb_require("bson/types/object_id");
|
|
ObjectId = rb_const_get(bson, rb_intern("ObjectId"));
|
|
rb_require("bson/types/dbref");
|
|
DBRef = rb_const_get(bson, rb_intern("DBRef"));
|
|
rb_require("bson/types/code");
|
|
Code = rb_const_get(bson, rb_intern("Code"));
|
|
rb_require("bson/types/min_max_keys");
|
|
MinKey = rb_const_get(bson, rb_intern("MinKey"));
|
|
MaxKey = rb_const_get(bson, rb_intern("MaxKey"));
|
|
Regexp = rb_const_get(rb_cObject, rb_intern("Regexp"));
|
|
rb_require("bson/exceptions");
|
|
InvalidKeyName = rb_const_get(bson, rb_intern("InvalidKeyName"));
|
|
InvalidStringEncoding = rb_const_get(bson, rb_intern("InvalidStringEncoding"));
|
|
InvalidDocument = rb_const_get(bson, rb_intern("InvalidDocument"));
|
|
rb_require("bson/ordered_hash");
|
|
OrderedHash = rb_const_get(bson, rb_intern("OrderedHash"));
|
|
RB_HASH = rb_const_get(bson, rb_intern("Hash"));
|
|
|
|
CBson = rb_define_module("CBson");
|
|
ext_version = rb_str_new2(VERSION);
|
|
rb_define_const(CBson, "VERSION", ext_version);
|
|
rb_define_module_function(CBson, "serialize", method_serialize, 3);
|
|
rb_define_module_function(CBson, "deserialize", method_deserialize, 1);
|
|
rb_define_module_function(CBson, "max_bson_size", method_max_bson_size, 0);
|
|
rb_define_module_function(CBson, "update_max_bson_size", method_update_max_bson_size, 1);
|
|
|
|
rb_require("digest/md5");
|
|
Digest = rb_const_get(rb_cObject, rb_intern("Digest"));
|
|
DigestMD5 = rb_const_get(Digest, rb_intern("MD5"));
|
|
|
|
rb_define_method(ObjectId, "generate", objectid_generate, 0);
|
|
|
|
if (gethostname(hostname, MAX_HOSTNAME_LENGTH) != 0) {
|
|
rb_raise(rb_eRuntimeError, "failed to get hostname");
|
|
}
|
|
digest = rb_funcall(DigestMD5, rb_intern("digest"), 1,
|
|
rb_str_new2(hostname));
|
|
memcpy(hostname_digest, RSTRING_PTR(digest), 16);
|
|
hostname_digest[16] = '\0';
|
|
|
|
max_bson_size = 4 * 1024 * 1024;
|
|
}
|