mongo-ruby-driver/ext/cbson/cbson.c

354 lines
12 KiB
C
Raw Normal View History

2009-03-04 22:38:06 +00:00
/*
* Copyright 2009 10gen, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file contains C implementations of some of the functions needed by the
* bson module. If possible, these implementations should be used to speed up
* BSON encoding and decoding.
*/
2009-03-03 22:07:22 +00:00
#include "ruby.h"
2009-03-04 15:05:50 +00:00
#include "st.h"
2009-03-05 18:16:34 +00:00
#include "regex.h"
2009-03-03 22:07:22 +00:00
#include <assert.h>
#define INITIAL_BUFFER_SIZE 256
typedef struct {
char* buffer;
int size;
int position;
} bson_buffer;
2009-03-04 15:05:50 +00:00
static char zero = 0;
2009-03-04 16:07:22 +00:00
static char one = 1;
2009-03-05 18:16:34 +00:00
static int cmp_char(const void* a, const void* b) {
return *(char*)a - *(char*)b;
}
2009-03-04 16:07:22 +00:00
static void write_doc(bson_buffer* buffer, VALUE hash);
2009-03-05 20:48:40 +00:00
static int write_element(VALUE key, VALUE value, VALUE extra);
2009-03-04 15:05:50 +00:00
2009-03-03 22:07:22 +00:00
static bson_buffer* buffer_new(void) {
bson_buffer* buffer;
2009-03-05 14:48:12 +00:00
buffer = ALLOC(bson_buffer);
2009-03-03 22:07:22 +00:00
assert(buffer);
buffer->size = INITIAL_BUFFER_SIZE;
buffer->position = 0;
2009-03-05 14:48:12 +00:00
buffer->buffer = ALLOC_N(char, INITIAL_BUFFER_SIZE);
2009-03-03 22:07:22 +00:00
assert(buffer->buffer);
return buffer;
}
static void buffer_free(bson_buffer* buffer) {
assert(buffer);
assert(buffer->buffer);
free(buffer->buffer);
free(buffer);
}
static void buffer_resize(bson_buffer* buffer, int min_length) {
int size = buffer->size;
if (size >= min_length) {
return;
}
while (size < min_length) {
size *= 2;
}
2009-03-05 14:48:12 +00:00
buffer->buffer = REALLOC_N(buffer->buffer, char, size);
2009-03-03 22:07:22 +00:00
assert(buffer->buffer);
buffer->size = size;
}
static void buffer_assure_space(bson_buffer* buffer, int size) {
if (buffer->position + size <= buffer->size) {
return;
}
buffer_resize(buffer, buffer->position + size);
}
/* returns offset for writing */
static int buffer_save_bytes(bson_buffer* buffer, int size) {
buffer_assure_space(buffer, size);
int position = buffer->position;
buffer->position += size;
return position;
}
static void buffer_write_bytes(bson_buffer* buffer, const char* bytes, int size) {
buffer_assure_space(buffer, size);
memcpy(buffer->buffer + buffer->position, bytes, size);
buffer->position += size;
}
2009-03-04 15:05:50 +00:00
static void write_name_and_type(bson_buffer* buffer, VALUE name, char type) {
buffer_write_bytes(buffer, &type, 1);
buffer_write_bytes(buffer, RSTRING(name)->ptr, RSTRING(name)->len);
buffer_write_bytes(buffer, &zero, 1);
}
2009-03-05 20:48:40 +00:00
static int write_element_allow_id(VALUE key, VALUE value, VALUE extra, int allow_id) {
2009-03-04 15:05:50 +00:00
bson_buffer* buffer = (bson_buffer*)extra;
2009-03-05 15:27:19 +00:00
if (TYPE(key) == T_SYMBOL) {
// TODO better way to do this... ?
key = rb_str_new2(rb_id2name(SYM2ID(key)));
}
if (TYPE(key) != T_STRING) {
rb_raise(rb_eTypeError, "keys must be strings or symbols");
}
2009-03-05 20:48:40 +00:00
if (!allow_id && strcmp("_id", RSTRING(key)->ptr) == 0) {
return ST_CONTINUE;
}
2009-03-05 21:08:10 +00:00
// TODO do this somewhere else, not in the c code...
int is_code = !strcmp("$where", RSTRING(key)->ptr);
2009-03-04 15:05:50 +00:00
switch(TYPE(value)) {
2009-03-04 16:07:22 +00:00
case T_FIXNUM:
write_name_and_type(buffer, key, 0x10);
int int_value = FIX2INT(value);
buffer_write_bytes(buffer, (char*)&int_value, 4);
break;
case T_TRUE:
write_name_and_type(buffer, key, 0x08);
buffer_write_bytes(buffer, &one, 1);
break;
case T_FALSE:
write_name_and_type(buffer, key, 0x08);
buffer_write_bytes(buffer, &zero, 1);
break;
case T_FLOAT:
write_name_and_type(buffer, key, 0x01);
double d = NUM2DBL(value);
buffer_write_bytes(buffer, (char*)&d, 8);
break;
case T_NIL:
write_name_and_type(buffer, key, 0x0A);
break;
case T_HASH:
write_name_and_type(buffer, key, 0x03);
write_doc(buffer, value);
break;
case T_ARRAY:
write_name_and_type(buffer, key, 0x04);
int start_position = buffer->position;
// save space for length
int length_location = buffer_save_bytes(buffer, 4);
int items = RARRAY_LEN(value);
VALUE* values = RARRAY_PTR(value);
int i;
for(i = 0; i < items; i++) {
char* name;
asprintf(&name, "%d", i);
VALUE key = rb_str_new2(name);
write_element(key, values[i], (VALUE)buffer);
free(name);
}
// write null byte and fill in length
buffer_write_bytes(buffer, &zero, 1);
int obj_length = buffer->position - start_position;
memcpy(buffer->buffer + length_location, &obj_length, 4);
break;
2009-03-04 15:05:50 +00:00
case T_STRING:
2009-03-05 21:08:10 +00:00
if (is_code) {
write_name_and_type(buffer, key, 0x0D);
} else {
write_name_and_type(buffer, key, 0x02);
}
2009-03-04 15:05:50 +00:00
int length = RSTRING(value)->len + 1;
buffer_write_bytes(buffer, (char*)&length, 4);
buffer_write_bytes(buffer, RSTRING(value)->ptr, length - 1);
buffer_write_bytes(buffer, &zero, 1);
break;
2009-03-05 16:09:52 +00:00
case T_SYMBOL:
write_name_and_type(buffer, key, 0x0E);
const char* str_value = rb_id2name(SYM2ID(value));
2009-03-05 16:45:04 +00:00
length = strlen(str_value) + 1;
buffer_write_bytes(buffer, (char*)&length, 4);
buffer_write_bytes(buffer, str_value, length);
2009-03-05 16:09:52 +00:00
break;
2009-03-04 22:38:06 +00:00
case T_OBJECT:
{
// TODO there has to be a better way to do these checks...
const char* cls = rb_class2name(RBASIC(value)->klass);
2009-03-05 14:22:55 +00:00
if (strcmp(cls, "XGen::Mongo::Driver::Binary") == 0 ||
strcmp(cls, "ByteBuffer") == 0) {
2009-03-04 22:38:06 +00:00
write_name_and_type(buffer, key, 0x05);
2009-03-05 14:22:55 +00:00
const char subtype = strcmp(cls, "ByteBuffer") ?
(const char)FIX2INT(rb_funcall(value, rb_intern("subtype"), 0)) : 2;
2009-03-04 22:38:06 +00:00
VALUE string_data = rb_funcall(value, rb_intern("to_s"), 0);
int length = RSTRING(string_data)->len;
if (subtype == 2) {
const int other_length = length + 4;
buffer_write_bytes(buffer, (const char*)&other_length, 4);
buffer_write_bytes(buffer, &subtype, 1);
}
buffer_write_bytes(buffer, (const char*)&length, 4);
if (subtype != 2) {
buffer_write_bytes(buffer, &subtype, 1);
}
buffer_write_bytes(buffer, RSTRING(string_data)->ptr, length);
break;
}
2009-03-05 16:38:08 +00:00
if (strcmp(cls, "XGen::Mongo::Driver::ObjectID") == 0) {
write_name_and_type(buffer, key, 0x07);
VALUE as_array = rb_funcall(value, rb_intern("to_a"), 0);
int i;
for (i = 0; i < 12; i++) {
char byte = (char)FIX2INT(RARRAY(as_array)->ptr[i]);
buffer_write_bytes(buffer, &byte, 1);
}
break;
}
2009-03-05 16:45:04 +00:00
if (strcmp(cls, "XGen::Mongo::Driver::DBRef") == 0) {
write_name_and_type(buffer, key, 0x0C);
VALUE ns = rb_funcall(value, rb_intern("namespace"), 0);
int length = RSTRING(ns)->len + 1;
buffer_write_bytes(buffer, (char*)&length, 4);
buffer_write_bytes(buffer, RSTRING(ns)->ptr, length - 1);
buffer_write_bytes(buffer, &zero, 1);
VALUE oid_as_array = rb_funcall(rb_funcall(value, rb_intern("object_id"), 0),
rb_intern("to_a"), 0);
for (i = 0; i < 12; i++) {
char byte = (char)FIX2INT(RARRAY(oid_as_array)->ptr[i]);
buffer_write_bytes(buffer, &byte, 1);
}
break;
}
2009-03-05 16:12:44 +00:00
if (strcmp(cls, "XGen::Mongo::Driver::Undefined") == 0) {
write_name_and_type(buffer, key, 0x06);
break;
}
2009-03-04 22:38:06 +00:00
}
2009-03-05 15:58:56 +00:00
case T_DATA:
{
// TODO again, is this really the only way to do this?
const char* cls = rb_class2name(RBASIC(value)->klass);
if (strcmp(cls, "Time") == 0) {
write_name_and_type(buffer, key, 0x09);
double t = NUM2DBL(rb_funcall(value, rb_intern("to_f"), 0));
long long time_since_epoch = (long long)(t * 1000);
buffer_write_bytes(buffer, (const char*)&time_since_epoch, 8);
break;
}
}
2009-03-05 18:16:34 +00:00
case T_REGEXP:
write_name_and_type(buffer, key, 0x0B);
length = RREGEXP(value)->len;
char* pattern = RREGEXP(value)->str;
buffer_write_bytes(buffer, pattern, length);
buffer_write_bytes(buffer, &zero, 1);
long flags = RREGEXP(value)->ptr->options;
if (flags & RE_OPTION_IGNORECASE) {
char ignorecase = 'i';
buffer_write_bytes(buffer, &ignorecase, 1);
}
if (flags & RE_OPTION_MULTILINE) {
char multiline = 'm';
buffer_write_bytes(buffer, &multiline, 1);
}
if (flags & RE_OPTION_EXTENDED) {
char extended = 'x';
buffer_write_bytes(buffer, &extended, 1);
}
VALUE has_extra = rb_funcall(value, rb_intern("respond_to?"), 1, rb_str_new2("extra_options_str"));
if (TYPE(has_extra) == T_TRUE) {
VALUE extra = rb_funcall(value, rb_intern("extra_options_str"), 0);
int old_position = buffer->position;
buffer_write_bytes(buffer, RSTRING(extra)->ptr, RSTRING(extra)->len);
qsort(buffer->buffer + old_position, RSTRING(extra)->len, sizeof(char), cmp_char);
}
buffer_write_bytes(buffer, &zero, 1);
break;
2009-03-04 15:05:50 +00:00
default:
2009-03-05 18:16:34 +00:00
rb_raise(rb_eTypeError, "no c encoder for this type yet (%d)", TYPE(value));
2009-03-04 15:05:50 +00:00
break;
}
return ST_CONTINUE;
}
2009-03-05 20:48:40 +00:00
static int write_element(VALUE key, VALUE value, VALUE extra) {
return write_element_allow_id(key, value, extra, 0);
}
2009-03-04 15:05:50 +00:00
static void write_doc(bson_buffer* buffer, VALUE hash) {
int start_position = buffer->position;
int length_location = buffer_save_bytes(buffer, 4);
2009-03-05 20:48:40 +00:00
VALUE key = rb_str_new2("_id");
VALUE id = rb_hash_aref(hash, key);
if (TYPE(id) != T_NIL) {
write_element_allow_id(key, id, (VALUE)buffer, 1);
}
key = ID2SYM(rb_intern("_id"));
id = rb_hash_aref(hash, key);
if (TYPE(id) != T_NIL) {
write_element_allow_id(key, id, (VALUE)buffer, 1);
}
// we have to check for an OrderedHash and handle that specially
if (strcmp(rb_class2name(RBASIC(hash)->klass), "OrderedHash") == 0) {
VALUE keys = rb_funcall(hash, rb_intern("keys"), 0);
int i;
for(i = 0; i < RARRAY(keys)->len; i++) {
VALUE key = RARRAY(keys)->ptr[i];
VALUE value = rb_hash_aref(hash, key);
write_element(key, value, (VALUE)buffer);
}
} else {
rb_hash_foreach(hash, write_element, (VALUE)buffer);
}
2009-03-04 15:05:50 +00:00
// write null byte and fill in length
buffer_write_bytes(buffer, &zero, 1);
int length = buffer->position - start_position;
memcpy(buffer->buffer + length_location, &length, 4);
}
2009-03-03 22:07:22 +00:00
static VALUE method_serialize(VALUE self, VALUE doc) {
2009-03-04 15:05:50 +00:00
bson_buffer* buffer = buffer_new();
assert(buffer);
write_doc(buffer, doc);
VALUE result = rb_str_new(buffer->buffer, buffer->position);
buffer_free(buffer);
return result;
2009-03-03 22:07:22 +00:00
}
void Init_cbson() {
VALUE CBson = rb_define_module("CBson");
rb_define_module_function(CBson, "serialize", method_serialize, 1);
}