# -- # Copyright (C) 2008-2009 10gen Inc. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License, version 3, as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License # for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # ++ require 'base64' require 'mongo/util/byte_buffer' require 'mongo/util/ordered_hash' require 'mongo/types/binary' require 'mongo/types/dbref' require 'mongo/types/objectid' require 'mongo/types/regexp_of_holding' require 'mongo/types/undefined' # A BSON seralizer/deserializer. class BSON include XGen::Mongo::Driver MINKEY = -1 EOO = 0 NUMBER = 1 STRING = 2 OBJECT = 3 ARRAY = 4 BINARY = 5 UNDEFINED = 6 OID = 7 BOOLEAN = 8 DATE = 9 NULL = 10 REGEX = 11 REF = 12 CODE = 13 SYMBOL = 14 CODE_W_SCOPE = 15 NUMBER_INT = 16 TIMESTAMP = 17 MAXKEY = 127 if RUBY_VERSION >= '1.9' def self.to_utf8(str) str.encode("utf-8") end else def self.to_utf8(str) str # TODO Ruby 1.8 punt for now end end def self.serialize_cstr(buf, val) buf.put_array(to_utf8(val.to_s).unpack("C*") + [0]) end def initialize() @buf = ByteBuffer.new end def to_a @buf.to_a end begin require 'mongo_ext/cbson' def serialize(obj, check_keys=false) @buf = ByteBuffer.new(CBson.serialize(obj, check_keys)) end rescue LoadError def serialize(obj, check_keys=false) raise "Document is null" unless obj @buf.rewind # put in a placeholder for the total size @buf.put_int(0) # Write key/value pairs. Always write _id first if it exists. if obj.has_key? '_id' serialize_key_value('_id', obj['_id'], check_keys) elsif obj.has_key? :_id serialize_key_value('_id', obj[:_id], check_keys) end obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id } serialize_eoo_element(@buf) @buf.put_int(@buf.size, 0) self end end def serialize_key_value(k, v, check_keys) k = k.to_s if check_keys if k[0] == ?$ raise RuntimeError.new("key #{k} must not start with '$'") end if k.include? ?. raise RuntimeError.new("key #{k} must not contain '.'") end end type = bson_type(v) case type when STRING, SYMBOL serialize_string_element(@buf, k, v, type) when NUMBER, NUMBER_INT serialize_number_element(@buf, k, v, type) when OBJECT serialize_object_element(@buf, k, v, check_keys) when OID serialize_oid_element(@buf, k, v) when ARRAY serialize_array_element(@buf, k, v, check_keys) when REGEX serialize_regex_element(@buf, k, v) when BOOLEAN serialize_boolean_element(@buf, k, v) when DATE serialize_date_element(@buf, k, v) when NULL serialize_null_element(@buf, k) when REF serialize_dbref_element(@buf, k, v) when BINARY serialize_binary_element(@buf, k, v) when UNDEFINED serialize_undefined_element(@buf, k) when CODE_W_SCOPE serialize_code_w_scope(@buf, k, v) else raise "unhandled type #{type}" end end begin require 'mongo_ext/cbson' def deserialize(buf=nil) if buf.is_a? String @buf = ByteBuffer.new(buf) if buf else @buf = ByteBuffer.new(buf.to_a) if buf end @buf.rewind CBson.deserialize(@buf.to_s) end rescue LoadError def deserialize(buf=nil) # If buf is nil, use @buf, assumed to contain already-serialized BSON. # This is only true during testing. if buf.is_a? String @buf = ByteBuffer.new(buf) if buf else @buf = ByteBuffer.new(buf.to_a) if buf end @buf.rewind @buf.get_int # eat message size doc = OrderedHash.new while @buf.more? type = @buf.get case type when STRING, CODE key = deserialize_cstr(@buf) doc[key] = deserialize_string_data(@buf) when SYMBOL key = deserialize_cstr(@buf) doc[key] = deserialize_string_data(@buf).intern when NUMBER key = deserialize_cstr(@buf) doc[key] = deserialize_number_data(@buf) when NUMBER_INT key = deserialize_cstr(@buf) doc[key] = deserialize_number_int_data(@buf) when OID key = deserialize_cstr(@buf) doc[key] = deserialize_oid_data(@buf) when ARRAY key = deserialize_cstr(@buf) doc[key] = deserialize_array_data(@buf) when REGEX key = deserialize_cstr(@buf) doc[key] = deserialize_regex_data(@buf) when OBJECT key = deserialize_cstr(@buf) doc[key] = deserialize_object_data(@buf) when BOOLEAN key = deserialize_cstr(@buf) doc[key] = deserialize_boolean_data(@buf) when DATE key = deserialize_cstr(@buf) doc[key] = deserialize_date_data(@buf) when NULL key = deserialize_cstr(@buf) doc[key] = nil when UNDEFINED key = deserialize_cstr(@buf) doc[key] = Undefined.new when REF key = deserialize_cstr(@buf) doc[key] = deserialize_dbref_data(@buf) when BINARY key = deserialize_cstr(@buf) doc[key] = deserialize_binary_data(@buf) when CODE_W_SCOPE key = deserialize_cstr(@buf) doc[key] = deserialize_code_w_scope_data(@buf) when TIMESTAMP key = deserialize_cstr(@buf) doc[key] = [deserialize_number_int_data(@buf), deserialize_number_int_data(@buf)] when EOO break else raise "Unknown type #{type}, key = #{key}" end end @buf.rewind doc end end # For debugging. def hex_dump str = '' @buf.to_a.each_with_index { |b,i| if (i % 8) == 0 str << "\n" if i > 0 str << '%4d: ' % i else str << ' ' end str << '%02X' % b } str end def deserialize_date_data(buf) millisecs = buf.get_long() Time.at(millisecs.to_f / 1000.0).utc # at() takes fractional seconds end def deserialize_boolean_data(buf) buf.get == 1 end def deserialize_number_data(buf) buf.get_double end def deserialize_number_int_data(buf) # sometimes ruby makes me angry... why would the same code pack as signed # but unpack as unsigned unsigned = buf.get_int unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned end def deserialize_object_data(buf) size = buf.get_int buf.position -= 4 object = BSON.new().deserialize(buf.get(size)) if object.has_key? "$ref" DBRef.new(object["$ref"], object["$id"]) else object end end def deserialize_array_data(buf) h = deserialize_object_data(buf) a = [] h.each { |k, v| a[k.to_i] = v } a end def deserialize_regex_data(buf) str = deserialize_cstr(buf) options_str = deserialize_cstr(buf) options = 0 options |= Regexp::IGNORECASE if options_str.include?('i') options |= Regexp::MULTILINE if options_str.include?('m') options |= Regexp::EXTENDED if options_str.include?('x') options_str.gsub!(/[imx]/, '') # Now remove the three we understand RegexpOfHolding.new(str, options, options_str) end def deserialize_string_data(buf) len = buf.get_int bytes = buf.get(len) str = bytes[0..-2] if str.respond_to? "pack" str = str.pack("C*") end if RUBY_VERSION >= '1.9' str.force_encoding("utf-8") end str end def deserialize_code_w_scope_data(buf) buf.get_int len = buf.get_int code = buf.get(len)[0..-2] if code.respond_to? "pack" code = code.pack("C*") end if RUBY_VERSION >= '1.9' code.force_encoding("utf-8") end scope_size = buf.get_int buf.position -= 4 scope = BSON.new().deserialize(buf.get(scope_size)) Code.new(code, scope) end def deserialize_oid_data(buf) ObjectID.new(buf.get(12)) end def deserialize_dbref_data(buf) ns = deserialize_string_data(buf) oid = deserialize_oid_data(buf) DBRef.new(ns, oid) end def deserialize_binary_data(buf) len = buf.get_int type = buf.get len = buf.get_int if type == Binary::SUBTYPE_BYTES Binary.new(buf.get(len), type) end def serialize_eoo_element(buf) buf.put(EOO) end def serialize_null_element(buf, key) buf.put(NULL) self.class.serialize_cstr(buf, key) end def serialize_dbref_element(buf, key, val) oh = OrderedHash.new oh['$ref'] = val.namespace oh['$id'] = val.object_id serialize_object_element(buf, key, oh, false) end def serialize_binary_element(buf, key, val) buf.put(BINARY) self.class.serialize_cstr(buf, key) bytes = val.to_a num_bytes = bytes.length subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES if subtype == Binary::SUBTYPE_BYTES buf.put_int(num_bytes + 4) buf.put(subtype) buf.put_int(num_bytes) buf.put_array(bytes) else buf.put_int(num_bytes) buf.put(subtype) buf.put_array(bytes) end end def serialize_undefined_element(buf, key) buf.put(UNDEFINED) self.class.serialize_cstr(buf, key) end def serialize_boolean_element(buf, key, val) buf.put(BOOLEAN) self.class.serialize_cstr(buf, key) buf.put(val ? 1 : 0) end def serialize_date_element(buf, key, val) buf.put(DATE) self.class.serialize_cstr(buf, key) millisecs = (val.to_f * 1000).to_i buf.put_long(millisecs) end def serialize_number_element(buf, key, val, type) buf.put(type) self.class.serialize_cstr(buf, key) if type == NUMBER buf.put_double(val) else if val > 2**32 / 2 - 1 or val < -2**32 / 2 raise RangeError.new("MongoDB can only handle 4-byte ints - try converting to a double before saving") end buf.put_int(val) end end def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT) buf.put(opcode) self.class.serialize_cstr(buf, key) buf.put_array(BSON.new.serialize(val, check_keys).to_a) end def serialize_array_element(buf, key, val, check_keys) # Turn array into hash with integer indices as keys h = OrderedHash.new i = 0 val.each { |v| h[i] = v; i += 1 } serialize_object_element(buf, key, h, check_keys, ARRAY) end def serialize_regex_element(buf, key, val) buf.put(REGEX) self.class.serialize_cstr(buf, key) str = val.to_s.sub(/.*?:/, '')[0..-2] # Turn "(?xxx:yyy)" into "yyy" self.class.serialize_cstr(buf, str) options = val.options options_str = '' options_str << 'i' if ((options & Regexp::IGNORECASE) != 0) options_str << 'm' if ((options & Regexp::MULTILINE) != 0) options_str << 'x' if ((options & Regexp::EXTENDED) != 0) options_str << val.extra_options_str if val.respond_to?(:extra_options_str) # Must store option chars in alphabetical order self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join) end def serialize_oid_element(buf, key, val) buf.put(OID) self.class.serialize_cstr(buf, key) buf.put_array(val.to_a) end def serialize_string_element(buf, key, val, type) buf.put(type) self.class.serialize_cstr(buf, key) # Make a hole for the length len_pos = buf.position buf.put_int(0) # Save the string start_pos = buf.position self.class.serialize_cstr(buf, val) end_pos = buf.position # Put the string size in front buf.put_int(end_pos - start_pos, len_pos) # Go back to where we were buf.position = end_pos end def serialize_code_w_scope(buf, key, val) buf.put(CODE_W_SCOPE) self.class.serialize_cstr(buf, key) # Make a hole for the length len_pos = buf.position buf.put_int(0) buf.put_int(val.length + 1) self.class.serialize_cstr(buf, val) buf.put_array(BSON.new.serialize(val.scope).to_a) end_pos = buf.position buf.put_int(end_pos - len_pos, len_pos) buf.position = end_pos end def deserialize_cstr(buf) chars = "" while true b = buf.get break if b == 0 chars << b.chr end if RUBY_VERSION >= '1.9' chars.force_encoding("utf-8") # Mongo stores UTF-8 end chars end def bson_type(o) case o when nil NULL when Integer NUMBER_INT when Numeric NUMBER when ByteBuffer BINARY when Code CODE_W_SCOPE when String STRING when Array ARRAY when Regexp REGEX when ObjectID OID when DBRef REF when true, false BOOLEAN when Time DATE when Hash OBJECT when Symbol SYMBOL when Undefined UNDEFINED else raise "Unknown type of object: #{o.class.name}" end end end