# -- # Copyright (C) 2008-2009 10gen Inc. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License, version 3, as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License # for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . # ++ require 'base64' require 'mongo/util/byte_buffer' require 'mongo/util/ordered_hash' require 'mongo/types/binary' require 'mongo/types/dbref' require 'mongo/types/objectid' require 'mongo/types/regexp_of_holding' require 'mongo/types/undefined' # A BSON seralizer/deserializer. class BSON include XGen::Mongo::Driver MINKEY = -1 EOO = 0 NUMBER = 1 STRING = 2 OBJECT = 3 ARRAY = 4 BINARY = 5 UNDEFINED = 6 OID = 7 BOOLEAN = 8 DATE = 9 NULL = 10 REGEX = 11 REF = 12 CODE = 13 SYMBOL = 14 CODE_W_SCOPE = 15 NUMBER_INT = 16 MAXKEY = 127 if RUBY_VERSION >= '1.9' def self.to_utf8(str) str.encode("utf-8") end else def self.to_utf8(str) str # TODO Ruby 1.8 punt for now end end def self.serialize_cstr(buf, val) buf.put_array(to_utf8(val.to_s).unpack("C*") + [0]) end def initialize(db=nil) # db is only needed during deserialization when the data contains a DBRef @db = db @buf = ByteBuffer.new end def to_a @buf.to_a end begin require 'mongo/ext/cbson' def serialize(obj) @buf = ByteBuffer.new(CBson.serialize(obj)) end rescue LoadError def serialize(obj) raise "Document is null" unless obj @buf.rewind # put in a placeholder for the total size @buf.put_int(0) # Write key/value pairs. Always write _id first if it exists. oid = obj['_id'] || obj[:_id] serialize_key_value('_id', oid) if oid obj.each {|k, v| serialize_key_value(k, v) unless k == '_id' || k == :_id } serialize_eoo_element(@buf) @buf.put_int(@buf.size, 0) self end end def serialize_key_value(k, v) type = bson_type(v, k) case type when STRING, CODE, SYMBOL serialize_string_element(@buf, k, v, type) when NUMBER, NUMBER_INT serialize_number_element(@buf, k, v, type) when OBJECT serialize_object_element(@buf, k, v) when OID serialize_oid_element(@buf, k, v) when ARRAY serialize_array_element(@buf, k, v) when REGEX serialize_regex_element(@buf, k, v) when BOOLEAN serialize_boolean_element(@buf, k, v) when DATE serialize_date_element(@buf, k, v) when NULL serialize_null_element(@buf, k) when REF serialize_dbref_element(@buf, k, v) when BINARY serialize_binary_element(@buf, k, v) when UNDEFINED serialize_undefined_element(@buf, k) when CODE_W_SCOPE # TODO CODE_W_SCOPE unimplemented; may be removed raise "unimplemented type #{type}" else raise "unhandled type #{type}" end end def deserialize(buf=nil, parent=nil) # If buf is nil, use @buf, assumed to contain already-serialized BSON. # This is only true during testing. @buf = ByteBuffer.new(buf.to_a) if buf @buf.rewind @buf.get_int # eat message size doc = OrderedHash.new while @buf.more? type = @buf.get case type when STRING, CODE key = deserialize_cstr(@buf) doc[key] = deserialize_string_data(@buf) when SYMBOL key = deserialize_cstr(@buf) doc[key] = deserialize_string_data(@buf).intern when NUMBER key = deserialize_cstr(@buf) doc[key] = deserialize_number_data(@buf) when NUMBER_INT key = deserialize_cstr(@buf) doc[key] = deserialize_number_int_data(@buf) when OID key = deserialize_cstr(@buf) doc[key] = deserialize_oid_data(@buf) when ARRAY key = deserialize_cstr(@buf) doc[key] = deserialize_array_data(@buf, doc) when REGEX key = deserialize_cstr(@buf) doc[key] = deserialize_regex_data(@buf) when OBJECT key = deserialize_cstr(@buf) doc[key] = deserialize_object_data(@buf, doc) when BOOLEAN key = deserialize_cstr(@buf) doc[key] = deserialize_boolean_data(@buf) when DATE key = deserialize_cstr(@buf) doc[key] = deserialize_date_data(@buf) when NULL key = deserialize_cstr(@buf) doc[key] = nil when UNDEFINED key = deserialize_cstr(@buf) doc[key] = Undefined.new when REF key = deserialize_cstr(@buf) doc[key] = deserialize_dbref_data(@buf, key, parent) when BINARY key = deserialize_cstr(@buf) doc[key] = deserialize_binary_data(@buf) when CODE_W_SCOPE # TODO CODE_W_SCOPE unimplemented; may be removed raise "unimplemented type #{type}" when EOO break else raise "Unknown type #{type}, key = #{key}" end end @buf.rewind doc end # For debugging. def hex_dump str = '' @buf.to_a.each_with_index { |b,i| if (i % 8) == 0 str << "\n" if i > 0 str << '%4d: ' % i else str << ' ' end str << '%02X' % b } str end def deserialize_date_data(buf) millisecs = buf.get_long() Time.at(millisecs.to_f / 1000.0) # at() takes fractional seconds end def deserialize_boolean_data(buf) buf.get == 1 end def deserialize_number_data(buf) buf.get_double end def deserialize_number_int_data(buf) buf.get_int end def deserialize_object_data(buf, parent) size = buf.get_int buf.position -= 4 BSON.new(@db).deserialize(buf.get(size), parent) end def deserialize_array_data(buf, parent) h = deserialize_object_data(buf, parent) a = [] h.each { |k, v| a[k.to_i] = v } a end def deserialize_regex_data(buf) str = deserialize_cstr(buf) options_str = deserialize_cstr(buf) options = 0 options |= Regexp::IGNORECASE if options_str.include?('i') options |= Regexp::MULTILINE if options_str.include?('m') options |= Regexp::EXTENDED if options_str.include?('x') options_str.gsub!(/[imx]/, '') # Now remove the three we understand RegexpOfHolding.new(str, options, options_str) end def deserialize_string_data(buf) len = buf.get_int bytes = buf.get(len) str = bytes[0..-2].pack("C*") if RUBY_VERSION >= '1.9' str.force_encoding("utf-8") end str end def deserialize_oid_data(buf) ObjectID.new(buf.get(12)) end def deserialize_dbref_data(buf, key, parent) ns = deserialize_string_data(buf) oid = deserialize_oid_data(buf) DBRef.new(parent, key, @db, ns, oid) end def deserialize_binary_data(buf) len = buf.get_int type = buf.get len = buf.get_int if type == Binary::SUBTYPE_BYTES Binary.new(buf.get(len), type) end def serialize_eoo_element(buf) buf.put(EOO) end def serialize_null_element(buf, key) buf.put(NULL) self.class.serialize_cstr(buf, key) end def serialize_dbref_element(buf, key, val) serialize_string_element(buf, key, val.namespace, REF) buf.put_array(val.object_id.to_a) end def serialize_binary_element(buf, key, val) buf.put(BINARY) self.class.serialize_cstr(buf, key) bytes = val.to_a num_bytes = bytes.length subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES if subtype == Binary::SUBTYPE_BYTES buf.put_int(num_bytes + 4) buf.put(subtype) buf.put_int(num_bytes) buf.put_array(bytes) else buf.put_int(num_bytes) buf.put(subtype) buf.put_array(bytes) end end def serialize_undefined_element(buf, key) buf.put(UNDEFINED) self.class.serialize_cstr(buf, key) end def serialize_boolean_element(buf, key, val) buf.put(BOOLEAN) self.class.serialize_cstr(buf, key) buf.put(val ? 1 : 0) end def serialize_date_element(buf, key, val) buf.put(DATE) self.class.serialize_cstr(buf, key) millisecs = (val.to_f * 1000).to_i buf.put_long(millisecs) end def serialize_number_element(buf, key, val, type) buf.put(type) self.class.serialize_cstr(buf, key) if type == NUMBER buf.put_double(val) else buf.put_int(val) end end def serialize_object_element(buf, key, val, opcode=OBJECT) buf.put(opcode) self.class.serialize_cstr(buf, key) buf.put_array(BSON.new.serialize(val).to_a) end def serialize_array_element(buf, key, val) # Turn array into hash with integer indices as keys h = OrderedHash.new i = 0 val.each { |v| h[i] = v; i += 1 } serialize_object_element(buf, key, h, ARRAY) end def serialize_regex_element(buf, key, val) buf.put(REGEX) self.class.serialize_cstr(buf, key) str = val.to_s.sub(/.*?:/, '')[0..-2] # Turn "(?xxx:yyy)" into "yyy" self.class.serialize_cstr(buf, str) options = val.options options_str = '' options_str << 'i' if ((options & Regexp::IGNORECASE) != 0) options_str << 'm' if ((options & Regexp::MULTILINE) != 0) options_str << 'x' if ((options & Regexp::EXTENDED) != 0) options_str << val.extra_options_str if val.respond_to?(:extra_options_str) # Must store option chars in alphabetical order self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join) end def serialize_oid_element(buf, key, val) buf.put(OID) self.class.serialize_cstr(buf, key) buf.put_array(val.to_a) end def serialize_string_element(buf, key, val, type) buf.put(type) self.class.serialize_cstr(buf, key) # Make a hole for the length len_pos = buf.position buf.put_int(0) # Save the string start_pos = buf.position self.class.serialize_cstr(buf, val) end_pos = buf.position # Put the string size in front buf.put_int(end_pos - start_pos, len_pos) # Go back to where we were buf.position = end_pos end def deserialize_cstr(buf) chars = "" while true b = buf.get break if b == 0 chars << b.chr end if RUBY_VERSION >= '1.9' chars.force_encoding("utf-8") # Mongo stores UTF-8 end chars end def bson_type(o, key) case o when nil NULL when Integer NUMBER_INT when Numeric NUMBER when ByteBuffer BINARY when String # magic awful stuff - the DB requires that a where clause is sent as CODE key == "$where" ? CODE : STRING when Array ARRAY when Regexp REGEX when ObjectID OID when DBRef REF when true, false BOOLEAN when Time DATE when Hash OBJECT when Symbol SYMBOL when Undefined UNDEFINED else raise "Unknown type of object: #{o.class.name}" end end end