mongo-ruby-driver/lib/mongo/util/bson.rb

550 lines
14 KiB
Ruby
Raw Normal View History

2008-12-17 16:49:06 +00:00
# --
2009-01-06 15:51:01 +00:00
# Copyright (C) 2008-2009 10gen Inc.
2008-12-17 16:43:08 +00:00
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License, version 3, as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
# for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2008-12-17 16:49:06 +00:00
# ++
2008-12-17 16:43:08 +00:00
require 'base64'
2008-11-22 01:00:51 +00:00
require 'mongo/util/byte_buffer'
2008-12-08 20:27:20 +00:00
require 'mongo/util/ordered_hash'
2009-01-13 15:23:20 +00:00
require 'mongo/types/binary'
require 'mongo/types/dbref'
require 'mongo/types/objectid'
require 'mongo/types/regexp_of_holding'
require 'mongo/types/undefined'
2008-11-22 01:00:51 +00:00
2008-12-17 16:43:08 +00:00
# A BSON seralizer/deserializer.
2008-11-22 01:00:51 +00:00
class BSON
include Mongo
2009-02-02 15:14:06 +00:00
MINKEY = -1
EOO = 0
NUMBER = 1
STRING = 2
OBJECT = 3
2008-11-22 01:00:51 +00:00
ARRAY = 4
BINARY = 5
UNDEFINED = 6
OID = 7
BOOLEAN = 8
DATE = 9
NULL = 10
2008-11-22 01:00:51 +00:00
REGEX = 11
REF = 12
CODE = 13
SYMBOL = 14
CODE_W_SCOPE = 15
NUMBER_INT = 16
2009-04-22 17:49:56 +00:00
TIMESTAMP = 17
NUMBER_LONG = 18
MAXKEY = 127
2008-11-22 01:00:51 +00:00
2009-01-13 17:53:55 +00:00
if RUBY_VERSION >= '1.9'
def self.to_utf8(str)
str.encode("utf-8")
end
else
def self.to_utf8(str)
2009-02-07 19:36:08 +00:00
str # TODO Ruby 1.8 punt for now
2009-01-13 17:53:55 +00:00
end
end
2008-11-22 01:00:51 +00:00
def self.serialize_cstr(buf, val)
2009-01-13 17:53:55 +00:00
buf.put_array(to_utf8(val.to_s).unpack("C*") + [0])
2008-11-22 01:00:51 +00:00
end
def initialize()
2008-11-22 01:00:51 +00:00
@buf = ByteBuffer.new
end
def to_a
@buf.to_a
end
2009-03-03 22:07:22 +00:00
begin
2009-03-19 17:54:46 +00:00
require 'mongo_ext/cbson'
def serialize(obj, check_keys=false)
@buf = ByteBuffer.new(CBson.serialize(obj, check_keys))
2009-03-03 22:07:22 +00:00
end
rescue LoadError
def serialize(obj, check_keys=false)
2009-03-03 22:07:22 +00:00
raise "Document is null" unless obj
2008-11-22 01:00:51 +00:00
2009-03-03 22:07:22 +00:00
@buf.rewind
# put in a placeholder for the total size
@buf.put_int(0)
2008-11-22 01:00:51 +00:00
2009-03-03 22:07:22 +00:00
# Write key/value pairs. Always write _id first if it exists.
if obj.has_key? '_id'
serialize_key_value('_id', obj['_id'], check_keys)
elsif obj.has_key? :_id
serialize_key_value('_id', obj[:_id], check_keys)
end
obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id }
2009-03-03 22:07:22 +00:00
serialize_eoo_element(@buf)
@buf.put_int(@buf.size, 0)
self
end
end
def serialize_key_value(k, v, check_keys)
k = k.to_s
if check_keys
if k[0] == ?$
raise InvalidName.new("key #{k} must not start with '$'")
end
if k.include? ?.
raise InvalidName.new("key #{k} must not contain '.'")
end
end
type = bson_type(v)
case type
when STRING, SYMBOL
serialize_string_element(@buf, k, v, type)
when NUMBER, NUMBER_INT
serialize_number_element(@buf, k, v, type)
when OBJECT
serialize_object_element(@buf, k, v, check_keys)
when OID
serialize_oid_element(@buf, k, v)
when ARRAY
serialize_array_element(@buf, k, v, check_keys)
when REGEX
serialize_regex_element(@buf, k, v)
when BOOLEAN
serialize_boolean_element(@buf, k, v)
when DATE
serialize_date_element(@buf, k, v)
when NULL
serialize_null_element(@buf, k)
when REF
serialize_dbref_element(@buf, k, v)
when BINARY
serialize_binary_element(@buf, k, v)
when UNDEFINED
serialize_null_element(@buf, k)
when CODE_W_SCOPE
serialize_code_w_scope(@buf, k, v)
else
raise "unhandled type #{type}"
end
2008-11-22 01:00:51 +00:00
end
2009-03-10 14:23:42 +00:00
begin
2009-03-19 17:54:46 +00:00
require 'mongo_ext/cbson'
def deserialize(buf=nil)
2009-03-10 14:23:42 +00:00
if buf.is_a? String
@buf = ByteBuffer.new(buf) if buf
else
@buf = ByteBuffer.new(buf.to_a) if buf
end
@buf.rewind
CBson.deserialize(@buf.to_s)
2009-03-04 16:16:28 +00:00
end
2009-03-10 14:23:42 +00:00
rescue LoadError
def deserialize(buf=nil)
2009-03-10 14:23:42 +00:00
# If buf is nil, use @buf, assumed to contain already-serialized BSON.
# This is only true during testing.
if buf.is_a? String
@buf = ByteBuffer.new(buf) if buf
2008-11-22 01:00:51 +00:00
else
2009-03-10 14:23:42 +00:00
@buf = ByteBuffer.new(buf.to_a) if buf
2008-11-22 01:00:51 +00:00
end
2009-03-10 14:23:42 +00:00
@buf.rewind
@buf.get_int # eat message size
doc = OrderedHash.new
while @buf.more?
type = @buf.get
case type
when STRING, CODE
key = deserialize_cstr(@buf)
doc[key] = deserialize_string_data(@buf)
when SYMBOL
key = deserialize_cstr(@buf)
doc[key] = deserialize_string_data(@buf).intern
when NUMBER
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_data(@buf)
when NUMBER_INT
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_int_data(@buf)
when NUMBER_LONG
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_long_data(@buf)
2009-03-10 14:23:42 +00:00
when OID
key = deserialize_cstr(@buf)
doc[key] = deserialize_oid_data(@buf)
when ARRAY
key = deserialize_cstr(@buf)
doc[key] = deserialize_array_data(@buf)
2009-03-10 14:23:42 +00:00
when REGEX
key = deserialize_cstr(@buf)
doc[key] = deserialize_regex_data(@buf)
when OBJECT
key = deserialize_cstr(@buf)
doc[key] = deserialize_object_data(@buf)
2009-03-10 14:23:42 +00:00
when BOOLEAN
key = deserialize_cstr(@buf)
doc[key] = deserialize_boolean_data(@buf)
when DATE
key = deserialize_cstr(@buf)
doc[key] = deserialize_date_data(@buf)
when NULL
key = deserialize_cstr(@buf)
doc[key] = nil
when UNDEFINED
key = deserialize_cstr(@buf)
doc[key] = nil
2009-03-10 14:23:42 +00:00
when REF
key = deserialize_cstr(@buf)
doc[key] = deserialize_dbref_data(@buf)
2009-03-10 14:23:42 +00:00
when BINARY
key = deserialize_cstr(@buf)
doc[key] = deserialize_binary_data(@buf)
when CODE_W_SCOPE
key = deserialize_cstr(@buf)
doc[key] = deserialize_code_w_scope_data(@buf)
2009-04-22 17:49:56 +00:00
when TIMESTAMP
key = deserialize_cstr(@buf)
doc[key] = [deserialize_number_int_data(@buf),
deserialize_number_int_data(@buf)]
2009-03-10 14:23:42 +00:00
when EOO
break
else
raise "Unknown type #{type}, key = #{key}"
end
end
@buf.rewind
doc
2008-11-22 01:00:51 +00:00
end
end
2009-01-13 17:53:55 +00:00
# For debugging.
2008-11-22 01:00:51 +00:00
def hex_dump
str = ''
@buf.to_a.each_with_index { |b,i|
if (i % 8) == 0
str << "\n" if i > 0
str << '%4d: ' % i
else
str << ' '
end
str << '%02X' % b
}
str
end
def deserialize_date_data(buf)
unsigned = buf.get_long()
# see note for deserialize_number_long_data below
milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds
2008-11-22 01:00:51 +00:00
end
def deserialize_boolean_data(buf)
buf.get == 1
end
def deserialize_number_data(buf)
buf.get_double
end
def deserialize_number_int_data(buf)
# sometimes ruby makes me angry... why would the same code pack as signed
# but unpack as unsigned
unsigned = buf.get_int
unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned
2008-11-22 01:00:51 +00:00
end
def deserialize_number_long_data(buf)
# same note as above applies here...
unsigned = buf.get_long
unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
end
def deserialize_object_data(buf)
2008-11-22 01:00:51 +00:00
size = buf.get_int
buf.position -= 4
2009-03-18 17:30:30 +00:00
object = BSON.new().deserialize(buf.get(size))
if object.has_key? "$ref"
DBRef.new(object["$ref"], object["$id"])
else
object
end
2008-11-22 01:00:51 +00:00
end
def deserialize_array_data(buf)
h = deserialize_object_data(buf)
2008-12-08 20:27:20 +00:00
a = []
h.each { |k, v| a[k.to_i] = v }
a
end
2008-12-08 20:57:20 +00:00
def deserialize_regex_data(buf)
2009-01-09 16:56:30 +00:00
str = deserialize_cstr(buf)
options_str = deserialize_cstr(buf)
2008-12-08 20:57:20 +00:00
options = 0
options |= Regexp::IGNORECASE if options_str.include?('i')
options |= Regexp::MULTILINE if options_str.include?('m')
options |= Regexp::EXTENDED if options_str.include?('x')
options_str.gsub!(/[imx]/, '') # Now remove the three we understand
2009-02-02 15:14:06 +00:00
RegexpOfHolding.new(str, options, options_str)
2008-12-08 20:57:20 +00:00
end
2008-11-22 01:00:51 +00:00
def deserialize_string_data(buf)
len = buf.get_int
bytes = buf.get(len)
2009-03-04 15:05:50 +00:00
str = bytes[0..-2]
if str.respond_to? "pack"
str = str.pack("C*")
end
2009-01-13 17:53:55 +00:00
if RUBY_VERSION >= '1.9'
str.force_encoding("utf-8")
end
str
2008-11-22 01:00:51 +00:00
end
def deserialize_code_w_scope_data(buf)
buf.get_int
len = buf.get_int
code = buf.get(len)[0..-2]
if code.respond_to? "pack"
code = code.pack("C*")
end
if RUBY_VERSION >= '1.9'
code.force_encoding("utf-8")
end
scope_size = buf.get_int
buf.position -= 4
scope = BSON.new().deserialize(buf.get(scope_size))
Code.new(code, scope)
end
def deserialize_oid_data(buf)
2009-02-02 15:14:06 +00:00
ObjectID.new(buf.get(12))
2008-11-22 01:00:51 +00:00
end
def deserialize_dbref_data(buf)
2009-01-15 20:12:16 +00:00
ns = deserialize_string_data(buf)
2009-01-08 19:43:30 +00:00
oid = deserialize_oid_data(buf)
DBRef.new(ns, oid)
2009-01-08 19:43:30 +00:00
end
2009-01-12 16:13:49 +00:00
def deserialize_binary_data(buf)
len = buf.get_int
type = buf.get
2009-02-02 15:14:06 +00:00
len = buf.get_int if type == Binary::SUBTYPE_BYTES
Binary.new(buf.get(len), type)
end
2008-11-22 01:00:51 +00:00
def serialize_eoo_element(buf)
buf.put(EOO)
end
def serialize_null_element(buf, key)
buf.put(NULL)
self.class.serialize_cstr(buf, key)
end
2009-01-08 19:43:30 +00:00
def serialize_dbref_element(buf, key, val)
2009-03-18 17:30:30 +00:00
oh = OrderedHash.new
oh['$ref'] = val.namespace
oh['$id'] = val.object_id
serialize_object_element(buf, key, oh, false)
2009-01-08 19:43:30 +00:00
end
def serialize_binary_element(buf, key, val)
buf.put(BINARY)
self.class.serialize_cstr(buf, key)
bytes = val.to_a
num_bytes = bytes.length
2009-02-02 15:14:06 +00:00
subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES
if subtype == Binary::SUBTYPE_BYTES
buf.put_int(num_bytes + 4)
buf.put(subtype)
buf.put_int(num_bytes)
buf.put_array(bytes)
else
buf.put_int(num_bytes)
buf.put(subtype)
buf.put_array(bytes)
end
end
2008-11-22 01:00:51 +00:00
def serialize_boolean_element(buf, key, val)
buf.put(BOOLEAN)
self.class.serialize_cstr(buf, key)
buf.put(val ? 1 : 0)
end
def serialize_date_element(buf, key, val)
buf.put(DATE)
self.class.serialize_cstr(buf, key)
2009-01-07 20:56:40 +00:00
millisecs = (val.to_f * 1000).to_i
buf.put_long(millisecs)
2008-11-22 01:00:51 +00:00
end
def serialize_number_element(buf, key, val, type)
if type == NUMBER
buf.put(type)
self.class.serialize_cstr(buf, key)
2008-11-22 01:00:51 +00:00
buf.put_double(val)
else
if val > 2**64 / 2 - 1 or val < -2**64 / 2
raise RangeError.new("MongoDB can only handle 8-byte ints")
end
if val > 2**32 / 2 - 1 or val < -2**32 / 2
buf.put(NUMBER_LONG)
self.class.serialize_cstr(buf, key)
buf.put_long(val)
else
buf.put(type)
self.class.serialize_cstr(buf, key)
buf.put_int(val)
end
2008-11-22 01:00:51 +00:00
end
end
def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT)
2008-12-08 20:27:20 +00:00
buf.put(opcode)
2008-11-22 01:00:51 +00:00
self.class.serialize_cstr(buf, key)
buf.put_array(BSON.new.serialize(val, check_keys).to_a)
2008-11-22 01:00:51 +00:00
end
def serialize_array_element(buf, key, val, check_keys)
2008-12-08 20:27:20 +00:00
# Turn array into hash with integer indices as keys
h = OrderedHash.new
i = 0
val.each { |v| h[i] = v; i += 1 }
serialize_object_element(buf, key, h, check_keys, ARRAY)
2008-12-08 20:27:20 +00:00
end
2008-12-08 20:57:20 +00:00
def serialize_regex_element(buf, key, val)
buf.put(REGEX)
self.class.serialize_cstr(buf, key)
str = val.to_s.sub(/.*?:/, '')[0..-2] # Turn "(?xxx:yyy)" into "yyy"
self.class.serialize_cstr(buf, str)
options = val.options
options_str = ''
options_str << 'i' if ((options & Regexp::IGNORECASE) != 0)
options_str << 'm' if ((options & Regexp::MULTILINE) != 0)
options_str << 'x' if ((options & Regexp::EXTENDED) != 0)
options_str << val.extra_options_str if val.respond_to?(:extra_options_str)
# Must store option chars in alphabetical order
self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join)
2008-12-08 20:57:20 +00:00
end
2008-11-22 01:00:51 +00:00
def serialize_oid_element(buf, key, val)
buf.put(OID)
self.class.serialize_cstr(buf, key)
buf.put_array(val.to_a)
2008-11-22 01:00:51 +00:00
end
def serialize_string_element(buf, key, val, type)
buf.put(type)
self.class.serialize_cstr(buf, key)
# Make a hole for the length
len_pos = buf.position
buf.put_int(0)
# Save the string
start_pos = buf.position
self.class.serialize_cstr(buf, val)
end_pos = buf.position
# Put the string size in front
buf.put_int(end_pos - start_pos, len_pos)
2008-11-22 01:00:51 +00:00
# Go back to where we were
buf.position = end_pos
end
def serialize_code_w_scope(buf, key, val)
buf.put(CODE_W_SCOPE)
self.class.serialize_cstr(buf, key)
# Make a hole for the length
len_pos = buf.position
buf.put_int(0)
buf.put_int(val.length + 1)
self.class.serialize_cstr(buf, val)
buf.put_array(BSON.new.serialize(val.scope).to_a)
end_pos = buf.position
buf.put_int(end_pos - len_pos, len_pos)
buf.position = end_pos
end
2009-01-09 16:56:30 +00:00
def deserialize_cstr(buf)
2008-11-22 01:00:51 +00:00
chars = ""
2009-02-05 20:59:44 +00:00
while true
2008-11-22 01:00:51 +00:00
b = buf.get
break if b == 0
chars << b.chr
end
2009-01-13 17:53:55 +00:00
if RUBY_VERSION >= '1.9'
chars.force_encoding("utf-8") # Mongo stores UTF-8
end
2008-11-22 01:00:51 +00:00
chars
end
def bson_type(o)
2008-11-22 01:00:51 +00:00
case o
when nil
NULL
when Integer
NUMBER_INT
when Numeric
NUMBER
when ByteBuffer
BINARY
when Code
CODE_W_SCOPE
2008-11-22 01:00:51 +00:00
when String
STRING
2008-11-22 01:00:51 +00:00
when Array
ARRAY
2008-12-08 20:57:20 +00:00
when Regexp
REGEX
2009-02-02 15:14:06 +00:00
when ObjectID
2008-11-22 01:00:51 +00:00
OID
2009-02-02 15:14:06 +00:00
when DBRef
2009-01-08 19:43:30 +00:00
REF
2008-11-22 01:00:51 +00:00
when true, false
BOOLEAN
2008-11-22 01:00:51 +00:00
when Time
DATE
when Hash
OBJECT
2009-01-09 23:20:53 +00:00
when Symbol
SYMBOL
2009-02-02 15:14:06 +00:00
when Undefined
NULL
2008-11-22 01:00:51 +00:00
else
raise "Unknown type of object: #{o.class.name}"
end
end
end