mongo-ruby-driver/lib/bson/bson_ruby.rb

642 lines
17 KiB
Ruby
Raw Normal View History

# encoding: UTF-8
2010-04-05 18:09:06 +00:00
# --
2011-01-17 17:26:32 +00:00
# Copyright (C) 2008-2011 10gen Inc.
2010-04-05 18:09:06 +00:00
#
2010-05-26 02:24:29 +00:00
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
2010-04-05 18:09:06 +00:00
#
2010-05-26 02:24:29 +00:00
# http://www.apache.org/licenses/LICENSE-2.0
2010-04-05 18:09:06 +00:00
#
2010-05-26 02:24:29 +00:00
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2010-04-05 18:09:06 +00:00
# ++
module BSON
# A BSON seralizer/deserializer in pure Ruby.
class BSON_RUBY
DEFAULT_MAX_BSON_SIZE = 4 * 1024 * 1024
@@max_bson_size = DEFAULT_MAX_BSON_SIZE
2010-04-05 18:09:06 +00:00
MINKEY = -1
EOO = 0
NUMBER = 1
STRING = 2
OBJECT = 3
ARRAY = 4
BINARY = 5
UNDEFINED = 6
OID = 7
BOOLEAN = 8
DATE = 9
NULL = 10
REGEX = 11
REF = 12
CODE = 13
SYMBOL = 14
CODE_W_SCOPE = 15
NUMBER_INT = 16
TIMESTAMP = 17
NUMBER_LONG = 18
MAXKEY = 127
def initialize(max_bson_size=BSON::DEFAULT_MAX_BSON_SIZE)
@buf = ByteBuffer.new('', max_bson_size)
2010-09-30 13:43:17 +00:00
@encoder = BSON_RUBY
2010-04-05 18:09:06 +00:00
end
if RUBY_VERSION >= '1.9'
NULL_BYTE = "\0".force_encoding('binary').freeze
UTF8_ENCODING = Encoding.find('utf-8')
BINARY_ENCODING = Encoding.find('binary')
def self.to_utf8_binary(str)
2011-03-24 16:11:12 +00:00
begin
str.unpack("U*")
rescue => ex
raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
end
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
2010-04-05 18:09:06 +00:00
end
else
NULL_BYTE = "\0"
def self.to_utf8_binary(str)
2010-04-05 18:09:06 +00:00
begin
str.unpack("U*")
rescue => ex
raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
2010-04-05 18:09:06 +00:00
end
str
end
end
def self.update_max_bson_size(connection)
warn "BSON::BSON_CODER.update_max_bson_size is deprecated and now a no-op. It will be removed in v2.0."
@@max_bson_size = connection.max_bson_size
end
def self.max_bson_size
warn "BSON::BSON_CODER.max_bson_size is deprecated and will be removed in v2.0."
@@max_bson_size
end
2010-04-05 18:09:06 +00:00
def self.serialize_cstr(buf, val)
buf.put_binary(to_utf8_binary(val.to_s))
buf.put_binary(NULL_BYTE)
2010-04-05 18:09:06 +00:00
end
def self.serialize_key(buf, key)
raise InvalidDocument, "Key names / regex patterns must not contain the NULL byte" if key.include? "\x00"
self.serialize_cstr(buf, key)
end
def to_a
@buf.to_a
end
def to_s
@buf.to_s
end
# Serializes an object.
# Implemented to ensure an API compatible with BSON extension.
def self.serialize(obj, check_keys=false, move_id=false, max_bson_size=BSON::DEFAULT_MAX_BSON_SIZE)
new(max_bson_size).serialize(obj, check_keys, move_id)
2010-04-05 18:09:06 +00:00
end
def self.deserialize(buf=nil)
new.deserialize(buf)
end
def serialize(obj, check_keys=false, move_id=false)
raise(InvalidDocument, "BSON.serialize takes a Hash but got a #{obj.class}") unless obj.is_a?(Hash)
2010-04-05 18:09:06 +00:00
raise "Document is null" unless obj
@buf.rewind
# put in a placeholder for the total size
@buf.put_int(0)
# Write key/value pairs. Always write _id first if it exists.
if move_id
if obj.has_key? '_id'
serialize_key_value('_id', obj['_id'], false)
elsif obj.has_key? :_id
serialize_key_value('_id', obj[:_id], false)
end
obj.each {|k, v| serialize_key_value(k, v, check_keys) unless k == '_id' || k == :_id }
else
if obj.has_key?('_id') && obj.has_key?(:_id)
obj['_id'] = obj.delete(:_id)
end
obj.each {|k, v| serialize_key_value(k, v, check_keys) }
end
serialize_eoo_element(@buf)
if @buf.size > @buf.max_size
raise InvalidDocument, "Document is too large (#{@buf.size}). " +
"This BSON documents is limited to #{@buf.max_size} bytes."
2010-04-05 18:09:06 +00:00
end
@buf.put_int(@buf.size, 0)
2010-04-12 16:01:50 +00:00
@buf
2010-04-05 18:09:06 +00:00
end
# Returns the array stored in the buffer.
# Implemented to ensure an API compatible with BSON extension.
def unpack(arg)
@buf.to_a
end
def serialize_key_value(k, v, check_keys)
k = k.to_s
if check_keys
if k[0] == ?$
2010-04-05 22:24:31 +00:00
raise InvalidKeyName.new("key #{k} must not start with '$'")
2010-04-05 18:09:06 +00:00
end
if k.include? ?.
2010-04-05 22:24:31 +00:00
raise InvalidKeyName.new("key #{k} must not contain '.'")
2010-04-05 18:09:06 +00:00
end
end
type = bson_type(v)
case type
when STRING, SYMBOL
serialize_string_element(@buf, k, v, type)
when NUMBER, NUMBER_INT
serialize_number_element(@buf, k, v, type)
when OBJECT
serialize_object_element(@buf, k, v, check_keys)
when OID
serialize_oid_element(@buf, k, v)
when ARRAY
serialize_array_element(@buf, k, v, check_keys)
when REGEX
serialize_regex_element(@buf, k, v)
when BOOLEAN
serialize_boolean_element(@buf, k, v)
when DATE
serialize_date_element(@buf, k, v)
when NULL
serialize_null_element(@buf, k)
when REF
serialize_dbref_element(@buf, k, v)
when BINARY
serialize_binary_element(@buf, k, v)
when UNDEFINED
serialize_null_element(@buf, k)
when CODE_W_SCOPE
serialize_code_w_scope(@buf, k, v)
when MAXKEY
serialize_max_key_element(@buf, k)
when MINKEY
serialize_min_key_element(@buf, k)
2011-03-28 18:36:49 +00:00
when TIMESTAMP
serialize_timestamp_element(@buf, k, v)
2010-04-05 18:09:06 +00:00
else
raise "unhandled type #{type}"
end
end
def deserialize(buf=nil)
# If buf is nil, use @buf, assumed to contain already-serialized BSON.
# This is only true during testing.
if buf.is_a? String
2010-06-01 03:11:02 +00:00
@buf = ByteBuffer.new(buf.unpack("C*")) if buf
2010-04-05 18:09:06 +00:00
else
@buf = ByteBuffer.new(buf.to_a) if buf
end
@buf.rewind
@buf.get_int # eat message size
2010-05-07 01:25:18 +00:00
doc = BSON::OrderedHash.new
2010-04-05 18:09:06 +00:00
while @buf.more?
type = @buf.get
case type
when STRING, CODE
key = deserialize_cstr(@buf)
doc[key] = deserialize_string_data(@buf)
when SYMBOL
key = deserialize_cstr(@buf)
doc[key] = deserialize_string_data(@buf).intern
when NUMBER
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_data(@buf)
when NUMBER_INT
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_int_data(@buf)
when NUMBER_LONG
key = deserialize_cstr(@buf)
doc[key] = deserialize_number_long_data(@buf)
when OID
key = deserialize_cstr(@buf)
doc[key] = deserialize_oid_data(@buf)
when ARRAY
key = deserialize_cstr(@buf)
doc[key] = deserialize_array_data(@buf)
when REGEX
key = deserialize_cstr(@buf)
doc[key] = deserialize_regex_data(@buf)
when OBJECT
key = deserialize_cstr(@buf)
doc[key] = deserialize_object_data(@buf)
when BOOLEAN
key = deserialize_cstr(@buf)
doc[key] = deserialize_boolean_data(@buf)
when DATE
key = deserialize_cstr(@buf)
doc[key] = deserialize_date_data(@buf)
when NULL
key = deserialize_cstr(@buf)
doc[key] = nil
when UNDEFINED
key = deserialize_cstr(@buf)
doc[key] = nil
when REF
key = deserialize_cstr(@buf)
doc[key] = deserialize_dbref_data(@buf)
when BINARY
key = deserialize_cstr(@buf)
doc[key] = deserialize_binary_data(@buf)
when CODE_W_SCOPE
key = deserialize_cstr(@buf)
doc[key] = deserialize_code_w_scope_data(@buf)
when TIMESTAMP
key = deserialize_cstr(@buf)
2011-03-28 18:36:49 +00:00
doc[key] = deserialize_timestamp_data(@buf)
2010-04-05 18:09:06 +00:00
when MAXKEY
key = deserialize_cstr(@buf)
doc[key] = MaxKey.new
when MINKEY, 255 # This is currently easier than unpack the type byte as an unsigned char.
key = deserialize_cstr(@buf)
doc[key] = MinKey.new
when EOO
break
else
raise "Unknown type #{type}, key = #{key}"
end
end
@buf.rewind
doc
end
# For debugging.
def hex_dump
str = ''
@buf.to_a.each_with_index { |b,i|
if (i % 8) == 0
str << "\n" if i > 0
str << '%4d: ' % i
else
str << ' '
end
str << '%02X' % b
}
str
end
def deserialize_date_data(buf)
unsigned = buf.get_long()
milliseconds = unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
Time.at(milliseconds.to_f / 1000.0).utc # at() takes fractional seconds
end
def deserialize_boolean_data(buf)
buf.get == 1
end
def deserialize_number_data(buf)
buf.get_double
end
def deserialize_number_int_data(buf)
unsigned = buf.get_int
unsigned >= 2**32 / 2 ? unsigned - 2**32 : unsigned
end
def deserialize_number_long_data(buf)
unsigned = buf.get_long
unsigned >= 2 ** 64 / 2 ? unsigned - 2**64 : unsigned
end
def deserialize_object_data(buf)
size = buf.get_int
buf.position -= 4
2010-09-30 13:43:17 +00:00
object = @encoder.new().deserialize(buf.get(size))
2010-04-05 18:09:06 +00:00
if object.has_key? "$ref"
DBRef.new(object["$ref"], object["$id"])
else
object
end
end
def deserialize_array_data(buf)
h = deserialize_object_data(buf)
a = []
h.each { |k, v| a[k.to_i] = v }
a
end
def deserialize_regex_data(buf)
str = deserialize_cstr(buf)
options_str = deserialize_cstr(buf)
opts = 0
opts |= Regexp::IGNORECASE if options_str.include?('i')
opts |= Regexp::MULTILINE if options_str.include?('m')
opts |= Regexp::MULTILINE if options_str.include?('s')
opts |= Regexp::EXTENDED if options_str.include?('x')
Regexp.new(str, opts)
2010-04-05 18:09:06 +00:00
end
2011-03-28 18:36:49 +00:00
def deserialize_timestamp_data(buf)
increment = buf.get_int
seconds = buf.get_int
Timestamp.new(seconds, increment)
end
def encoded_str(str)
if RUBY_VERSION >= '1.9'
str.force_encoding("utf-8")
if Encoding.default_internal
str.encode!(Encoding.default_internal)
end
end
str
end
2010-04-05 18:09:06 +00:00
def deserialize_string_data(buf)
len = buf.get_int
bytes = buf.get(len)
str = bytes[0..-2]
if str.respond_to? "pack"
str = str.pack("C*")
end
encoded_str(str)
2010-04-05 18:09:06 +00:00
end
def deserialize_code_w_scope_data(buf)
buf.get_int
len = buf.get_int
code = buf.get(len)[0..-2]
if code.respond_to? "pack"
code = code.pack("C*")
end
scope_size = buf.get_int
buf.position -= 4
2010-09-30 13:43:17 +00:00
scope = @encoder.new().deserialize(buf.get(scope_size))
2010-04-05 18:09:06 +00:00
Code.new(encoded_str(code), scope)
2010-04-05 18:09:06 +00:00
end
def deserialize_oid_data(buf)
ObjectId.new(buf.get(12))
2010-04-05 18:09:06 +00:00
end
def deserialize_dbref_data(buf)
ns = deserialize_string_data(buf)
oid = deserialize_oid_data(buf)
DBRef.new(ns, oid)
end
def deserialize_binary_data(buf)
len = buf.get_int
type = buf.get
len = buf.get_int if type == Binary::SUBTYPE_BYTES
Binary.new(buf.get(len), type)
end
def serialize_eoo_element(buf)
buf.put(EOO)
end
def serialize_null_element(buf, key)
buf.put(NULL)
self.class.serialize_key(buf, key)
end
def serialize_dbref_element(buf, key, val)
2010-05-07 01:25:18 +00:00
oh = BSON::OrderedHash.new
2010-04-05 18:09:06 +00:00
oh['$ref'] = val.namespace
oh['$id'] = val.object_id
serialize_object_element(buf, key, oh, false)
end
def serialize_binary_element(buf, key, val)
buf.put(BINARY)
self.class.serialize_key(buf, key)
bytes = val.to_a
num_bytes = bytes.length
subtype = val.respond_to?(:subtype) ? val.subtype : Binary::SUBTYPE_BYTES
if subtype == Binary::SUBTYPE_BYTES
buf.put_int(num_bytes + 4)
buf.put(subtype)
buf.put_int(num_bytes)
buf.put_array(bytes)
else
buf.put_int(num_bytes)
buf.put(subtype)
buf.put_array(bytes)
end
end
def serialize_boolean_element(buf, key, val)
buf.put(BOOLEAN)
self.class.serialize_key(buf, key)
buf.put(val ? 1 : 0)
end
def serialize_date_element(buf, key, val)
buf.put(DATE)
self.class.serialize_key(buf, key)
millisecs = (val.to_f * 1000).to_i
buf.put_long(millisecs)
end
def serialize_number_element(buf, key, val, type)
if type == NUMBER
buf.put(type)
self.class.serialize_key(buf, key)
buf.put_double(val)
else
if val > 2**64 / 2 - 1 or val < -2**64 / 2
raise RangeError.new("MongoDB can only handle 8-byte ints")
end
if val > 2**32 / 2 - 1 or val < -2**32 / 2
buf.put(NUMBER_LONG)
self.class.serialize_key(buf, key)
buf.put_long(val)
else
buf.put(type)
self.class.serialize_key(buf, key)
buf.put_int(val)
end
end
end
def serialize_object_element(buf, key, val, check_keys, opcode=OBJECT)
buf.put(opcode)
self.class.serialize_key(buf, key)
2010-09-30 13:43:17 +00:00
buf.put_array(@encoder.new.serialize(val, check_keys).to_a)
2010-04-05 18:09:06 +00:00
end
def serialize_array_element(buf, key, val, check_keys)
# Turn array into hash with integer indices as keys
2010-05-07 01:25:18 +00:00
h = BSON::OrderedHash.new
2010-04-05 18:09:06 +00:00
i = 0
val.each { |v| h[i] = v; i += 1 }
serialize_object_element(buf, key, h, check_keys, ARRAY)
end
def serialize_regex_element(buf, key, val)
buf.put(REGEX)
self.class.serialize_key(buf, key)
str = val.source
# We use serialize_key here since regex patterns aren't prefixed with
# length (can't contain the NULL byte).
self.class.serialize_key(buf, str)
options = val.options
options_str = ''
options_str << 'i' if ((options & Regexp::IGNORECASE) != 0)
if ((options & Regexp::MULTILINE) != 0)
options_str << 'm'
options_str << 's'
end
2010-04-05 18:09:06 +00:00
options_str << 'x' if ((options & Regexp::EXTENDED) != 0)
options_str << val.extra_options_str if val.respond_to?(:extra_options_str)
# Must store option chars in alphabetical order
self.class.serialize_cstr(buf, options_str.split(//).sort.uniq.join)
end
def serialize_max_key_element(buf, key)
buf.put(MAXKEY)
self.class.serialize_key(buf, key)
end
def serialize_min_key_element(buf, key)
buf.put(MINKEY)
self.class.serialize_key(buf, key)
end
2011-03-28 18:36:49 +00:00
def serialize_timestamp_element(buf, key, val)
buf.put(TIMESTAMP)
self.class.serialize_key(buf, key)
buf.put_int(val.increment)
buf.put_int(val.seconds)
end
2010-04-05 18:09:06 +00:00
def serialize_oid_element(buf, key, val)
buf.put(OID)
self.class.serialize_key(buf, key)
buf.put_array(val.to_a)
end
def serialize_string_element(buf, key, val, type)
buf.put(type)
self.class.serialize_key(buf, key)
# Make a hole for the length
len_pos = buf.position
buf.put_int(0)
# Save the string
start_pos = buf.position
self.class.serialize_cstr(buf, val)
end_pos = buf.position
# Put the string size in front
buf.put_int(end_pos - start_pos, len_pos)
# Go back to where we were
buf.position = end_pos
end
def serialize_code_w_scope(buf, key, val)
buf.put(CODE_W_SCOPE)
self.class.serialize_key(buf, key)
# Make a hole for the length
len_pos = buf.position
buf.put_int(0)
2010-09-30 13:43:17 +00:00
buf.put_int(val.code.length + 1)
self.class.serialize_cstr(buf, val.code)
buf.put_array(@encoder.new.serialize(val.scope).to_a)
2010-04-05 18:09:06 +00:00
end_pos = buf.position
buf.put_int(end_pos - len_pos, len_pos)
buf.position = end_pos
end
def deserialize_cstr(buf)
chars = ""
while true
b = buf.get
break if b == 0
chars << b.chr
end
encoded_str(chars)
2010-04-05 18:09:06 +00:00
end
def bson_type(o)
case o
when nil
NULL
when Integer
NUMBER_INT
when Float
NUMBER
when ByteBuffer
BINARY
when Code
CODE_W_SCOPE
when String
STRING
when Array
ARRAY
when Regexp
REGEX
when ObjectId
OID
2010-04-05 18:09:06 +00:00
when DBRef
REF
when true, false
BOOLEAN
when Time
DATE
when Hash
OBJECT
when Symbol
SYMBOL
when MaxKey
MAXKEY
when MinKey
MINKEY
2011-03-28 18:36:49 +00:00
when Timestamp
TIMESTAMP
2010-04-05 18:09:06 +00:00
when Numeric
raise InvalidDocument, "Cannot serialize the Numeric type #{o.class} as BSON; only Fixum, Bignum, and Float are supported."
when Date, DateTime
raise InvalidDocument, "#{o.class} is not currently supported; " +
"use a UTC Time instance instead."
else
if defined?(ActiveSupport::TimeWithZone) && o.is_a?(ActiveSupport::TimeWithZone)
raise InvalidDocument, "ActiveSupport::TimeWithZone is not currently supported; " +
"use a UTC Time instance instead."
else
raise InvalidDocument, "Cannot serialize #{o.class} as a BSON type; it either isn't supported or won't translate to BSON."
end
end
end
end
end