When possible, have BSON::ByteBuffer store the underlying binary data as a binary String instead of an array.

This dramatically improves the performance of BSON::ByteBuffer.new(a_string).
On Ruby 1.9.2 it is about 130 times faster.
This commit is contained in:
Hongli Lai (Phusion) 2010-09-07 15:13:41 +02:00 committed by Kyle Banker
parent 7309d7e48b
commit b9de2eaa5c
3 changed files with 67 additions and 28 deletions

View File

@ -37,8 +37,16 @@ module BSON
attr_reader :order attr_reader :order
def initialize(initial_data=[]) def initialize(initial_data=[])
if initial_data.is_a?(String)
if initial_data.respond_to?(:force_encoding)
@str = initial_data.force_encoding('binary')
else
@str = initial_data
end
else
@buf = initial_data @buf = initial_data
@cursor = @buf.length end
@cursor = (@buf || @str).length
@order = :little_endian @order = :little_endian
@int_pack_order = 'V' @int_pack_order = 'V'
@double_pack_order = 'E' @double_pack_order = 'E'
@ -84,35 +92,38 @@ module BSON
def clear def clear
@buf = [] @buf = []
@str = nil
rewind rewind
end end
def size def size
@buf.size (@buf || @str).size
end end
alias_method :length, :size alias_method :length, :size
# Appends a second ByteBuffer object, +buffer+, to the current buffer. # Appends a second ByteBuffer object, +buffer+, to the current buffer.
def append!(buffer) def append!(buffer)
@buf = @buf + buffer.to_a self.buf = buf + buffer.to_a
self self
end end
# Prepends a second ByteBuffer object, +buffer+, to the current buffer. # Prepends a second ByteBuffer object, +buffer+, to the current buffer.
def prepend!(buffer) def prepend!(buffer)
@buf = buffer.to_a + @buf self.buf = buffer.to_a + buf
self self
end end
def put(byte, offset=nil) def put(byte, offset=nil)
@cursor = offset if offset @cursor = offset if offset
@buf[@cursor] = byte buf[@cursor] = byte
buf_modified!
@cursor += 1 @cursor += 1
end end
def put_array(array, offset=nil) def put_array(array, offset=nil)
@cursor = offset if offset @cursor = offset if offset
@buf[@cursor, array.length] = array buf[@cursor, array.length] = array
buf_modified!
@cursor += array.length @cursor += array.length
end end
@ -150,12 +161,12 @@ module BSON
start = @cursor start = @cursor
@cursor += len @cursor += len
if one_byte if one_byte
@buf[start] buf[start]
else else
if @buf.respond_to? "unpack" if buf.respond_to? "unpack"
@buf[start, len].unpack("C*") buf[start, len].unpack("C*")
else else
@buf[start, len] buf[start, len]
end end
end end
end end
@ -163,7 +174,7 @@ module BSON
def get_int def get_int
check_read_length(4) check_read_length(4)
vals = "" vals = ""
(@cursor..@cursor+3).each { |i| vals << @buf[i].chr } (@cursor..@cursor+3).each { |i| vals << buf[i].chr }
@cursor += 4 @cursor += 4
vals.unpack(@int_pack_order)[0] vals.unpack(@int_pack_order)[0]
end end
@ -181,20 +192,20 @@ module BSON
def get_double def get_double
check_read_length(8) check_read_length(8)
vals = "" vals = ""
(@cursor..@cursor+7).each { |i| vals << @buf[i].chr } (@cursor..@cursor+7).each { |i| vals << buf[i].chr }
@cursor += 8 @cursor += 8
vals.unpack(@double_pack_order)[0] vals.unpack(@double_pack_order)[0]
end end
def more? def more?
@cursor < @buf.size @cursor < buf.size
end end
def to_a def to_a
if @buf.respond_to? "unpack" if buf.respond_to? "unpack"
@buf.unpack("C*") buf.unpack("C*")
else else
@buf buf
end end
end end
@ -203,23 +214,42 @@ module BSON
end end
def to_s def to_s
if !@str
if @buf.respond_to? :fast_pack if @buf.respond_to? :fast_pack
@buf.fast_pack @str = @buf.fast_pack
elsif @buf.respond_to? "pack" elsif @buf.respond_to? "pack"
@buf.pack("C*") @str = @buf.pack("C*")
else else
@buf @str = @buf
end end
end end
@str
end
def dump def dump
@buf.each_with_index { |c, i| $stderr.puts "#{'%04d' % i}: #{'%02x' % c} #{'%03o' % c} #{'%s' % c.chr} #{'%3d' % c}" } buf.each_with_index { |c, i| $stderr.puts "#{'%04d' % i}: #{'%02x' % c} #{'%03o' % c} #{'%s' % c.chr} #{'%3d' % c}" }
end end
private private
def buf
if !@buf
@buf = @str.unpack("c*")
end
@buf
end
def buf=(buffer)
@buf = buffer
@str = nil
end
def buf_modified!
@str = nil
end
def check_read_length(len) def check_read_length(len)
raise "attempt to read past end of buffer" if @cursor + len > @buf.length raise "attempt to read past end of buffer" if @cursor + len > (@buf || @str).length
end end
end end

View File

@ -37,14 +37,13 @@ module BSON
# Create a buffer for storing binary data in MongoDB. # Create a buffer for storing binary data in MongoDB.
# #
# @param [Array, String] data to story as BSON binary. If a string is given, the value will be # @param [Array, String] data to story as BSON binary. If a string is given, the on
# concerted to an array of bytes using String#unpack("c*"). # Ruby 1.9 it will be forced to the binary encoding.
# @param [Fixnum] one of four values specifying a BSON binary subtype. Possible values are # @param [Fixnum] one of four values specifying a BSON binary subtype. Possible values are
# SUBTYPE_BYTES, SUBTYPE_UUID, SUBTYPE_MD5, and SUBTYPE_USER_DEFINED. # SUBTYPE_BYTES, SUBTYPE_UUID, SUBTYPE_MD5, and SUBTYPE_USER_DEFINED.
# #
# @see http://www.mongodb.org/display/DOCS/BSON#BSON-noteondatabinary BSON binary subtypes. # @see http://www.mongodb.org/display/DOCS/BSON#BSON-noteondatabinary BSON binary subtypes.
def initialize(data=[], subtype=SUBTYPE_BYTES) def initialize(data=[], subtype=SUBTYPE_BYTES)
data = data.unpack("c*") if data.is_a?(String)
super(data) super(data)
@subtype = subtype @subtype = subtype
end end

View File

@ -79,4 +79,14 @@ class ByteBufferTest < Test::Unit::TestCase
assert_equal [4, 0, 0, 0, 5, 0, 0, 0], @buf.to_a assert_equal [4, 0, 0, 0, 5, 0, 0, 0], @buf.to_a
end end
def test_binary_string_input
str = "abcd"
str.force_encoding('binary') if str.respond_to?(:force_encoding)
@buf = ByteBuffer.new(str)
assert_equal "abcd", @buf.to_s
assert_equal [97, 98, 99, 100], @buf.to_a
@buf.put_int(0)
assert_equal [97, 98, 99, 100, 0, 0, 0, 0], @buf.to_a
end
end end