Optimize ByteBuffer: use binary string as underlying storage instead of array.

This commit is contained in:
Hongli Lai (Phusion) 2010-09-11 22:56:43 +02:00 committed by Kyle Banker
parent 3e77299ec1
commit bde53f1e20
3 changed files with 122 additions and 86 deletions

View File

@ -25,13 +25,7 @@ module BSON
end end
def self.deserialize(buf=nil) def self.deserialize(buf=nil)
if buf.is_a? String CBson.deserialize(ByteBuffer.new(buf).to_s)
buf = ByteBuffer.new(buf.unpack("C*")) if buf
else
buf = ByteBuffer.new(buf.to_a) if buf
end
buf.rewind
CBson.deserialize(buf.to_s)
end end
end end

View File

@ -20,23 +20,9 @@
module BSON module BSON
class ByteBuffer class ByteBuffer
# Commonly-used integers.
INT_LOOKUP = {
0 => [0, 0, 0, 0],
1 => [1, 0, 0, 0],
2 => [2, 0, 0, 0],
3 => [3, 0, 0, 0],
4 => [4, 0, 0, 0],
2001 => [209, 7, 0, 0],
2002 => [210, 7, 0, 0],
2004 => [212, 7, 0, 0],
2005 => [213, 7, 0, 0],
2006 => [214, 7, 0, 0]
}
attr_reader :order attr_reader :order
def initialize(initial_data=[]) def initialize(initial_data="")
if initial_data.is_a?(String) if initial_data.is_a?(String)
if initial_data.respond_to?(:force_encoding) if initial_data.respond_to?(:force_encoding)
@str = initial_data.force_encoding('binary') @str = initial_data.force_encoding('binary')
@ -44,31 +30,38 @@ module BSON
@str = initial_data @str = initial_data
end end
else else
@buf = initial_data @str = initial_data.pack('C*')
end end
@cursor = (@buf || @str).length @cursor = @str.length
@order = :little_endian @order = :little_endian
@int_pack_order = 'V' @int_pack_order = 'V'
@double_pack_order = 'E' @double_pack_order = 'E'
end end
if RUBY_VERSION >= '1.9' if RUBY_VERSION >= '1.9'
def self.to_utf8(str) NULL_BYTE = "\0".force_encoding('binary').freeze
str.encode("utf-8") UTF8_ENCODING = Encoding.find('utf-8')
BINARY_ENCODING = Encoding.find('binary')
def self.to_utf8_binary(str)
str.encode(UTF8_ENCODING).force_encoding(BINARY_ENCODING)
end end
else else
def self.to_utf8(str) NULL_BYTE = "\0"
def self.to_utf8_binary(str)
begin begin
str.unpack("U*") str.unpack("U*")
rescue => ex rescue => ex
raise InvalidStringEncoding, "String not valid utf-8: #{str}" raise InvalidStringEncoding, "String not valid utf-8: #{str.inspect}"
end end
str str
end end
end end
def self.serialize_cstr(buf, val) def self.serialize_cstr(buf, val)
buf.put_array(to_utf8(val.to_s).unpack("C*") + [0]) buf.append!(to_utf8_binary(val.to_s))
buf.append!(NULL_BYTE)
end end
# +endianness+ should be :little_endian or :big_endian. Default is :little_endian # +endianness+ should be :little_endian or :big_endian. Default is :little_endian
@ -91,48 +84,59 @@ module BSON
end end
def clear def clear
@buf = [] @str = ""
@str = nil @str.force_encoding('binary') if @str.respond_to?(:force_encoding)
rewind rewind
end end
def size def size
(@buf || @str).size @str.size
end end
alias_method :length, :size alias_method :length, :size
# Appends a second ByteBuffer object, +buffer+, to the current buffer. # Appends a second ByteBuffer object, +buffer+, to the current buffer.
def append!(buffer) def append!(buffer)
self.buf = buf + buffer.to_a @str << buffer.to_s
self self
end end
# Prepends a second ByteBuffer object, +buffer+, to the current buffer. # Prepends a second ByteBuffer object, +buffer+, to the current buffer.
def prepend!(buffer) def prepend!(buffer)
self.buf = buffer.to_a + buf @str = buffer.to_s + @str
self self
end end
def put(byte, offset=nil) def put(byte, offset=nil)
@cursor = offset if offset @cursor = offset if offset
buf[@cursor] = byte if more?
buf_modified! @str[@cursor] = chr(byte)
else
ensure_length(@cursor)
@str << chr(byte)
end
@cursor += 1 @cursor += 1
end end
def put_array(array, offset=nil) def put_array(array, offset=nil)
@cursor = offset if offset @cursor = offset if offset
buf[@cursor, array.length] = array if more?
buf_modified! @str[@cursor, array.length] = array.pack("C*")
else
ensure_length(@cursor)
@str << array.pack("C*")
end
@cursor += array.length @cursor += array.length
end end
def put_int(i, offset=nil) def put_int(i, offset=nil)
unless a = INT_LOOKUP[i] @cursor = offset if offset
a = [] if more?
[i].pack(@int_pack_order).each_byte { |b| a << b } @str[@cursor, 4] = [i].pack(@int_pack_order)
else
ensure_length(@cursor)
@str << [i].pack(@int_pack_order)
end end
put_array(a, offset) @cursor += 4
end end
def put_long(i, offset=nil) def put_long(i, offset=nil)
@ -154,6 +158,7 @@ module BSON
# If +size+ == nil, returns one byte. Else returns array of bytes of length # If +size+ == nil, returns one byte. Else returns array of bytes of length
# # +size+. # # +size+.
if "x"[0].is_a?(Integer)
def get(len=nil) def get(len=nil)
one_byte = len.nil? one_byte = len.nil?
len ||= 1 len ||= 1
@ -161,19 +166,29 @@ module BSON
start = @cursor start = @cursor
@cursor += len @cursor += len
if one_byte if one_byte
buf[start] @str[start]
else else
if buf.respond_to? "unpack" @str[start, len].unpack("C*")
buf[start, len].unpack("C*") end
end
else else
buf[start, len] def get(len=nil)
one_byte = len.nil?
len ||= 1
check_read_length(len)
start = @cursor
@cursor += len
if one_byte
@str[start, 1].ord
else
@str[start, len].unpack("C*")
end end
end end
end end
def get_int def get_int
check_read_length(4) check_read_length(4)
vals = buf[@cursor..@cursor+3].pack("C*") vals = @str[@cursor..@cursor+3]
@cursor += 4 @cursor += 4
vals.unpack(@int_pack_order)[0] vals.unpack(@int_pack_order)[0]
end end
@ -190,22 +205,17 @@ module BSON
def get_double def get_double
check_read_length(8) check_read_length(8)
vals = "" vals = @str[@cursor..@cursor+7]
(@cursor..@cursor+7).each { |i| vals << buf[i].chr }
@cursor += 8 @cursor += 8
vals.unpack(@double_pack_order)[0] vals.unpack(@double_pack_order)[0]
end end
def more? def more?
@cursor < buf.size @cursor < @str.size
end end
def to_a def to_a
if buf.respond_to? "unpack" @str.unpack("C*")
buf.unpack("C*")
else
buf
end
end end
def unpack(args) def unpack(args)
@ -213,42 +223,35 @@ module BSON
end end
def to_s def to_s
if !@str
if @buf.respond_to? :fast_pack
@str = @buf.fast_pack
elsif @buf.respond_to? "pack"
@str = @buf.pack("C*")
else
@str = @buf
end
end
@str @str
end end
def dump def dump
buf.each_with_index { |c, i| $stderr.puts "#{'%04d' % i}: #{'%02x' % c} #{'%03o' % c} #{'%s' % c.chr} #{'%3d' % c}" } i = 0
@str.each_byte do |c, i|
$stderr.puts "#{'%04d' % i}: #{'%02x' % c} #{'%03o' % c} #{'%s' % c.chr} #{'%3d' % c}"
i += 1
end
end end
private private
def buf def ensure_length(length)
if !@buf if @str.size < length
@buf = @str.unpack("c*") @str << NULL * (length - @str.size)
end end
@buf
end end
def buf=(buffer) def chr(byte)
@buf = buffer if byte < 0
@str = nil [byte].pack('c')
else
byte.chr
end end
def buf_modified!
@str = nil
end end
def check_read_length(len) def check_read_length(len)
raise "attempt to read past end of buffer" if @cursor + len > (@buf || @str).length raise "attempt to read past end of buffer" if @cursor + len > @str.length
end end
end end

View File

@ -86,6 +86,45 @@ class ByteBufferTest < Test::Unit::TestCase
end end
end end
def test_put_negative_byte
@buf.put(-1)
@buf.rewind
assert_equal 255, @buf.get
assert_equal "\xFF", @buf.to_s
end
def test_put_with_offset
@buf.put(1)
@buf.put(2, 0)
@buf.put(3, 3)
assert_equal "\x02\x00\x00\x03", @buf.to_s
end
def test_put_array_with_offset
@buf.put(1)
@buf.put_array([2, 3], 0)
@buf.put_array([4, 5], 4)
assert_equal "\x02\x03\x00\x00\x04\x05", @buf.to_s
end
def test_put_int_with_offset
@buf.put(1)
@buf.put_int(2, 0)
@buf.put_int(3, 5)
assert_equal "\x02\x00\x00\x00\x00\x03\x00\x00\x00", @buf.to_s
end
def test_put_long_with_offset
@buf.put(1)
@buf.put_long(2, 0)
@buf.put_long(3, 9)
assert_equal(
"\x02\x00\x00\x00\x00\x00\x00\x00" +
"\x00" +
"\x03\x00\x00\x00\x00\x00\x00\x00",
@buf.to_s)
end
def test_rewrite def test_rewrite
@buf.put_int(0) @buf.put_int(0)
@buf.rewind @buf.rewind