diff --git a/lib/mongo/gridfs/grid.rb b/lib/mongo/gridfs/grid.rb index 850b7b4..b60c9a5 100644 --- a/lib/mongo/gridfs/grid.rb +++ b/lib/mongo/gridfs/grid.rb @@ -18,13 +18,13 @@ module Mongo # WARNING: This class is part of a new, experimental GridFS API. Subject to change. class Grid - DEFAULT_BUCKET_NAME = 'fs' + DEFAULT_FS_NAME = 'fs' - def initialize(db, bucket_name=DEFAULT_BUCKET_NAME) + def initialize(db, fs_name=DEFAULT_FS_NAME) check_params(db) @db = db - @files = @db["#{bucket_name}.files"] - @chunks = @db["#{bucket_name}.chunks"] + @files = @db["#{fs_name}.files"] + @chunks = @db["#{fs_name}.chunks"] @chunks.create_index([['files_id', Mongo::ASCENDING], ['n', Mongo::ASCENDING]]) end diff --git a/lib/mongo/gridfs/grid_file_system.rb b/lib/mongo/gridfs/grid_file_system.rb index c38c91f..dcfa7f3 100644 --- a/lib/mongo/gridfs/grid_file_system.rb +++ b/lib/mongo/gridfs/grid_file_system.rb @@ -19,7 +19,7 @@ module Mongo # WARNING: This class is part of a new, experimental GridFS API. Subject to change. class GridFileSystem < Grid - def initialize(db, bucket_name=DEFAULT_BUCKET_NAME) + def initialize(db, fs_name=DEFAULT_FS_NAME) super @files.create_index([['filename', 1], ['uploadDate', -1]]) diff --git a/lib/mongo/gridfs/grid_io.rb b/lib/mongo/gridfs/grid_io.rb index 729b842..6fc16f4 100644 --- a/lib/mongo/gridfs/grid_io.rb +++ b/lib/mongo/gridfs/grid_io.rb @@ -48,20 +48,15 @@ module Mongo # @return [String] # the data in the file def read(length=nil) - return '' if length == 0 - return read_all if length.nil? && @file_position.zero? - buf = '' - while true - buf << @current_chunk['data'].to_s[@chunk_position..-1] - if buf.length >= length - return buf[0...length] - else - @current_chunk = get_chunk(@current_chunk['n'] + 1) - end + if length == 0 + return '' + elsif length.nil? && @file_position.zero? + read_all + else + read_length(length) end - buf end - alias :data :read + alias_method :data, :read # Write the given string (binary) data to the file. # @@ -70,26 +65,17 @@ module Mongo # # @return [Integer] # the number of bytes written. - def write(string) - raise GridError, "#{@filename} not opened for write" unless @mode[0] == ?w - # Since Ruby 1.9.1 doesn't necessarily store one character per byte. - if string.respond_to?(:force_encoding) - string.force_encoding("binary") - end - to_write = string.length - while (to_write > 0) do - if @current_chunk && @chunk_position == @chunk_size - next_chunk_number = @current_chunk['n'] + 1 - @current_chunk = create_chunk(next_chunk_number) + def write(io) + raise GridError, "file not opened for write" unless @mode[0] == ?w + if io.is_a? String + write_string(io) + else + length = 0 + while(string = io.read(@chunk_size)) + length += write_string(string) end - chunk_available = @chunk_size - @chunk_position - step_size = (to_write > chunk_available) ? chunk_available : to_write - @current_chunk['data'] = Binary.new((@current_chunk['data'].to_s << string[-to_write, step_size]).unpack("c*")) - @chunk_position += step_size - to_write -= step_size - save_chunk(@current_chunk) + length end - string.length - to_write end # Position the file pointer at the provided location. @@ -129,12 +115,12 @@ module Mongo @file_position end - # Creates or updates the document storing the chunks' metadata - # in the files collection. The file exists only after this method - # is called. + # Creates or updates the document from the files collection that + # stores the chunks' metadata. The file becomes available only after + # this method has been called. # - # This method will be invoked automatically - # on GridIO#open. Otherwise, it must be called manually. + # This method will be invoked automatically when + # on GridIO#open is passed a block. Otherwise, it must be called manually. # # @return [True] def close @@ -170,7 +156,12 @@ module Mongo def get_chunk(n) chunk = @chunks.find({'files_id' => @files_id, 'n' => n}).next_document @chunk_position = 0 - chunk || {} + chunk + end + + def get_chunk_for_read(n) + chunk = get_chunk(n) + return nil unless chunk end def last_chunk_number @@ -188,6 +179,58 @@ module Mongo buf end + def read_length(length) + cache_chunk_data + remaining = (@file_length - @file_position) + to_read = length > remaining ? remaining : length + return nil unless remaining > 0 + + buf = '' + while to_read > 0 + if @chunk_position == @chunk_data_length + @current_chunk = get_chunk(@current_chunk['n'] + 1) + cache_chunk_data + end + chunk_remainder = @chunk_data_length - @chunk_position + size = (to_read >= chunk_remainder) ? chunk_remainder : to_read + buf << @current_chunk_data[@chunk_position, size] + to_read -= size + @chunk_position += size + @file_position += size + end + buf + end + + def cache_chunk_data + @current_chunk_data = @current_chunk['data'].to_s + if @current_chunk_data.respond_to?(:force_encoding) + @current_chunk_data.force_encoding("binary") + end + @chunk_data_length = @current_chunk['data'].length + end + + def write_string(string) + # Since Ruby 1.9.1 doesn't necessarily store one character per byte. + if string.respond_to?(:force_encoding) + string.force_encoding("binary") + end + + to_write = string.length + while (to_write > 0) do + if @current_chunk && @chunk_position == @chunk_size + next_chunk_number = @current_chunk['n'] + 1 + @current_chunk = create_chunk(next_chunk_number) + end + chunk_available = @chunk_size - @chunk_position + step_size = (to_write > chunk_available) ? chunk_available : to_write + @current_chunk['data'] = Binary.new((@current_chunk['data'].to_s << string[-to_write, step_size]).unpack("c*")) + @chunk_position += step_size + to_write -= step_size + save_chunk(@current_chunk) + end + string.length - to_write + end + # Initialize based on whether the supplied file exists. def init_read(filesystem, opts) if filesystem diff --git a/test/data/sample_file.pdf b/test/data/sample_file.pdf new file mode 100644 index 0000000..4557c48 Binary files /dev/null and b/test/data/sample_file.pdf differ diff --git a/test/data/small_data.txt b/test/data/small_data.txt new file mode 100644 index 0000000..82736e2 --- /dev/null +++ b/test/data/small_data.txt @@ -0,0 +1 @@ +This is pretty small data diff --git a/test/test_grid.rb b/test/test_grid.rb index e27d6f2..5f87c38 100644 --- a/test/test_grid.rb +++ b/test/test_grid.rb @@ -5,8 +5,8 @@ class GridTest < Test::Unit::TestCase def setup @db ||= Connection.new(ENV['MONGO_RUBY_DRIVER_HOST'] || 'localhost', ENV['MONGO_RUBY_DRIVER_PORT'] || Connection::DEFAULT_PORT).db('ruby-mongo-test') - @files = @db.collection('test-bucket.files') - @chunks = @db.collection('test-bucket.chunks') + @files = @db.collection('test-fs.files') + @chunks = @db.collection('test-fs.chunks') end def teardown @@ -17,7 +17,7 @@ class GridTest < Test::Unit::TestCase context "A basic grid-stored file" do setup do @data = "GRIDDATA" * 50000 - @grid = Grid.new(@db, 'test-bucket') + @grid = Grid.new(@db, 'test-fs') @id = @grid.put(@data, 'sample', :metadata => {'app' => 'photos'}) end @@ -44,4 +44,38 @@ class GridTest < Test::Unit::TestCase end end + def read_and_write_stream(filename, read_length, opts={}) + io = File.open(File.join(File.dirname(__FILE__), 'data', filename), 'r') + id = @grid.put(io, filename + read_length.to_s, opts) + file = @grid.get(id) + io.rewind + data = io.read + read_data = "" + while(chunk = file.read(read_length)) + read_data << chunk + end + assert_equal data.length, read_data.length + end + + context "Streaming: " do + setup do + @grid = Grid.new(@db, 'test-fs') + end + + should "put and get a small io object with a small chunk size" do + read_and_write_stream('small_data.txt', 1, :chunk_size => 2) + end + + should "put and get a small io object" do + read_and_write_stream('small_data.txt', 1) + end + + should "put and get a large io object when reading smaller than the chunk size" do + read_and_write_stream('sample_file.pdf', 256 * 1024) + end + + should "put and get a large io object when reading larger than the chunk size" do + read_and_write_stream('sample_file.pdf', 300 * 1024) + end + end end diff --git a/test/test_grid_file_system.rb b/test/test_grid_file_system.rb index e042cfd..110cb91 100644 --- a/test/test_grid_file_system.rb +++ b/test/test_grid_file_system.rb @@ -143,12 +143,12 @@ class GridTest < Test::Unit::TestCase f.seek(0) f.seek(7, IO::SEEK_CUR) assert_equal 'w', f.read(1) - f.seek(-1, IO::SEEK_CUR) + f.seek(-2, IO::SEEK_CUR) assert_equal ' ', f.read(1) f.seek(-4, IO::SEEK_CUR) assert_equal 'l', f.read(1) f.seek(3, IO::SEEK_CUR) - assert_equal ',', f.read(1) + assert_equal 'w', f.read(1) end end