From 1d0cc7aae26599e4a14921fe6be468c2813c43ba Mon Sep 17 00:00:00 2001 From: Kyle Banker Date: Mon, 22 Feb 2010 18:06:59 -0500 Subject: [PATCH] added safe more for gridfs --- bin/gridstore_benchmark | 2 +- lib/mongo/gridfs/grid.rb | 9 ++----- lib/mongo/gridfs/grid_file_system.rb | 37 ++++++++++++-------------- lib/mongo/gridfs/grid_io.rb | 39 +++++++++++++++++++++++----- test/grid_file_system_test.rb | 23 ++++++++++++++++ test/grid_io_test.rb | 19 ++++++++++++++ 6 files changed, 94 insertions(+), 35 deletions(-) diff --git a/bin/gridstore_benchmark b/bin/gridstore_benchmark index 395e821..873f6bb 100755 --- a/bin/gridstore_benchmark +++ b/bin/gridstore_benchmark @@ -15,7 +15,7 @@ mb = length / 1048576.0 t1 = Time.now @grid = Grid.new(db) -@id = @grid.put(sample_data, 'mongodb-new.pdf') +@id = @grid.put(sample_data, 'mongodb-new.pdf', :safe => true) puts "Write: #{mb / (Time.now - t1)} mb/s" t1 = Time.now diff --git a/lib/mongo/gridfs/grid.rb b/lib/mongo/gridfs/grid.rb index 7b02be4..b3636b9 100644 --- a/lib/mongo/gridfs/grid.rb +++ b/lib/mongo/gridfs/grid.rb @@ -21,7 +21,8 @@ module Mongo DEFAULT_FS_NAME = 'fs' def initialize(db, fs_name=DEFAULT_FS_NAME) - check_params(db) + raise MongoArgumentError, "db must be a Mongo::DB." unless db.is_a?(Mongo::DB) + @db = db @files = @db["#{fs_name}.files"] @chunks = @db["#{fs_name}.chunks"] @@ -53,11 +54,5 @@ module Mongo def default_grid_io_opts {:fs_name => @fs_name} end - - def check_params(db) - if !db.is_a?(Mongo::DB) - raise MongoArgumentError, "db must be an instance of Mongo::DB." - end - end end end diff --git a/lib/mongo/gridfs/grid_file_system.rb b/lib/mongo/gridfs/grid_file_system.rb index 990d9ac..9929b98 100644 --- a/lib/mongo/gridfs/grid_file_system.rb +++ b/lib/mongo/gridfs/grid_file_system.rb @@ -19,8 +19,13 @@ module Mongo # WARNING: This class is part of a new, experimental GridFS API. Subject to change. class GridFileSystem < Grid - def initialize(db, fs_name=DEFAULT_FS_NAME) - super + def initialize(db, fs_name=Grid::DEFAULT_FS_NAME) + raise MongoArgumentError, "db must be a Mongo::DB." unless db.is_a?(Mongo::DB) + + @db = db + @files = @db["#{fs_name}.files"] + @chunks = @db["#{fs_name}.chunks"] + @fs_name = fs_name @files.create_index([['filename', 1], ['uploadDate', -1]]) @default_query_opts = {:sort => [['filename', 1], ['uploadDate', -1]], :limit => 1} @@ -39,26 +44,18 @@ module Mongo result end - def put(data, filename, opts={}) - opts.merge!(default_grid_io_opts(filename)) - file = GridIO.new(@files, @chunks, filename, 'w', opts) - file.write(data) - file.close - file.files_id - end - - def get(filename, opts={}) - opts.merge!(default_grid_io_opts(filename)) - GridIO.new(@files, @chunks, filename, 'r', opts) - end - - def delete(filename, opts={}) - ids = @files.find({'filename' => filename}, ['_id']) - ids.each do |id| - @files.remove({'_id' => id}) - @chunks.remove('files_id' => id) + def delete(filename) + files = @files.find({'filename' => filename}, :fields => ['_id']) + files.each do |file| + @files.remove({'_id' => file['_id']}) + @chunks.remove({'files_id' => file['_id']}) end end + alias_method :unlink, :delete + + def remove_previous_versions + ids = @files.find({'filename' => filename}, :sort => [['filename', 1]]) + end private diff --git a/lib/mongo/gridfs/grid_io.rb b/lib/mongo/gridfs/grid_io.rb index 2eb5c89..436ce32 100644 --- a/lib/mongo/gridfs/grid_io.rb +++ b/lib/mongo/gridfs/grid_io.rb @@ -14,6 +14,8 @@ # limitations under the License. # ++ +require 'digest/md5' + module Mongo # WARNING: This is part of a new, experimental GridFS API. Subject to change. @@ -21,7 +23,8 @@ module Mongo DEFAULT_CHUNK_SIZE = 256 * 1024 DEFAULT_CONTENT_TYPE = 'binary/octet-stream' - attr_reader :content_type, :chunk_size, :upload_date, :files_id, :filename, :metadata + attr_reader :content_type, :chunk_size, :upload_date, :files_id, :filename, + :metadata, :server_md5, :client_md5 def initialize(files, chunks, filename, mode, opts={}) @files = files @@ -31,6 +34,8 @@ module Mongo @query = opts[:query] || {} @query_opts = opts[:query_opts] || {} @fs_name = opts[:fs_name] || Grid::DEFAULT_FS_NAME + @safe = opts[:safe] || false + @local_md5 = Digest::MD5.new if @safe case @mode when 'r' then init_read(opts) @@ -68,11 +73,21 @@ module Mongo def write(io) raise GridError, "file not opened for write" unless @mode[0] == ?w if io.is_a? String + if @safe + @local_md5.update(io) + end write_string(io) else length = 0 - while(string = io.read(@chunk_size)) - length += write_string(string) + if @safe + while(string = io.read(@chunk_size)) + @local_md5.update(string) + length += write_string(string) + end + else + while(string = io.read(@chunk_size)) + length += write_string(string) + end end length end @@ -266,14 +281,24 @@ module Mongo h['uploadDate'] = @upload_date h['aliases'] = @aliases h['metadata'] = @metadata + h['md5'] = get_md5 + h + end - # Get a server-side md5. + # Get a server-side md5 and validate against the client if running in safe mode. + def get_md5 md5_command = OrderedHash.new md5_command['filemd5'] = @files_id md5_command['root'] = @fs_name - h['md5'] = @files.db.command(md5_command)['md5'] - - h + @server_md5 = @files.db.command(md5_command)['md5'] + if @safe + @client_md5 = @local_md5.hexdigest + if @local_md5 != @server_md5 + raise @local_md5 != @server_md5GridError, "File on server failed MD5 check" + end + else + @server_md5 + end end end end diff --git a/test/grid_file_system_test.rb b/test/grid_file_system_test.rb index 99628e8..dc276ba 100644 --- a/test/grid_file_system_test.rb +++ b/test/grid_file_system_test.rb @@ -105,6 +105,29 @@ class GridFileSystemTest < Test::Unit::TestCase should "contain the new data" do assert_equal @new_data, @new.read, "Expected DATA" end + + context "and on a second overwrite" do + setup do + sleep(2) + new_data = "NEW" * 1000 + @grid.open('sample', 'w') do |f| + f.write new_data + end + + @ids = @db['fs.files'].find({'filename' => 'sample'}).map {|file| file['_id']} + end + + should "write a third version of the file" do + assert_equal 3, @db['fs.files'].find({'filename' => 'sample'}).count + assert_equal 3, @db['fs.chunks'].find({'files_id' => {'$in' => @ids}}).count + end + + should "remove all versions and their data on delete" do + @grid.delete('sample') + assert_equal 0, @db['fs.files'].find({'filename' => 'sample'}).count + assert_equal 0, @db['fs.chunks'].find({'files_id' => {'$in' => @ids}}).count + end + end end end diff --git a/test/grid_io_test.rb b/test/grid_io_test.rb index dcf15df..ad307a7 100644 --- a/test/grid_io_test.rb +++ b/test/grid_io_test.rb @@ -9,6 +9,7 @@ class GridIOTest < Test::Unit::TestCase ENV['MONGO_RUBY_DRIVER_PORT'] || Connection::DEFAULT_PORT).db('ruby-mongo-test') @files = @db.collection('fs.files') @chunks = @db.collection('fs.chunks') + @chunks.create_index([['files_id', Mongo::ASCENDING], ['n', Mongo::ASCENDING]]) end teardown do @@ -32,6 +33,24 @@ class GridIOTest < Test::Unit::TestCase assert_equal 1000, file.chunk_size end end + + context "Grid MD5 check" do + + should "run in safe mode" do + file = GridIO.new(@files, @chunks, 'smallfile', 'w', :safe => true) + file.write("DATA" * 100) + assert file.close + assert_equal file.server_md5, file.client_md5 + end + + should "validate with a large file" do + io = File.open(File.join(File.dirname(__FILE__), 'data', 'sample_file.pdf'), 'r') + file = GridIO.new(@files, @chunks, 'bigfile', 'w', :safe => true) + file.write(io) + assert file.close + assert_equal file.server_md5, file.client_md5 + end + end end end