New GridFS Version 1.0 storage format.

This commit is contained in:
Jim Menard 2009-01-30 16:44:29 -05:00
parent b0612054f9
commit af2b8b61d0
5 changed files with 153 additions and 164 deletions

View File

@ -1,5 +1,3 @@
require 'mongo/types/binary'
require 'mongo/types/dbref'
require 'mongo/types/objectid' require 'mongo/types/objectid'
require 'mongo/util/byte_buffer' require 'mongo/util/byte_buffer'
require 'mongo/util/ordered_hash' require 'mongo/util/ordered_hash'
@ -19,10 +17,10 @@ module XGen
attr_reader :object_id, :chunk_number attr_reader :object_id, :chunk_number
attr_accessor :data attr_accessor :data
def initialize(db_collection, mongo_object={}) def initialize(file, mongo_object={})
@coll = db_collection @file = file
@object_id = mongo_object['_id'] || XGen::Mongo::Driver::ObjectID.new @object_id = mongo_object['_id'] || XGen::Mongo::Driver::ObjectID.new
@chunk_number = mongo_object['cn'] || 1 @chunk_number = mongo_object['n'] || 0
@data = ByteBuffer.new @data = ByteBuffer.new
case mongo_object['data'] case mongo_object['data']
@ -32,27 +30,11 @@ module XGen
@data.put_array(mongo_object['data'].to_a) @data.put_array(mongo_object['data'].to_a)
when Array when Array
@data.put_array(mongo_object['data']) @data.put_array(mongo_object['data'])
when nil
else
raise "illegal chunk format; data is #{mongo_object['data'] ? (' ' + mongo_object['data'].class.name) : 'nil'}"
end end
@data.rewind @data.rewind
@next_chunk_dbref = mongo_object['next']
end
def has_next?
@next_chunk_dbref
end
def next
return @next_chunk if @next_chunk
return nil unless @next_chunk_dbref
row = @coll.find({'_id' => @next_chunk_dbref.object_id}).next_object
@next_chunk = self.class.new(@coll, row) if row
@next_chunk
end
def next=(chunk)
@next_chunk = chunk
@next_chunk_dbref = XGen::Mongo::Driver::DBRef.new(nil, nil, @coll.db, '_chunks', chunk.object_id)
end end
def pos; @data.position; end def pos; @data.position; end
@ -62,14 +44,6 @@ module XGen
def size; @data.size; end def size; @data.size; end
alias_method :length, :size alias_method :length, :size
def empty?
@data.length == 0
end
def clear
@data.clear
end
# Erase all data after current position. # Erase all data after current position.
def truncate def truncate
if @data.position < @data.length if @data.position < @data.length
@ -88,16 +62,17 @@ module XGen
end end
def save def save
@coll.remove({'_id' => @object_id}) if @object_id coll = @file.chunk_collection
@coll.insert(to_mongo_object) coll.remove({'_id' => @object_id}) if @object_id
coll.insert(to_mongo_object)
end end
def to_mongo_object def to_mongo_object
h = OrderedHash.new h = OrderedHash.new
h['_id'] = @object_id h['_id'] = @object_id
h['cn'] = @chunk_number h['files_id'] = @file.files_id
h['n'] = @chunk_number
h['data'] = data h['data'] = data
h['next'] = @next_chunk_dbref
h h
end end

View File

@ -1,4 +1,3 @@
require 'mongo/types/dbref'
require 'mongo/types/objectid' require 'mongo/types/objectid'
require 'mongo/util/ordered_hash' require 'mongo/util/ordered_hash'
require 'mongo/gridfs/chunk' require 'mongo/gridfs/chunk'
@ -41,7 +40,9 @@ module XGen
# Default is DEFAULT_CONTENT_TYPE # Default is DEFAULT_CONTENT_TYPE
attr_accessor :content_type attr_accessor :content_type
attr_reader :object_id attr_accessor :metadata
attr_reader :files_id
# Time that the file was first saved. # Time that the file was first saved.
attr_reader :upload_date attr_reader :upload_date
@ -52,8 +53,8 @@ module XGen
class << self class << self
def exist?(db, name) def exist?(db, name, root_collection=DEFAULT_ROOT_COLLECTION)
db.collection('_files').find({'filename' => name}).next_object.nil? db.collection("#{root_collection}.files").find({'filename' => name}).next_object != nil
end end
def open(db, name, mode, options={}) def open(db, name, mode, options={})
@ -84,7 +85,7 @@ module XGen
names.each { |name| names.each { |name|
gs = GridStore.new(db, name) gs = GridStore.new(db, name)
gs.send(:delete_chunks) gs.send(:delete_chunks)
db.collection('_files').remove('_id' => gs.object_id) gs.collection.remove('_id' => gs.files_id)
} }
end end
alias_method :delete, :unlink alias_method :delete, :unlink
@ -97,62 +98,81 @@ module XGen
# Mode may only be 'r', 'w', or 'w+'. # Mode may only be 'r', 'w', or 'w+'.
# #
# Options: # Options. Descriptions start with a list of the modes for which that
# option is legitimate.
# #
# :chunk_size :: Ignored if mode is 'r' or 'w+'. Sets chunk size for # :root :: (r, w, w+) Name of root collection to use, instead of
# files opened for writing ('w'). See also #chunk_size= # DEFAULT_ROOT_COLLECTION.
# which may only be called before any data is written.
# #
# :content_type :: Ignored if mode is 'r' or 'w+'. String. Default # :metadata:: (w, w+) A hash containing any data you want persisted as
# value is DEFAULT_CONTENT_TYPE. See also # this file's metadata. See also metadata=
# #content_type= #
# :chunk_size :: (w) Sets chunk size for files opened for writing
# See also chunk_size= which may only be called before
# any data is written.
#
# :content_type :: (w) Default value is DEFAULT_CONTENT_TYPE. See
# also #content_type=
def initialize(db, name, mode='r', options={}) def initialize(db, name, mode='r', options={})
@db, @filename, @mode = db, name, mode @db, @filename, @mode = db, name, mode
@root = options[:root] || DEFAULT_ROOT_COLLECTION
doc = @db.collection('_files').find({'filename' => @filename}).next_object doc = collection.find({'filename' => @filename}).next_object
if doc if doc
@object_id = doc['_id'] @files_id = doc['_id']
@content_type = doc['contentType'] @content_type = doc['contentType']
@chunk_size = doc['chunkSize'] @chunk_size = doc['chunkSize']
@upload_date = doc['uploadDate'] @upload_date = doc['uploadDate']
@aliases = doc['aliases'] @aliases = doc['aliases']
@length = doc['length'] @length = doc['length']
fc_id = doc['next'] @metadata = doc['metadata']
if fc_id
coll = @db.collection('_chunks')
row = coll.find({'_id' => fc_id.object_id}).next_object
@first_chunk = row ? Chunk.new(coll, row) : nil
else else
@first_chunk = nil @files_id = XGen::Mongo::Driver::ObjectID.new
end
else
@upload_date = Time.new
@chunk_size = Chunk::DEFAULT_CHUNK_SIZE
@content_type = DEFAULT_CONTENT_TYPE @content_type = DEFAULT_CONTENT_TYPE
@chunk_size = Chunk::DEFAULT_CHUNK_SIZE
@length = 0 @length = 0
end end
case mode case mode
when 'r' when 'r'
@curr_chunk = @first_chunk @curr_chunk = nth_chunk(0)
@position = 0
when 'w' when 'w'
chunk_collection.create_index("chunk_index", ['files_id', 'n'])
delete_chunks delete_chunks
@first_chunk = @curr_chunk = nil @curr_chunk = Chunk.new(self, 'n' => 0)
@content_type = options[:content_type] if options[:content_type] @content_type = options[:content_type] if options[:content_type]
@chunk_size = options[:chunk_size] if options[:chunk_size] @chunk_size = options[:chunk_size] if options[:chunk_size]
@metadata = options[:metadata] if options[:metadata]
@position = 0
when 'w+' when 'w+'
@curr_chunk = find_last_chunk chunk_collection.create_index("chunk_index", ['files_id', 'n'])
@curr_chunk = nth_chunk(last_chunk_number) || Chunk.new(self, 'n' => 0) # might be empty
@curr_chunk.pos = @curr_chunk.data.length if @curr_chunk @curr_chunk.pos = @curr_chunk.data.length if @curr_chunk
@metadata = options[:metadata] if options[:metadata]
@position = @length
else
raise "error: illegal mode #{mode}"
end end
@lineno = 0 @lineno = 0
@pushback_byte = nil @pushback_byte = nil
end end
def collection
@db.collection("#{@root}.files")
end
# Returns collection used for storing chunks. Depends on value of
# @root.
def chunk_collection
@db.collection("#{@root}.chunks")
end
# Change chunk size. Can only change if the file is opened for write # Change chunk size. Can only change if the file is opened for write
# and the first chunk's size is zero. # and no data has yet been written.
def chunk_size=(size) def chunk_size=(size)
unless @mode[0] == ?w && @first_chunk == nil unless @mode[0] == ?w && @position == 0 && @upload_date == nil
raise "error: can only change chunk size if open for write and no data written." raise "error: can only change chunk size if open for write and no data written."
end end
@chunk_size = size @chunk_size = size
@ -166,13 +186,15 @@ module XGen
if @pushback_byte if @pushback_byte
byte = @pushback_byte byte = @pushback_byte
@pushback_byte = nil @pushback_byte = nil
@position += 1
byte byte
elsif eof? elsif eof?
nil nil
else else
if @curr_chunk.eof? if @curr_chunk.eof?
@curr_chunk = @curr_chunk.next @curr_chunk = nth_chunk(@curr_chunk.chunk_number + 1)
end end
@position += 1
@curr_chunk.getc @curr_chunk.getc
end end
end end
@ -237,6 +259,7 @@ module XGen
def ungetc(byte) def ungetc(byte)
@pushback_byte = byte @pushback_byte = byte
@position -= 1
end end
#--- #---
@ -244,15 +267,12 @@ module XGen
#+++ #+++
def putc(byte) def putc(byte)
chunks = @db.collection('_chunks') if @curr_chunk.pos == @chunk_size
if @curr_chunk == nil prev_chunk_number = @curr_chunk.chunk_number
@first_chunk = @curr_chunk = Chunk.new(chunks, 'cn' => 1) @curr_chunk.save
elsif @curr_chunk.pos == @chunk_size @curr_chunk = Chunk.new(self, 'n' => prev_chunk_number + 1)
prev_chunk = @curr_chunk
@curr_chunk = Chunk.new(chunks, 'cn' => prev_chunk.chunk_number + 1)
prev_chunk.next = @curr_chunk
prev_chunk.save
end end
@position += 1
@curr_chunk.putc(byte) @curr_chunk.putc(byte)
end end
@ -303,7 +323,7 @@ module XGen
def eof def eof
raise IOError.new("stream not open for reading") unless @mode[0] == ?r raise IOError.new("stream not open for reading") unless @mode[0] == ?r
@curr_chunk == nil || (@curr_chunk.eof? && !@curr_chunk.has_next?) @position >= @length
end end
alias_method :eof?, :eof alias_method :eof?, :eof
@ -312,35 +332,41 @@ module XGen
#+++ #+++
def rewind def rewind
if @curr_chunk != @first_chunk if @curr_chunk.chunk_number != 0
@curr_chunk.save unless @curr_chunk == nil || @curr_chunk.empty? if @mode[0] == ?w
@curr_chunk == @first_chunk delete_chunks
@curr_chunk = Chunk.new(self, 'n' => 0)
else
@curr_chunk == nth_chunk(0)
end
end end
@curr_chunk.pos = 0 @curr_chunk.pos = 0
@lineno = 0 @lineno = 0
# TODO if writing, delete all other chunks on first write @position = 0
end end
def seek(pos, whence=IO::SEEK_SET) def seek(pos, whence=IO::SEEK_SET)
# target_pos = case whence target_pos = case whence
# when IO::SEEK_CUR when IO::SEEK_CUR
# tell + pos @position + pos
# when IO::SEEK_END when IO::SEEK_END
@length - pos
when IO::SEEK_SET
pos
end
# @curr_chunk.save if @curr_chunk new_chunk_number = (target_pos / @chunk_size).to_i
# target_chunk_num = ((pos / @chunk_size) + 1).to_i if new_chunk_number != @curr_chunk.chunk_number
# target_chunk_pos = pos % @chunk_size @curr_chunk.save if @mode[0] == ?w
# if @curr_chunk == nil || @curr_chunk.chunk_number != target_chunk_num @curr_chunk = nth_chunk(new_chunk_number)
# end end
# @curr_chunk.pos = target_chunk_pos @position = target_pos
# 0 @curr_chunk.pos = @position % @chunk_size
# TODO 0
raise "not yet implemented"
end end
def tell def tell
return 0 unless @curr_chunk @position
@chunk_size * (@curr_chunk.chunk_number - 1) + @curr_chunk.pos
end end
#--- #---
@ -351,13 +377,12 @@ module XGen
if @mode[0] == ?w if @mode[0] == ?w
if @curr_chunk if @curr_chunk
@curr_chunk.truncate @curr_chunk.truncate
@curr_chunk.save @curr_chunk.save if @curr_chunk.pos > 0
end end
files = @db.collection('_files') files = collection
if @object_id if @upload_date
files.remove('_id' => @object_id) files.remove('_id' => @files_id)
else else
@object_id = XGen::Mongo::Driver::ObjectID.new
@upload_date = Time.now @upload_date = Time.now
end end
files.insert(to_mongo_object) files.insert(to_mongo_object)
@ -377,38 +402,29 @@ module XGen
def to_mongo_object def to_mongo_object
h = OrderedHash.new h = OrderedHash.new
h['_id'] = @object_id h['_id'] = @files_id
h['filename'] = @filename h['filename'] = @filename
h['contentType'] = @content_type h['contentType'] = @content_type
h['length'] = @curr_chunk ? (@curr_chunk.chunk_number - 1) * @chunk_size + @curr_chunk.pos : 0 h['length'] = @curr_chunk ? @curr_chunk.chunk_number * @chunk_size + @curr_chunk.pos : 0
h['chunkSize'] = @chunk_size h['chunkSize'] = @chunk_size
h['uploadDate'] = @upload_date h['uploadDate'] = @upload_date
h['aliases'] = @aliases h['aliases'] = @aliases
h['next'] = XGen::Mongo::Driver::DBRef.new(nil, nil, @db, '_chunks', @first_chunk.object_id) if @first_chunk h['metadata'] = @metadata
h h
end end
def find_last_chunk
chunk = @curr_chunk || @first_chunk
while chunk.has_next?
chunk = chunk.next
end
chunk
end
def save_chunk(chunk)
chunks = @db.collection('_chunks')
end
def delete_chunks def delete_chunks
chunk = @first_chunk chunk_collection.remove({'files_id' => @files_id}) if @files_id
coll = @db.collection('_chunks') @curr_chunk = nil
while chunk
next_chunk = chunk.next
coll.remove({'_id' => chunk.object_id})
chunk = next_chunk
end end
@first_chunk = @curr_chunk = nil
def nth_chunk(n)
mongo_chunk = chunk_collection.find({'files_id' => @files_id, 'n' => n}).next_object
Chunk.new(self, mongo_chunk || {})
end
def last_chunk_number
(@length / @chunk_size).to_i
end end
end end

View File

@ -1,6 +1,6 @@
Gem::Specification.new do |s| Gem::Specification.new do |s|
s.name = 'mongo' s.name = 'mongo'
s.version = '0.4.1' s.version = '0.4.2'
s.platform = Gem::Platform::RUBY s.platform = Gem::Platform::RUBY
s.summary = 'Simple pure-Ruby driver for the 10gen Mongo DB' s.summary = 'Simple pure-Ruby driver for the 10gen Mongo DB'
s.description = 'A pure-Ruby driver for the 10gen Mongo DB. For more information about Mongo, see http://www.mongodb.org.' s.description = 'A pure-Ruby driver for the 10gen Mongo DB. For more information about Mongo, see http://www.mongodb.org.'

View File

@ -1,6 +1,5 @@
$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib') $LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib')
require 'test/unit' require 'test/unit'
require 'rubygems'
require 'mongo' require 'mongo'
require 'mongo/gridfs' require 'mongo/gridfs'
@ -14,12 +13,13 @@ class ChunkTest < Test::Unit::TestCase
@port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT @port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT
@db = Mongo.new(@host, @port).db('ruby-mongo-utils-test') @db = Mongo.new(@host, @port).db('ruby-mongo-utils-test')
@files = @db.collection('_files') @files = @db.collection('gridfs.files')
@chunks = @db.collection('_chunks') @chunks = @db.collection('gridfs.chunks')
@chunks.clear @chunks.clear
@files.clear @files.clear
@c = Chunk.new(@chunks) @f = GridStore.new(@db, 'foobar', 'w')
@c = @f.instance_variable_get('@curr_chunk')
end end
def teardown def teardown
@ -30,35 +30,20 @@ class ChunkTest < Test::Unit::TestCase
end end
end end
def test_has_next
assert !@c.has_next?
@c.next = Chunk.new(@chunks)
assert @c.has_next?
end
def test_assign_next
assert !@c.has_next?
assert_nil @c.next
c2 = Chunk.new(@chunks)
@c.next = c2
assert_same c2, @c.next
end
def test_pos def test_pos
assert_equal 0, @c.pos assert_equal 0, @c.pos
assert @c.eof? # since data is empty assert @c.eof? # since data is empty
b = ByteBuffer.new b = ByteBuffer.new
3.times { |i| b.put(i) } 3.times { |i| b.put(i) }
c = Chunk.new(@db, 'data' => b) c = Chunk.new(@f, 'data' => b)
assert !c.eof? assert !c.eof?
end end
def test_getc def test_getc
b = ByteBuffer.new b = ByteBuffer.new
3.times { |i| b.put(i) } 3.times { |i| b.put(i) }
c = Chunk.new(@chunks, 'data' => b) c = Chunk.new(@f, 'data' => b)
assert !c.eof? assert !c.eof?
assert_equal 0, c.getc assert_equal 0, c.getc
@ -82,12 +67,6 @@ class ChunkTest < Test::Unit::TestCase
assert @c.eof? assert @c.eof?
end end
def test_empty
assert @c.empty?
@c.putc(1)
assert !@c.empty?
end
def test_truncate def test_truncate
10.times { |i| @c.putc(i) } 10.times { |i| @c.putc(i) }
assert_equal 10, @c.size assert_equal 10, @c.size

View File

@ -1,6 +1,5 @@
$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib') $LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib')
require 'test/unit' require 'test/unit'
require 'rubygems'
require 'mongo' require 'mongo'
require 'mongo/gridfs' require 'mongo/gridfs'
@ -14,8 +13,8 @@ class GridStoreTest < Test::Unit::TestCase
@port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT @port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT
@db = Mongo.new(@host, @port).db('ruby-mongo-utils-test') @db = Mongo.new(@host, @port).db('ruby-mongo-utils-test')
@files = @db.collection('_files') @files = @db.collection('gridfs.files')
@chunks = @db.collection('_chunks') @chunks = @db.collection('gridfs.chunks')
@chunks.clear @chunks.clear
@files.clear @files.clear
@ -30,17 +29,24 @@ class GridStoreTest < Test::Unit::TestCase
end end
end end
def test_exist
assert GridStore.exist?(@db, 'foobar')
assert !GridStore.exist?(@db, 'does_not_exist')
assert !GridStore.exist?(@db, 'foobar', 'another_root')
end
def test_small_write def test_small_write
rows = @files.find({'filename' => 'foobar'}).to_a rows = @files.find({'filename' => 'foobar'}).to_a
assert_not_nil rows assert_not_nil rows
assert_equal 1, rows.length assert_equal 1, rows.length
assert_equal 1, rows.length
row = rows[0] row = rows[0]
assert_not_nil row assert_not_nil row
assert_kind_of DBRef, row['next'] file_id = row['_id']
first_chunk_id = row['next'].object_id assert_kind_of ObjectID, file_id
first_chunk = @chunks.find({'_id' => first_chunk_id}).next_object rows = @chunks.find({'files_id' => file_id}).to_a
assert_not_nil rows
assert_equal 1, rows.length
end end
def test_small_file def test_small_file
@ -61,11 +67,11 @@ class GridStoreTest < Test::Unit::TestCase
assert_equal "hello", GridStore.read(@db, 'foobar', 5) assert_equal "hello", GridStore.read(@db, 'foobar', 5)
end end
# TODO seek not yet implemented # Also tests seek
# def test_read_with_offset def test_read_with_offset
# assert_equal "world", GridStore.read(@db, 'foobar', 5, 7) assert_equal "world", GridStore.read(@db, 'foobar', 5, 7)
# assert_equal "world!", GridStore.read(@db, 'foobar', nil, 7) assert_equal "world!", GridStore.read(@db, 'foobar', nil, 7)
# end end
def test_multi_chunk def test_multi_chunk
@chunks.clear @chunks.clear
@ -131,7 +137,7 @@ class GridStoreTest < Test::Unit::TestCase
def test_save_empty_file def test_save_empty_file
@chunks.clear @chunks.clear
@files.clear @files.clear
GridStore.open(@db, 'empty', 'w') GridStore.open(@db, 'empty', 'w') {} # re-write with zero bytes
assert_equal 1, @files.count assert_equal 1, @files.count
assert_equal 0, @chunks.count assert_equal 0, @chunks.count
end end
@ -213,4 +219,17 @@ class GridStoreTest < Test::Unit::TestCase
assert_equal 'image/jpg', ct assert_equal 'image/jpg', ct
end end
def test_unknown_mode
GridStore.open(@db, 'foobar', 'x')
fail 'should have seen "illegal mode" error raised'
rescue => ex
assert_equal "error: illegal mode x", ex.to_s
end
def test_metadata
GridStore.open(@db, 'foobar', 'r') { |f| assert_nil f.metadata }
GridStore.open(@db, 'foobar', 'w+') { |f| f.metadata = {'a' => 1} }
GridStore.open(@db, 'foobar', 'r') { |f| assert_equal({'a' => 1}, f.metadata) }
end
end end