New GridFS Version 1.0 storage format.

This commit is contained in:
Jim Menard 2009-01-30 16:44:29 -05:00
parent b0612054f9
commit af2b8b61d0
5 changed files with 153 additions and 164 deletions

View File

@ -1,5 +1,3 @@
require 'mongo/types/binary'
require 'mongo/types/dbref'
require 'mongo/types/objectid'
require 'mongo/util/byte_buffer'
require 'mongo/util/ordered_hash'
@ -19,10 +17,10 @@ module XGen
attr_reader :object_id, :chunk_number
attr_accessor :data
def initialize(db_collection, mongo_object={})
@coll = db_collection
def initialize(file, mongo_object={})
@file = file
@object_id = mongo_object['_id'] || XGen::Mongo::Driver::ObjectID.new
@chunk_number = mongo_object['cn'] || 1
@chunk_number = mongo_object['n'] || 0
@data = ByteBuffer.new
case mongo_object['data']
@ -32,27 +30,11 @@ module XGen
@data.put_array(mongo_object['data'].to_a)
when Array
@data.put_array(mongo_object['data'])
when nil
else
raise "illegal chunk format; data is #{mongo_object['data'] ? (' ' + mongo_object['data'].class.name) : 'nil'}"
end
@data.rewind
@next_chunk_dbref = mongo_object['next']
end
def has_next?
@next_chunk_dbref
end
def next
return @next_chunk if @next_chunk
return nil unless @next_chunk_dbref
row = @coll.find({'_id' => @next_chunk_dbref.object_id}).next_object
@next_chunk = self.class.new(@coll, row) if row
@next_chunk
end
def next=(chunk)
@next_chunk = chunk
@next_chunk_dbref = XGen::Mongo::Driver::DBRef.new(nil, nil, @coll.db, '_chunks', chunk.object_id)
end
def pos; @data.position; end
@ -62,14 +44,6 @@ module XGen
def size; @data.size; end
alias_method :length, :size
def empty?
@data.length == 0
end
def clear
@data.clear
end
# Erase all data after current position.
def truncate
if @data.position < @data.length
@ -88,16 +62,17 @@ module XGen
end
def save
@coll.remove({'_id' => @object_id}) if @object_id
@coll.insert(to_mongo_object)
coll = @file.chunk_collection
coll.remove({'_id' => @object_id}) if @object_id
coll.insert(to_mongo_object)
end
def to_mongo_object
h = OrderedHash.new
h['_id'] = @object_id
h['cn'] = @chunk_number
h['files_id'] = @file.files_id
h['n'] = @chunk_number
h['data'] = data
h['next'] = @next_chunk_dbref
h
end

View File

@ -1,4 +1,3 @@
require 'mongo/types/dbref'
require 'mongo/types/objectid'
require 'mongo/util/ordered_hash'
require 'mongo/gridfs/chunk'
@ -41,7 +40,9 @@ module XGen
# Default is DEFAULT_CONTENT_TYPE
attr_accessor :content_type
attr_reader :object_id
attr_accessor :metadata
attr_reader :files_id
# Time that the file was first saved.
attr_reader :upload_date
@ -52,8 +53,8 @@ module XGen
class << self
def exist?(db, name)
db.collection('_files').find({'filename' => name}).next_object.nil?
def exist?(db, name, root_collection=DEFAULT_ROOT_COLLECTION)
db.collection("#{root_collection}.files").find({'filename' => name}).next_object != nil
end
def open(db, name, mode, options={})
@ -84,7 +85,7 @@ module XGen
names.each { |name|
gs = GridStore.new(db, name)
gs.send(:delete_chunks)
db.collection('_files').remove('_id' => gs.object_id)
gs.collection.remove('_id' => gs.files_id)
}
end
alias_method :delete, :unlink
@ -97,62 +98,81 @@ module XGen
# Mode may only be 'r', 'w', or 'w+'.
#
# Options:
# Options. Descriptions start with a list of the modes for which that
# option is legitimate.
#
# :chunk_size :: Ignored if mode is 'r' or 'w+'. Sets chunk size for
# files opened for writing ('w'). See also #chunk_size=
# which may only be called before any data is written.
# :root :: (r, w, w+) Name of root collection to use, instead of
# DEFAULT_ROOT_COLLECTION.
#
# :content_type :: Ignored if mode is 'r' or 'w+'. String. Default
# value is DEFAULT_CONTENT_TYPE. See also
# #content_type=
# :metadata:: (w, w+) A hash containing any data you want persisted as
# this file's metadata. See also metadata=
#
# :chunk_size :: (w) Sets chunk size for files opened for writing
# See also chunk_size= which may only be called before
# any data is written.
#
# :content_type :: (w) Default value is DEFAULT_CONTENT_TYPE. See
# also #content_type=
def initialize(db, name, mode='r', options={})
@db, @filename, @mode = db, name, mode
@root = options[:root] || DEFAULT_ROOT_COLLECTION
doc = @db.collection('_files').find({'filename' => @filename}).next_object
doc = collection.find({'filename' => @filename}).next_object
if doc
@object_id = doc['_id']
@files_id = doc['_id']
@content_type = doc['contentType']
@chunk_size = doc['chunkSize']
@upload_date = doc['uploadDate']
@aliases = doc['aliases']
@length = doc['length']
fc_id = doc['next']
if fc_id
coll = @db.collection('_chunks')
row = coll.find({'_id' => fc_id.object_id}).next_object
@first_chunk = row ? Chunk.new(coll, row) : nil
else
@first_chunk = nil
end
@metadata = doc['metadata']
else
@upload_date = Time.new
@chunk_size = Chunk::DEFAULT_CHUNK_SIZE
@files_id = XGen::Mongo::Driver::ObjectID.new
@content_type = DEFAULT_CONTENT_TYPE
@chunk_size = Chunk::DEFAULT_CHUNK_SIZE
@length = 0
end
case mode
when 'r'
@curr_chunk = @first_chunk
@curr_chunk = nth_chunk(0)
@position = 0
when 'w'
chunk_collection.create_index("chunk_index", ['files_id', 'n'])
delete_chunks
@first_chunk = @curr_chunk = nil
@curr_chunk = Chunk.new(self, 'n' => 0)
@content_type = options[:content_type] if options[:content_type]
@chunk_size = options[:chunk_size] if options[:chunk_size]
@metadata = options[:metadata] if options[:metadata]
@position = 0
when 'w+'
@curr_chunk = find_last_chunk
chunk_collection.create_index("chunk_index", ['files_id', 'n'])
@curr_chunk = nth_chunk(last_chunk_number) || Chunk.new(self, 'n' => 0) # might be empty
@curr_chunk.pos = @curr_chunk.data.length if @curr_chunk
@metadata = options[:metadata] if options[:metadata]
@position = @length
else
raise "error: illegal mode #{mode}"
end
@lineno = 0
@pushback_byte = nil
end
def collection
@db.collection("#{@root}.files")
end
# Returns collection used for storing chunks. Depends on value of
# @root.
def chunk_collection
@db.collection("#{@root}.chunks")
end
# Change chunk size. Can only change if the file is opened for write
# and the first chunk's size is zero.
# and no data has yet been written.
def chunk_size=(size)
unless @mode[0] == ?w && @first_chunk == nil
unless @mode[0] == ?w && @position == 0 && @upload_date == nil
raise "error: can only change chunk size if open for write and no data written."
end
@chunk_size = size
@ -166,13 +186,15 @@ module XGen
if @pushback_byte
byte = @pushback_byte
@pushback_byte = nil
@position += 1
byte
elsif eof?
nil
else
if @curr_chunk.eof?
@curr_chunk = @curr_chunk.next
@curr_chunk = nth_chunk(@curr_chunk.chunk_number + 1)
end
@position += 1
@curr_chunk.getc
end
end
@ -237,6 +259,7 @@ module XGen
def ungetc(byte)
@pushback_byte = byte
@position -= 1
end
#---
@ -244,15 +267,12 @@ module XGen
#+++
def putc(byte)
chunks = @db.collection('_chunks')
if @curr_chunk == nil
@first_chunk = @curr_chunk = Chunk.new(chunks, 'cn' => 1)
elsif @curr_chunk.pos == @chunk_size
prev_chunk = @curr_chunk
@curr_chunk = Chunk.new(chunks, 'cn' => prev_chunk.chunk_number + 1)
prev_chunk.next = @curr_chunk
prev_chunk.save
if @curr_chunk.pos == @chunk_size
prev_chunk_number = @curr_chunk.chunk_number
@curr_chunk.save
@curr_chunk = Chunk.new(self, 'n' => prev_chunk_number + 1)
end
@position += 1
@curr_chunk.putc(byte)
end
@ -303,7 +323,7 @@ module XGen
def eof
raise IOError.new("stream not open for reading") unless @mode[0] == ?r
@curr_chunk == nil || (@curr_chunk.eof? && !@curr_chunk.has_next?)
@position >= @length
end
alias_method :eof?, :eof
@ -312,36 +332,42 @@ module XGen
#+++
def rewind
if @curr_chunk != @first_chunk
@curr_chunk.save unless @curr_chunk == nil || @curr_chunk.empty?
@curr_chunk == @first_chunk
if @curr_chunk.chunk_number != 0
if @mode[0] == ?w
delete_chunks
@curr_chunk = Chunk.new(self, 'n' => 0)
else
@curr_chunk == nth_chunk(0)
end
end
@curr_chunk.pos = 0
@lineno = 0
# TODO if writing, delete all other chunks on first write
@position = 0
end
def seek(pos, whence=IO::SEEK_SET)
# target_pos = case whence
# when IO::SEEK_CUR
# tell + pos
# when IO::SEEK_END
target_pos = case whence
when IO::SEEK_CUR
@position + pos
when IO::SEEK_END
@length - pos
when IO::SEEK_SET
pos
end
# @curr_chunk.save if @curr_chunk
# target_chunk_num = ((pos / @chunk_size) + 1).to_i
# target_chunk_pos = pos % @chunk_size
# if @curr_chunk == nil || @curr_chunk.chunk_number != target_chunk_num
# end
# @curr_chunk.pos = target_chunk_pos
# 0
# TODO
raise "not yet implemented"
new_chunk_number = (target_pos / @chunk_size).to_i
if new_chunk_number != @curr_chunk.chunk_number
@curr_chunk.save if @mode[0] == ?w
@curr_chunk = nth_chunk(new_chunk_number)
end
@position = target_pos
@curr_chunk.pos = @position % @chunk_size
0
end
def tell
return 0 unless @curr_chunk
@chunk_size * (@curr_chunk.chunk_number - 1) + @curr_chunk.pos
end
@position
end
#---
# ================ closing ================
@ -351,13 +377,12 @@ module XGen
if @mode[0] == ?w
if @curr_chunk
@curr_chunk.truncate
@curr_chunk.save
@curr_chunk.save if @curr_chunk.pos > 0
end
files = @db.collection('_files')
if @object_id
files.remove('_id' => @object_id)
files = collection
if @upload_date
files.remove('_id' => @files_id)
else
@object_id = XGen::Mongo::Driver::ObjectID.new
@upload_date = Time.now
end
files.insert(to_mongo_object)
@ -377,38 +402,29 @@ module XGen
def to_mongo_object
h = OrderedHash.new
h['_id'] = @object_id
h['_id'] = @files_id
h['filename'] = @filename
h['contentType'] = @content_type
h['length'] = @curr_chunk ? (@curr_chunk.chunk_number - 1) * @chunk_size + @curr_chunk.pos : 0
h['length'] = @curr_chunk ? @curr_chunk.chunk_number * @chunk_size + @curr_chunk.pos : 0
h['chunkSize'] = @chunk_size
h['uploadDate'] = @upload_date
h['aliases'] = @aliases
h['next'] = XGen::Mongo::Driver::DBRef.new(nil, nil, @db, '_chunks', @first_chunk.object_id) if @first_chunk
h['metadata'] = @metadata
h
end
def find_last_chunk
chunk = @curr_chunk || @first_chunk
while chunk.has_next?
chunk = chunk.next
end
chunk
end
def save_chunk(chunk)
chunks = @db.collection('_chunks')
end
def delete_chunks
chunk = @first_chunk
coll = @db.collection('_chunks')
while chunk
next_chunk = chunk.next
coll.remove({'_id' => chunk.object_id})
chunk = next_chunk
end
@first_chunk = @curr_chunk = nil
chunk_collection.remove({'files_id' => @files_id}) if @files_id
@curr_chunk = nil
end
def nth_chunk(n)
mongo_chunk = chunk_collection.find({'files_id' => @files_id, 'n' => n}).next_object
Chunk.new(self, mongo_chunk || {})
end
def last_chunk_number
(@length / @chunk_size).to_i
end
end

View File

@ -1,6 +1,6 @@
Gem::Specification.new do |s|
s.name = 'mongo'
s.version = '0.4.1'
s.version = '0.4.2'
s.platform = Gem::Platform::RUBY
s.summary = 'Simple pure-Ruby driver for the 10gen Mongo DB'
s.description = 'A pure-Ruby driver for the 10gen Mongo DB. For more information about Mongo, see http://www.mongodb.org.'

View File

@ -1,6 +1,5 @@
$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib')
require 'test/unit'
require 'rubygems'
require 'mongo'
require 'mongo/gridfs'
@ -14,12 +13,13 @@ class ChunkTest < Test::Unit::TestCase
@port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT
@db = Mongo.new(@host, @port).db('ruby-mongo-utils-test')
@files = @db.collection('_files')
@chunks = @db.collection('_chunks')
@files = @db.collection('gridfs.files')
@chunks = @db.collection('gridfs.chunks')
@chunks.clear
@files.clear
@c = Chunk.new(@chunks)
@f = GridStore.new(@db, 'foobar', 'w')
@c = @f.instance_variable_get('@curr_chunk')
end
def teardown
@ -30,35 +30,20 @@ class ChunkTest < Test::Unit::TestCase
end
end
def test_has_next
assert !@c.has_next?
@c.next = Chunk.new(@chunks)
assert @c.has_next?
end
def test_assign_next
assert !@c.has_next?
assert_nil @c.next
c2 = Chunk.new(@chunks)
@c.next = c2
assert_same c2, @c.next
end
def test_pos
assert_equal 0, @c.pos
assert @c.eof? # since data is empty
b = ByteBuffer.new
3.times { |i| b.put(i) }
c = Chunk.new(@db, 'data' => b)
c = Chunk.new(@f, 'data' => b)
assert !c.eof?
end
def test_getc
b = ByteBuffer.new
3.times { |i| b.put(i) }
c = Chunk.new(@chunks, 'data' => b)
c = Chunk.new(@f, 'data' => b)
assert !c.eof?
assert_equal 0, c.getc
@ -82,12 +67,6 @@ class ChunkTest < Test::Unit::TestCase
assert @c.eof?
end
def test_empty
assert @c.empty?
@c.putc(1)
assert !@c.empty?
end
def test_truncate
10.times { |i| @c.putc(i) }
assert_equal 10, @c.size

View File

@ -1,6 +1,5 @@
$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib')
require 'test/unit'
require 'rubygems'
require 'mongo'
require 'mongo/gridfs'
@ -14,8 +13,8 @@ class GridStoreTest < Test::Unit::TestCase
@port = ENV['MONGO_RUBY_DRIVER_PORT'] || Mongo::DEFAULT_PORT
@db = Mongo.new(@host, @port).db('ruby-mongo-utils-test')
@files = @db.collection('_files')
@chunks = @db.collection('_chunks')
@files = @db.collection('gridfs.files')
@chunks = @db.collection('gridfs.chunks')
@chunks.clear
@files.clear
@ -30,17 +29,24 @@ class GridStoreTest < Test::Unit::TestCase
end
end
def test_exist
assert GridStore.exist?(@db, 'foobar')
assert !GridStore.exist?(@db, 'does_not_exist')
assert !GridStore.exist?(@db, 'foobar', 'another_root')
end
def test_small_write
rows = @files.find({'filename' => 'foobar'}).to_a
assert_not_nil rows
assert_equal 1, rows.length
assert_equal 1, rows.length
row = rows[0]
assert_not_nil row
assert_kind_of DBRef, row['next']
first_chunk_id = row['next'].object_id
first_chunk = @chunks.find({'_id' => first_chunk_id}).next_object
file_id = row['_id']
assert_kind_of ObjectID, file_id
rows = @chunks.find({'files_id' => file_id}).to_a
assert_not_nil rows
assert_equal 1, rows.length
end
def test_small_file
@ -61,11 +67,11 @@ class GridStoreTest < Test::Unit::TestCase
assert_equal "hello", GridStore.read(@db, 'foobar', 5)
end
# TODO seek not yet implemented
# def test_read_with_offset
# assert_equal "world", GridStore.read(@db, 'foobar', 5, 7)
# assert_equal "world!", GridStore.read(@db, 'foobar', nil, 7)
# end
# Also tests seek
def test_read_with_offset
assert_equal "world", GridStore.read(@db, 'foobar', 5, 7)
assert_equal "world!", GridStore.read(@db, 'foobar', nil, 7)
end
def test_multi_chunk
@chunks.clear
@ -131,7 +137,7 @@ class GridStoreTest < Test::Unit::TestCase
def test_save_empty_file
@chunks.clear
@files.clear
GridStore.open(@db, 'empty', 'w')
GridStore.open(@db, 'empty', 'w') {} # re-write with zero bytes
assert_equal 1, @files.count
assert_equal 0, @chunks.count
end
@ -213,4 +219,17 @@ class GridStoreTest < Test::Unit::TestCase
assert_equal 'image/jpg', ct
end
def test_unknown_mode
GridStore.open(@db, 'foobar', 'x')
fail 'should have seen "illegal mode" error raised'
rescue => ex
assert_equal "error: illegal mode x", ex.to_s
end
def test_metadata
GridStore.open(@db, 'foobar', 'r') { |f| assert_nil f.metadata }
GridStore.open(@db, 'foobar', 'w+') { |f| f.metadata = {'a' => 1} }
GridStore.open(@db, 'foobar', 'r') { |f| assert_equal({'a' => 1}, f.metadata) }
end
end