mongo-ruby-driver/lib/mongo/gridfs/grid_store.rb
2010-02-12 18:03:07 -05:00

567 lines
16 KiB
Ruby

# --
# Copyright (C) 2008-2009 10gen Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ++
require 'mongo/types/objectid'
require 'mongo/util/ordered_hash'
require 'mongo/gridfs/chunk'
module GridFS
# GridStore is an IO-like class that provides input and output for
# streams of data to MongoDB.
#
# @example
#
# include GridFS
#
# #Store the text "Hello, world!" in the grid store.
# GridStore.open(database, 'filename', 'w') do |f|
# f.puts "Hello, world!"
# end
#
# # Output "Hello, world!"
# GridStore.open(database, 'filename', 'r') do |f|
# puts f.read
# end
#
# # Add text to the grid store.
# GridStore.open(database, 'filename', 'w+') do |f|
# f.puts "But wait, there's more!"
# end
#
# # Retrieve everything, outputting "Hello, world!\nBut wait, there's more!\n"
# GridStore.open(database, 'filename', 'r') do |f|
# puts f.read
# end
#
# @core gridfs
class GridStore
include Enumerable
DEFAULT_ROOT_COLLECTION = 'fs'
DEFAULT_CONTENT_TYPE = 'text/plain'
attr_accessor :filename
# Array of strings; may be +nil+
attr_accessor :aliases
# Default is DEFAULT_CONTENT_TYPE
attr_accessor :content_type
# Size of file in bytes
attr_reader :length
attr_accessor :metadata
attr_reader :files_id
# Time that the file was first saved.
attr_reader :upload_date
attr_reader :chunk_size
attr_accessor :lineno
attr_reader :md5
def self.default_root_collection
@@default_root_collection ||= DEFAULT_ROOT_COLLECTION
end
def self.default_root_collection=(name)
@@default_root_collection = name
end
# Determine whether a given file exists in the GridStore.
#
# @param [Mongo::DB] a MongoDB database.
# @param [String] name the filename.
# @param [String] root_collection the name of the gridfs root collection.
#
# @return [Boolean]
def self.exist?(db, name, root_collection=GridStore.default_root_collection)
db.collection("#{root_collection}.files").find({'filename' => name}).next_document != nil
end
# Open a GridFS file for reading, writing, or appending. Note that
# this method must be used with a block.
#
# @param [Mongo::DB] a MongoDB database.
# @param [String] name the filename.
# @param [String] mode one of 'r', 'w', or 'w+' for reading, writing,
# and appending, respectively.
# @param [Hash] options any of the options available on
# GridStore initialization.
#
# @see GridStore#initialize.
# @see The various GridStore class methods, e.g., GridStore.open, GridStore.read etc.
def self.open(db, name, mode, options={})
gs = self.new(db, name, mode, options)
result = nil
begin
result = yield gs if block_given?
ensure
gs.close
end
result
end
# Read a file stored in GridFS.
#
# @param [Mongo::DB] db a MongoDB database.
# @param [String] name the name of the file.
# @param [Integer] length the number of bytes to read.
# @param [Integer] offset the number of bytes beyond the
# beginning of the file to start reading.
#
# @return [String] the file data
def self.read(db, name, length=nil, offset=nil)
GridStore.open(db, name, 'r') do |gs|
gs.seek(offset) if offset
gs.read(length)
end
end
# List the contents of all GridFS files stored in the given db and
# root collection.
#
# @param [Mongo::DB] db a MongoDB database.
# @param [String] root_collection the name of the root collection.
#
# @return [Array]
def self.list(db, root_collection=GridStore.default_root_collection)
db.collection("#{root_collection}.files").find().map do |f|
f['filename']
end
end
# Get each line of data from the specified file
# as an array of strings.
#
# @param [Mongo::DB] db a MongoDB database.
# @param [String] name the filename.
# @param [String, Reg] separator
#
# @return [Array]
def self.readlines(db, name, separator=$/)
GridStore.open(db, name, 'r') do |gs|
gs.readlines(separator)
end
end
# Remove one for more files from the given db.
#
# @param [Mongo::Database] db a MongoDB database.
# @param [Array<String>] names the filenames to remove
#
# @return [True]
def self.unlink(db, *names)
names.each do |name|
gs = GridStore.new(db, name)
gs.delete_chunks
gs.collection.remove('_id' => gs.files_id)
end
end
class << self
alias_method :delete, :unlink
end
# Rename a file in this collection. Note that this method uses
# Collection#update, which means that you will not be notified of the
# success of the operation.
#
# @param [Mongo::DB] a MongoDB database.
# @param [String] src the name of the source file.
# @param [String] dest the name of the destination file.
# @param [String] root_collection the name of the default root collection.
def self.mv(db, src, dest, root_collection=GridStore.default_root_collection)
db.collection("#{root_collection}.files").update({ :filename => src }, { '$set' => { :filename => dest } })
end
# Initialize a GridStore instance for reading, writing, or modifying a given file.
# Note that it's often easier to work with the various GridStore class methods (open, read, etc.).
#
# @param [Mongo::DB] db a MongoDB database.
# @param [String] name a filename.
# @param [String] mode either 'r', 'w', or 'w+' for reading, writing, or appending, respectively.
#
# @option options [String] :root DEFAULT_ROOT_COLLECTION ('r', 'w', 'w+') the name of the root collection to use.
#
# @option options [String] :metadata ({}) (w, w+) A hash containing any data you want persisted as
# this file's metadata.
#
# @option options [Integer] :chunk_size (Chunk::DEFAULT_CHUNK_SIZE) (w) Sets chunk size for files opened for writing.
# See also GridStore#chunk_size=.
#
# @option options [String] :content_type ('text/plain') Set the content type stored as the
# file's metadata. See also GridStore#content_type=.
def initialize(db, name, mode='r', options={})
@db, @filename, @mode = db, name, mode
@root = options[:root] || GridStore.default_root_collection
doc = collection.find({'filename' => @filename}).next_document
if doc
@files_id = doc['_id']
@content_type = doc['contentType']
@chunk_size = doc['chunkSize']
@upload_date = doc['uploadDate']
@aliases = doc['aliases']
@length = doc['length']
@metadata = doc['metadata']
@md5 = doc['md5']
else
@files_id = Mongo::ObjectID.new
@content_type = DEFAULT_CONTENT_TYPE
@chunk_size = Chunk::DEFAULT_CHUNK_SIZE
@length = 0
end
case mode
when 'r'
@curr_chunk = nth_chunk(0)
@position = 0
when 'w'
chunk_collection.create_index([['files_id', Mongo::ASCENDING], ['n', Mongo::ASCENDING]])
delete_chunks
@curr_chunk = Chunk.new(self, 'n' => 0)
@content_type = options[:content_type] if options[:content_type]
@chunk_size = options[:chunk_size] if options[:chunk_size]
@metadata = options[:metadata] if options[:metadata]
@position = 0
when 'w+'
chunk_collection.create_index([['files_id', Mongo::ASCENDING], ['n', Mongo::ASCENDING]])
@curr_chunk = nth_chunk(last_chunk_number) || Chunk.new(self, 'n' => 0) # might be empty
@curr_chunk.pos = @curr_chunk.data.length if @curr_chunk
@metadata = options[:metadata] if options[:metadata]
@position = @length
else
raise "error: illegal mode #{mode}"
end
@lineno = 0
@pushback_byte = nil
end
# Get the files collection referenced by this GridStore instance.
#
# @return [Mongo::Collection]
def collection
@db.collection("#{@root}.files")
end
# Get the chunk collection referenced by this GridStore.
#
# @return [Mongo::Collection]
def chunk_collection
@db.collection("#{@root}.chunks")
end
# Change the chunk size. This is permitted only when the file is opened for write
# and no data has yet been written.
#
# @param [Integer] size the new chunk size, in bytes.
#
# @return [Integer] the new chunk size.
def chunk_size=(size)
unless @mode[0] == ?w && @position == 0 && @upload_date == nil
raise "error: can only change chunk size if open for write and no data written."
end
@chunk_size = size
end
# ================ reading ================
def getc
if @pushback_byte
byte = @pushback_byte
@pushback_byte = nil
@position += 1
byte
elsif eof?
nil
else
if @curr_chunk.eof?
@curr_chunk = nth_chunk(@curr_chunk.chunk_number + 1)
end
@position += 1
@curr_chunk.getc
end
end
def gets(separator=$/)
str = ''
byte = self.getc
return nil if byte == nil # EOF
while byte != nil
s = byte.chr
str << s
break if s == separator
byte = self.getc
end
@lineno += 1
str
end
def read(len=nil, buf=nil)
if len
read_partial(len, buf)
else
read_all(buf)
end
end
def readchar
byte = self.getc
raise EOFError.new if byte == nil
byte
end
def readline(separator=$/)
line = gets
raise EOFError.new if line == nil
line
end
def readlines(separator=$/)
read.split(separator).collect { |line| "#{line}#{separator}" }
end
def each
line = gets
while line
yield line
line = gets
end
end
alias_method :each_line, :each
def each_byte
byte = self.getc
while byte
yield byte
byte = self.getc
end
end
def ungetc(byte)
@pushback_byte = byte
@position -= 1
end
# ================ writing ================
def putc(byte)
if @curr_chunk.pos == @chunk_size
prev_chunk_number = @curr_chunk.chunk_number
@curr_chunk.save
@curr_chunk = Chunk.new(self, 'n' => prev_chunk_number + 1)
end
@position += 1
@curr_chunk.putc(byte)
end
def print(*objs)
objs = [$_] if objs == nil || objs.empty?
objs.each { |obj|
str = obj.to_s
str.each_byte { |byte| self.putc(byte) }
}
nil
end
def puts(*objs)
if objs == nil || objs.empty?
self.putc(10)
else
print(*objs.collect{ |obj|
str = obj.to_s
str << "\n" unless str =~ /\n$/
str
})
end
nil
end
def <<(obj)
write(obj.to_s)
end
def write(string)
raise "#@filename not opened for write" unless @mode[0] == ?w
# Since Ruby 1.9.1 doesn't necessarily store one character per byte.
if string.respond_to?(:force_encoding)
string.force_encoding("binary")
end
to_write = string.length
while (to_write > 0) do
if @curr_chunk && @curr_chunk.data.position == @chunk_size
prev_chunk_number = @curr_chunk.chunk_number
@curr_chunk = GridFS::Chunk.new(self, 'n' => prev_chunk_number + 1)
end
chunk_available = @chunk_size - @curr_chunk.data.position
step_size = (to_write > chunk_available) ? chunk_available : to_write
@curr_chunk.data.put_array(ByteBuffer.new(string[-to_write,step_size]).to_a)
to_write -= step_size
@curr_chunk.save
end
string.length - to_write
end
# A no-op.
def flush
end
# ================ status ================
def eof
raise IOError.new("stream not open for reading") unless @mode[0] == ?r
@position >= @length
end
alias_method :eof?, :eof
# ================ positioning ================
def rewind
if @curr_chunk.chunk_number != 0
if @mode[0] == ?w
delete_chunks
@curr_chunk = Chunk.new(self, 'n' => 0)
else
@curr_chunk == nth_chunk(0)
end
end
@curr_chunk.pos = 0
@lineno = 0
@position = 0
end
def seek(pos, whence=IO::SEEK_SET)
target_pos = case whence
when IO::SEEK_CUR
@position + pos
when IO::SEEK_END
@length + pos
when IO::SEEK_SET
pos
end
new_chunk_number = (target_pos / @chunk_size).to_i
if new_chunk_number != @curr_chunk.chunk_number
@curr_chunk.save if @mode[0] == ?w
@curr_chunk = nth_chunk(new_chunk_number)
end
@position = target_pos
@curr_chunk.pos = @position % @chunk_size
0
end
def tell
@position
end
#---
# ================ closing ================
#+++
def close
if @mode[0] == ?w
if @curr_chunk
@curr_chunk.truncate
@curr_chunk.save if @curr_chunk.pos > 0
end
files = collection
if @upload_date
files.remove('_id' => @files_id)
else
@upload_date = Time.now
end
files.insert(to_mongo_object)
end
@db = nil
end
def closed?
@db == nil
end
def delete_chunks
chunk_collection.remove({'files_id' => @files_id}) if @files_id
@curr_chunk = nil
end
#---
# ================ protected ================
#+++
protected
def to_mongo_object
h = OrderedHash.new
h['_id'] = @files_id
h['filename'] = @filename
h['contentType'] = @content_type
h['length'] = @curr_chunk ? @curr_chunk.chunk_number * @chunk_size + @curr_chunk.pos : 0
h['chunkSize'] = @chunk_size
h['uploadDate'] = @upload_date
h['aliases'] = @aliases
h['metadata'] = @metadata
md5_command = OrderedHash.new
md5_command['filemd5'] = @files_id
md5_command['root'] = @root
h['md5'] = @db.command(md5_command)['md5']
h
end
def read_partial(len, buf=nil)
buf ||= ''
byte = self.getc
while byte != nil && (len == nil || len > 0)
buf << byte.chr
len -= 1 if len
byte = self.getc if (len == nil || len > 0)
end
buf
end
def read_all(buf=nil)
buf ||= ''
while true do
if (@curr_chunk.pos > 0)
data = @curr_chunk.data.to_s
buf += data[@position, data.length]
else
buf += @curr_chunk.data.to_s
end
break if @curr_chunk.chunk_number == last_chunk_number
@curr_chunk = nth_chunk(@curr_chunk.chunk_number + 1)
end
buf
end
def nth_chunk(n)
mongo_chunk = chunk_collection.find({'files_id' => @files_id, 'n' => n}).next_document
Chunk.new(self, mongo_chunk || {})
end
def last_chunk_number
(@length / @chunk_size).to_i
end
end
end