mongo-ruby-driver/lib/mongo/gridfs/grid_io.rb

# --
# Copyright (C) 2008-2009 10gen Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ++

module Mongo

  # WARNING: This is part of a new, experimental GridFS API. Subject to change.
  class GridIO
    DEFAULT_CHUNK_SIZE   = 256 * 1024
    DEFAULT_CONTENT_TYPE = 'binary/octet-stream'

    attr_reader :content_type, :chunk_size, :upload_date, :files_id, :filename, :metadata

    def initialize(files, chunks, filename, mode, filesystem, opts={})
      @files    = files
      @chunks   = chunks
      @filename = filename
      @mode     = mode
      @content_type = opts[:content_type] || DEFAULT_CONTENT_TYPE
      @chunk_size   = opts[:chunk_size] || DEFAULT_CHUNK_SIZE
      @files_id     = opts[:_id]

      case @mode
        when 'r' then init_read(filesystem, opts)
        when 'w' then init_write(opts)
        else
          raise GridError, "Invalid file mode #{@mode}. Valid options include 'r' and 'w'."
      end
    end

    # Read the data from the file. If a length if specified, will read from the
    # current file position.
    #
    # @param [Integer] length
    #
    # @return [String]
    #   the data in the file
    def read(length=nil)
      if length == 0
        return ''
      elsif length.nil? && @file_position.zero?
        read_all
      else
        read_length(length)
      end
    end
    alias_method :data, :read

    # Write the given string (binary) data to the file.
    #
    # @param [String] string
    #   the data to write
    #
    # @return [Integer]
    #   the number of bytes written.
    def write(io)
      raise GridError, "file not opened for write" unless @mode[0] == ?w
      if io.is_a? String
        write_string(io)
      else
        length = 0
        while(string = io.read(@chunk_size))
          length += write_string(string)
        end
        length
      end
    end

    # Position the file pointer at the provided location.
    #
    # @param [Integer] pos
    #   the number of bytes to advance the file pointer. this can be a negative
    #   number.
    # @param [Integer] whence
    #   one of IO::SEEK_CUR, IO::SEEK_END, or IO::SEEK_SET
    #
    # @return [Integer] the new file position
    def seek(pos, whence=IO::SEEK_SET)
      raise GridError, "Seek is only allowed in read mode." unless @mode == 'r'
      target_pos = case whence
                   when IO::SEEK_CUR
                     @file_position + pos
                   when IO::SEEK_END
                     @file_length + pos
                   when IO::SEEK_SET
                     pos
                   end

      new_chunk_number = (target_pos / @chunk_size).to_i
      if new_chunk_number != @current_chunk['n']
        save_chunk(@current_chunk) if @mode[0] == ?w
        @current_chunk = get_chunk(new_chunk_number)
      end
      @file_position  = target_pos
      @chunk_position = @file_position % @chunk_size
      @file_position
    end

    # The current position of the file.
    #
    # @return [Integer]
    def tell
      @file_position
    end

    # Creates or updates the document from the files collection that
    # stores the chunks' metadata. The file becomes available only after
    # this method has been called.
    #
    # This method will be invoked automatically when
    # on GridIO#open is passed a block. Otherwise, it must be called manually.
    #
    # @return [True]
    def close
      if @mode[0] == ?w
        @upload_date = Time.now.utc
        @files.insert(to_mongo_object)
      end
      true
    end

    def inspect
      "_id: #{@files_id}"
    end

    private

    def create_chunk(n)
      chunk = OrderedHash.new
      chunk['_id']      = Mongo::ObjectID.new
      chunk['n']        = n
      chunk['files_id'] = @files_id
      chunk['data']     = ''
      @chunk_position   = 0
      chunk
    end

    # TODO: Perhaps use an upsert here instead?
    def save_chunk(chunk)
      @chunks.remove('_id' => chunk['_id'])
      @chunks.insert(chunk)
    end

    def get_chunk(n)
      chunk = @chunks.find({'files_id' => @files_id, 'n' => n}).next_document
      @chunk_position = 0
      chunk
    end

    def get_chunk_for_read(n)
      chunk = get_chunk(n)
      return nil unless chunk
    end

    def last_chunk_number
      (@file_length / @chunk_size).to_i
    end

    # Read a file in its entirety (optimized).
    def read_all
      buf = ''
      while true
        buf << @current_chunk['data'].to_s
        break if @current_chunk['n'] == last_chunk_number
        @current_chunk = get_chunk(@current_chunk['n'] + 1)
      end
      buf
    end

    # Read a file incrementally.
    def read_length(length)
      cache_chunk_data
      remaining  = (@file_length - @file_position)
      to_read    = length > remaining ? remaining : length
      return nil unless remaining > 0

      buf        =  ''
      while to_read > 0
        if @chunk_position == @chunk_data_length
          @current_chunk = get_chunk(@current_chunk['n'] + 1)
          cache_chunk_data
        end
        chunk_remainder = @chunk_data_length - @chunk_position
        size = (to_read >= chunk_remainder) ? chunk_remainder : to_read
        buf << @current_chunk_data[@chunk_position, size]
        to_read         -= size
        @chunk_position += size
        @file_position  += size
      end
      buf
    end

    def cache_chunk_data
      @current_chunk_data = @current_chunk['data'].to_s
      if @current_chunk_data.respond_to?(:force_encoding)
        @current_chunk_data.force_encoding("binary")
      end
      @chunk_data_length  = @current_chunk['data'].length
    end

    def write_string(string)
      # Since Ruby 1.9.1 doesn't necessarily store one character per byte.
      if string.respond_to?(:force_encoding)
        string.force_encoding("binary")
      end

      to_write = string.length
      while (to_write > 0) do
        if @current_chunk && @chunk_position == @chunk_size
          next_chunk_number = @current_chunk['n'] + 1
          @current_chunk    = create_chunk(next_chunk_number)
        end
        chunk_available = @chunk_size - @chunk_position
        step_size = (to_write > chunk_available) ? chunk_available : to_write
        @current_chunk['data'] = Binary.new((@current_chunk['data'].to_s << string[-to_write, step_size]).unpack("c*"))
        @chunk_position += step_size
        to_write -= step_size
        save_chunk(@current_chunk)
      end
      string.length - to_write
    end

    # Initialize based on whether the supplied file exists.
    def init_read(filesystem, opts)
      if filesystem
        doc = @files.find({'filename' => @filename}, :sort => [["uploadDate", -1]], :limit => 1).next_document
        raise GridError, "Could not open file with filename #{@filename}" unless doc
      else
        doc = @files.find({'_id' => @files_id}).next_document
        raise GridError, "Could not open file with id #{@files_id}" unless doc
      end

      @files_id     = doc['_id']
      @content_type = doc['contentType']
      @chunk_size   = doc['chunkSize']
      @upload_date  = doc['uploadDate']
      @aliases      = doc['aliases']
      @file_length  = doc['length']
      @metadata     = doc['metadata']
      @md5          = doc['md5']
      @filename     = doc['filename']
      @current_chunk = get_chunk(0)
      @file_position = 0
    end

    # Validates and sets up the class for the given file mode.
    def init_write(opts)
      @files_id      = opts[:_id] || Mongo::ObjectID.new
      @content_type  = opts[:content_type] || @content_type || DEFAULT_CONTENT_TYPE
      @chunk_size    = opts[:chunk_size]   || @chunk_size || DEFAULT_CHUNK_SIZE
      @file_length   = 0
      @metadata      = opts[:metadata] if opts[:metadata]

      @current_chunk = create_chunk(0)
      @file_position = 0
    end

    def to_mongo_object
      h                = OrderedHash.new
      h['_id']         = @files_id
      h['filename']    = @filename
      h['contentType'] = @content_type
      h['length']      = @current_chunk ? @current_chunk['n'] * @chunk_size + @chunk_position : 0
      h['chunkSize']   = @chunk_size
      h['uploadDate']  = @upload_date
      h['aliases']     = @aliases
      h['metadata']    = @metadata

      # Get a server-side md5.
      md5_command            = OrderedHash.new
      md5_command['filemd5'] = @files_id
      md5_command['root']    = 'fs'
      h['md5']               = @files.db.command(md5_command)['md5']

      h
    end
  end
end
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`# --`
			`# Copyright (C) 2008-2009 10gen Inc.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ++`

			`module Mongo`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00
			`# WARNING: This is part of a new, experimental GridFS API. Subject to change.`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`class GridIO`
			`DEFAULT_CHUNK_SIZE = 256 * 1024`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`DEFAULT_CONTENT_TYPE = 'binary/octet-stream'`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`attr_reader :content_type, :chunk_size, :upload_date, :files_id, :filename, :metadata`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`def initialize(files, chunks, filename, mode, filesystem, opts={})`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`@files = files`
			`@chunks = chunks`
			`@filename = filename`
			`@mode = mode`
			`@content_type = opts[:content_type] \|\| DEFAULT_CONTENT_TYPE`
			`@chunk_size = opts[:chunk_size] \|\| DEFAULT_CHUNK_SIZE`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`@files_id = opts[:_id]`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`case @mode`
			`when 'r' then init_read(filesystem, opts)`
			`when 'w' then init_write(opts)`
			`else`
			`raise GridError, "Invalid file mode #{@mode}. Valid options include 'r' and 'w'."`
			`end`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`

			`# Read the data from the file. If a length if specified, will read from the`
			`# current file position.`
			`#`
			`# @param [Integer] length`
			`#`
			`# @return [String]`
			`# the data in the file`
			`def read(length=nil)`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`if length == 0`
			`return ''`
			`elsif length.nil? && @file_position.zero?`
			`read_all`
			`else`
			`read_length(length)`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`
			`end`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`alias_method :data, :read`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00
			`# Write the given string (binary) data to the file.`
			`#`
			`# @param [String] string`
			`# the data to write`
			`#`
			`# @return [Integer]`
			`# the number of bytes written.`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`def write(io)`
			`raise GridError, "file not opened for write" unless @mode[0] == ?w`
			`if io.is_a? String`
			`write_string(io)`
			`else`
			`length = 0`
			`while(string = io.read(@chunk_size))`
			`length += write_string(string)`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`length`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`
			`end`

			`# Position the file pointer at the provided location.`
			`#`
			`# @param [Integer] pos`
			`# the number of bytes to advance the file pointer. this can be a negative`
			`# number.`
			`# @param [Integer] whence`
			`# one of IO::SEEK_CUR, IO::SEEK_END, or IO::SEEK_SET`
			`#`
			`# @return [Integer] the new file position`
			`def seek(pos, whence=IO::SEEK_SET)`
			`raise GridError, "Seek is only allowed in read mode." unless @mode == 'r'`
			`target_pos = case whence`
			`when IO::SEEK_CUR`
			`@file_position + pos`
			`when IO::SEEK_END`
			`@file_length + pos`
			`when IO::SEEK_SET`
			`pos`
			`end`

			`new_chunk_number = (target_pos / @chunk_size).to_i`
			`if new_chunk_number != @current_chunk['n']`
			`save_chunk(@current_chunk) if @mode[0] == ?w`
			`@current_chunk = get_chunk(new_chunk_number)`
			`end`
			`@file_position = target_pos`
			`@chunk_position = @file_position % @chunk_size`
			`@file_position`
			`end`

			`# The current position of the file.`
			`#`
			`# @return [Integer]`
			`def tell`
			`@file_position`
			`end`

streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`# Creates or updates the document from the files collection that`
			`# stores the chunks' metadata. The file becomes available only after`
			`# this method has been called.`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`#`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`# This method will be invoked automatically when`
			`# on GridIO#open is passed a block. Otherwise, it must be called manually.`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`#`
			`# @return [True]`
			`def close`
			`if @mode[0] == ?w`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`@upload_date = Time.now.utc`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`@files.insert(to_mongo_object)`
			`end`
			`true`
			`end`

More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`def inspect`
			`"_id: #{@files_id}"`
			`end`

Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`private`

			`def create_chunk(n)`
			`chunk = OrderedHash.new`
			`chunk['_id'] = Mongo::ObjectID.new`
			`chunk['n'] = n`
			`chunk['files_id'] = @files_id`
			`chunk['data'] = ''`
fixed bug on safe remove; style fixes 2010-02-17 21:48:23 +00:00			`@chunk_position = 0`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`chunk`
			`end`

fixed bug on safe remove; style fixes 2010-02-17 21:48:23 +00:00			`# TODO: Perhaps use an upsert here instead?`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`def save_chunk(chunk)`
			`@chunks.remove('_id' => chunk['_id'])`
			`@chunks.insert(chunk)`
			`end`

			`def get_chunk(n)`
			`chunk = @chunks.find({'files_id' => @files_id, 'n' => n}).next_document`
			`@chunk_position = 0`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`chunk`
			`end`

			`def get_chunk_for_read(n)`
			`chunk = get_chunk(n)`
			`return nil unless chunk`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`

			`def last_chunk_number`
			`(@file_length / @chunk_size).to_i`
			`end`

minor: docs 2010-02-19 22:05:52 +00:00			`# Read a file in its entirety (optimized).`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`def read_all`
			`buf = ''`
			`while true`
			`buf << @current_chunk['data'].to_s`
			`break if @current_chunk['n'] == last_chunk_number`
			`@current_chunk = get_chunk(@current_chunk['n'] + 1)`
			`end`
			`buf`
			`end`

minor: docs 2010-02-19 22:05:52 +00:00			`# Read a file incrementally.`
streaming for new gridfs api 2010-02-19 21:20:46 +00:00			`def read_length(length)`
			`cache_chunk_data`
			`remaining = (@file_length - @file_position)`
			`to_read = length > remaining ? remaining : length`
			`return nil unless remaining > 0`

			`buf = ''`
			`while to_read > 0`
			`if @chunk_position == @chunk_data_length`
			`@current_chunk = get_chunk(@current_chunk['n'] + 1)`
			`cache_chunk_data`
			`end`
			`chunk_remainder = @chunk_data_length - @chunk_position`
			`size = (to_read >= chunk_remainder) ? chunk_remainder : to_read`
			`buf << @current_chunk_data[@chunk_position, size]`
			`to_read -= size`
			`@chunk_position += size`
			`@file_position += size`
			`end`
			`buf`
			`end`

			`def cache_chunk_data`
			`@current_chunk_data = @current_chunk['data'].to_s`
			`if @current_chunk_data.respond_to?(:force_encoding)`
			`@current_chunk_data.force_encoding("binary")`
			`end`
			`@chunk_data_length = @current_chunk['data'].length`
			`end`

			`def write_string(string)`
			`# Since Ruby 1.9.1 doesn't necessarily store one character per byte.`
			`if string.respond_to?(:force_encoding)`
			`string.force_encoding("binary")`
			`end`

			`to_write = string.length`
			`while (to_write > 0) do`
			`if @current_chunk && @chunk_position == @chunk_size`
			`next_chunk_number = @current_chunk['n'] + 1`
			`@current_chunk = create_chunk(next_chunk_number)`
			`end`
			`chunk_available = @chunk_size - @chunk_position`
			`step_size = (to_write > chunk_available) ? chunk_available : to_write`
			`@current_chunk['data'] = Binary.new((@current_chunk['data'].to_s << string[-to_write, step_size]).unpack("c*"))`
			`@chunk_position += step_size`
			`to_write -= step_size`
			`save_chunk(@current_chunk)`
			`end`
			`string.length - to_write`
			`end`

Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`# Initialize based on whether the supplied file exists.`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`def init_read(filesystem, opts)`
			`if filesystem`
			`doc = @files.find({'filename' => @filename}, :sort => [["uploadDate", -1]], :limit => 1).next_document`
			`raise GridError, "Could not open file with filename #{@filename}" unless doc`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`else`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`doc = @files.find({'_id' => @files_id}).next_document`
			`raise GridError, "Could not open file with id #{@files_id}" unless doc`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00
			`@files_id = doc['_id']`
			`@content_type = doc['contentType']`
			`@chunk_size = doc['chunkSize']`
			`@upload_date = doc['uploadDate']`
			`@aliases = doc['aliases']`
			`@file_length = doc['length']`
			`@metadata = doc['metadata']`
			`@md5 = doc['md5']`
			`@filename = doc['filename']`
			`@current_chunk = get_chunk(0)`
			`@file_position = 0`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`

			`# Validates and sets up the class for the given file mode.`
More experimental GridFS improvements 2010-02-18 21:31:25 +00:00			`def init_write(opts)`
			`@files_id = opts[:_id] \|\| Mongo::ObjectID.new`
			`@content_type = opts[:content_type] \|\| @content_type \|\| DEFAULT_CONTENT_TYPE`
			`@chunk_size = opts[:chunk_size] \|\| @chunk_size \|\| DEFAULT_CHUNK_SIZE`
			`@file_length = 0`
			`@metadata = opts[:metadata] if opts[:metadata]`

			`@current_chunk = create_chunk(0)`
			`@file_position = 0`
Initial GridFS refactoring 2010-02-12 23:03:07 +00:00			`end`

			`def to_mongo_object`
			`h = OrderedHash.new`
			`h['_id'] = @files_id`
			`h['filename'] = @filename`
			`h['contentType'] = @content_type`
			`h['length'] = @current_chunk ? @current_chunk['n'] * @chunk_size + @chunk_position : 0`
			`h['chunkSize'] = @chunk_size`
			`h['uploadDate'] = @upload_date`
			`h['aliases'] = @aliases`
			`h['metadata'] = @metadata`

			`# Get a server-side md5.`
			`md5_command = OrderedHash.new`
			`md5_command['filemd5'] = @files_id`
			`md5_command['root'] = 'fs'`
			`h['md5'] = @files.db.command(md5_command)['md5']`

			`h`
			`end`
			`end`
			`end`