diff --git a/bin/batch_size_benchmark b/bin/batch_size_benchmark new file mode 100755 index 0000000..af96e50 --- /dev/null +++ b/bin/batch_size_benchmark @@ -0,0 +1,70 @@ +#!/usr/bin/env ruby +#$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib') +# +# Note: Ruby 1.9 is faster than 1.8, as expected. +# This suite will be run against the installed version of ruby-mongo-driver. +# The c-extension, bson_ext, will be used if installed. + +require 'rubygems' +require 'mongo' + +large = { + 'base_url' => 'http://www.example.com/test-me', + 'total_word_count' => 6743, + 'access_time' => Time.now, + 'meta_tags' => { + 'description' => 'i am a long description string', + 'author' => 'Holly Man', + 'dynamically_created_meta_tag' => 'who know\n what' + }, + 'page_structure' => { + 'counted_tags' => 3450, + 'no_of_js_attached' => 10, + 'no_of_images' => 6 + }, + 'harvested_words' => ['10gen','web','open','source','application','paas', + 'platform-as-a-service','technology','helps', + 'developers','focus','building','mongodb','mongo'] * 20 +} + + +con = Mongo::Connection.new + +db = con['benchmark'] +col = db['bigdocs'] + +col.remove() + +puts "Inserting 100,000 large documents..." + +def get_batch(large) + batch = [] + 100.times do + batch << large.dup + end + batch +end + +1000.times do + col.insert(get_batch(large)) +end +GC.start + +t1 = Time.now +cursor = col.find({}, :batch_size => 0) +200.times do + cursor.next_document +end +t2 = Time.now +puts "Database decides batch size: #{t2 - t1}" + + +t1 = Time.now +cursor = col.find({}, :batch_size => 100) +200.times do + cursor.next_document +end +t2 = Time.now +puts "Batch size of 100: #{t2 - t1}" + + diff --git a/ext/cbson/version.h b/ext/cbson/version.h index 047c60a..fbbaceb 100644 --- a/ext/cbson/version.h +++ b/ext/cbson/version.h @@ -14,4 +14,4 @@ * limitations under the License. */ -#define VERSION "0.20.1" +#define VERSION "0.20.2" diff --git a/lib/bson.rb b/lib/bson.rb index 8fc0bab..4c92f6e 100644 --- a/lib/bson.rb +++ b/lib/bson.rb @@ -1,7 +1,7 @@ $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) module BSON - VERSION = "0.20.1" + VERSION = "0.20.2" def self.serialize(obj, check_keys=false, move_id=false) BSON_CODER.serialize(obj, check_keys, move_id) end diff --git a/lib/mongo.rb b/lib/mongo.rb index dcb37f9..f20d090 100644 --- a/lib/mongo.rb +++ b/lib/mongo.rb @@ -1,7 +1,7 @@ $:.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) module Mongo - VERSION = "0.20.1" + VERSION = "0.20.2" end module Mongo @@ -21,6 +21,8 @@ module Mongo OP_QUERY_SLAVE_OK = 4 OP_QUERY_NO_CURSOR_TIMEOUT = 16 + + DEFAULT_BATCH_SIZE = 100 end end diff --git a/lib/mongo/collection.rb b/lib/mongo/collection.rb index b1ff72d..71d1c50 100644 --- a/lib/mongo/collection.rb +++ b/lib/mongo/collection.rb @@ -120,6 +120,8 @@ module Mongo # @option opts [Boolean] :snapshot ('false') if true, snapshot mode will be used for this query. # Snapshot mode assures no duplicates are returned, or objects missed, which were preset at both the start and # end of the query's execution. For details see http://www.mongodb.org/display/DOCS/How+to+do+Snapshotting+in+the+Mongo+Database + # @option opts [Boolean] :batch_size (100) the number of documents to returned by the database per GETMORE operation. A value of 0 + # will let the database server decide how many results to returns. This option can be ignored for most use cases. # @option opts [Boolean] :timeout ('true') when +true+, the returned cursor will be subject to # the normal cursor timeout behavior of the mongod process. When +false+, the returned cursor will never timeout. Note # that disabling timeout will only work when #find is invoked with a block. This is to prevent any inadvertant failure to @@ -140,6 +142,7 @@ module Mongo sort = opts.delete(:sort) hint = opts.delete(:hint) snapshot = opts.delete(:snapshot) + batch_size = opts.delete(:batch_size) if opts[:timeout] == false && !block_given? raise ArgumentError, "Timeout can be set to false only when #find is invoked with a block." end @@ -152,7 +155,7 @@ module Mongo raise RuntimeError, "Unknown options [#{opts.inspect}]" unless opts.empty? cursor = Cursor.new(self, :selector => selector, :fields => fields, :skip => skip, :limit => limit, - :order => sort, :hint => hint, :snapshot => snapshot, :timeout => timeout) + :order => sort, :hint => hint, :snapshot => snapshot, :timeout => timeout, :batch_size => batch_size) if block_given? yield cursor cursor.close() diff --git a/lib/mongo/cursor.rb b/lib/mongo/cursor.rb index ed9b417..6752fbf 100644 --- a/lib/mongo/cursor.rb +++ b/lib/mongo/cursor.rb @@ -47,6 +47,7 @@ module Mongo @timeout = options[:timeout] || false @explain = options[:explain] @socket = options[:socket] + @batch_size = options[:batch_size] || Mongo::Constants::DEFAULT_BATCH_SIZE @full_collection_name = "#{@collection.db.name}.#{@collection.name}" @cache = [] @@ -317,8 +318,8 @@ module Mongo db_name = @admin ? 'admin' : @db.name BSON::BSON_RUBY.serialize_cstr(message, "#{db_name}.#{@collection.name}") - # Number of results to return; db decides for now. - message.put_int(0) + # Number of results to return. + message.put_int(@batch_size) # Cursor id. message.put_long(@cursor_id)