From 135a9ca0ab28335d8f9eaaa78fed7cc502d062df Mon Sep 17 00:00:00 2001 From: Jim Menard Date: Tue, 9 Dec 2008 08:28:10 -0500 Subject: [PATCH] New object id generation that matches Babble. --- README | 15 -- lib/mongo/objectid.rb | 79 +++++++++-- lib/mongo/util/macaddr.rb | 71 ---------- lib/mongo/util/uuid.rb | 282 -------------------------------------- tests/test_objectid.rb | 54 ++++++++ 5 files changed, 118 insertions(+), 383 deletions(-) delete mode 100644 lib/mongo/util/macaddr.rb delete mode 100644 lib/mongo/util/uuid.rb create mode 100644 tests/test_objectid.rb diff --git a/README b/README index f275b3a..1d14de3 100644 --- a/README +++ b/README @@ -52,10 +52,6 @@ type = To Do -* Implement Babble's algorithm for ObjectID. Implement Comparable. - -* ObjectID equality. - * Capped collection support. * Implement order_by (right now, sort order is ignored) @@ -104,17 +100,6 @@ details. See http://www.gnu.org/licenses/ for a copy of the GNU Affero General Public License. -== UUID - -This driver uses the UUID generator by Assaf Arkin and Eric Hodel, which uses -the MIT license (see the top of the file lib/mongo/util/uuid.rb). - -== MAC Address Finder - -This driver uses the MAC address finder found at -http://codeforpeople.com/lib/ruby/macaddr/macaddr-1.0.0/lib/macaddr.rb, which -uses Ruby's license (see http://codeforpeople.com/lib/license.txt). - -- # Local Variables: # mode:rdoc diff --git a/lib/mongo/objectid.rb b/lib/mongo/objectid.rb index 596cf81..b7678dc 100644 --- a/lib/mongo/objectid.rb +++ b/lib/mongo/objectid.rb @@ -12,37 +12,86 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -require 'mongo/util/uuid' +require 'mutex_m' +require 'mongo/util/byte_buffer' module XGen module Mongo module Driver + # Implementation of the Babble OID. + # + # 12 bytes + # --- + # 0 time + # 1 + # 2 + # 3 + # 4 machine + # 5 + # 6 + # 7 pid + # 8 + # 9 inc + # 10 + # 11 class ObjectID - include Comparable + MACHINE = ( val = rand(0x1000000); [val & 0xff, (val >> 8) & 0xff, (val >> 16) & 0xff] ) + PID = ( val = rand(0x10000); [val & 0xff, (val >> 8) & 0xff]; ) - @@uuid_generator = UUID.new + LOCK = Object.new + LOCK.extend Mutex_m - # String UUID - attr_reader :uuid + @@index_time = Time.new.to_i + @@index = 0 - # +uuid+ is a string. If nil, a new UUID will be generated. - def initialize(uuid=nil) - # The Babble server expects 12-byte (24 hex character) keys, which - # is why we throw away part of the UUID. - @uuid ||= @@uuid_generator.generate(:compact).sub(/(.{12}).{8}(.{12})/, '\1\2') + # +data+ is an array of bytes. If nil, a new id will be generated. + # The time +t+ is only used for testing; leave it nil. + def initialize(data=nil, t=nil) + @data = data || generate_id(t) + end + + def eql?(other) + @data == other.to_a + end + alias_method :==, :eql? + + def to_a + @data.dup end def to_s - @uuid + @data.collect { |b| '%02x' % b }.join end - # This will make more sense when we implement the Babble algorithm for - # generating object ids. - def <=>(other) - to_s <=> other.to_s + def generate_id(t=nil) + t ||= Time.new.to_i + buf = ByteBuffer.new + buf.put_int(t & 0xffffffff) + buf.put_array(MACHINE) + buf.put_array(PID) + i = index_for_time(t) + buf.put(i & 0xff) + buf.put((i >> 8) & 0xff) + buf.put((i >> 16) & 0xff) + + buf.rewind + buf.to_a.dup end + + def index_for_time(t) + LOCK.mu_synchronize { + if t != @@index_time + @@index = 0 + @@index_time = t + end + retval = @@index + @@index += 1 + retval + } + end + end end end diff --git a/lib/mongo/util/macaddr.rb b/lib/mongo/util/macaddr.rb deleted file mode 100644 index 11a0fcd..0000000 --- a/lib/mongo/util/macaddr.rb +++ /dev/null @@ -1,71 +0,0 @@ -# http://codeforpeople.com/lib/ruby/macaddr/macaddr-1.0.0/lib/macaddr.rb -# -## -# Cross platform MAC address determination. Works for: -# * /sbin/ifconfig -# * /bin/ifconfig -# * ifconfig -# * ipconfig /all -# -# To return the first MAC address on the system: -# -# Mac.address -# -# To return an array of all MAC addresses: -# -# Mac.address.list - -module Mac - VERSION = '1.0.0' - - def Mac.version() ::Mac::VERSION end - - class << self - - ## - # Accessor for the system's first MAC address, requires a call to #address - # first - - attr_accessor "mac_address" - - ## - # Discovers and returns the system's MAC addresses. Returns the first - # MAC address, and includes an accessor #list for the remaining addresses: - # - # Mac.addr # => first address - # Mac.addr.list # => all addresses - - def address - return @mac_address if defined? @mac_address and @mac_address - re = %r/[^:\-](?:[0-9A-F][0-9A-F][:\-]){5}[0-9A-F][0-9A-F][^:\-]/io - cmds = '/sbin/ifconfig', '/bin/ifconfig', 'ifconfig', 'ipconfig /all' - - null = test(?e, '/dev/null') ? '/dev/null' : 'NUL' - - lines = nil - cmds.each do |cmd| - stdout = IO.popen("#{ cmd } 2> #{ null }"){|fd| fd.readlines} rescue next - next unless stdout and stdout.size > 0 - lines = stdout and break - end - raise "all of #{ cmds.join ' ' } failed" unless lines - - candidates = lines.select{|line| line =~ re} - raise 'no mac address candidates' unless candidates.first - candidates.map!{|c| c[re].strip} - - maddr = candidates.first - raise 'no mac address found' unless maddr - - maddr.strip! - maddr.instance_eval{ @list = candidates; def list() @list end } - - @mac_address = maddr - end - - ## - # Shorter alias for #address - - alias_method "addr", "address" - end -end diff --git a/lib/mongo/util/uuid.rb b/lib/mongo/util/uuid.rb deleted file mode 100644 index 89b5c8f..0000000 --- a/lib/mongo/util/uuid.rb +++ /dev/null @@ -1,282 +0,0 @@ -# From http://github.com/assaf/uuid/tree/master -# -# = uuid.rb - UUID generator -# -# Author:: Assaf Arkin assaf@labnotes.org -# Eric Hodel drbrain@segment7.net -# Copyright:: Copyright (c) 2005-2008 Assaf Arkin, Eric Hodel -# License:: MIT and/or Creative Commons Attribution-ShareAlike - -require 'fileutils' -require 'thread' -require 'tmpdir' - -# require 'rubygems' -require 'mongo/util/macaddr' - - -## -# = Generating UUIDs -# -# Call #generate to generate a new UUID. The method returns a string in one of -# three formats. The default format is 36 characters long, and contains the 32 -# hexadecimal octets and hyphens separating the various value parts. The -# :compact format omits the hyphens, while the :urn format -# adds the :urn:uuid prefix. -# -# For example: -# -# uuid = UUID.new -# -# 10.times do -# p uuid.generate -# end -# -# = UUIDs in Brief -# -# UUID (universally unique identifier) are guaranteed to be unique across time -# and space. -# -# A UUID is 128 bit long, and consists of a 60-bit time value, a 16-bit -# sequence number and a 48-bit node identifier. -# -# The time value is taken from the system clock, and is monotonically -# incrementing. However, since it is possible to set the system clock -# backward, a sequence number is added. The sequence number is incremented -# each time the UUID generator is started. The combination guarantees that -# identifiers created on the same machine are unique with a high degree of -# probability. -# -# Note that due to the structure of the UUID and the use of sequence number, -# there is no guarantee that UUID values themselves are monotonically -# incrementing. The UUID value cannot itself be used to sort based on order -# of creation. -# -# To guarantee that UUIDs are unique across all machines in the network, -# the IEEE 802 MAC address of the machine's network interface card is used as -# the node identifier. -# -# For more information see {RFC 4122}[http://www.ietf.org/rfc/rfc4122.txt]. - -class UUID - - VERSION = '2.0.1' - - ## - # Clock multiplier. Converts Time (resolution: seconds) to UUID clock - # (resolution: 10ns) - CLOCK_MULTIPLIER = 10000000 - - ## - # Clock gap is the number of ticks (resolution: 10ns) between two Ruby Time - # ticks. - CLOCK_GAPS = 100000 - - ## - # Version number stamped into the UUID to identify it as time-based. - VERSION_CLOCK = 0x0100 - - ## - # Formats supported by the UUID generator. - # - # :default:: Produces 36 characters, including hyphens separating - # the UUID value parts - # :compact:: Produces a 32 digits (hexadecimal) value with no - # hyphens - # :urn:: Adds the prefix urn:uuid: to the default format - FORMATS = { - :compact => '%08x%04x%04x%04x%012x', - :default => '%08x-%04x-%04x-%04x-%012x', - :urn => 'urn:uuid:%08x-%04x-%04x-%04x-%012x', - } - - ## - # MAC address (48 bits), sequence number and last clock - STATE_FILE_FORMAT = 'SLLQ' - - @state_file = nil - @mode = nil - @uuid = nil - - ## - # The access mode of the state file. Set it with state_file. - - def self.mode - @mode - end - - ## - # Generates a new UUID string using +format+. See FORMATS for a list of - # supported formats. - - def self.generate(format = :default) - @uuid ||= new - @uuid.generate format - end - - ## - # Creates an empty state file in /var/tmp/ruby-uuid or the windows common - # application data directory using mode 0644. Call with a different mode - # before creating a UUID generator if you want to open access beyond your - # user by default. - # - # If the default state dir is not writable, UUID falls back to ~/.ruby-uuid. - # - # State files are not portable across machines. - def self.state_file(mode = 0644) - return @state_file if @state_file - - @mode = mode - - begin - require 'Win32API' - - csidl_common_appdata = 0x0023 - path = 0.chr * 260 - get_folder_path = Win32API.new('shell32', 'SHGetFolderPath', 'LLLLP', 'L') - get_folder_path.call 0, csidl_common_appdata, 0, 1, path - - state_dir = File.join(path.strip) - rescue LoadError - state_dir = File.join('', 'var', 'tmp') - end - - if File.writable?(state_dir) then - @state_file = File.join(state_dir, 'ruby-uuid') - else - @state_file = File.expand_path(File.join('~', '.ruby-uuid')) - end - - @state_file - end - - ## - # Create a new UUID generator. You really only need to do this once. - def initialize - @drift = 0 - @last_clock = (Time.now.to_f * CLOCK_MULTIPLIER).to_i - @mutex = Mutex.new - - if File.exist?(self.class.state_file) then - next_sequence - else - @mac = Mac.addr.gsub(/:|-/, '').hex & 0x7FFFFFFFFFFF - fail "Cannot determine MAC address from any available interface, tried with #{Mac.addr}" if @mac == 0 - @sequence = rand 0x10000 - - open_lock 'w' do |io| - write_state io - end - end - end - - ## - # Generates a new UUID string using +format+. See FORMATS for a list of - # supported formats. - def generate(format = :default) - template = FORMATS[format] - - raise ArgumentError, "invalid UUID format #{format.inspect}" unless template - - # The clock must be monotonically increasing. The clock resolution is at - # best 100 ns (UUID spec), but practically may be lower (on my setup, - # around 1ms). If this method is called too fast, we don't have a - # monotonically increasing clock, so the solution is to just wait. - # - # It is possible for the clock to be adjusted backwards, in which case we - # would end up blocking for a long time. When backward clock is detected, - # we prevent duplicates by asking for a new sequence number and continue - # with the new clock. - - clock = @mutex.synchronize do - clock = (Time.new.to_f * CLOCK_MULTIPLIER).to_i & 0xFFFFFFFFFFFFFFF0 - - if clock > @last_clock then - @drift = 0 - @last_clock = clock - elsif clock == @last_clock then - drift = @drift += 1 - - if drift < 10000 then - @last_clock += 1 - else - Thread.pass - nil - end - else - next_sequence - @last_clock = clock - end - end until clock - - template % [ - clock & 0xFFFFFFFF, - (clock >> 32) & 0xFFFF, - ((clock >> 48) & 0xFFFF | VERSION_CLOCK), - @sequence & 0xFFFF, - @mac & 0xFFFFFFFFFFFF - ] - end - - ## - # Updates the state file with a new sequence number. - def next_sequence - open_lock 'r+' do |io| - @mac, @sequence, @last_clock = read_state(io) - - io.rewind - io.truncate 0 - - @sequence += 1 - - write_state io - end - rescue Errno::ENOENT - open_lock 'w' do |io| - write_state io - end - ensure - @last_clock = (Time.now.to_f * CLOCK_MULTIPLIER).to_i - @drift = 0 - end - - def inspect - mac = ("%012x" % @mac).scan(/[0-9a-f]{2}/).join(':') - "MAC: #{mac} Sequence: #{@sequence}" - end - -protected - - ## - # Open the state file with an exclusive lock and access mode +mode+. - def open_lock(mode) - File.open self.class.state_file, mode, self.class.mode do |io| - begin - io.flock File::LOCK_EX - yield io - ensure - io.flock File::LOCK_UN - end - end - end - - ## - # Read the state from +io+ - def read_state(io) - mac1, mac2, seq, last_clock = io.read(32).unpack(STATE_FILE_FORMAT) - mac = (mac1 << 32) + mac2 - - return mac, seq, last_clock - end - - - ## - # Write that state to +io+ - def write_state(io) - mac2 = @mac & 0xffffffff - mac1 = (@mac >> 32) & 0xffff - - io.write [mac1, mac2, @sequence, @last_clock].pack(STATE_FILE_FORMAT) - end - -end diff --git a/tests/test_objectid.rb b/tests/test_objectid.rb new file mode 100644 index 0000000..d32e319 --- /dev/null +++ b/tests/test_objectid.rb @@ -0,0 +1,54 @@ +$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib') +require 'mongo' +require 'test/unit' + +class ObjectIDTest < Test::Unit::TestCase + + include XGen::Mongo::Driver + + def setup + @t = 42 + @o = ObjectID.new(nil, @t) + end + + def test_index_for_time + t = 99 + assert_equal 0, @o.index_for_time(t) + assert_equal 1, @o.index_for_time(t) + assert_equal 2, @o.index_for_time(t) + t = 100 + assert_equal 0, @o.index_for_time(t) + end + + def test_time_bytes + a = @o.to_a + assert_equal @t, a[0] + 3.times { |i| assert_equal 0, a[i+1] } + + t = 43 + o = ObjectID.new(nil, t) + a = o.to_a + assert_equal t, a[0] + 3.times { |i| assert_equal 0, a[i+1] } + assert_equal 1, o.index_for_time(t) # 0 was used for o + end + + def test_different + o2 = ObjectID.new(nil, @t) + assert @o.to_a != o2.to_a + end + + def test_eql? + o2 = ObjectID.new(@o.to_a) + assert @o.eql?(o2) + assert @o == o2 + end + + def test_to_s + s = @o.to_s + assert_equal 24, s.length + s =~ /^([0-9a-f]+)$/ + assert_equal 24, $1.length + end + +end