New object id generation that matches Babble.

This commit is contained in:
Jim Menard 2008-12-09 08:28:10 -05:00
parent 294fa97454
commit 135a9ca0ab
5 changed files with 118 additions and 383 deletions

15
README
View File

@ -52,10 +52,6 @@ type
= To Do
* Implement Babble's algorithm for ObjectID. Implement Comparable.
* ObjectID equality.
* Capped collection support.
* Implement order_by (right now, sort order is ignored)
@ -104,17 +100,6 @@ details.
See http://www.gnu.org/licenses/ for a copy of the GNU Affero General Public
License.
== UUID
This driver uses the UUID generator by Assaf Arkin and Eric Hodel, which uses
the MIT license (see the top of the file lib/mongo/util/uuid.rb).
== MAC Address Finder
This driver uses the MAC address finder found at
http://codeforpeople.com/lib/ruby/macaddr/macaddr-1.0.0/lib/macaddr.rb, which
uses Ruby's license (see http://codeforpeople.com/lib/license.txt).
--
# Local Variables:
# mode:rdoc

View File

@ -12,37 +12,86 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
require 'mongo/util/uuid'
require 'mutex_m'
require 'mongo/util/byte_buffer'
module XGen
module Mongo
module Driver
# Implementation of the Babble OID.
#
# 12 bytes
# ---
# 0 time
# 1
# 2
# 3
# 4 machine
# 5
# 6
# 7 pid
# 8
# 9 inc
# 10
# 11
class ObjectID
include Comparable
MACHINE = ( val = rand(0x1000000); [val & 0xff, (val >> 8) & 0xff, (val >> 16) & 0xff] )
PID = ( val = rand(0x10000); [val & 0xff, (val >> 8) & 0xff]; )
@@uuid_generator = UUID.new
LOCK = Object.new
LOCK.extend Mutex_m
# String UUID
attr_reader :uuid
@@index_time = Time.new.to_i
@@index = 0
# +uuid+ is a string. If nil, a new UUID will be generated.
def initialize(uuid=nil)
# The Babble server expects 12-byte (24 hex character) keys, which
# is why we throw away part of the UUID.
@uuid ||= @@uuid_generator.generate(:compact).sub(/(.{12}).{8}(.{12})/, '\1\2')
# +data+ is an array of bytes. If nil, a new id will be generated.
# The time +t+ is only used for testing; leave it nil.
def initialize(data=nil, t=nil)
@data = data || generate_id(t)
end
def eql?(other)
@data == other.to_a
end
alias_method :==, :eql?
def to_a
@data.dup
end
def to_s
@uuid
@data.collect { |b| '%02x' % b }.join
end
# This will make more sense when we implement the Babble algorithm for
# generating object ids.
def <=>(other)
to_s <=> other.to_s
def generate_id(t=nil)
t ||= Time.new.to_i
buf = ByteBuffer.new
buf.put_int(t & 0xffffffff)
buf.put_array(MACHINE)
buf.put_array(PID)
i = index_for_time(t)
buf.put(i & 0xff)
buf.put((i >> 8) & 0xff)
buf.put((i >> 16) & 0xff)
buf.rewind
buf.to_a.dup
end
def index_for_time(t)
LOCK.mu_synchronize {
if t != @@index_time
@@index = 0
@@index_time = t
end
retval = @@index
@@index += 1
retval
}
end
end
end
end

View File

@ -1,71 +0,0 @@
# http://codeforpeople.com/lib/ruby/macaddr/macaddr-1.0.0/lib/macaddr.rb
#
##
# Cross platform MAC address determination. Works for:
# * /sbin/ifconfig
# * /bin/ifconfig
# * ifconfig
# * ipconfig /all
#
# To return the first MAC address on the system:
#
# Mac.address
#
# To return an array of all MAC addresses:
#
# Mac.address.list
module Mac
VERSION = '1.0.0'
def Mac.version() ::Mac::VERSION end
class << self
##
# Accessor for the system's first MAC address, requires a call to #address
# first
attr_accessor "mac_address"
##
# Discovers and returns the system's MAC addresses. Returns the first
# MAC address, and includes an accessor #list for the remaining addresses:
#
# Mac.addr # => first address
# Mac.addr.list # => all addresses
def address
return @mac_address if defined? @mac_address and @mac_address
re = %r/[^:\-](?:[0-9A-F][0-9A-F][:\-]){5}[0-9A-F][0-9A-F][^:\-]/io
cmds = '/sbin/ifconfig', '/bin/ifconfig', 'ifconfig', 'ipconfig /all'
null = test(?e, '/dev/null') ? '/dev/null' : 'NUL'
lines = nil
cmds.each do |cmd|
stdout = IO.popen("#{ cmd } 2> #{ null }"){|fd| fd.readlines} rescue next
next unless stdout and stdout.size > 0
lines = stdout and break
end
raise "all of #{ cmds.join ' ' } failed" unless lines
candidates = lines.select{|line| line =~ re}
raise 'no mac address candidates' unless candidates.first
candidates.map!{|c| c[re].strip}
maddr = candidates.first
raise 'no mac address found' unless maddr
maddr.strip!
maddr.instance_eval{ @list = candidates; def list() @list end }
@mac_address = maddr
end
##
# Shorter alias for #address
alias_method "addr", "address"
end
end

View File

@ -1,282 +0,0 @@
# From http://github.com/assaf/uuid/tree/master
#
# = uuid.rb - UUID generator
#
# Author:: Assaf Arkin assaf@labnotes.org
# Eric Hodel drbrain@segment7.net
# Copyright:: Copyright (c) 2005-2008 Assaf Arkin, Eric Hodel
# License:: MIT and/or Creative Commons Attribution-ShareAlike
require 'fileutils'
require 'thread'
require 'tmpdir'
# require 'rubygems'
require 'mongo/util/macaddr'
##
# = Generating UUIDs
#
# Call #generate to generate a new UUID. The method returns a string in one of
# three formats. The default format is 36 characters long, and contains the 32
# hexadecimal octets and hyphens separating the various value parts. The
# <tt>:compact</tt> format omits the hyphens, while the <tt>:urn</tt> format
# adds the <tt>:urn:uuid</tt> prefix.
#
# For example:
#
# uuid = UUID.new
#
# 10.times do
# p uuid.generate
# end
#
# = UUIDs in Brief
#
# UUID (universally unique identifier) are guaranteed to be unique across time
# and space.
#
# A UUID is 128 bit long, and consists of a 60-bit time value, a 16-bit
# sequence number and a 48-bit node identifier.
#
# The time value is taken from the system clock, and is monotonically
# incrementing. However, since it is possible to set the system clock
# backward, a sequence number is added. The sequence number is incremented
# each time the UUID generator is started. The combination guarantees that
# identifiers created on the same machine are unique with a high degree of
# probability.
#
# Note that due to the structure of the UUID and the use of sequence number,
# there is no guarantee that UUID values themselves are monotonically
# incrementing. The UUID value cannot itself be used to sort based on order
# of creation.
#
# To guarantee that UUIDs are unique across all machines in the network,
# the IEEE 802 MAC address of the machine's network interface card is used as
# the node identifier.
#
# For more information see {RFC 4122}[http://www.ietf.org/rfc/rfc4122.txt].
class UUID
VERSION = '2.0.1'
##
# Clock multiplier. Converts Time (resolution: seconds) to UUID clock
# (resolution: 10ns)
CLOCK_MULTIPLIER = 10000000
##
# Clock gap is the number of ticks (resolution: 10ns) between two Ruby Time
# ticks.
CLOCK_GAPS = 100000
##
# Version number stamped into the UUID to identify it as time-based.
VERSION_CLOCK = 0x0100
##
# Formats supported by the UUID generator.
#
# <tt>:default</tt>:: Produces 36 characters, including hyphens separating
# the UUID value parts
# <tt>:compact</tt>:: Produces a 32 digits (hexadecimal) value with no
# hyphens
# <tt>:urn</tt>:: Adds the prefix <tt>urn:uuid:</tt> to the default format
FORMATS = {
:compact => '%08x%04x%04x%04x%012x',
:default => '%08x-%04x-%04x-%04x-%012x',
:urn => 'urn:uuid:%08x-%04x-%04x-%04x-%012x',
}
##
# MAC address (48 bits), sequence number and last clock
STATE_FILE_FORMAT = 'SLLQ'
@state_file = nil
@mode = nil
@uuid = nil
##
# The access mode of the state file. Set it with state_file.
def self.mode
@mode
end
##
# Generates a new UUID string using +format+. See FORMATS for a list of
# supported formats.
def self.generate(format = :default)
@uuid ||= new
@uuid.generate format
end
##
# Creates an empty state file in /var/tmp/ruby-uuid or the windows common
# application data directory using mode 0644. Call with a different mode
# before creating a UUID generator if you want to open access beyond your
# user by default.
#
# If the default state dir is not writable, UUID falls back to ~/.ruby-uuid.
#
# State files are not portable across machines.
def self.state_file(mode = 0644)
return @state_file if @state_file
@mode = mode
begin
require 'Win32API'
csidl_common_appdata = 0x0023
path = 0.chr * 260
get_folder_path = Win32API.new('shell32', 'SHGetFolderPath', 'LLLLP', 'L')
get_folder_path.call 0, csidl_common_appdata, 0, 1, path
state_dir = File.join(path.strip)
rescue LoadError
state_dir = File.join('', 'var', 'tmp')
end
if File.writable?(state_dir) then
@state_file = File.join(state_dir, 'ruby-uuid')
else
@state_file = File.expand_path(File.join('~', '.ruby-uuid'))
end
@state_file
end
##
# Create a new UUID generator. You really only need to do this once.
def initialize
@drift = 0
@last_clock = (Time.now.to_f * CLOCK_MULTIPLIER).to_i
@mutex = Mutex.new
if File.exist?(self.class.state_file) then
next_sequence
else
@mac = Mac.addr.gsub(/:|-/, '').hex & 0x7FFFFFFFFFFF
fail "Cannot determine MAC address from any available interface, tried with #{Mac.addr}" if @mac == 0
@sequence = rand 0x10000
open_lock 'w' do |io|
write_state io
end
end
end
##
# Generates a new UUID string using +format+. See FORMATS for a list of
# supported formats.
def generate(format = :default)
template = FORMATS[format]
raise ArgumentError, "invalid UUID format #{format.inspect}" unless template
# The clock must be monotonically increasing. The clock resolution is at
# best 100 ns (UUID spec), but practically may be lower (on my setup,
# around 1ms). If this method is called too fast, we don't have a
# monotonically increasing clock, so the solution is to just wait.
#
# It is possible for the clock to be adjusted backwards, in which case we
# would end up blocking for a long time. When backward clock is detected,
# we prevent duplicates by asking for a new sequence number and continue
# with the new clock.
clock = @mutex.synchronize do
clock = (Time.new.to_f * CLOCK_MULTIPLIER).to_i & 0xFFFFFFFFFFFFFFF0
if clock > @last_clock then
@drift = 0
@last_clock = clock
elsif clock == @last_clock then
drift = @drift += 1
if drift < 10000 then
@last_clock += 1
else
Thread.pass
nil
end
else
next_sequence
@last_clock = clock
end
end until clock
template % [
clock & 0xFFFFFFFF,
(clock >> 32) & 0xFFFF,
((clock >> 48) & 0xFFFF | VERSION_CLOCK),
@sequence & 0xFFFF,
@mac & 0xFFFFFFFFFFFF
]
end
##
# Updates the state file with a new sequence number.
def next_sequence
open_lock 'r+' do |io|
@mac, @sequence, @last_clock = read_state(io)
io.rewind
io.truncate 0
@sequence += 1
write_state io
end
rescue Errno::ENOENT
open_lock 'w' do |io|
write_state io
end
ensure
@last_clock = (Time.now.to_f * CLOCK_MULTIPLIER).to_i
@drift = 0
end
def inspect
mac = ("%012x" % @mac).scan(/[0-9a-f]{2}/).join(':')
"MAC: #{mac} Sequence: #{@sequence}"
end
protected
##
# Open the state file with an exclusive lock and access mode +mode+.
def open_lock(mode)
File.open self.class.state_file, mode, self.class.mode do |io|
begin
io.flock File::LOCK_EX
yield io
ensure
io.flock File::LOCK_UN
end
end
end
##
# Read the state from +io+
def read_state(io)
mac1, mac2, seq, last_clock = io.read(32).unpack(STATE_FILE_FORMAT)
mac = (mac1 << 32) + mac2
return mac, seq, last_clock
end
##
# Write that state to +io+
def write_state(io)
mac2 = @mac & 0xffffffff
mac1 = (@mac >> 32) & 0xffff
io.write [mac1, mac2, @sequence, @last_clock].pack(STATE_FILE_FORMAT)
end
end

54
tests/test_objectid.rb Normal file
View File

@ -0,0 +1,54 @@
$LOAD_PATH[0,0] = File.join(File.dirname(__FILE__), '..', 'lib')
require 'mongo'
require 'test/unit'
class ObjectIDTest < Test::Unit::TestCase
include XGen::Mongo::Driver
def setup
@t = 42
@o = ObjectID.new(nil, @t)
end
def test_index_for_time
t = 99
assert_equal 0, @o.index_for_time(t)
assert_equal 1, @o.index_for_time(t)
assert_equal 2, @o.index_for_time(t)
t = 100
assert_equal 0, @o.index_for_time(t)
end
def test_time_bytes
a = @o.to_a
assert_equal @t, a[0]
3.times { |i| assert_equal 0, a[i+1] }
t = 43
o = ObjectID.new(nil, t)
a = o.to_a
assert_equal t, a[0]
3.times { |i| assert_equal 0, a[i+1] }
assert_equal 1, o.index_for_time(t) # 0 was used for o
end
def test_different
o2 = ObjectID.new(nil, @t)
assert @o.to_a != o2.to_a
end
def test_eql?
o2 = ObjectID.new(@o.to_a)
assert @o.eql?(o2)
assert @o == o2
end
def test_to_s
s = @o.to_s
assert_equal 24, s.length
s =~ /^([0-9a-f]+)$/
assert_equal 24, $1.length
end
end