Rsync synchronization for remote workers

This commit is contained in:
Nick Gauthier 2010-02-09 15:40:35 -05:00
parent 5b53276af6
commit eee69c0096
9 changed files with 168 additions and 34 deletions

View File

@ -25,32 +25,93 @@ In your rakefile:
t.add_files 'test/integration/**/*_test.rb' t.add_files 'test/integration/**/*_test.rb'
end end
Then you can run 'rake hydra'. Run:
$ rake hydra
Hydra defaults to Single Core mode, so you may want to configure it
to use two (or more) of your cores if you have a multi-processing machine.
== Configuration == Configuration
Place the config file in the main project directory as Place the config file in the main project directory as
'hydra.yml' or 'config/hydra.yml'. 'hydra.yml' or 'config/hydra.yml'.
=== Examples
==== Dual Core
workers:
- type: local
runners: 2
==== Dual Core, with a remote Quad Core server
The -p3022 tells it to connect on a different port
workers: workers:
- type: local - type: local
runners: 2 runners: 2
- type: ssh - type: ssh
connect: user@example.com -p3022 connect: user@example.com
ssh_opts: -p3022
directory: /absolute/path/to/project directory: /absolute/path/to/project
runners: 4 runners: 4
The "connect" option is passed to SSH. So if you've setup an ==== Two Remote Quad Cores with Synchronization
ssh config alias to a server, you can use that.
The "directory" option is the path for the project directory You can use the 'sync' configuration to allow rsync to synchronize
where the tests should be run. the local and remote directories every time you run hydra.
The "runners" option is how many processes will be running workers:
on the remote machine. It's best to pick the same number - type: ssh
connect: user@alpha.example.com
directory: /path/to/project/on/alpha/
runners: 4
- type: ssh
connect: user@beta.example.com
directory: /path/to/project/on/beta/
runners: 4
sync:
directory: /my/local/project/directory
exclude:
- tmp
- log
- doc
=== Workers Options
==== type
Either "local" or "ssh".
==== runners
The *runners* option is how many processes will be running
on the machine. It's best to pick the same number
as the number of cores on that machine (as well as your as the number of cores on that machine (as well as your
own). own).
=== SSH Options
==== connect
The *connect* option is passed to SSH. So if you've setup an
ssh config alias to a server, you can use that. It is also
used in rsync, so you cannot use options.
==== ssh_opts
The *ssh_opts* option is passed to SSH and to Rsync's RSH so
that you can use the same ssh options for connecting and rsync.
Use ssh_opts to set the port or compression options.
==== directory
The *directory* option is the path for the project directory
where the tests should be run.
== Copyright == Copyright
Copyright (c) 2010 Nick Gauthier. See LICENSE for details. Copyright (c) 2010 Nick Gauthier. See LICENSE for details.

19
TODO
View File

@ -1,24 +1,5 @@
= Hydra TODO = Hydra TODO
== Rsync
Split SSH into:
connect: user@site.com
ssh_options: -p 3022
Then make an rsync section:
rsync:
source: /path/to/code/locally
exclude:
- tmp
- log
- config/test/database.yml
- db/*.sqlite3
etc...
options: -az
== Setup == Setup
Provide a hydra:setup task to override to be run remotely before testing Provide a hydra:setup task to override to be run remotely before testing

View File

@ -47,6 +47,7 @@ Gem::Specification.new do |s|
"test/fixtures/config.yml", "test/fixtures/config.yml",
"test/fixtures/hello_world.rb", "test/fixtures/hello_world.rb",
"test/fixtures/slow.rb", "test/fixtures/slow.rb",
"test/fixtures/sync_test.rb",
"test/fixtures/write_file.rb", "test/fixtures/write_file.rb",
"test/master_test.rb", "test/master_test.rb",
"test/message_test.rb", "test/message_test.rb",
@ -67,6 +68,7 @@ Gem::Specification.new do |s|
"test/ssh_test.rb", "test/ssh_test.rb",
"test/fixtures/write_file.rb", "test/fixtures/write_file.rb",
"test/fixtures/slow.rb", "test/fixtures/slow.rb",
"test/fixtures/sync_test.rb",
"test/fixtures/assert_true.rb", "test/fixtures/assert_true.rb",
"test/fixtures/hello_world.rb", "test/fixtures/hello_world.rb",
"test/master_test.rb", "test/master_test.rb",

View File

@ -1,10 +1,12 @@
require 'hydra/hash' require 'hydra/hash'
require 'open3'
module Hydra #:nodoc: module Hydra #:nodoc:
# Hydra class responsible for delegate work down to workers. # Hydra class responsible for delegate work down to workers.
# #
# The Master is run once for any given testing session. # The Master is run once for any given testing session.
class Master class Master
include Hydra::Messages::Master include Hydra::Messages::Master
include Open3
traceable('MASTER') traceable('MASTER')
# Create a new Master # Create a new Master
# #
@ -22,12 +24,15 @@ module Hydra #:nodoc:
if config_file if config_file
opts.merge!(YAML.load_file(config_file).stringify_keys!) opts.merge!(YAML.load_file(config_file).stringify_keys!)
end end
@files = opts.fetch('files') { [] } @files = Array(opts.fetch('files') { nil })
raise "No files, nothing to do" if @files.empty?
@files.sort!{|a,b| File.size(b) <=> File.size(a)} # dumb heuristic @files.sort!{|a,b| File.size(b) <=> File.size(a)} # dumb heuristic
@incomplete_files = @files.dup @incomplete_files = @files.dup
@workers = [] @workers = []
@listeners = [] @listeners = []
@verbose = opts.fetch('verbose') { false } @verbose = opts.fetch('verbose') { false }
@sync = opts.fetch('sync') { nil }
# default is one worker that is configured to use a pipe with one runner # default is one worker that is configured to use a pipe with one runner
worker_cfg = opts.fetch('workers') { [ { 'type' => 'local', 'runners' => 1} ] } worker_cfg = opts.fetch('workers') { [ { 'type' => 'local', 'runners' => 1} ] }
@ -42,10 +47,10 @@ module Hydra #:nodoc:
# Message handling # Message handling
# Send a file down to a worker. If there are no more files, this will shut the # Send a file down to a worker.
# worker down.
def send_file(worker) def send_file(worker)
f = @files.pop f = @files.pop
trace "Sending #{f.inspect}"
worker[:io].write(RunFile.new(:file => f)) if f worker[:io].write(RunFile.new(:file => f)) if f
end end
@ -101,7 +106,27 @@ module Hydra #:nodoc:
"ruby -e \"require 'rubygems'; require 'hydra'; Hydra::Worker.new(:io => Hydra::Stdio.new, :runners => #{runners}, :verbose => #{@verbose});\"" "ruby -e \"require 'rubygems'; require 'hydra'; Hydra::Worker.new(:io => Hydra::Stdio.new, :runners => #{runners}, :verbose => #{@verbose});\""
} }
trace "Synchronizing with #{connect} [NOT REALLY]" if @sync
@sync.stringify_keys!
trace "Synchronizing with #{connect}\n\t#{@sync.inspect}"
local_dir = @sync.fetch('directory') {
raise "You must specify a synchronization directory"
}
exclude_paths = @sync.fetch('exclude') { [] }
exclude_opts = exclude_paths.inject(''){|memo, path| memo += "--exclude=#{path} "}
rsync_command = [
'rsync',
'-avz',
'--delete',
exclude_opts,
File.expand_path(local_dir)+'/',
"-e \"ssh #{ssh_opts}\"",
"#{connect}:#{directory}"
].join(" ")
trace rsync_command
trace `#{rsync_command}`
end
trace "Booting SSH worker" trace "Booting SSH worker"
ssh = Hydra::SSH.new("#{ssh_opts} #{connect}", directory, command) ssh = Hydra::SSH.new("#{ssh_opts} #{connect}", directory, command)

View File

@ -13,6 +13,7 @@ module Hydra #:nodoc:
return nil unless message return nil unless message
return Message.build(eval(message.chomp)) return Message.build(eval(message.chomp))
rescue SyntaxError, NameError rescue SyntaxError, NameError
# uncomment to help catch remote errors by seeing all traffic
#$stderr.write "Not a message: [#{message.inspect}]\n" #$stderr.write "Not a message: [#{message.inspect}]\n"
return gets return gets
end end

View File

@ -33,7 +33,13 @@ module Hydra #:nodoc:
# Run a test file and report the results # Run a test file and report the results
def run_file(file) def run_file(file)
trace "Running file: #{file}" trace "Running file: #{file}"
begin
require file require file
rescue LoadError => ex
trace "#{file} does not exist [#{ex.to_s}]"
@io.write Results.new(:output => ex.to_s, :file => file)
return
end
output = [] output = []
@result = Test::Unit::TestResult.new @result = Test::Unit::TestResult.new
@result.add_listener(Test::Unit::TestResult::FAULT) do |value| @result.add_listener(Test::Unit::TestResult::FAULT) do |value|

View File

@ -91,7 +91,6 @@ module Hydra #:nodoc:
@listeners.each{|l| l.join } @listeners.each{|l| l.join }
@io.close @io.close
trace "Done processing messages" trace "Done processing messages"
exit(0) # avoids test summaries
end end
def process_messages_from_master def process_messages_from_master

8
test/fixtures/sync_test.rb vendored Normal file
View File

@ -0,0 +1,8 @@
require 'test/unit'
class SyncTest < Test::Unit::TestCase
def test_truth
assert true
end
end

View File

@ -57,7 +57,6 @@ class MasterTest < Test::Unit::TestCase
assert (finish-start) < 15, "took #{finish-start} seconds" assert (finish-start) < 15, "took #{finish-start} seconds"
end end
should "run a test via ssh" do should "run a test via ssh" do
Hydra::Master.new( Hydra::Master.new(
:files => [test_file], :files => [test_file],
@ -80,5 +79,57 @@ class MasterTest < Test::Unit::TestCase
assert File.exists?(target_file) assert File.exists?(target_file)
assert_equal "HYDRA", File.read(target_file) assert_equal "HYDRA", File.read(target_file)
end end
should "synchronize a test file over ssh with rsync" do
local = File.join(Dir.tmpdir, 'hydra', 'local')
remote = File.join(Dir.tmpdir, 'hydra', 'remote')
sync_test = File.join(File.dirname(__FILE__), 'fixtures', 'sync_test.rb')
[local, remote].each{|f| FileUtils.rm_rf f; FileUtils.mkdir_p f}
# setup the folders:
# local:
# - test_a
# - test_c
# remote:
# - test_b
#
# add test_c to exludes
FileUtils.cp(sync_test, File.join(local, 'test_a.rb'))
FileUtils.cp(sync_test, File.join(local, 'test_c.rb'))
FileUtils.cp(sync_test, File.join(remote, 'test_b.rb'))
# ensure a is not on remote
assert !File.exists?(File.join(remote, 'test_a.rb')), "A should not be on remote"
# ensure c is not on remote
assert !File.exists?(File.join(remote, 'test_c.rb')), "C should not be on remote"
# ensure b is on remote
assert File.exists?(File.join(remote, 'test_b.rb')), "B should be on remote"
# fake as if the test got run, so only the sync code is really being tested
fake_result = Hydra::Messages::Worker::Results.new(
:file => 'test_a.rb', :output => '.'
).serialize.inspect
Hydra::Master.new(
:files => ['test_a.rb'],
:workers => [{
:type => :ssh,
:connect => 'localhost',
:directory => remote,
:runners => 1,
:command => "ruby -e 'puts #{fake_result}' && exit"
}],
:sync => {
:directory => local,
:exclude => ['test_c.rb']
}
)
# ensure a is copied
assert File.exists?(File.join(remote, 'test_a.rb')), "A was not copied"
# ensure c is not copied
assert !File.exists?(File.join(remote, 'test_c.rb')), "C was copied, should be excluded"
# ensure b is deleted
assert !File.exists?(File.join(remote, 'test_b.rb')), "B was not deleted"
end
end end
end end