prevent catastrophic failures

This commit is contained in:
John Bintz 2011-08-31 17:24:43 -04:00
parent 95cbf09313
commit 16809a69dc
3 changed files with 28 additions and 0 deletions

View File

@ -201,6 +201,7 @@ module Hydra #:nodoc:
@workers << worker if worker @workers << worker if worker
end end
if worker if worker
dead_count = 0
while true while true
begin begin
message = worker[:io].gets message = worker[:io].gets
@ -209,6 +210,9 @@ module Hydra #:nodoc:
# SSH gives us back echoes, so we need to ignore our own messages # SSH gives us back echoes, so we need to ignore our own messages
if message and !message.class.to_s.index("Worker").nil? if message and !message.class.to_s.index("Worker").nil?
message.handle(self, worker) message.handle(self, worker)
else
dead_count += 1
raise IOError if dead_count > 100
end end
rescue IOError rescue IOError
trace "lost Worker [#{worker.inspect}]" trace "lost Worker [#{worker.inspect}]"

View File

@ -11,6 +11,8 @@ module Hydra #:nodoc:
traceable('RUNNER') traceable('RUNNER')
DEFAULT_LOG_FILE = 'hydra-runner.log' DEFAULT_LOG_FILE = 'hydra-runner.log'
PING_COUNT_FAILURE_TIME = 5
WAIT_BETWEEN_PING = 0.1
# Boot up a runner. It takes an IO object (generally a pipe from its # Boot up a runner. It takes an IO object (generally a pipe from its
# parent) to send it messages on which files to execute. # parent) to send it messages on which files to execute.
@ -102,6 +104,7 @@ module Hydra #:nodoc:
message.handle(self) message.handle(self)
else else
@io.write Ping.new @io.write Ping.new
sleep WAIT_BETWEEN_PING
end end
rescue IOError => ex rescue IOError => ex
trace "Runner lost Worker" trace "Runner lost Worker"

View File

@ -273,6 +273,27 @@ class MasterTest < Test::Unit::TestCase
assert_file_exists target_file assert_file_exists target_file
end end
should "not die horribly when the host cannot be reached" do
capture_stderr do # redirect stderr
@pid = Process.fork do
Hydra::Master.new(
:files => [test_file],
:autosort => false,
:listeners => [@master_listener],
:runner_listeners => [@runner_listener],
:workers => [{
:type => :ssh,
:connect => 'sdlsdkjfhadsfjsd',
:directory => remote_dir_path,
:runners => 1
}],
:verbose => false
)
end
end
Process.waitpid @pid
end
end end
end end