prevent catastrophic failures
This commit is contained in:
parent
95cbf09313
commit
16809a69dc
@ -201,6 +201,7 @@ module Hydra #:nodoc:
|
||||
@workers << worker if worker
|
||||
end
|
||||
if worker
|
||||
dead_count = 0
|
||||
while true
|
||||
begin
|
||||
message = worker[:io].gets
|
||||
@ -209,6 +210,9 @@ module Hydra #:nodoc:
|
||||
# SSH gives us back echoes, so we need to ignore our own messages
|
||||
if message and !message.class.to_s.index("Worker").nil?
|
||||
message.handle(self, worker)
|
||||
else
|
||||
dead_count += 1
|
||||
raise IOError if dead_count > 100
|
||||
end
|
||||
rescue IOError
|
||||
trace "lost Worker [#{worker.inspect}]"
|
||||
|
@ -11,6 +11,8 @@ module Hydra #:nodoc:
|
||||
traceable('RUNNER')
|
||||
|
||||
DEFAULT_LOG_FILE = 'hydra-runner.log'
|
||||
PING_COUNT_FAILURE_TIME = 5
|
||||
WAIT_BETWEEN_PING = 0.1
|
||||
|
||||
# Boot up a runner. It takes an IO object (generally a pipe from its
|
||||
# parent) to send it messages on which files to execute.
|
||||
@ -102,6 +104,7 @@ module Hydra #:nodoc:
|
||||
message.handle(self)
|
||||
else
|
||||
@io.write Ping.new
|
||||
sleep WAIT_BETWEEN_PING
|
||||
end
|
||||
rescue IOError => ex
|
||||
trace "Runner lost Worker"
|
||||
|
@ -273,6 +273,27 @@ class MasterTest < Test::Unit::TestCase
|
||||
|
||||
assert_file_exists target_file
|
||||
end
|
||||
|
||||
should "not die horribly when the host cannot be reached" do
|
||||
capture_stderr do # redirect stderr
|
||||
@pid = Process.fork do
|
||||
Hydra::Master.new(
|
||||
:files => [test_file],
|
||||
:autosort => false,
|
||||
:listeners => [@master_listener],
|
||||
:runner_listeners => [@runner_listener],
|
||||
:workers => [{
|
||||
:type => :ssh,
|
||||
:connect => 'sdlsdkjfhadsfjsd',
|
||||
:directory => remote_dir_path,
|
||||
:runners => 1
|
||||
}],
|
||||
:verbose => false
|
||||
)
|
||||
end
|
||||
end
|
||||
Process.waitpid @pid
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user