From 16809a69dca06190166552d4610dd8d0511c612e Mon Sep 17 00:00:00 2001 From: John Bintz Date: Wed, 31 Aug 2011 17:24:43 -0400 Subject: [PATCH] prevent catastrophic failures --- lib/hydra/master.rb | 4 ++++ lib/hydra/runner.rb | 3 +++ test/master_test.rb | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/lib/hydra/master.rb b/lib/hydra/master.rb index b1525b8..2eda5c8 100644 --- a/lib/hydra/master.rb +++ b/lib/hydra/master.rb @@ -201,6 +201,7 @@ module Hydra #:nodoc: @workers << worker if worker end if worker + dead_count = 0 while true begin message = worker[:io].gets @@ -209,6 +210,9 @@ module Hydra #:nodoc: # SSH gives us back echoes, so we need to ignore our own messages if message and !message.class.to_s.index("Worker").nil? message.handle(self, worker) + else + dead_count += 1 + raise IOError if dead_count > 100 end rescue IOError trace "lost Worker [#{worker.inspect}]" diff --git a/lib/hydra/runner.rb b/lib/hydra/runner.rb index feb84de..11765d9 100644 --- a/lib/hydra/runner.rb +++ b/lib/hydra/runner.rb @@ -11,6 +11,8 @@ module Hydra #:nodoc: traceable('RUNNER') DEFAULT_LOG_FILE = 'hydra-runner.log' + PING_COUNT_FAILURE_TIME = 5 + WAIT_BETWEEN_PING = 0.1 # Boot up a runner. It takes an IO object (generally a pipe from its # parent) to send it messages on which files to execute. @@ -102,6 +104,7 @@ module Hydra #:nodoc: message.handle(self) else @io.write Ping.new + sleep WAIT_BETWEEN_PING end rescue IOError => ex trace "Runner lost Worker" diff --git a/test/master_test.rb b/test/master_test.rb index 0d9e174..7999f3c 100644 --- a/test/master_test.rb +++ b/test/master_test.rb @@ -273,6 +273,27 @@ class MasterTest < Test::Unit::TestCase assert_file_exists target_file end + + should "not die horribly when the host cannot be reached" do + capture_stderr do # redirect stderr + @pid = Process.fork do + Hydra::Master.new( + :files => [test_file], + :autosort => false, + :listeners => [@master_listener], + :runner_listeners => [@runner_listener], + :workers => [{ + :type => :ssh, + :connect => 'sdlsdkjfhadsfjsd', + :directory => remote_dir_path, + :runners => 1 + }], + :verbose => false + ) + end + end + Process.waitpid @pid + end end end