Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,25 @@ class Handler < Chef::Handler

class StartHandler < Handler
def report
# This is informational only, and gives us a fraction more
# information in /var/log/chef/client.log and in the default
# attributes (until next run) for debugging purposes.
# However, it will only take effect after the handler has been
# installed in /etc/chef/client.rb *and* chef-client daemon
# has subsequently been restarted; the
# reload_chef_client_config hack doesn't work with
# start_handlers since it reloads the config too late, after
# the start handlers have already been triggered.
start_mode = record_maintenance_mode_before_this_chef_run
Chef::Log.info("Pacemaker maintenance mode currently %s" %
[start_mode ? "on" : "off"])

if maintenance_mode_set_via_this_chef_run?
# Sanity check: this should never happen because we're using
# default attributes which get wiped for each chef-client run.
raise "BUG: Pacemaker maintenance mode was already set at the start of this run! (pid #$$)"
# Check we're not in maintenance mode. This could happen for two
# reasons:
#
# 1. A previous chef-client run failed, so we shouldn't
# risk compounding problems by trying again until the
# root cause is addressed.
#
# 2. Someone/something other than Chef set the node into
# maintenance mode. That should be rare, but when it
# happens, we shouldn't interfere.
#
# So in both cases, we should abort the run immediately with a
# helpful message.
if maintenance_mode?
raise \
"Pacemaker maintenance mode was already set on " \
"#{node.hostname}; aborting! Please diagnose why this was the " \
"case, fix the root cause, and then unset maintenance mode via " \
"HAWK or by running 'crm node ready' on the node."
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style/MultilineOperationIndentation: Use 2 (not 0) spaces for indenting an expression spanning multiple lines.

end
end
end
Expand All @@ -51,8 +53,8 @@ def report
Chef::Log.info("Taking node out of Pacemaker maintenance mode")
system("crm --wait node ready")
else
# This shouldn't happen, and suggests that one of the recipes
# is interfering in a way it shouldn't.
# This shouldn't happen, and suggests that something is
# interfering in a way it shouldn't.
raise "Something took node out of maintenance mode during run!"
end
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,45 @@ module CrowbarPacemaker
# A mixin for Chef::Pacemaker::Handler subclasses, and also for the
# Chef::Provider::PacemakerService LWRP.
module MaintenanceModeHelpers
def maintenance_mode?
# See https://bugzilla.suse.com/show_bug.cgi?id=870696
!! (%x(crm_attribute -G -N #{node.hostname} -n maintenance -q) =~ /^on$/)
def cluster_up?
# For once, we want 2>&1 to come before >/dev/null, not after!
# This is because we want to capture STDERR and ditch STDOUT.
cibadmin = `cibadmin -Ql 2>&1 >/dev/null`
case cibadmin
when /Connection refused/, /Transport endpoint is not connected/
Chef::Log.warn("Cluster is down")
return false
when /command not found/
Chef::Log.warn("cibadmin not found; was pacemaker deinstalled?")
return false
end

if !$?.success?
Chef::Log.warn("cibadmin -Ql failed when checking Pacemaker maintenance mode!")
Chef::Log.warn(cibadmin)
return nil # unknown
end

Chef::Log.debug("Cluster is up")
true
end

def record_maintenance_mode_before_this_chef_run
# Via Chef::Pacemaker::StartHandler we track whether anything
# has put the node into Pacemaker maintenance mode prior to this
# chef-client run. This may come in handy during debugging.
#
# We use a default attribute so that it will get reset at the
# beginning of each chef-client run.
node.default[:pacemaker][:maintenance_mode][$$][:at_start] = maintenance_mode?
def maintenance_mode?
case cluster_up?
when nil # unknown
Chef::Log.warn("Something wrong, so treating as if in maintenance " +
"mode; will need manual intervention.")
return true
when false
# Cluster is not up, so let things proceed so that Chef can
# start it if appropriate.
Chef::Log.info("Cluster is down; not in maintenance mode")
return false
end

Chef::Log.debug("Checking maintenance mode status")
# See https://bugzilla.suse.com/show_bug.cgi?id=870696
`crm_attribute -G -N #{node.hostname} -n maintenance -q` =~ /^on$/
end

def set_maintenance_mode_via_this_chef_run
Expand All @@ -42,14 +68,14 @@ def set_maintenance_mode_via_this_chef_run
#
# We use a default attribute so that it will get reset at the
# beginning of each chef-client run.
node.default[:pacemaker][:maintenance_mode][$$][:via_chef] = true
node.default[:pacemaker][:maintenance_mode][$PID][:via_chef] = true
end

def maintenance_mode_set_via_this_chef_run?
# The "== true" is required because Chef::Node::Attribute does
# auto-vivification on read (!), so the value will be initialized
# to an empty Chef::Node::Attribute if not already set to true.
node.default[:pacemaker][:maintenance_mode][$$][:via_chef] == true
node.default[:pacemaker][:maintenance_mode][$PID][:via_chef] == true
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
if loaded
Chef::Log.debug("Pacemaker maintenance handlers already installed")
else
Chef::Log.info("Pacemaker maintenance handlers not installed; " +
Chef::Log.info("Pacemaker maintenance handlers not installed; " \
"scheduling Chef config reload")
ruby_block "reload_chef_client_config" do
block { Chef::Config.from_file("/etc/chef/client.rb") }
Expand Down