chef/cookbooks/pacemaker/recipes/default.rb
#
# Author:: Robert Choi
# Cookbook Name:: pacemaker
# Recipe:: default
#
# Copyright 2013, Robert Choi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if node[:pacemaker][:platform][:packages].nil?
Chef::Application.fatal! "FIXME: #{node.platform} platform not supported yet"
end
node[:pacemaker][:platform][:packages].each do |pkg|
package pkg
end
file "/etc/sysconfig/pacemaker" do
content "SYSTEMD_NO_WRAP=1"
owner "root"
mode "0644"
action :create
end
if Chef::Config[:solo]
unless ENV["RSPEC_RUNNING"]
Chef::Application.fatal! \
"pacemaker::default needs corosync::default which uses search, " \
"but Chef Solo does not support search."
return
end
else
include_recipe "corosync::default"
end
if (platform_family?("suse") && node.platform_version.to_f >= 12.0) || platform_family?("rhel")
service "pacemaker" do
action [:enable, :start]
if platform_family? "rhel"
notifies :restart, "service[clvm]", :immediately
end
end
end
cluster_size = node[:pacemaker][:elements]["pacemaker-cluster-member"].length + \
(node[:pacemaker][:elements]["pacemaker-remote"] || []).length
nodes_names = node[:pacemaker][:elements]["pacemaker-cluster-member"].map do |n|
n.gsub(/\..*/, "")
end
# When newly added node is faster than the old nodes, it can finish the default timeout
# here and continue chef-client run before the cluster is fully (re)configured.
# If it reaches any syncmark it can get: "Could not map name=<nodename> to a UUID" error.
# Waiting a bit more gives the rest of the cluster some time to recognize the new member.
# Extending this timeout unconditionally would cause a deadlock with the "Waiting for
# cluster founder to be set up" loop in crowbar-pacemaker cookbook.
online_timeout = node.fetch("crowbar_wall", {})[:cluster_node_added] ? 120 : 60
is_founder = node[:pacemaker][:founder] == node[:fqdn]
ruby_block "wait for cluster to be online" do
block do
require "timeout"
begin
Timeout.timeout(online_timeout) do
loop do
# example of 'crm_node -l' output:
# 1084813649 d52-54-77-77-01-02 member
# 1084813652 d52-54-77-77-01-01 member
crm_node_cmd = Mixlib::ShellOut.new("crm_node -l").run_command
if crm_node_cmd.exitstatus != 0
Chef::Log.warn("Problems when executing 'crm_node -l': #{crm_node_cmd.stderr}")
next
end
crm_nodes = crm_node_cmd.stdout
crm_names = crm_nodes.split("\n").map { |l| l.split(" ")[1] }
crm_mon_cmd = Mixlib::ShellOut.new("crm_mon -1").run_command
if crm_mon_cmd.exitstatus != 0
Chef::Log.warn("Problems when executing 'crm_mon -1': #{crm_mon_cmd.stderr}")
next
end
crm_mon = crm_mon_cmd.stdout
ready = true
if !is_founder && !crm_mon.include?("#{cluster_size} nodes configured")
ready = false
Chef::Log.warn("cluster doesn't have #{cluster_size} nodes configured yet")
elsif !crm_mon.include?("Online:")
ready = false
Chef::Log.warn("cluster doesn't have nodes online yet")
elsif !is_founder && crm_names.sort != nodes_names.sort
ready = false
Chef::Log.warn("crm_node -l listed nodes #{crm_names.sort}, " \
"not #{nodes_names.sort} as expected")
end
break if ready
Chef::Log.warn("cluster not online yet")
sleep(5)
end
end
rescue Timeout::Error
message = "Pacemaker cluster not online yet; our first configuration changes might get lost (but will be reapplied on next chef run)."
Chef::Log.warn(message)
end
end # block
end # ruby_block
if node[:pacemaker][:founder]
include_recipe "pacemaker::setup"
end
include_recipe "pacemaker::stonith"
include_recipe "pacemaker::notifications"