crowbar/crowbar-openstack

View on GitHub
chef/cookbooks/nova/recipes/compute_ha.rb

Summary

Maintainability
B
6 hrs
Test Coverage
# Copyright 2016 SUSE
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

remote_nodes = CrowbarPacemakerHelper.remote_nodes(node)
return if remote_nodes.empty?

nova = remote_nodes.first
unless nova.roles.any? { |role| /^nova-compute-/ =~ role }
  Chef::Log.info("Skipping setup of HA for compute nodes as remote nodes don't have a nova compute role yet.")
  return
end

unless nova[:nova][:ha][:compute][:enabled]
  raise "HA for compute nodes is not enabled!"
end

unless nova[:nova][:ha][:compute][:setup]
  Chef::Log.info("Skipping setup of HA for compute nodes as compute nodes have not been prepared yet.")
  return
end

# ensure attributes are set
include_recipe "crowbar-pacemaker::attributes"
include_recipe "crowbar-pacemaker::remote_attributes"

keystone_settings = KeystoneHelper.keystone_settings(nova, @cookbook_name)
neutrons = node_search_with_cache("roles:neutron-server")
neutron = neutrons.first || \
  raise("Neutron instance '#{nova[:nova][:neutron_instance]}' for nova not found")

no_shared_storage = nova[:nova]["use_shared_instance_storage"] ? "0" : "1"

# Install basic nova package to have /var/log/nova (used by fence_compute) as
# well as nova user (to not have some weird permissions in /var/log/nova in
# case nova is installed later)
package "nova-common" do
  if %w(rhel suse).include?(node[:platform_family])
    package_name "openstack-nova"
  end
end

# We have to install libvirt and neutron packages on the corosync nodes so that
# pacemaker can know about the resource agents.
package "libvirt"

case neutron[:neutron][:networking_plugin]
when "ml2"
  ml2_mech_drivers = neutron[:neutron][:ml2_mechanism_drivers]
  case
  when ml2_mech_drivers.include?("openvswitch")
    neutron_agent = neutron[:neutron][:platform][:ovs_agent_name]
    neutron_agent_pkg = neutron[:neutron][:platform][:ovs_agent_pkg]
    neutron_agent_ra = neutron[:neutron][:ha][:network]["openvswitch_ra"]
  when ml2_mech_drivers.include?("linuxbridge")
    neutron_agent = neutron[:neutron][:platform][:lb_agent_name]
    neutron_agent_pkg = neutron[:neutron][:platform][:lb_agent_pkg]
    neutron_agent_ra = neutron[:neutron][:ha][:network]["linuxbridge_ra"]
  when ml2_mech_drivers.include?("cisco_apic_ml2") || ml2_mech_drivers.include?("apic_gbp")
    neutron_agent = ""
    neutron_agent_pkg = ""
    neutron_agent_ra = ""
  end

  package neutron_agent_pkg unless neutron_agent_ra.empty?
end

if neutron[:neutron][:use_dvr]
  package neutron[:neutron][:platform][:l3_agent_pkg]
  package neutron[:neutron][:platform][:metadata_agent_pkg]
end

# Wait for all nodes to reach this point so we know that all nodes will have
# all the required packages installed before we create the pacemaker
# resources.
# We use the revision of the nova node we found as the goal is to not require
# the corosync nodes to be part of the nova proposal.
crowbar_pacemaker_sync_mark "sync-nova_compute_before_ha" do
  revision nova[:nova]["crowbar-revision"]
end

# Avoid races when creating pacemaker resources
crowbar_pacemaker_sync_mark "wait-nova_compute_ha_resources" do
  revision nova[:nova]["crowbar-revision"]
end

compute_primitives_for_group = []
compute_primitives_to_clone = []
compute_transaction_objects = []

# virtlogd service exists since 12.2, make sure nodes have been upgraded already
# check the version of remote's :platform_version
old_remote_nodes = remote_nodes.select do |remote_node|
  remote_node[:platform_version].to_f == 12.1
end

if node[:platform_family] == "suse" && old_remote_nodes.empty?
  virtlogd_primitive = "virtlogd-compute"
  pacemaker_primitive virtlogd_primitive do
    agent "systemd:virtlogd"
    op nova[:nova][:ha][:op]
    action :update
    only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
  end
  compute_primitives_for_group << virtlogd_primitive
  compute_transaction_objects << "pacemaker_primitive[#{virtlogd_primitive}]"
end

libvirtd_primitive = "libvirtd-compute"
pacemaker_primitive libvirtd_primitive do
  agent "systemd:libvirtd"
  op nova[:nova][:ha][:op]
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
compute_primitives_for_group << libvirtd_primitive
compute_transaction_objects << "pacemaker_primitive[#{libvirtd_primitive}]"

case neutron[:neutron][:networking_plugin]
when "ml2"
  # neutron_agent & neutron_agent_ra are empty for plugins like cisco_apic_ml2 and apic_gbp
  unless neutron_agent.empty? || neutron_agent_ra.empty?
    neutron_agent_primitive = "#{neutron_agent.sub(/^openstack-/, "")}-compute"
    pacemaker_primitive neutron_agent_primitive do
      agent neutron_agent_ra
      op neutron[:neutron][:ha][:network][:op]
      action :update
      only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
    end
    compute_primitives_to_clone << neutron_agent_primitive
    compute_transaction_objects << "pacemaker_primitive[#{neutron_agent_primitive}]"
  end
end

if neutron[:neutron][:use_dvr]
  l3_agent_primitive = "neutron-l3-agent-compute"
  pacemaker_primitive l3_agent_primitive do
    agent neutron[:neutron][:ha][:network][:l3_ra]
    op neutron[:neutron][:ha][:network][:op]
    action :update
    only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
  end
  compute_primitives_to_clone << l3_agent_primitive
  compute_transaction_objects << "pacemaker_primitive[#{l3_agent_primitive}]"

  metadata_agent_primitive = "neutron-metadata-agent-compute"
  pacemaker_primitive metadata_agent_primitive do
    agent neutron[:neutron][:ha][:network][:metadata_ra]
    op neutron[:neutron][:ha][:network][:op]
    action :update
    only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
  end
  compute_primitives_to_clone << metadata_agent_primitive
  compute_transaction_objects << "pacemaker_primitive[#{metadata_agent_primitive}]"
end

nova_primitive = "nova-compute"
pacemaker_primitive nova_primitive do
  agent "ocf:openstack:NovaCompute"
  params ({
    "auth_url"       => keystone_settings["internal_auth_url"],
    # "region_name"    => keystone_settings["endpoint_region"],
    "endpoint_type"  => "internalURL",
    "username"       => keystone_settings["admin_user"],
    "password"       => keystone_settings["admin_password"],
    "tenant_name"    => keystone_settings["admin_tenant"],
    # "insecure"       => keystone_settings["insecure"] || nova[:nova][:ssl][:insecure],
    "domain"         => node[:domain],
    "no_shared_storage" => no_shared_storage
  })
  op nova[:nova][:ha][:compute][:compute][:op]
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
compute_primitives_for_group << nova_primitive
compute_transaction_objects << "pacemaker_primitive[#{nova_primitive}]"

compute_group_name = "g-#{nova_primitive}"
pacemaker_group compute_group_name do
  members compute_primitives_for_group
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
compute_primitives_to_clone << compute_group_name
compute_transaction_objects << "pacemaker_group[#{compute_group_name}]"

compute_primitives_to_clone.each do |compute_primitive_to_clone|
  clone_name = "cl-#{compute_primitive_to_clone}"
  pacemaker_clone clone_name do
    rsc compute_primitive_to_clone
    meta CrowbarPacemakerHelper.clone_meta(node, remote: true)
    action :update
    only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
  end
  compute_transaction_objects << "pacemaker_clone[#{clone_name}]"

  location_name = "l-#{clone_name}-compute"
  definition = OpenStackHAHelper.compute_only_location(location_name, clone_name)
  pacemaker_location location_name do
    definition definition
    action :update
    only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
  end
  compute_transaction_objects << "pacemaker_location[#{location_name}]"
end
compute_clone_name = "cl-#{compute_group_name}"

pacemaker_transaction "nova compute" do
  cib_objects compute_transaction_objects
  # note that this will also automatically start the resources
  action :commit_new
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end

controller_transaction_objects = []

evacuate_primitive = "nova-evacuate"
pacemaker_primitive evacuate_primitive do
  agent "ocf:openstack:NovaEvacuate"
  params ({
    "auth_url"       => keystone_settings["internal_auth_url"],
    # "region_name"    => keystone_settings["endpoint_region"],
    "endpoint_type"  => "internalURL",
    "username"       => keystone_settings["admin_user"],
    "password"       => keystone_settings["admin_password"],
    "tenant_name"    => keystone_settings["admin_tenant"],
    # "insecure"       => keystone_settings["insecure"] || nova[:nova][:ssl][:insecure],
    "domain"         => node[:domain],
    "no_shared_storage" => no_shared_storage
  })
  op nova[:nova][:ha][:compute][:evacuate][:op]
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
controller_transaction_objects << "pacemaker_primitive[#{evacuate_primitive}]"

controller_location_name = "l-#{evacuate_primitive}-controller"
pacemaker_location controller_location_name do
  definition OpenStackHAHelper.no_compute_location(controller_location_name, evacuate_primitive)
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
controller_transaction_objects << "pacemaker_location[#{controller_location_name}]"

order_name = "o-#{compute_clone_name}"
pacemaker_order order_name do
  score "Mandatory"
  ordering "#{compute_clone_name} #{evacuate_primitive}"
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
controller_transaction_objects << "pacemaker_order[#{order_name}]"

hostmap = remote_nodes.map do |remote_node|
  "remote-#{remote_node[:hostname]}:#{remote_node[:hostname]}"
end.sort.join(";")

fence_primitive = "fence-nova"
pacemaker_primitive fence_primitive do
  agent "stonith:fence_compute"
  params ({
    "pcmk_host_map"  => hostmap,
    "auth-url"       => keystone_settings["internal_auth_url"],
    # "region-name"    => keystone_settings["endpoint_region"],
    "endpoint-type"  => "internalURL",
    "login"          => keystone_settings["admin_user"],
    "passwd"         => keystone_settings["admin_password"],
    "tenant-name"    => keystone_settings["admin_tenant"],
    # "insecure"       => keystone_settings["insecure"] || nova[:nova][:ssl][:insecure],
    "domain"         => node[:domain],
    "no-shared-storage" => no_shared_storage,
    "record-only"    => "1",
    "verbose"        => "1",
    "debug"          => "/var/log/nova/fence_compute.log"
  })
  op nova[:nova][:ha][:compute][:fence][:op]
  action :update
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end
controller_transaction_objects << "pacemaker_primitive[#{fence_primitive}]"

pacemaker_transaction "nova compute (non-remote bits)" do
  cib_objects controller_transaction_objects
  # note that this will also automatically start the resources
  action :commit_new
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end

unless %w(disabled manual).include? node[:pacemaker][:stonith][:mode]
  case node[:pacemaker][:stonith][:mode]
  when "sbd"
    stonith_resource = "stonith-sbd"
  when "shared"
    stonith_resource = "stonith-shared"
  when "per_node"
    stonith_resource = nil
  else
    raise "Unknown STONITH mode: #{node[:pacemaker][:stonith][:mode]}."
  end

  topology = remote_nodes.map do |remote_node|
    remote_stonith = stonith_resource
    remote_stonith ||= "stonith-remote-#{remote_node[:hostname]}"
    "remote-#{remote_node[:hostname]}: #{remote_stonith},#{fence_primitive}"
  end

  # TODO: implement proper LWRP for this, and move this as part of the
  # transaction for controller bits
  bash "crm configure fencing_topology" do
    code "echo fencing_topology #{topology.sort.join(" ")} | crm configure load update -"
  end
end

rabbit_settings = fetch_rabbitmq_settings

crowbar_pacemaker_order_only_existing "o-#{evacuate_primitive}" do
  # We need services required to boot an instance; most of these services are
  # obviously required. Some additional notes:
  #  - swift is used in case it's the backend for glance
  #  - cinder is used in case of boot from volume
  #  - neutron agents are used even with DVR, if only to have a DHCP server for
  #    the instance to get an IP address
  ordering "( ms-galera #{rabbit_settings[:pacemaker_resource]} ) #{evacuate_primitive}"
  score "Mandatory"
  action :create
  only_if { CrowbarPacemakerHelper.is_cluster_founder?(node) }
end

crowbar_pacemaker_sync_mark "create-nova_compute_ha_resources" do
  revision nova[:nova]["crowbar-revision"]
end