crowbar_framework/app/models/service_object.rb from crowbar/crowbar-core

crowbar_framework/app/models/service_object.rb
Summary

Maintainability

2 wks
Test Coverage

Issues
#
# Copyright 2011-2013, Dell
# Copyright 2013-2014, SUSE LINUX Products GmbH
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

require "pp"
require "chef"
require "json"
require "hash_only_merge"
require "securerandom"
require "timeout"
require "thwait"

class ServiceObject
  include CrowbarPacemakerProxy

  FORBIDDEN_PROPOSAL_NAMES=["template","nodes","commit","status"]

  attr_accessor :bc_name
  attr_accessor :logger
  attr_accessor :validation_errors

  def initialize(thelogger = nil)
    @bc_name = "unknown"
    # Still needed for compatibility reasons
    @logger = Rails.logger
    @validation_errors = []
  end

  def self.get_service(name)
    Kernel.const_get("#{name.camelize}Service")
  end

  # OVERRIDE AS NEEDED! true if barclamp can have multiple proposals
  def self.allow_multiple_proposals?
    false
  end

  # This provides the suggested name for new proposals.
  # OVERRIDE AS NEEDED!
  def self.suggested_proposal_name
    I18n.t("proposal.items.default")
  end

  def role_constraints
    self.class.role_constraints
  end

  class << self
    include CrowbarPacemakerProxy

    # This method should be overriden from subclassing service objects
    # and return the constraints related to this specific service.
    def role_constraints
      {}
    end
  end

  def validation_error message
    Rails.logger.warn message
    @validation_errors << message
  end

  def self.barclamp_catalog
    BarclampCatalog.catalog
  end

  def self.bc_name
    self.name.underscore[/(.*)_service$/,1]
  end

  # ordered list of barclamps from groups in the crowbar.yml files.
  # Built at barclamp install time by the catalog step
  def self.members
    BarclampCatalog.members(bc_name)
  end

  def self.all
    # The catalog contains more than just barclamps - it has also barclamp
    # groups. So we filter out barclamps by attempting to create a proposal
    # (which loads the barclamps JSON metadata). Only those that pass
    # are valid barclamps.
    BarclampCatalog.barclamps.map do |name, attrs|
      Proposal.new(barclamp: name) rescue nil
    end.compact.map do |prop|
      [prop.barclamp, prop["description"]]
    end.to_h
  end

  def self.run_order(bc, cat = nil)
    BarclampCatalog.run_order(bc)
  end

  def run_order
    BarclampCatalog.run_order(@bc_name)
  end

  def self.chef_order(bc, cat = nil)
    BarclampCatalog.chef_order(bc)
  end

  def chef_order
    BarclampCatalog.chef_order(@bc_name)
  end

  # Approach copied from libraries/secure_password.rb in the openssl cookbook
  def random_password(size = 12)
    pw = String.new
    while pw.length < size
      # SecureRandom actually wraps around
      # OpenSSL::Random.random_bytes (falling back to /dev/urandom),
      # but it ensures a random seed first.
      # Note that we only accept (a subset of) ASCII characters; otherwise, we
      # get unicode characters that chef cannot store.
      pw << SecureRandom.base64(size).gsub(/[\+\/=]/, "")
    end
    pw[-size,size]
  end

#
# Locking Routines
#
  def new_lock(name)
    Crowbar::Lock::LocalBlocking.new(name: name, logger: Rails.logger)
  end

  def acquire_lock(name)
    new_lock(name).acquire
  end

  def with_lock(name)
    new_lock(name).with_lock do
      yield
    end
  end

#
# Helper routines for queuing
#

  def set_to_applying(nodes, inst, pre_cached_nodes)
    with_lock "BA-LOCK" do
      nodes_to_save = []
      nodes.each do |node_name|
        node = pre_cached_nodes[node_name]
        if node.nil?
          node = Node.find_by_name(node_name)
        end
        next if node.nil?

        node.crowbar["state"] = "applying"
        node.crowbar["state_owner"] = "#{@bc_name}-#{inst}"
        nodes_to_save.push node
      end
      save_nodes nodes_to_save
    end
  end

  def restore_node_to_ready(node)
    node.crowbar["state"] = "ready"
    node.crowbar["state_owner"] = ""
  end

  def restore_to_ready(nodes)
    with_lock "BA-LOCK" do
      nodes_to_save = []
      nodes.each do |node_name|
        node = Node.find_by_name(node_name)
        next if node.nil?

        # Nodes with 'crowbar_upgrade' state need to stay in that state
        # even after applying relevant roles. They could be brought back to
        # being ready only by explicit user's action.
        if node.crowbar["state"] != "crowbar_upgrade"
          restore_node_to_ready(node)
        end

        node["crowbar"]["applying_for"] = {}
        nodes_to_save.push node
      end
      save_nodes nodes_to_save
    end
  end

  def reset_proposal(inst, bc = @bc_name)
    ::Proposal.find_by(
      barclamp: bc,
      name: inst
    ).tap do |proposal|
      if proposal.nil?
        return [
          404,
          I18n.t("model.service.cannot_find")
        ]
      end

      unless proposal["deployment"][bc]["crowbar-committing"]
        proposal["deployment"][bc]["crowbar-committing"] = false

        unless proposal.save
          return [
            422,
            I18n.t("proposal.failures.proposal_reset")
          ]
        end
      end

      nodes = []
      Node.find("roles:#{bc}-config-#{inst}").each do |node|
        next if node.crowbar["state"] == "ready"
        node.crowbar["state"] = "ready"
        unless node.save
          nodes.push(node.alias)
        end
      end

      unless nodes.blank?
        return [
          422,
          I18n.t("proposal.failures.nodes_reset", nodes: nodes.join(", "))
        ]
      end
    end

    [
      200,
      ""
    ]
  rescue => e
    [
      500,
      e.message
    ]
  end

#
# Queuing routines:
#   queue_proposal - attempts to queue proposal returns delay otherwise.
#   dequeue_proposal - remove item from queue and clean up
#   process_queue - see what we can execute
#

  def queue_proposal(inst, element_order, elements, deps, bc = @bc_name, pre_cached_nodes = {})
    Crowbar::DeploymentQueue.new(logger: Rails.logger).queue_proposal(
      bc, inst, elements, element_order, deps, pre_cached_nodes
    )
  end

  def dequeue_proposal(inst, bc = @bc_name)
    Crowbar::DeploymentQueue.new(logger: Rails.logger).dequeue_proposal(bc, inst)
  end

  def process_queue
    Crowbar::DeploymentQueue.new(logger: Rails.logger).process_queue
  end

  #
  # update proposal status information
  #
  def update_proposal_status(inst, status, message, bc = @bc_name)
    Rails.logger.debug("update_proposal_status: enter #{inst} #{bc} #{status} #{message}")

    prop = Proposal.where(barclamp: bc, name: inst).first
    unless prop.nil?
      prop["deployment"][bc]["crowbar-status"] = status
      prop["deployment"][bc]["crowbar-failed"] = message
      res = prop.save
    else
      res = true
    end

    Rails.logger.debug("update_proposal_status: exit #{inst} #{bc} #{status} #{message}")
    res
  end

#
# API Functions
#
  def versions
    [200, { versions: ["1.0"] }]
  end

  def transition(inst, name, state)
    [200, {}]
  end

  def list_active
    roles = RoleObject.find_roles_by_name("#{@bc_name}-config-*")
    roles.map! { |r| r.name.gsub("#{@bc_name}-config-","") } unless roles.empty?
    [200, roles]
  end

  def show_active(inst)
    inst = "#{@bc_name}-config-#{inst}"

    role = RoleObject.find_role_by_name(inst)

    if role.nil?
      [404, "Active instance not found"]
    else
      [200, role]
    end
  end

  def clean_proposal(proposal)
    Rails.logger.debug "clean_proposal"
    proposal.delete("controller")
    proposal.delete("action")
    proposal.delete("barclamp")
    proposal.delete("name")
    proposal.delete("utf8")
    proposal.delete("_method")
    proposal.delete("authenticity_token")
  end

  def destroy_active(inst)

    role_name = "#{@bc_name}-config-#{inst}"
    Rails.logger.debug "Trying to deactivate role #{role_name}"
    role = RoleObject.find_role_by_name(role_name)
    return [404, {}] if role.nil?
    reverse_deps = RoleObject.reverse_dependencies(role_name)
    if !reverse_deps.empty?
      raise(I18n.t("model.service.would_break_dependency", name: @bc_name, dependson: reverse_deps.to_sentence))
    else
      # By nulling the elements, it functions as a remove
      dep = role.override_attributes
      dep[@bc_name]["elements"] = {}
      dep[@bc_name].delete("elements_expanded")
      if dep[@bc_name]["config"].key?("crowbar-committing")
        Rails.logger.debug "#{inst} proposal has a crowbar-committing key"
      end
      dep[@bc_name]["config"].delete("crowbar-committing")
      dep[@bc_name]["config"].delete("crowbar-queued")
      role.override_attributes = dep
      answer = apply_role(role, inst, false)
      role.destroy
      answer
    end
  end

  def elements
    [200, Proposal.new(barclamp: @bc_name).all_elements]
  end

  def element_info(role = nil)
    nodes = Node.find_all_nodes.map(&:name)

    return [200, nodes] unless role

    valid_roles = Proposal.new(barclamp: @bc_name).all_elements
    return [404, "No role #{role} found for #{@bc_name}."] unless valid_roles.include?(role)

    nodes.reject! do |node|
      node_is_invalid_for_role(node, role.to_s)
    end

    [200, nodes]
  end

  def proposals_raw
    Proposal.where(barclamp: @bc_name)
  end

  def proposals
    props = proposals_raw
    props = props.map { |p| p["id"].gsub("#{@bc_name}-", "") }
    [200, props]
  end

  def proposal_template
    template = proposal_schema_directory.join("template-#{@bc_name}.json")

    if template.exist?
      [
        200,
        JSON.load(template.read)
      ]
    else
      [
        404,
        I18n.t("model.service.template_missing", name: @bc_name)
      ]
    end
  end

  def proposal_show(inst)
    prop = Proposal.where(barclamp: @bc_name, name: inst).first
    if prop.nil?
      [404, I18n.t("model.service.cannot_find")]
    else
      [200, prop]
    end
  end

  #
  # Utility method to find instances for barclamps we depend on
  #
  def find_dep_proposal(bc, optional=false)
    begin
      const_service = self.class.get_service(bc)
    rescue
      Rails.logger.info "Barclamp \"#{bc}\" is not available."
      proposals = []
    else
      service = const_service.new Rails.logger
      proposals = service.list_active[1]
      proposals = service.proposals[1] if proposals.empty?
    end

    if proposals.empty? || proposals[0].blank?
      if optional
        Rails.logger.info(
          "No optional \"#{bc}\" dependency proposal found for \"#{@bc_name}\" proposal."
        )
      else
        raise(I18n.t("model.service.dependency_missing", name: @bc_name, dependson: bc))
      end
    end

    # Return empty string instead of nil, because the attributes referring to
    # proposals are generally required in the schema
    proposals[0] || ""
  end

  def node_is_invalid_for_role(node, role)
    elements = { role => [node] }
    violates_admin_constraint?(elements, role) ||
      violates_platform_constraint?(elements, role) ||
      violates_exclude_platform_constraint?(elements, role) ||
      violates_cluster_constraint?(elements, role) ||
      violates_remotes_constraint?(elements, role)
  end

  # Helper to select nodes that make sense on proposal creation
  def select_nodes_for_role(all_nodes, role, preferred_intended_role = nil)
    # do not modify array given by caller
    valid_nodes = all_nodes.dup

    valid_nodes.delete_if { |n| n.nil? }

    valid_nodes.reject! do |node|
      node_is_invalid_for_role(node.name, role)
    end

    unless preferred_intended_role.nil?
      preferred_all_nodes = valid_nodes.select { |n| n.intended_role == preferred_intended_role }
      valid_nodes = preferred_all_nodes unless preferred_all_nodes.empty?
    end

    if role_constraints[role] && role_constraints[role].key?("count") && role_constraints[role]["count"] >= 0
      valid_nodes = valid_nodes.take(role_constraints[role]["count"])
    end

    valid_nodes
  end

  #
  # This can be overridden to provide a better creation proposal
  #
  def create_proposal
    prop = Proposal.new(barclamp: @bc_name)
    raise(I18n.t("model.service.template_missing", name: @bc_name )) if prop.nil?
    prop.raw_data
  end

  def proposal_create(params)
    base_id = params["id"]
    params["id"] = "#{@bc_name}-#{params["id"]}"
    if FORBIDDEN_PROPOSAL_NAMES.any?{ |n| n == base_id }
      return [403,I18n.t("model.service.illegal_name", names: FORBIDDEN_PROPOSAL_NAMES.to_sentence)]
    end

    prop = Proposal.where(barclamp: @bc_name, name: base_id).first
    return [400, I18n.t("model.service.name_exists")] unless prop.nil?
    return [400, I18n.t("model.service.too_short")] if base_id.to_s.length == 0
    return [400, I18n.t("model.service.illegal_chars")] if base_id =~ /[^A-Za-z0-9_]/

    proposal = create_proposal
    proposal["deployment"][@bc_name]["config"]["environment"] = "#{@bc_name}-config-#{base_id}"

    # crowbar-deep-merge-template key should be removed in all cases, as it
    # should not end in the proposal anyway; if the key is not here, we default
    # to false (and therefore the old behavior)
    if params.delete("crowbar-deep-merge-template")
      HashOnlyMerge.hash_only_merge!(proposal, params)
    else
      proposal.merge!(params)
    end

    clean_proposal(proposal)

    # When we create a proposal, it might be "invalid", as some roles might be missing
    # This is OK, as the next step for the user is to add nodes to the roles
    # But we need to skip the after_save validations in the _proposal_update
    _proposal_update(@bc_name, base_id, proposal, false)
  end

  # Used when creating a proposal during the bootstrap process
  def proposal_create_bootstrap(params)
    proposal_create(params)
  end

  def proposal_edit(params)
    base_id = params["id"] || params[:name]
    params["id"] = "#{@bc_name}-#{base_id}"
    proposal = {}.merge(params)
    clean_proposal(proposal)
    _proposal_update(@bc_name, base_id, proposal, true)
  end

  def proposal_delete(inst)
    prop = Proposal.where(barclamp: @bc_name, name: inst).first
    if prop.nil?
      [404, I18n.t("model.service.cannot_find")]
    else
      prop.destroy
      [200, {}]
    end
  end

  # most of these can be validations on the model itself,
  # preferrably refactored into Validator classes.
  def save_proposal!(prop, options = {})
    options.reverse_merge!(validate: true, validate_after_save: true)
    clean_proposal(prop.raw_data)
    validate_proposal(prop.raw_data) if options[:validate]
    validate_proposal_elements(prop.elements) if options[:validate]
    prop.latest_applied = false
    prop.save
    validate_proposal_after_save(prop.raw_data) if options[:validate_after_save]
  end

  # This is where proposal gets copied into a role, scheduling / ops order
  # is computed (in apply_role) and chef client gets called on the nodes.
  # Hopefully, this will get moved into a background job.
  def proposal_commit(inst, options = {})
    options.reverse_merge!(
      in_queue: false,
      validate: true,
      validate_after_save: true,
      bootstrap: false
    )

    prop = Proposal.where(barclamp: @bc_name, name: inst).first

    if prop.nil?
      [404, "#{I18n.t('.cannot_find', scope: 'model.service')}: #{@bc_name}.#{inst}"]
    elsif prop["deployment"][@bc_name]["crowbar-committing"]
      [402, "#{I18n.t('.already_commit', scope: 'model.service')}: #{@bc_name}.#{inst}"]
    else
      response = [500, "Internal Error: Something went wrong."]
      begin
        # Put mark on the wall
        prop["deployment"][@bc_name]["crowbar-committing"] = true
        save_proposal!(prop,
                       validate: options[:validate],
                       validate_after_save: options[:validate_after_save])
        response = active_update(prop.raw_data, inst, options[:in_queue], options[:bootstrap])
      rescue Chef::Exceptions::ValidationFailed => e
        Rails.logger.error(([e.message] + e.backtrace).join("\n"))
        response = [400, "Failed to validate proposal: #{e.message}"]
      rescue StandardError => e
        Rails.logger.error(([e.message] + e.backtrace).join("\n"))
        response = [500, e.message]
      ensure
        # Make sure we unmark the wall
        prop.reload
        prop["deployment"][@bc_name]["crowbar-committing"] = false
        prop.latest_applied = (response.first == 200)
        prop.save
      end
      response
    end
  end

  def display_name
    @display_name ||= BarclampCatalog.display_name(@bc_name)
  end

  def accept_clusters
    accept = false
    role_constraints.keys.each do |role|
      accept ||= role_constraints[role]["cluster"]
    end
    accept
  end

  def accept_remotes
    accept = false
    role_constraints.keys.each do |role|
      accept ||= role_constraints[role]["remotes"]
    end
    accept
  end

  #
  # This can be overridden.  Specific to node validation.
  #
  def validate_proposal_elements proposal_elements
    proposal_elements.each do |role_and_elements|
      role, elements = role_and_elements
      uniq_elements  = elements.uniq

      if uniq_elements.length != elements.length
        raise I18n.t("proposal.failures.duplicate_elements_in_role") + " " + role
      end

      uniq_elements.each do |element|
        if is_cluster? element
          unless cluster_exists? element
            raise I18n.t("proposal.failures.unknown_cluster") + " " + cluster_name(element)
          end
        elsif is_remotes? element
          unless remotes_exists? element
            raise I18n.t("proposal.failures.unknown_remotes") + " " + cluster_name(element)
          end
        elsif element.include? ":"
          raise I18n.t("proposal.failures.unknown_node") + " " + element
        else
          nodes = Node.find_nodes_by_name element
          if nodes.nil? || nodes.empty?
            raise I18n.t("proposal.failures.unknown_node") + " " + element
          end
        end
      end
    end
  end

  def proposal_schema_directory
    Rails.root.join("..", "chef", "data_bags", "crowbar").expand_path
  end

  #
  # This can be overridden to get better validation if needed.
  #
  def validate_proposal proposal
    path = proposal_schema_directory
    begin
      validator = CrowbarValidator.new("#{path}/template-#{@bc_name}.schema")
    rescue StandardError => e
      Rails.logger.error("failed to load databag schema for #{@bc_name}: #{e.message}")
      Rails.logger.debug e.backtrace.join("\n")
      raise Chef::Exceptions::ValidationFailed.new( "failed to load databag schema for #{@bc_name}: #{e.message}" )
    end
    Rails.logger.info "validating proposal #{@bc_name}"

    errors = validator.validate(proposal)
    @validation_errors = errors.map { |e| "#{e.path} #{e.message}" }
    handle_validation_errors
  end

  #
  # This does additional validation of the proposal, but after it has been
  # saved. This should be used if the errors are easy to fix in the proposal.
  #
  # This can be overridden to get better validation if needed. Call it
  # after your overriden method for error handling and constraints validation.
  #
  def validate_proposal_after_save proposal
    validate_proposal_constraints proposal
    validate_postponed_nodes proposal
    handle_validation_errors
  end

  #
  # Make sure that when user wants to apply changes affecting the nodes that are postponed
  # (= not upgraded while rest of the cloud is already upgraded)
  # the nodes can be skipped.
  #
  def validate_postponed_nodes(proposal)
    return unless upgrade_postponed?

    skip_unready_nodes_enabled = Rails.application.config.crowbar.fetch(
      "skip_unready_nodes", {}
    ).fetch("enabled", false)

    skip_unready_nodes_roles = Rails.application.config.crowbar.fetch(
      "skip_unready_nodes", {}
    ).fetch("roles", [])

    postponed_nodes = []
    proposal["deployment"][@bc_name]["elements"].each do |role, nodes|
      next if skip_unready_nodes_enabled && skip_unready_nodes_roles.include?(role)
      nodes.each do |n|
        next if is_cluster? n
        node = NodeObject.find_by_name(n)
        postponed_nodes << n unless node.upgraded?
      end
    end
    return if postponed_nodes.empty?

    validation_error "The upgrade of some nodes has been postponed: " +
      postponed_nodes.join(", ") + ". " \
      "It is necessary to have 'skip_unready_nodes' feature enabled for the roles " \
      "in this barclamp, so these nodes could be properly skipped when applying the barclamp."
  end

  def violates_count_constraint?(elements, role)
    if role_constraints[role] && role_constraints[role].key?("count")
      len = elements[role].length
      max_count = role_constraints[role]["count"]
      max_count >= 0 && len > max_count
    else
      false
    end
  end

  def violates_uniqueness_constraint?(elements, role)
    if role_constraints[role] && role_constraints[role]["unique"]
      elements[role].each do |element|
        elements.keys.each do |loop_role|
          next if loop_role == role
          return true if elements[loop_role].include? element
        end
      end
    end
    false
  end

  def violates_conflicts_constraint?(elements, role)
    if role_constraints[role] && role_constraints[role]["conflicts_with"]
      conflicts = role_constraints[role]["conflicts_with"].select do |conflicting_role|
        elements[role].any? do |element|
          elements[conflicting_role] && elements[conflicting_role].include?(element)
        end
      end
      return true if conflicts.count > 0
    end
    false
  end

  def violates_admin_constraint?(elements, role, nodes_is_admin = {})
    if role_constraints[role] && !role_constraints[role]["admin"]
      elements[role].each do |element|
        next if is_cluster?(element) || is_remotes?(element)
        unless nodes_is_admin.key? element
          node = Node.find_by_name(element)
          nodes_is_admin[element] = (!node.nil? && node.admin?)
        end
        return true if nodes_is_admin[element]
      end
    end
    false
  end

  def violates_platform_constraint?(elements, role)
    if role_constraints[role] && role_constraints[role].key?("platform")
      constraints = role_constraints[role]["platform"]
      elements[role].each do |element|
        next if is_cluster?(element) || is_remotes?(element)
        node = Node.find_by_name(element)

        return true if !constraints.any? do |platform, version|
          PlatformRequirement.new(platform, version).satisfied_by?(node[:platform], node[:platform_version])
        end
      end
    end
    false
  end

  def violates_exclude_platform_constraint?(elements, role)
    if role_constraints[role] && role_constraints[role].key?("exclude_platform")
      constraints = role_constraints[role]["exclude_platform"]
      elements[role].each do |element|
        next if is_cluster?(element) || is_remotes?(element)
        node = Node.find_by_name(element)

        return true if constraints.any? do |platform, version|
          PlatformRequirement.new(platform, version).satisfied_by?(node[:platform], node[:platform_version])
        end
      end
    end
    false
  end

  def violates_cluster_constraint?(elements, role)
    if role_constraints[role] && !role_constraints[role]["cluster"]
      clusters = elements[role].select { |e| is_cluster? e }
      unless clusters.empty?
        return true
      end
    end
    false
  end

  def violates_remotes_constraint?(elements, role)
    if role_constraints[role] && !role_constraints[role]["remotes"]
      remotes = elements[role].select { |e| is_remotes? e }
      unless remotes.empty?
        return true
      end
    end
    false
  end

  #
  # Ensure that the proposal respects constraints defined for the roles
  #
  def validate_proposal_constraints(proposal)
    elements = proposal["deployment"][@bc_name]["elements"]
    nodes_is_admin = {}

    role_constraints.keys.each do |role|
      next unless elements.key?(role)

      if violates_count_constraint?(elements, role)
        validation_error("Role #{role} can accept up to #{role_constraints[role]["count"]} elements only.")
      end

      if violates_uniqueness_constraint?(elements, role)
        validation_error("Elements assigned to #{role} cannot be assigned to another role.")
        break
      end

      if violates_conflicts_constraint?(elements, role)
        validation_error("Element cannot be assigned to both role #{role} and any of these roles: #{role_constraints[role]["conflicts_with"].join(", ")}")
        break
      end

      if violates_admin_constraint?(elements, role, nodes_is_admin)
        validation_error("Role #{role} does not accept admin nodes.")
        break
      end

      if violates_platform_constraint?(elements, role)
        platforms = role_constraints[role]["platform"].map { |k, v| [k, v].join(" ") }.join(", ")
        validation_error("Role #{role} can be used only for #{platforms} platform(s).")
      end

      if violates_exclude_platform_constraint?(elements, role)
        platforms = role_constraints[role]["exclude_platform"].map { |k, v| [k, v].join(" ") }.join(", ")
        validation_error("Role #{role} can't be used for #{platforms} platform(s).")
      end

      if violates_cluster_constraint?(elements, role)
        validation_error("Role #{role} does not accept clusters.")
      end

      if violates_remotes_constraint?(elements, role)
        validation_error("Role #{role} does not accept remotes.")
      end
    end
  end

  #
  # Ensure that the proposal contains exactly one node for role
  #
  def validate_one_for_role(proposal, role)
    elements = proposal["deployment"][@bc_name]["elements"]

    if not elements.key?(role) or elements[role].length != 1
      validation_error("Need one (and only one) #{role} node.")
    end
  end

  #
  # Ensure that the proposal contains at least n nodes for role
  #
  def validate_at_least_n_for_role(proposal, role, n)
    elements = proposal["deployment"][@bc_name]["elements"]

    if not elements.key?(role) or elements[role].length < n
      validation_error("Need at least #{n} #{role} node#{"s" if n > 1}.")
    end
  end

  #
  # Ensure that the proposal contains at least 2 nodes for role or a cluster
  #
  def validate_multiple_for_role_or_cluster(proposal, role)
    elements = proposal["deployment"][@bc_name]["elements"]

    if !elements.key?(role) ||
        (elements[role].length < 2 &&
         elements[role].none? { |e| is_cluster? e })
      validation_error("Need at least 2 #{role} nodes or a cluster.")
    end
  end

  #
  # Ensure that the proposal contains an odd number of nodes for role
  #
  def validate_count_as_odd_for_role(proposal, role)
    elements = proposal["deployment"][@bc_name]["elements"]

    if not elements.key?(role) or elements[role].length.to_i.even?
      validation_error("Need an odd number of #{role} nodes.")
    end
  end

  def validate_dep_proposal_is_active(bc, proposal)
    const_service = self.class.get_service(bc)
    service = const_service.new Rails.logger
    proposals = service.list_active[1].to_a
    unless proposals.include?(proposal)
      if const_service.allow_multiple_proposals?
        validation_error("Proposal \"#{proposal}\" for #{service.display_name} is not active yet.")
      else
        validation_error("Proposal for #{service.display_name} is not active yet.")
      end
    end
  end

  def _proposal_update(bc_name, inst, proposal, validate_after_save = true)
    prop = Proposal.where(barclamp: bc_name, name: inst).first_or_initialize(barclamp: bc_name, name: inst)

    begin
      prop.properties = proposal
      save_proposal!(prop, validate_after_save: validate_after_save)
      Rails.logger.info "saved proposal"
      [200, {}]
    rescue Net::HTTPServerException => e
      Rails.logger.error(([e.message] + e.backtrace).join("\n"))
      [e.response.code, I18n.t("model.service.unknown_error")]
    rescue Chef::Exceptions::ValidationFailed => e2
      Rails.logger.error(([e2.message] + e2.backtrace).join("\n"))
      [400, "Failed to validate proposal: #{e2.message}"]
    end
  end

  #
  # From a proposal json
  #
  def self.proposal_to_role(proposal, bc_name)
    role = Chef::Role.new
    role.name proposal["id"].gsub("#{bc_name}-", "#{bc_name}-config-")
    role.description proposal["description"]
    role.default_attributes proposal["attributes"]
    role.override_attributes proposal["deployment"]
    RoleObject.new role
  end

  #
  # After validation, this is where the role is applied to the system The old
  # instance (if one exists) is compared with the new instance.  roles are
  # removed and delete roles are added (if they exist) for nodes leaving roles
  # roles are added for nodes joining roles.  Calls chef-client on nodes
  #
  # This function can be overriden to define a barclamp specific operation.  A
  # call is provided that receives the role and all string names of the nodes
  # before the chef-client call
  #
  # The in_queue signifies if apply_role was called from deployment queue's
  # process_queue, and prevents recursion.
  #
  # The bootstrap parameter tells if we're in bootstrapping mode, in which case
  # we simply do not run chef.
  def apply_role(role, inst, in_queue, bootstrap = false)
    Rails.logger.debug "apply_role(#{role.name}, #{inst}, #{in_queue}, #{bootstrap})"
    Rails.logger.progress("Starting to apply role #{role.name}")

    # Variables used in the global ensure
    apply_locks = []
    applying_nodes = []

    # Cache some node attributes to avoid useless node reloads
    node_attr_cache = {}

    skip_unready_nodes_enabled = Rails.application.config.crowbar.fetch(
      "skip_unready_nodes", {}
    ).fetch("enabled", false)

    skip_unchanged_nodes_enabled = Rails.application.config.crowbar.fetch(
      "skip_unchanged_nodes", {}
    ).fetch("enabled", false)

    # Part I: Looking up data & checks
    #
    # we look up the role in the database (if there is one), the new one is
    # passed in as the role param.
    #
    # From both, we need 'elements', i.e. role -> nodes map and element_order
    # -> an ordered list of roles, telling us in which order they should be
    # applied.  I.e., it gives dependency info within a barclamp.
    #
    # Any of the new role's elements can contain clusters, so we need to expand
    # them to individual nodes. We store them in 'elements_expanded'.  Keeping
    # role's elements_expanded cache field fresh is handled by pacemaker
    # barclamp.
    #
    # We also check that all nodes we'll require are in the ready state.
    #

    # Query for this role
    old_role = RoleObject.find_role_by_name(role.name)

    # Get the new elements list
    new_deployment = role.override_attributes[@bc_name]
    new_elements = new_deployment["elements"]
    element_order = new_deployment["element_order"]

    # Build a list of old elements.
    # elements_expanded on the old role is guaranteed to exists, as we already
    # ran through apply_role with the old_role.  Cache is used for the case
    # when pacemaker barclamp is deactivated.  elements_expanded gets updated
    # by pacemaker barclamp.
    old_elements = {}
    old_deployment = old_role.override_attributes[@bc_name] unless old_role.nil?
    unless old_deployment.nil?
      old_elements = old_deployment["elements_expanded"]
      if old_elements.nil?
        old_elements = old_deployment["elements"]
      end
    end

    pre_cached_nodes = {}
    cleaned_elements = nil

    # When bootstrapping, we don't run chef, so there's no need for queuing
    if bootstrap
      # do not try to process the queue in any case
      in_queue = true
    else
      # Attempt to queue the proposal.  If delay is empty, then run it.
      deps = proposal_dependencies(role)
      cleaned_elements = new_elements.deep_dup

      if skip_unchanged_nodes_enabled
        cleaned_elements = skip_unchanged_nodes(cleaned_elements, old_role, role)
      end

      if skip_unready_nodes_enabled
        cleaned_elements, pre_cached_nodes = skip_unready_nodes(
          @bc_name, inst, cleaned_elements, old_elements
        )
      end

      delay, pre_cached_nodes = queue_proposal(
        inst, element_order, cleaned_elements, deps, @bc_name, pre_cached_nodes
      )

      unless delay.empty?
        Rails.logger.progress("Queuing the application of role #{role.name}")
        # force not processing the queue further
        in_queue = true
        # FIXME: this breaks the convention that we return a string; but really,
        # we should return a hash everywhere, to avoid this...
        return [202, delay]
      end

      Rails.logger.debug "delay empty - running proposal"
    end

    new_elements, failures, msg = expand_items_in_elements(new_deployment["elements"])
    unless failures.nil?
      Rails.logger.progress("apply_role: Failed to apply role #{role.name}")
      update_proposal_status(inst, "failed", msg)
      return [405, msg]
    end

    # save list of expanded elements, as this is needed when we look at the old
    # role. See below the comments for old_elements.
    if new_elements != new_deployment["elements"]
      new_deployment["elements_expanded"] = new_elements
    else
      new_deployment.delete("elements_expanded")
    end

    unless cleaned_elements.nil?
      # if we have removed nodes from the list, make sure to expand them and overwrite the
      # new_elements var so we dont try to run chef-client on those not-ready nodes
      new_elements, failures, msg = expand_items_in_elements(cleaned_elements)
      unless failures.nil?
        Rails.logger.progress("apply_role: Failed to apply role #{role.name}")
        update_proposal_status(inst, "failed", msg)
        return [405, msg]
      end
    end

    # use the same order as in the old deployment if the element order is not filled yet
    element_order = old_deployment["element_order"] if (!old_deployment.nil? and element_order.nil?)

    Rails.logger.debug "old_deployment #{old_deployment.pretty_inspect}"
    Rails.logger.debug "new_deployment #{new_deployment.pretty_inspect}"

    # Part II. Creating add/remove changesets.
    #
    # For Role ordering
    runlist_priority_map = new_deployment["element_run_list_order"] || { }
    local_chef_order = chef_order()

    # List of all *new* nodes which will be changed (sans deleted ones)
    all_nodes = new_elements.values.flatten

    # deployment["element_order"] tells us which order the various
    # roles should be applied, and deployment["elements"] tells us
    # which nodes each role should be applied to.  We need to "join
    # the dots" between these two, to build lists of pending role
    # addition/removal actions, which will allow us to perform the
    # correct operations on the nodes' run lists, and then run
    # chef-client in the correct order.  So we build a
    # pending_node_actions Hash which maps each node name to a Hash
    # representing pending role addition/removal actions for that
    # node, e.g.
    #
    #   {
    #     :remove => [ role1_to_remove, ... ],
    #     :add    => [ role1_to_add,    ... ]
    #   }
    pending_node_actions = {}

    # We'll build an Array where each item represents a batch of work,
    # and the batches must be performed sequentially in this order.
    batches = []

    # get proposal to remember potential removal of a role
    proposal = Proposal.where(barclamp: @bc_name, name: inst).first
    save_proposal = false

    # recreate new_elements with all elements, in case some of the nodes
    # were hit by the filtering of unready/unchanged nodes,
    # as we need the full old/new deployment list to compare the role changes
    new_elements_unfiltered, = expand_items_in_elements(new_deployment["elements"])
    # element_order is an Array where each item represents a batch of roles and
    # the batches must be applied sequentially in this order.
    element_order.each do |roles|
      # roles is an Array of names of Chef roles which can all be
      # applied in parallel.

      # A list of nodes changed when applying roles from this batch
      nodes_in_batch = []

      roles.each do |role_name|
        # Ignore _remove roles in case they're listed here, as we automatically
        # handle them
        next if role_name =~ /_remove$/

        old_nodes = old_elements[role_name] || []
        new_nodes = new_elements_unfiltered[role_name] || []

        Rails.logger.debug "Preparing role #{role_name} for batch:"
        Rails.logger.debug "  Nodes in old applied proposal for role: #{old_nodes.inspect}"
        Rails.logger.debug "  Nodes in new applied proposal for role: #{new_nodes.inspect}"

        remove_role_name = "#{role_name}_remove"

        # Also act on nodes that were to be removed last time, but couldn't due
        # to possibly some error on last application
        old_nodes += (proposal.elements.delete(remove_role_name) || [])

        # We already have nodes with old version of this role.
        unless old_nodes.empty?
          # Lookup remove-role.
          tmprole = RoleObject.find_role_by_name remove_role_name
          use_remove_role = !tmprole.nil?

          old_nodes.each do |node_name|
            pre_cached_nodes[node_name] ||= Node.find_by_name(node_name)

            # Don't add deleted nodes to the run order, they clearly won't have
            # the old role
            if pre_cached_nodes[node_name].nil?
              Rails.logger.debug "skipping deleted node #{node_name}"
              next
            end

            # An old node that is not in the new deployment, drop it
            unless new_nodes.include?(node_name)
              pending_node_actions[node_name] ||= { remove: [], add: [] }
              pending_node_actions[node_name][:remove] << role_name

              # Remove roles are  a way to "de-configure" things on the node
              # when a role is not used anymore for that node. For instance,
              # stopping a service, or removing packages.
              # FIXME: it's not clear how/who should be responsible for
              # removing them from the node records.
              if use_remove_role
                pending_node_actions[node_name][:add] << remove_role_name

                # Save remove intention in #{@bc_name}-databag; we will remove
                # the intention after a successful apply_role.
                proposal.elements[remove_role_name] ||= []
                proposal.elements[remove_role_name] << node_name
                save_proposal ||= true
              end

              nodes_in_batch << node_name unless nodes_in_batch.include?(node_name)
            end
          end
        end

        # If new_nodes is empty, we are just removing the proposal.
        unless new_nodes.empty?
          new_nodes.each do |node_name|
            # skip adding nodes to the batch unless they are really in the list to be deployed
            # do it before the Node load to avoid doing the call if the node is not there,
            # as we dont want to spend cycles doing extra calls not needed
            # This also means we can't ensure that the node has all required roles
            # through the use of pending_node_actions
            # It's a reasonable trade-off in the context of this specific optimization,
            # as the nodes should already have all roles, unless the customer removes roles
            # manually or the roles disappear from the node magically (bugs)
            next unless new_elements[role_name].include?(node_name)

            pre_cached_nodes[node_name] ||= Node.find_by_name(node_name)

            # Don't add deleted nodes to the run order
            #
            # Q: Why don't we just bail out instead?
            # A: This got added for the barclamps where all nodes are used (for
            # instance, provisioner, logging, dns, ntp); so that we don't fail
            # too easily when a node got forgotten.
            # It's kind of a ugly workaround for the fact that we don't
            # properly handle forgotten node and for the fact that we don't
            # have some alias that be used to assign all existing nodes to a
            # role (which would be an improvement over the requirement to
            # explicitly list all nodes).
            if pre_cached_nodes[node_name].nil?
              Rails.logger.debug "skipping deleted node #{node_name}"
              next
            end

            pending_node_actions[node_name] ||= { remove: [], add: [] }
            pending_node_actions[node_name][:add] << role_name

            nodes_in_batch << node_name unless nodes_in_batch.include?(node_name)
          end
        end
      end # roles.each

      batches << [roles, nodes_in_batch] unless nodes_in_batch.empty?
    end
    Rails.logger.debug "batches: #{batches.inspect}"

    # Cache attributes that are useful later on
    pre_cached_nodes.each do |node_name, node|
      if node.nil?
        Rails.logger.debug "skipping deleted node #{node_name}"
        next
      end
      node_attr_cache[node_name] = {
        "alias" => node.alias,
        "windows" => node[:platform_family] == "windows",
        "admin" => node.admin?
      }
    end

    # save databag with the role removal intention
    proposal.save if save_proposal

    unless bootstrap
      applying_nodes = batches.map { |roles, nodes| nodes }.flatten.uniq.sort

      # Mark nodes as applying; beware that all_nodes do not contain nodes that
      # are actually removed.
      Rails.logger.progress("Marking #{applying_nodes.length} nodes as applying")
      set_to_applying(applying_nodes, inst, pre_cached_nodes)

      # Prevent any intervallic runs from running whilst we apply the
      # proposal, in order to avoid the orchestration problems described
      # in https://bugzilla.suse.com/show_bug.cgi?id=857375
      #
      # First we pause the chef-client daemons by ensuring a magic
      # pause-file.lock exists which the daemons will honour due to a
      # custom patch:
      nodes_to_lock = applying_nodes.reject do |node_name|
        node_attr_cache[node_name]["windows"] || node_attr_cache[node_name]["admin"]
      end

      if nodes_to_lock.any?
        owner = "apply_role-#{role.name}-#{inst}-#{Process.pid}"
        reason = "apply_role(#{role.name}, #{inst}, #{in_queue}) pid #{Process.pid}"
        Rails.logger.progress("Acquiring chef-client locks on #{nodes_to_lock.length} nodes")
        apply_locks, errors = lock_nodes(nodes_to_lock, owner, reason)
      else
        apply_locks = []
        errors = []
      end

      unless errors.empty?
        Rails.logger.progress("apply_role: Failed to apply role #{role.name}")
        message = "Failed to apply the proposal:\n#{errors.values.join("\n")}"
        update_proposal_status(inst, "failed", message)
        return [409, message] # 409 is 'Conflict', which makes sense for locks
      end

      # Now that we've ensured no new intervallic runs can be started,
      # wait for any which started before we paused the daemons.
      Rails.logger.progress(
        "Waiting for existing chef-client to complete on #{applying_nodes.length} nodes"
      )
      wait_for_chef_daemons(applying_nodes)
    end

    # By this point, no intervallic runs should be running, and no
    # more will be able to start running until we release the locks
    # after the proposal has finished applying.

    # We save the role ("applied proposal") now, because we will reference it
    # from the run lists of the nodes, and therefore it now really needs to
    # exist.
    # We explicitly don't want to save it earlier because apply_role could fail
    # in code before here, and while the role contains the data from the
    # proposal, it doesn't contain yet the data that could be changed in
    # apply_role_pre_chef_client -- which is critical data, like "do we do HA?".
    # Saving earlier would mean that a crash in apply_role could lead to the
    # role containing invalid information ("no HA") that would be used in
    # periodic chef-client runs.
    # Ideally we would therefore only save the role just before/after
    # apply_role_pre_chef_client, but as said above, we need to save it before
    # we change the run lists.
    role.save

    # Part III: Update run lists of nodes to reflect new deployment. I.e. write
    # through the deployment schedule in pending node actions into run lists.
    Rails.logger.progress("Updating the run_lists for #{pending_node_actions.inspect}")

    pending_node_actions.each do |node_name, lists|
      # pre_cached_nodes contains only new_nodes, we need to look up the
      # old ones as well.
      pre_cached_nodes[node_name] ||= Node.find_by_name(node_name)
      node = pre_cached_nodes[node_name]
      next if node.nil?

      save_it = false

      rlist = lists[:remove]
      alist = lists[:add]

      # Remove the roles being lost
      rlist.each do |item|
        save_it = node.delete_from_run_list(item) || save_it
      end

      # Add the roles being gained
      alist.each do |item|
        priority = runlist_priority_map[item] || local_chef_order
        save_it = node.add_to_run_list(item, priority) || save_it
      end

      # Make sure the config role is on the nodes in this barclamp, otherwise
      # remove it
      if all_nodes.include?(node.name)
        priority = runlist_priority_map[role.name] || local_chef_order
        save_it = node.add_to_run_list(role.name, priority) || save_it
      else
        save_it = node.delete_from_run_list(role.name) || save_it
      end

      node.save if save_it
    end

    # Part IV: Deployment. Running chef clients as separate processes, each
    # independent batch is parallelized, admin and non-admin nodes are treated
    # separately. Lastly, chef client is executed manually on this (admin) node,
    # to make sure admin node changes are deployed.

    # Deployment pre (and later post) callbacks.
    # The barclamps override these.
    Rails.logger.progress("Calling apply_role_pre_chef_call")
    begin
      apply_role_pre_chef_call(old_role, role, all_nodes)
    rescue StandardError => e
      Rails.logger.fatal("apply_role: Exception #{e.message} #{e.backtrace.join("\n")}")
      Rails.logger.progress("Failed to apply role #{role.name} before calling chef")
      message = "Failed to apply the proposal: exception before calling chef (#{e.message})"
      update_proposal_status(inst, "failed", message)
      return [405, message]
    end

    # When boostrapping, we don't want to run chef.
    if bootstrap
      batches = []
      ran_admin = true
    else
      ran_admin = false
    end

    # Invalidate cache as apply_role_pre_chef_call can save nodes
    pre_cached_nodes = {}

    # Each batch is a list of nodes that can be done in parallel.
    batches.each_with_index do |batch, index|
      roles, node_names = batch
      batch_progress_message = \
        "batch #{index + 1}/#{batches.count}: " \
        "#{node_names.join(", ")} for #{roles.join(", ")}"
      Rails.logger.progress("Applying #{batch_progress_message}")

      ran_admin = true if node_names.detect do |node_name|
        node_attr_cache[node_name]["admin"]
      end

      nodes_to_run = node_names.reject do |node_name|
        node_attr_cache[node_name]["windows"]
      end

      threads = remote_chef_client_threads(nodes_to_run, pre_cached_nodes,
                                           roles)

      # Invalidate cache as chef might have saved the nodes
      pre_cached_nodes = {}

      # wait for all running threads and collect the ones with a non-zero return value
      bad_nodes = []
      Rails.logger.progress("Waiting for #{threads.keys.length} threads to finish...")
      ThreadsWait.all_waits(threads.keys) do |t|
        Rails.logger.debug("Thread #{t} for node #{threads[t]} finished (return '#{t.value}')")
        unless t.value == 0
          bad_nodes << threads[t]
        end
      end

      if bad_nodes.empty?
        Rails.logger.progress("Applied #{batch_progress_message}")
        next
      end

      message = "Failed to apply the proposal to:\n"
      nodes_alias = []
      bad_nodes.each do |node|
        message += "#{node_attr_cache[node]["alias"]} (#{node}):\n"
        nodes_alias.push(node_attr_cache[node]["alias"])
        message += get_log_lines(node)
      end
      Rails.logger.progress("Failed to apply the role to #{nodes_alias.join(", ")}")
      update_proposal_status(inst, "failed", message)
      return [405, message]
    end

    # XXX: This should not be done this way.  Something else should request this.
    system("sudo", "-i", Rails.root.join("..", "bin", "single_chef_client.sh").expand_path.to_s) if !ran_admin

    # Post deploy callback
    Rails.logger.progress("Calling apply_role_post_chef_call")
    begin
      apply_role_post_chef_call(old_role, role, all_nodes)
    rescue StandardError => e
      Rails.logger.fatal("apply_role: Exception #{e.message} #{e.backtrace.join("\n")}")
      Rails.logger.progress("Failed to apply role #{role.name} after calling chef")
      message = "Failed to apply the proposal: exception after calling chef (#{e.message})"
      update_proposal_status(inst, "failed", message)
      return [405, message]
    end

    # Invalidate cache as apply_role_post_chef_call can save nodes
    pre_cached_nodes = {}

    # are there any roles to remove from the runlist?
    # The @bcname proposal's elements key will contain the removal intentions
    # proposal.elements =>
    # {
    #   "role1_remove" => ["node1"],
    #   "role2_remove" => ["node2", "node3"]
    # }
    roles_to_remove = proposal.elements.keys.select do |r|
      r =~ /_remove$/
    end
    roles_to_remove.each do |role_to_remove|
      # No need to remember the nodes with the role to remove, now that we've
      # executed the role, hence the delete()
      nodes_with_role_to_remove = proposal.elements.delete(role_to_remove)
      nodes_with_role_to_remove.each do |node_name|
        # Do not use pre_cached_nodes, as nodes might have been saved in
        # apply_role_pre_chef_call
        pre_cached_nodes[node_name] ||= Node.find_by_name(node_name)
        node = pre_cached_nodes[node_name]
        node.save if node.delete_from_run_list(role_to_remove)
      end
    end

    # Save if we did a change
    proposal.save unless roles_to_remove.empty?

    update_proposal_status(inst, "success", "")
    [200, {}]
  rescue StandardError => e
    Rails.logger.progress("Failed to apply proposal")
    Rails.logger.fatal("apply_role: Uncaught exception #{e.message} #{e.backtrace.join("\n")}")
    message = "Failed to apply the proposal: uncaught exception (#{e.message})"
    update_proposal_status(inst, "failed", message)
    [405, message]
  ensure
    if apply_locks.any?
      Rails.logger.progress("Releasing chef-client locks on #{apply_locks.length} nodes")
      release_chef_locks(apply_locks)
    end
    if applying_nodes.any?
      Rails.logger.progress("Restoring #{applying_nodes.length} nodes as ready")
      restore_to_ready(applying_nodes)
    end
    Rails.logger.progress("Done applying role #{role.name}")
    process_queue unless in_queue
  end

  def apply_role_pre_chef_call(old_role, role, all_nodes)
    # noop by default.
  end

  def apply_role_post_chef_call(old_role, role, all_nodes)
    # noop by default.
  end

  # This callback provides a hook into the schema migration procedure.
  # It is called from lib/schema_migration.rb after the schema migrations
  # for a proposal (and its role, if present) have been executed. Parameters
  # are the migrated proposal an role objects. The default implementation is
  # a noop. (To be overwritten in sub classes)
  def post_schema_migration_callback(proposal, role)
    # noop by default
  end

  #
  # Inputs: role = RoleObject of proposal being applied/queued.
  # Returns: List of hashs { "barclamp" => bcname, "inst" => instname }
  #
  def proposal_dependencies(role)
    # Default none
    []
  end

  def skip_unchanged_node?(node_name, old_role, role)
    # By default dont skip anything
    false
  end

  def expand_items_in_elements(elements)
    # expand items in elements that are not nodes
    expanded_new_elements = {}
    elements.each do |role_name, nodes|
      expanded_new_elements[role_name], failures = expand_nodes_for_all(nodes)
      next if failures.nil? || failures.empty?
      Rails.logger.fatal(
        "apply_role: Failed to expand items #{failures.inspect} for role \"#{role_name}\""
      )
      msg = "Failed to apply the proposal: cannot expand list of nodes " \
        "for role \"#{role_name}\", following items do not exist: #{failures.join(", ")}"
      return [nil, failures, msg]
    end
    [expanded_new_elements, nil, nil]
  end

  def add_role_to_instance_and_node(barclamp, instance, name, prop, role, newrole)
    node = Node.find_by_name(name)
    if node.nil?
      Rails.logger.debug("ARTOI: couldn't find node #{name}. bailing")
      return false
    end

    runlist_priority_map = prop["deployment"][barclamp]["element_run_list_order"] rescue {}
    runlist_priority_map ||= {}

    local_chef_order = runlist_priority_map[newrole] || BarclampCatalog.chef_order(barclamp)

    prop["deployment"][barclamp]["elements"][newrole] = [] if prop["deployment"][barclamp]["elements"][newrole].nil?
    unless prop["deployment"][barclamp]["elements"][newrole].include?(node.name)
      Rails.logger.debug("ARTOI: updating proposal with node #{node.name}, role #{newrole} " \
        "for deployment of #{barclamp}")
      prop["deployment"][barclamp]["elements"][newrole] << node.name
      prop.save
    else
      Rails.logger.debug(
        "ARTOI: node #{node.name} already in proposal: role #{newrole} for #{barclamp}"
      )
    end

    role.override_attributes[barclamp]["elements"][newrole] = [] if role.override_attributes[barclamp]["elements"][newrole].nil?
    unless role.override_attributes[barclamp]["elements"][newrole].include?(node.name)
      Rails.logger.debug("ARTOI: updating role #{role.name} for node #{node.name} " \
        "for barclamp: #{barclamp}/#{newrole}")
      role.override_attributes[barclamp]["elements"][newrole] << node.name
      role.save
    else
      Rails.logger.debug("ARTOI: role #{role.name} already has node #{node.name} " \
       "for barclamp: #{barclamp}/#{newrole}")
    end

    save_it = false
    save_it = node.add_to_run_list(newrole, local_chef_order) || save_it
    save_it = node.add_to_run_list("#{barclamp}-config-#{instance}", local_chef_order) || save_it

    if save_it
      Rails.logger.debug("saving node")
      node.save
    end
    true
  end

  def remote_chef_client_threads(node_names, pre_cached_nodes, roles)
    threads = {}
    node_names.each do |node_name|
      pre_cached_nodes[node_name] ||= Node.find_by_name(node_name)
      node = pre_cached_nodes[node_name]
      node["crowbar"]["applying_for"] = {}
      node["crowbar"]["applying_for"][@bc_name] = roles
      node.save

      filename = "#{ENV['CROWBAR_LOG_DIR']}/chef-client/#{node_name}.log"
      thread = run_remote_chef_client(node_name, "chef-client", filename)
      threads[thread] = node_name
    end

    threads
  end

  # run the given command in a thread. the thread returns 0
  # if the run was successfull
  def run_remote_chef_client(node, command, logfile_name)
    Thread.new do
      # Exec command
      # the -- tells sudo to stop interpreting options

      ssh_cmd = ["sudo", "-u", "root", "--", "ssh", "-o", "TCPKeepAlive=no",
                 "-o", "ServerAliveInterval=15", "root@#{node}"]
      ssh_cmd << command

      # check if there are currently other chef-client runs on the node
      wait_for_chef_clients(node, logger: false)
      # check if the node is currently rebooting
      wait_for_reboot(node)

      # don't use a cached node object here, as there might have been some chef
      # run we were blocking on in the wait_for_chef_clients call before
      node_wall = Node.find_by_name(node)[:crowbar_wall]
      old_reboot_time = node_wall[:wait_for_reboot_requesttime] || 0

      ret = 0
      open(logfile_name, "a") do |f|
        success = system(*ssh_cmd, out: f, err: f)
        # If reboot was requested (through the reboot handler), then the
        # chef-client call might be interrupted and might fail; however,
        # because the reboot occurs at the end of the chef run, we know that
        # the run was actually successful.
        # And of course, we need to reload the node object from chef to get the
        # latest attributes.
        node_wall = Node.find_by_name(node)[:crowbar_wall]
        if success ||
            (node_wall[:wait_for_reboot] &&
                node_wall[:wait_for_reboot_requesttime] > old_reboot_time)
          wait_for_reboot(node)
        else
          ret = 1
        end
      end
      ret
    end
  end

  private

  THREAD_POOL_SIZE = 20

  # Is the upgrade of (some) compute nodes postponed
  def upgrade_postponed?
    Dir.glob("/var/lib/crowbar/upgrade/*-upgrade-compute-nodes-postponed").any?
  end

  def wait_for_chef_daemons(node_list)
    return if node_list.empty?

    queue = Queue.new

    node_list.each do |node_name|
      node = Node.find_by_name(node_name)
      queue.push node_name unless node[:platform_family] == "windows"
    end

    workers = (0...THREAD_POOL_SIZE).map do
      Thread.new do
        loop do
          begin
            node_name = queue.pop(true)
          rescue ThreadError
            break
          end

          wait_for_chef_clients(node_name, logger: true)
        end
      end
    end

    logger.debug "wait_for_chef_daemons: Waiting " \
      "for #{workers.count} unlock threads to finish..."
    workers.map(&:join)
    logger.debug "wait_for_chef_daemons: Finished waiting for #{workers.count} lock threads"
  end

  def release_chef_locks(locks)
    return if locks.empty?

    queue = Queue.new
    locks.each { |l| queue.push l }

    workers = (0...THREAD_POOL_SIZE).map do
      Thread.new do
        loop do
          begin
            lock = queue.pop(true)
          rescue ThreadError
            break
          end

          lock.release
        end
      end
    end

    logger.debug "release_chef_locks: Waiting for #{workers.count} unlock threads to finish..."
    workers.map(&:join)
    logger.debug "release_chef_locks: Finished waiting for #{workers.count} lock threads"
  end

  def save_nodes(nodes)
    return if nodes.empty?

    queue = Queue.new
    nodes.each { |n| queue.push n }

    workers = (0...[THREAD_POOL_SIZE, nodes.count].min).map do
      Thread.new do
        loop do
          begin
            node = queue.pop(true)
          rescue ThreadError
            break
          end

          node.save
        end
      end
    end

    logger.debug "save_nodes: Waiting for #{workers.count} save threads to finish..."
    workers.map(&:join)
    logger.debug "save_nodes: Finished waiting for #{workers.count} save threads"
  end

  def lock_nodes(nodes, lock_owner, lock_reason)
    locks = []
    errors = {}

    return [locks, errors] if nodes.empty?

    locks_mutex = Mutex.new
    errors_mutex = Mutex.new

    queue = Queue.new
    nodes.each { |n| queue.push n }

    workers = (0...THREAD_POOL_SIZE).map do
      Thread.new do
        loop do
          begin
            node = queue.pop(true)
          rescue ThreadError
            break
          end

          begin
            lock = Crowbar::Lock::SharedNonBlocking.new(
              logger: @logger,
              path: "/var/chef/cache/pause-file.lock",
              node: node,
              owner: lock_owner,
              reason: lock_reason
            ).acquire
          rescue Crowbar::Error::LockingFailure => e
            errors_mutex.synchronize { errors[node] = e.message }
          end

          locks.push(lock) if lock
          locks_mutex.synchronize { locks }
        end
      end
    end

    logger.debug "lock_nodes: Waiting for #{workers.count} lock threads to finish..."
    workers.map(&:join)
    logger.debug "lock_nodes: Finished waiting for #{workers.count} lock threads"

    [locks, errors]
  end

  def wait_for_chef_clients(node_name, options = {})
    options = options.include?(:logger) ? { logger: Rails.logger } : {}
    Rails.logger.debug(
      "wait_for_chef_clients: Waiting for already running chef-clients on #{node_name}."
    )
    unless RemoteNode.chef_ready?(node_name, 1200, 10, options)
      Rails.logger.error("Waiting for already running chef-clients on #{node_name} failed.")
      exit(1)
    end
  end

  def wait_for_reboot(node)
    nobj = Node.find_by_name(node)
    if nobj[:crowbar_wall][:wait_for_reboot]
      puts "Waiting for reboot of node #{node}"
      if RemoteNode.ready?(node, 1200)
        puts "Waiting for reboot of node #{node} done. Node is back"
        # Check node state - crowbar_join's chef-client run should successfully finish
        puts "Waiting to finish chef-client run on node #{node}"
        begin
          Timeout.timeout(600) do
            loop do
              nobj = Node.find_by_name(node)
              case nobj[:state]
              when "ready"
                puts "Node state after reboot is: #{nobj[:state]}. Continue"
                break
              when "problem"
                STDERR.puts "Node state after reboot is: #{nobj[:state]}. Exit"
                exit(1)
              else
                puts "Node state after reboot is: #{nobj[:state]}. Waiting"
                sleep(10)
              end
            end
          end
        rescue Timeout::Error
          STDERR.puts "Node state never reached valid state. Exit"
          exit(1)
        end
      else
        STDERR.puts "Waiting for reboot of node #{node} failed"
        exit(1)
      end
    end
  end

  def handle_validation_errors
    if @validation_errors && @validation_errors.length > 0
      Rails.logger.info "validation errors in proposal #{@bc_name}"
      raise Chef::Exceptions::ValidationFailed.new("#{@validation_errors.join("\n")}\n")
    end
  end

  def get_log_lines(node)
    begin
      line_count = 0
      last_delimiter_line = 0
      f = File.open("/var/log/crowbar/chef-client/#{node}.log")
      f.each do |line|
        if line == "=" * 80
          last_delimiter_line = line_count
        end
        line_count += 1
      end
      f.seek(0, IO::SEEK_SET)
      starting_line =
        # If we found a delimiter in the last (say) 10 lines, we don't need
        # to show all of the last 50.
        if (last_delimiter_line > 0) && (line_count - last_delimiter_line) < 50
          last_delimiter_line - 3
        else
          line_count - 50
        end
      logged_lines = f.readlines[starting_line..line_count]
      "Most recent logged lines from the Chef run: \n\n<pre>" +
        logged_lines.join + "</pre>"
    rescue
      Rails.logger.error("Error reporting: Couldn't open /var/log/crowbar/chef-client/#{node}.log ")
      raise "Error reporting: Couldn't open  /var/log/crowbar/chef-client/#{node}.log"
    end
  end

  #
  # Proposal is a json structure (not a ProposalObject)
  # Use to create or update an active instance
  #
  def active_update(proposal, inst, in_queue, bootstrap = false)
    begin
      role = ServiceObject.proposal_to_role(proposal, @bc_name)
      apply_role(role, inst, in_queue, bootstrap)
    rescue Net::HTTPServerException => e
      Rails.logger.error(([e.message] + e.backtrace).join("\n"))
      [e.response.code, {}]
    rescue Chef::Exceptions::ValidationFailed => e2
      Rails.logger.error(([e2.message] + e2.backtrace).join("\n"))
      [400, e2.message]
    end
  end

  def skip_unchanged_nodes(elements, old_role, role)
    cleaned_elements = {}
    elements.each_key do |r|
      cleaned_elements[r] ||= []
      elements[r].each do |node_name|
        cleaned_elements[r] << node_name unless skip_unchanged_node?(node_name, old_role, role)
      end
    end
    cleaned_elements
  end

  def skip_unready_nodes(bc, inst, new_elements, old_elements)
    logger.debug("skip_unready_nodes: enter for #{bc}:#{inst}")
    skip_unready_nodes_roles = Rails.application.config.crowbar.fetch(
      "skip_unready_nodes", {}
    ).fetch("roles", [])
    pre_cached_nodes = {}
    cleaned_elements = new_elements.deep_dup
    skip_unready_nodes_roles.each do |role|
      # only do something if we have the same role on both old and new
      next unless new_elements.key?(role) && old_elements.key?(role)
      # we only can skip nodes that are on both old and new, as we know that those old nodes had
      # the roles applied and will eventually become consistent with the deployment due to the
      # periodic chef run
      shared_elements = new_elements[role] & old_elements[role]
      shared_elements.each do |n|
        pre_cached_nodes[n] ||= Node.find_by_name(n)
        node = pre_cached_nodes[n]
        next if node.nil?
        # skip if nodes are on ready, we dont need to do anything
        next if node.state == "ready"
        # crowbar_upgrade is not an error state and some upgrade specific recipes are executed
        # for the nodes in this state. So we do not want to skip it by default.
        # The exceptional case is when the upgrade of compute nodes is postponed and
        # user wants to apply some proposal. In such case we really want to skip such nodes
        # so the proposal can be applied only to the nodes that are already upgraded and ready.
        next if node.state == "crowbar_upgrade" && !upgrade_postponed?
        logger.warn(
          "Node #{n} is skipped until next chef run for #{bc}:#{inst} with role #{role}"
        )
        cleaned_elements[role].delete(n)
      end
    end
    logger.debug("skip_unready_nodes: exit for #{bc}:#{inst}")
    [cleaned_elements, pre_cached_nodes]
  end

  # return true if the new attributes are different from the old ones
  def node_changed_attributes?(node, old_role, new_role)
    old_role.default_attributes[@bc_name] != new_role.default_attributes[@bc_name]
  end

  # return true if the node has changed roles
  def node_changed_roles?(node, old_role, new_role)
    roles_in_old = old_role.elements.keys.select { |r| old_role.elements[r].include?(node) }.sort
    roles_in_new = new_role.elements.keys.select { |r| new_role.elements[r].include?(node) }.sort
    roles_in_old != roles_in_new
  end

  # return true if no other attribute than the ignored has changed, but only if
  # the node has no other role than the ones listed in only_for_roles
  def relevant_attributes_changed_if_roles?(node, old_role, new_role, ignore_attr, only_for_roles)
    # if only_for_roles has some sole
    unless only_for_roles.nil? || only_for_roles.empty?
      # get roles for this node
      roles_in_new = new_role.elements.keys.select { |r| new_role.elements[r].include?(node) }

      # return false if node has other roles that ones in the list
      # (only_for_roles) -- this test is not for us
      return false unless Set.new(roles_in_new).subset?(Set.new(only_for_roles))
    end

    # if the ingnore_attr has some element, apply filters
    if ignore_attr.nil? || ignore_attr.empty?
      old_role.default_attributes[@bc_name] != new_role.default_attributes[@bc_name]
    else
      # prepare a clone of default attributes of old and new roles
      old_selected_attributes = old_role.default_attributes[@bc_name].deep_dup
      new_selected_attributes = new_role.default_attributes[@bc_name].deep_dup

      # function to remove all ignored attributes from a list
      remove_ignored = lambda do |attributes, ignored|
        ignored.each do |path|
          iterator = attributes
          path = path.split(".")

          while path.length > 1
            iterator = iterator[path[0]]
            break if iterator.nil?
            path.slice!(0)
          end

          iterator.delete(path[0]) unless iterator.nil?
        end
      end

      # remove ignored attributes from old and new attributes
      remove_ignored.call(old_selected_attributes, ignore_attr)
      remove_ignored.call(new_selected_attributes, ignore_attr)

      # return true if the attributes have changed, except for the ignored ones
      old_selected_attributes != new_selected_attributes
    end
  end
end