crowbar/crowbar-core

View on GitHub
chef/cookbooks/provisioner/templates/default/crowbar_join.suse.sh.erb

Summary

Maintainability
Test Coverage
#!/bin/bash
# Copyright 2011, Dell
# Copyright 2012, SUSE Linux Products GmbH
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

if [[ ! -d /var/log/crowbar/crowbar_join ]] ; then
  mkdir -p /var/log/crowbar/crowbar_join/
fi

echo "======== Running $0 $* ($(date '+%F %T %z')) ========" >>/var/log/crowbar/crowbar_join/errlog
exec 2>>/var/log/crowbar/crowbar_join/errlog

export PS4='${BASH_SOURCE}@${LINENO}(${FUNCNAME[0]}): '
set -x


################## Functions ##################

echo_error() {
    # Not to fd2 because this would go to the file
    echo $*
}

echo_verbose() {
    [ "$VERBOSE" -ne 0 ] && echo $*
}

echo_debug() {
    [ "$DEBUG" -ne 0 ] && echo $*
}

# Run a command and log its output.
log_to() {
    # $1 = install log to log to
    # $@ = rest of args
    local __log="/var/log/crowbar/crowbar_join/$1"
    local __timestamp="$(date '+%F %T %z')"
    shift
    printf "\n%s\n" "$__timestamp: Running $*" | \
    tee -a "$__log.err" >> "$__log.log"
    local _ret=0
    if "$@" 2>> "$__log.err" >>"$__log.log"; then
    _ret=0
    else
    _ret="$?"
    echo_error "$__timestamp: $* failed."
    echo_error "See $__log.log and $__log.err for more information."
    fi
    printf "\n$s\n--------\n"  "$(date '+%F %T %z'): Done $*" | \
    tee -a "$__log.err" >> "$__log.log"
    return $_ret
}

get_state() {
  local output
  # we expect something like "state ready"
  output=$(crowbarctl restricted show --plain --filter state $1)
  state="${output##state }"
  if [ "$state" != "$output" ]; then
      echo "$state"
  else
      echo "unknown"
  fi
}

wait_for_network() {
    # Spin while we wait for the interface to come up.
    while ! ip addr | grep -v " lo" | grep -q "inet "
    do
      sleep 1
    done
}

wait_for_hostname() {
    # wait for hostname to become available
    tries_left=10
    while ! hostname -f > /dev/null; do
      HOSTNAME=$(hostname -f)
      exit_code=$?

      echo_debug $HOSTNAME
      echo_debug $exit_code
      if [ $exit_code -eq 0 ]; then
        break
      fi

      tries_left=$(($tries_left-1))
      if [ $tries_left -eq 0 ]; then
        return 1
      fi

      echo_debug "sleeping 5 seconds"
      sleep 5
    done

    return 0
}

wait_for_admin_server() {
    # wait for admin server to become pingable
    tries_left=120
    while ! ping -q -c1 $IP > /dev/null; do
      tries_left=$(($tries_left-1))
      if [ $tries_left -eq 0 ]; then
        return 1
      fi

      echo_debug "Waiting for administration server"
      sleep 1
    done

    return 0
}

sync_time() {
    if [ -z "$VALID_NTP_SERVERS" ]; then
        echo_verbose "Skipping time synchronization..."
        return
    fi

    local tries_left=120

    while [[ $tries_left > 0 ]] ; do
        if /usr/sbin/ntpdate -u $VALID_NTP_SERVERS; then
            break
        fi
        tries_left=$(($tries_left-1))
        echo_verbose "Waiting for NTP server(s) $VALID_NTP_SERVERS"
        sleep 1
    done
    if [ $tries_left -eq 0 ]; then
        VALID_NTP_SERVERS=""
        echo_verbose "Giving up on time synchronization; will skip further attempts..."
    fi
}

do_setup() {
    mkdir -p /etc/chef

    for retry in $(seq 1 30); do
        curl -f --retry 2 -o /etc/chef/validation.pem \
            --connect-timeout 60 -s -L \
            "http://$HTTP_SERVER/validation.pem"
        [ -f /etc/chef/validation.pem ] && break
        sleep $retry
    done

    # Make sure that the client knows how to talk to the server.
    local cfg=/etc/chef/client.rb
    if ! [ -f $cfg ] || \
       ! grep -q "^\s*chef_server_url\s*[\"\']http://$IP:4000[\"\']" $cfg; then
        test -f $cfg && mv $cfg $cfg.bak
        echo "chef_server_url \"http://$IP:4000\"" >$cfg
        echo "zypper_check_gpg true" >> $cfg
    fi

    log_to chef systemctl enable chef-client
}

do_ensure_chef_configuration() {
    # Disable reporting, which only works with Enterprise Chef
    grep -q enable_reporting /etc/chef/client.rb || echo "enable_reporting false" >> /etc/chef/client.rb

    mkdir -p /var/chef/handlers
    # create reboot handler
    read -d '' reboot_handler_content <<"EOF"
class RebootHandler < Chef::Handler
  def report
    if node.run_state[:reboot]
      # remember reboot so crowbar can catch the reboot and wait until reboot finished
      node.set[:crowbar_wall][:wait_for_reboot] = true
      node.set[:crowbar_wall][:wait_for_reboot_requesttime] = `date +%s`.to_i
      node.save
      Chef::Log.info("Reboot requested through node.run_state[:reboot]")
      system("/sbin/reboot")
    end
  end
end
EOF

    if [ -f /var/chef/handlers/reboot_handler.rb ] ; then
        # only update reboot_handler.rb if something changed
        local reboot_handler_shasum_file=$(sha256sum /var/chef/handlers/reboot_handler.rb | cut -d ' ' -f 1)
        local reboot_handler_shasum_new=$(echo "$reboot_handler_content" | sha256sum | cut -d ' ' -f 1)

        if [ "$reboot_handler_shasum_file" != "$reboot_handler_shasum_new" ] ; then
            echo "$reboot_handler_content" > /var/chef/handlers/reboot_handler.rb
        fi
    else
        echo "$reboot_handler_content" > /var/chef/handlers/reboot_handler.rb
    fi

    # create reboot handler reset
    read -d '' reboot_handler_reset_content <<"EOF"
class RebootHandlerReset < Chef::Handler
  def report
    if defined?(node[:crowbar_wall][:wait_for_reboot]) and node[:crowbar_wall][:wait_for_reboot]== true
      boottime =`echo $(($(date +%s) - $(cat /proc/uptime|cut -d " " -f 1|cut -d "." -f 1)))`.to_i
      if boottime > node[:crowbar_wall][:wait_for_reboot_requesttime]
        node.set[:crowbar_wall][:wait_for_reboot] = false
        node.set[:crowbar_wall][:wait_for_reboot_requesttime] = 0
        node.save
        Chef::Log.debug("node[:crowbar_wall][:wait_for_reboot] reset done")
      else
        Chef::Log.debug("No reset of wait_for_reboot flag. boottime #{boottime} still <= reboot requesttime #{node[:crowbar_wall][:wait_for_reboot_requesttime]}.")
      end
    end
  end
end
EOF
    if [ -f /var/chef/handlers/reboot_handler_reset.rb ] ; then
        # only update reboot_handler_reset.rb if something changed
        local reboot_handler_reset_shasum_file=$(sha256sum /var/chef/handlers/reboot_handler_reset.rb | cut -d ' ' -f 1)
        local reboot_handler_reset_shasum_new=$(echo "$reboot_handler_reset_content" | sha256sum | cut -d ' ' -f 1)

        if [ "$reboot_handler_reset_shasum_file" != "$reboot_handler_reset_shasum_new" ] ; then
            echo "$reboot_handler_reset_content" > /var/chef/handlers/reboot_handler_reset.rb
        fi
    else
        echo "$reboot_handler_reset_content" > /var/chef/handlers/reboot_handler_reset.rb
    fi

    # add reboot handler as report_handler
    for line in 'require "/var/chef/handlers/reboot_handler"' \
        'reboot_handler = RebootHandler.new' \
        'report_handlers << reboot_handler # these fire at the end of a successful run'; do
        grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
    done

    # add reboot handler reset as start_handler
    for line in 'require "/var/chef/handlers/reboot_handler_reset"' \
        'reboot_handler_reset = RebootHandlerReset.new' \
        'start_handlers << reboot_handler_reset # these fire at the start of a run'; do
        grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
    done

    # work around: https://tickets.opscode.com/browse/CHEF-3304
    line='Encoding.default_external = Encoding::UTF_8 if RUBY_VERSION > "1.9"'
    grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb

    # work around: avoid excessively large node attributes due to large number of accounts
    line='Ohai::Config[:disabled_plugins] << "passwd"'
    grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
}

do_chef_client_after_setup() {
    # Until we arrange for the network to transisiton from using
    # DHCP somewhere else, the first run of chef-client will always die due to
    # the networking barclamp changing the IP address from dhcp to static.
    # We will try to pick up and run with it.
    echo_verbose "Running Chef Client (after setup)"
    log_to chef chef-client $CHEF_CLIENT_OPTIONS

    # Make sure our interfaces are as up as we can get them
    echo_verbose "Ensuring that our network interfaces are up."
    log_to ifup /sbin/service network restart
    # Wait again for the admin server. Otherwise chef-client runs fail with EHOSTUNREACH (bsc#1054191)
    echo_verbose "Checking connectivity with administration server after network restart..."
    wait_for_admin_server
}

do_chef_client() {
    # Note that we only transition to problem state if the second run fails.
    echo_verbose "Running Chef Client (try 1)"
    if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
        return
    fi

    # we didn't succeed with chef-client, so let's try running it again with a
    # state where some roles will not be active
    echo_debug "Failed to run chef-client, trying with state \"recovering\""
    crowbarctl restricted transition $HOSTNAME "recovering"

    echo_debug "Syncing Time"
    sync_time
    echo_debug "Removing Chef Cache"
    rm -rf /var/cache/chef/*

    echo_verbose "Running Chef Client (try 2, pass 1) - cache cleanup"
    if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
    # it worked, cool, let's try again with "readying" state
        crowbarctl restricted transition $HOSTNAME "readying"
        echo_verbose "Running Chef Client (try 2, pass 2) - cache cleanup"
        if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
            return
        fi
    fi

    echo_error "chef-client run failed too many times, giving up."
    printf "Our IP address is: %s\n" "$(ip addr show)" >&2
    final_state="problem"
}


################## Option handling ##################

SCRIPT=`basename $0`

usage() {
    echo "$SCRIPT -- Synchronize with Crowbar administration server"
    echo ""
    echo "Usage:"
    echo "$SCRIPT [--setup|--start|--stop] [--debug] [--verbose]"
}

ARGS=`getopt -o "hdv:" -l "help,setup,start,stop,debug,verbose" -n "$SCRIPT" -- "$@"`
if test $? -ne 0; then
   usage
   exit 1
fi

eval set -- "$ARGS"

MODE=
DEBUG=0
VERBOSE=0

while true; do
    case "$1" in
        --)
            break
            ;;
        -h|--help)
            usage
            exit 0
            ;;
        --setup)
            if test -n "$MODE"; then
                echo_error "Error: more than one mode specified."
                exit 1
            fi
            MODE="setup"
            shift
            ;;
        --start)
            if test -n "$MODE"; then
                echo_error "Error: more than one mode specified."
                exit 1
            fi
            MODE="start"
            shift
            ;;
        --stop)
            if test -n "$MODE"; then
                echo_error "Error: more than one mode specified."
                exit 1
            fi
            MODE="stop"
            shift
            ;;
        -d|--debug)
            DEBUG=1
            shift
            ;;
        -v|--verbose)
            VERBOSE=1
            shift
            ;;
    esac
done

if test -z "$MODE"; then
    echo_error "Error: no mode specified."
    exit 1
fi

if test "$MODE" != "setup" -a "$MODE" != "start" -a "$MODE" != "stop"; then
    echo_error "Error: invalide mode $MODE."
    exit 1
fi

################## Core ##################

EXVAL=0

# This is required if the autoyast.xml had any <media_url> elements
# containing credentials; if so, they get cached in
# /root/.zypp/credentials.cat and so subsequent zypper runs need HOME
# set in order to reuse them.
export HOME=/root

IP="<%= @admin_ip %>"
HTTP_SERVER="<%= @admin_ip %>:<%= @web_port %>"
NTP_SERVERS="<%= @ntp_servers_ips.join(" ") %>"
VALID_NTP_SERVERS=""

CHEF_CLIENT_OPTIONS=
if [ "$DEBUG" -ne 0 ]; then
    CHEF_CLIENT_OPTIONS="-l debug"
fi

if [ "$MODE" == "setup" -o "$MODE" == "start" ]; then
    echo_verbose "Waiting on our network interface to come up..."
    wait_for_network

    echo_verbose "Checking connectivity with administration server..."
    wait_for_admin_server
    if [ $? -ne 0 -a "$MODE" == "start" ]; then
        echo_error "Cannot ping administration server on $IP..."
        exit 1
    fi

    netcat -z $IP 4000
    if [ $? -ne 0 -a "$MODE" == "start" ]; then
        echo_error "Cannot connect to chef-server..."
        exit 1
    fi

    echo_verbose "Getting hostname..."
    if ! wait_for_hostname; then
        echo_error "Cannot get hostname..."
        exit 1
    fi

    if [ ! -f /etc/chef/validation.pem -a "$MODE" == "start" ]; then
        echo_error "Please join Crowbar by running with --setup first"
        exit 1
    fi

    # Get our hostname
    HOSTNAME=$(hostname -f)

    # stop ntpd before we run ntpdate, and start it again afterwards.
    NTP_SERVICE=ntpd
    NTP_START=0
    if service $NTP_SERVICE status > /dev/null; then
        log_to time service $NTP_SERVICE stop
        NTP_START=1
    fi

    # We only run NTP-related bits if the NTP server(s) can be reached
    for NTP_SERVER in $NTP_SERVERS; do
        ping -q -c1 $NTP_SERVER > /dev/null && netcat -zu $NTP_SERVER 123
        if [ $? -eq 0 ]; then
            VALID_NTP_SERVERS="$VALID_NTP_SERVERS $NTP_SERVER"
        fi
    done
    if [ -z "$VALID_NTP_SERVERS" ]; then
        echo_verbose "Cannot connect to NTP server(s), will skip time synchronization..."
    fi

    echo_verbose "Synchronizing time (pass 1)"
    sync_time

    # Mark us as readying, and get our cert.
    crowbarctl restricted transition $HOSTNAME "readying"
    final_state="ready"

    [ "$MODE" == "setup" ] && do_setup

    do_ensure_chef_configuration

    # Run Chef
    echo_verbose "Synchronizing time (pass 2)"
    sync_time

    [ "$MODE" == "setup" ] && do_chef_client_after_setup
    do_chef_client

    # Transition to our final state
    crowbarctl restricted transition $HOSTNAME "$final_state"

    # make sure to keep hostname
    [ "$MODE" == "setup" ] && echo $HOSTNAME > /etc/HOSTNAME

    [ $NTP_START -eq 1 ] && log_to time service $NTP_SERVICE start
    [ "$MODE" == "setup" ] && log_to chef service chef-client start

    [ $final_state == "ready" ] || EXVAL=1
elif [ "$MODE" == "stop" ]; then
    state=$(get_state $HOSTNAME)
    HOSTNAME=$(hostname -f)

    case "$state" in
        "reset"|"reinstall"|"confupdate")
            # don't change the state, it has an impact on the PXE config
            final_state="$state"
            ;;
        *)
            if systemctl --no-legend --full --no-pager list-jobs reboot.target | grep -q " start "; then
                final_state="reboot"
            else
                final_state="shutdown"
            fi
            crowbarctl restricted transition $HOSTNAME "$final_state"
            ;;
     esac

    echo_debug "Stopping with state=$final_state"
fi # mode

echo_verbose "Done"

exit $EXVAL