chef/cookbooks/provisioner/templates/default/crowbar_join.suse.sh.erb
#!/bin/bash
# Copyright 2011, Dell
# Copyright 2012, SUSE Linux Products GmbH
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if [[ ! -d /var/log/crowbar/crowbar_join ]] ; then
mkdir -p /var/log/crowbar/crowbar_join/
fi
echo "======== Running $0 $* ($(date '+%F %T %z')) ========" >>/var/log/crowbar/crowbar_join/errlog
exec 2>>/var/log/crowbar/crowbar_join/errlog
export PS4='${BASH_SOURCE}@${LINENO}(${FUNCNAME[0]}): '
set -x
################## Functions ##################
echo_error() {
# Not to fd2 because this would go to the file
echo $*
}
echo_verbose() {
[ "$VERBOSE" -ne 0 ] && echo $*
}
echo_debug() {
[ "$DEBUG" -ne 0 ] && echo $*
}
# Run a command and log its output.
log_to() {
# $1 = install log to log to
# $@ = rest of args
local __log="/var/log/crowbar/crowbar_join/$1"
local __timestamp="$(date '+%F %T %z')"
shift
printf "\n%s\n" "$__timestamp: Running $*" | \
tee -a "$__log.err" >> "$__log.log"
local _ret=0
if "$@" 2>> "$__log.err" >>"$__log.log"; then
_ret=0
else
_ret="$?"
echo_error "$__timestamp: $* failed."
echo_error "See $__log.log and $__log.err for more information."
fi
printf "\n$s\n--------\n" "$(date '+%F %T %z'): Done $*" | \
tee -a "$__log.err" >> "$__log.log"
return $_ret
}
get_state() {
local output
# we expect something like "state ready"
output=$(crowbarctl restricted show --plain --filter state $1)
state="${output##state }"
if [ "$state" != "$output" ]; then
echo "$state"
else
echo "unknown"
fi
}
wait_for_network() {
# Spin while we wait for the interface to come up.
while ! ip addr | grep -v " lo" | grep -q "inet "
do
sleep 1
done
}
wait_for_hostname() {
# wait for hostname to become available
tries_left=10
while ! hostname -f > /dev/null; do
HOSTNAME=$(hostname -f)
exit_code=$?
echo_debug $HOSTNAME
echo_debug $exit_code
if [ $exit_code -eq 0 ]; then
break
fi
tries_left=$(($tries_left-1))
if [ $tries_left -eq 0 ]; then
return 1
fi
echo_debug "sleeping 5 seconds"
sleep 5
done
return 0
}
wait_for_admin_server() {
# wait for admin server to become pingable
tries_left=120
while ! ping -q -c1 $IP > /dev/null; do
tries_left=$(($tries_left-1))
if [ $tries_left -eq 0 ]; then
return 1
fi
echo_debug "Waiting for administration server"
sleep 1
done
return 0
}
sync_time() {
if [ -z "$VALID_NTP_SERVERS" ]; then
echo_verbose "Skipping time synchronization..."
return
fi
local tries_left=120
while [[ $tries_left > 0 ]] ; do
if /usr/sbin/ntpdate -u $VALID_NTP_SERVERS; then
break
fi
tries_left=$(($tries_left-1))
echo_verbose "Waiting for NTP server(s) $VALID_NTP_SERVERS"
sleep 1
done
if [ $tries_left -eq 0 ]; then
VALID_NTP_SERVERS=""
echo_verbose "Giving up on time synchronization; will skip further attempts..."
fi
}
do_setup() {
mkdir -p /etc/chef
for retry in $(seq 1 30); do
curl -f --retry 2 -o /etc/chef/validation.pem \
--connect-timeout 60 -s -L \
"http://$HTTP_SERVER/validation.pem"
[ -f /etc/chef/validation.pem ] && break
sleep $retry
done
# Make sure that the client knows how to talk to the server.
local cfg=/etc/chef/client.rb
if ! [ -f $cfg ] || \
! grep -q "^\s*chef_server_url\s*[\"\']http://$IP:4000[\"\']" $cfg; then
test -f $cfg && mv $cfg $cfg.bak
echo "chef_server_url \"http://$IP:4000\"" >$cfg
echo "zypper_check_gpg true" >> $cfg
fi
log_to chef systemctl enable chef-client
}
do_ensure_chef_configuration() {
# Disable reporting, which only works with Enterprise Chef
grep -q enable_reporting /etc/chef/client.rb || echo "enable_reporting false" >> /etc/chef/client.rb
mkdir -p /var/chef/handlers
# create reboot handler
read -d '' reboot_handler_content <<"EOF"
class RebootHandler < Chef::Handler
def report
if node.run_state[:reboot]
# remember reboot so crowbar can catch the reboot and wait until reboot finished
node.set[:crowbar_wall][:wait_for_reboot] = true
node.set[:crowbar_wall][:wait_for_reboot_requesttime] = `date +%s`.to_i
node.save
Chef::Log.info("Reboot requested through node.run_state[:reboot]")
system("/sbin/reboot")
end
end
end
EOF
if [ -f /var/chef/handlers/reboot_handler.rb ] ; then
# only update reboot_handler.rb if something changed
local reboot_handler_shasum_file=$(sha256sum /var/chef/handlers/reboot_handler.rb | cut -d ' ' -f 1)
local reboot_handler_shasum_new=$(echo "$reboot_handler_content" | sha256sum | cut -d ' ' -f 1)
if [ "$reboot_handler_shasum_file" != "$reboot_handler_shasum_new" ] ; then
echo "$reboot_handler_content" > /var/chef/handlers/reboot_handler.rb
fi
else
echo "$reboot_handler_content" > /var/chef/handlers/reboot_handler.rb
fi
# create reboot handler reset
read -d '' reboot_handler_reset_content <<"EOF"
class RebootHandlerReset < Chef::Handler
def report
if defined?(node[:crowbar_wall][:wait_for_reboot]) and node[:crowbar_wall][:wait_for_reboot]== true
boottime =`echo $(($(date +%s) - $(cat /proc/uptime|cut -d " " -f 1|cut -d "." -f 1)))`.to_i
if boottime > node[:crowbar_wall][:wait_for_reboot_requesttime]
node.set[:crowbar_wall][:wait_for_reboot] = false
node.set[:crowbar_wall][:wait_for_reboot_requesttime] = 0
node.save
Chef::Log.debug("node[:crowbar_wall][:wait_for_reboot] reset done")
else
Chef::Log.debug("No reset of wait_for_reboot flag. boottime #{boottime} still <= reboot requesttime #{node[:crowbar_wall][:wait_for_reboot_requesttime]}.")
end
end
end
end
EOF
if [ -f /var/chef/handlers/reboot_handler_reset.rb ] ; then
# only update reboot_handler_reset.rb if something changed
local reboot_handler_reset_shasum_file=$(sha256sum /var/chef/handlers/reboot_handler_reset.rb | cut -d ' ' -f 1)
local reboot_handler_reset_shasum_new=$(echo "$reboot_handler_reset_content" | sha256sum | cut -d ' ' -f 1)
if [ "$reboot_handler_reset_shasum_file" != "$reboot_handler_reset_shasum_new" ] ; then
echo "$reboot_handler_reset_content" > /var/chef/handlers/reboot_handler_reset.rb
fi
else
echo "$reboot_handler_reset_content" > /var/chef/handlers/reboot_handler_reset.rb
fi
# add reboot handler as report_handler
for line in 'require "/var/chef/handlers/reboot_handler"' \
'reboot_handler = RebootHandler.new' \
'report_handlers << reboot_handler # these fire at the end of a successful run'; do
grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
done
# add reboot handler reset as start_handler
for line in 'require "/var/chef/handlers/reboot_handler_reset"' \
'reboot_handler_reset = RebootHandlerReset.new' \
'start_handlers << reboot_handler_reset # these fire at the start of a run'; do
grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
done
# work around: https://tickets.opscode.com/browse/CHEF-3304
line='Encoding.default_external = Encoding::UTF_8 if RUBY_VERSION > "1.9"'
grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
# work around: avoid excessively large node attributes due to large number of accounts
line='Ohai::Config[:disabled_plugins] << "passwd"'
grep -qxF -e "$line" /etc/chef/client.rb || echo "$line" >> /etc/chef/client.rb
}
do_chef_client_after_setup() {
# Until we arrange for the network to transisiton from using
# DHCP somewhere else, the first run of chef-client will always die due to
# the networking barclamp changing the IP address from dhcp to static.
# We will try to pick up and run with it.
echo_verbose "Running Chef Client (after setup)"
log_to chef chef-client $CHEF_CLIENT_OPTIONS
# Make sure our interfaces are as up as we can get them
echo_verbose "Ensuring that our network interfaces are up."
log_to ifup /sbin/service network restart
# Wait again for the admin server. Otherwise chef-client runs fail with EHOSTUNREACH (bsc#1054191)
echo_verbose "Checking connectivity with administration server after network restart..."
wait_for_admin_server
}
do_chef_client() {
# Note that we only transition to problem state if the second run fails.
echo_verbose "Running Chef Client (try 1)"
if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
return
fi
# we didn't succeed with chef-client, so let's try running it again with a
# state where some roles will not be active
echo_debug "Failed to run chef-client, trying with state \"recovering\""
crowbarctl restricted transition $HOSTNAME "recovering"
echo_debug "Syncing Time"
sync_time
echo_debug "Removing Chef Cache"
rm -rf /var/cache/chef/*
echo_verbose "Running Chef Client (try 2, pass 1) - cache cleanup"
if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
# it worked, cool, let's try again with "readying" state
crowbarctl restricted transition $HOSTNAME "readying"
echo_verbose "Running Chef Client (try 2, pass 2) - cache cleanup"
if log_to chef chef-client $CHEF_CLIENT_OPTIONS; then
return
fi
fi
echo_error "chef-client run failed too many times, giving up."
printf "Our IP address is: %s\n" "$(ip addr show)" >&2
final_state="problem"
}
################## Option handling ##################
SCRIPT=`basename $0`
usage() {
echo "$SCRIPT -- Synchronize with Crowbar administration server"
echo ""
echo "Usage:"
echo "$SCRIPT [--setup|--start|--stop] [--debug] [--verbose]"
}
ARGS=`getopt -o "hdv:" -l "help,setup,start,stop,debug,verbose" -n "$SCRIPT" -- "$@"`
if test $? -ne 0; then
usage
exit 1
fi
eval set -- "$ARGS"
MODE=
DEBUG=0
VERBOSE=0
while true; do
case "$1" in
--)
break
;;
-h|--help)
usage
exit 0
;;
--setup)
if test -n "$MODE"; then
echo_error "Error: more than one mode specified."
exit 1
fi
MODE="setup"
shift
;;
--start)
if test -n "$MODE"; then
echo_error "Error: more than one mode specified."
exit 1
fi
MODE="start"
shift
;;
--stop)
if test -n "$MODE"; then
echo_error "Error: more than one mode specified."
exit 1
fi
MODE="stop"
shift
;;
-d|--debug)
DEBUG=1
shift
;;
-v|--verbose)
VERBOSE=1
shift
;;
esac
done
if test -z "$MODE"; then
echo_error "Error: no mode specified."
exit 1
fi
if test "$MODE" != "setup" -a "$MODE" != "start" -a "$MODE" != "stop"; then
echo_error "Error: invalide mode $MODE."
exit 1
fi
################## Core ##################
EXVAL=0
# This is required if the autoyast.xml had any <media_url> elements
# containing credentials; if so, they get cached in
# /root/.zypp/credentials.cat and so subsequent zypper runs need HOME
# set in order to reuse them.
export HOME=/root
IP="<%= @admin_ip %>"
HTTP_SERVER="<%= @admin_ip %>:<%= @web_port %>"
NTP_SERVERS="<%= @ntp_servers_ips.join(" ") %>"
VALID_NTP_SERVERS=""
CHEF_CLIENT_OPTIONS=
if [ "$DEBUG" -ne 0 ]; then
CHEF_CLIENT_OPTIONS="-l debug"
fi
if [ "$MODE" == "setup" -o "$MODE" == "start" ]; then
echo_verbose "Waiting on our network interface to come up..."
wait_for_network
echo_verbose "Checking connectivity with administration server..."
wait_for_admin_server
if [ $? -ne 0 -a "$MODE" == "start" ]; then
echo_error "Cannot ping administration server on $IP..."
exit 1
fi
netcat -z $IP 4000
if [ $? -ne 0 -a "$MODE" == "start" ]; then
echo_error "Cannot connect to chef-server..."
exit 1
fi
echo_verbose "Getting hostname..."
if ! wait_for_hostname; then
echo_error "Cannot get hostname..."
exit 1
fi
if [ ! -f /etc/chef/validation.pem -a "$MODE" == "start" ]; then
echo_error "Please join Crowbar by running with --setup first"
exit 1
fi
# Get our hostname
HOSTNAME=$(hostname -f)
# stop ntpd before we run ntpdate, and start it again afterwards.
NTP_SERVICE=ntpd
NTP_START=0
if service $NTP_SERVICE status > /dev/null; then
log_to time service $NTP_SERVICE stop
NTP_START=1
fi
# We only run NTP-related bits if the NTP server(s) can be reached
for NTP_SERVER in $NTP_SERVERS; do
ping -q -c1 $NTP_SERVER > /dev/null && netcat -zu $NTP_SERVER 123
if [ $? -eq 0 ]; then
VALID_NTP_SERVERS="$VALID_NTP_SERVERS $NTP_SERVER"
fi
done
if [ -z "$VALID_NTP_SERVERS" ]; then
echo_verbose "Cannot connect to NTP server(s), will skip time synchronization..."
fi
echo_verbose "Synchronizing time (pass 1)"
sync_time
# Mark us as readying, and get our cert.
crowbarctl restricted transition $HOSTNAME "readying"
final_state="ready"
[ "$MODE" == "setup" ] && do_setup
do_ensure_chef_configuration
# Run Chef
echo_verbose "Synchronizing time (pass 2)"
sync_time
[ "$MODE" == "setup" ] && do_chef_client_after_setup
do_chef_client
# Transition to our final state
crowbarctl restricted transition $HOSTNAME "$final_state"
# make sure to keep hostname
[ "$MODE" == "setup" ] && echo $HOSTNAME > /etc/HOSTNAME
[ $NTP_START -eq 1 ] && log_to time service $NTP_SERVICE start
[ "$MODE" == "setup" ] && log_to chef service chef-client start
[ $final_state == "ready" ] || EXVAL=1
elif [ "$MODE" == "stop" ]; then
state=$(get_state $HOSTNAME)
HOSTNAME=$(hostname -f)
case "$state" in
"reset"|"reinstall"|"confupdate")
# don't change the state, it has an impact on the PXE config
final_state="$state"
;;
*)
if systemctl --no-legend --full --no-pager list-jobs reboot.target | grep -q " start "; then
final_state="reboot"
else
final_state="shutdown"
fi
crowbarctl restricted transition $HOSTNAME "$final_state"
;;
esac
echo_debug "Stopping with state=$final_state"
fi # mode
echo_verbose "Done"
exit $EXVAL