SUSE/saptune

View on GitHub
ospackage/bin/saptune_check

Summary

Maintainability
Test Coverage
#!/bin/bash
# ------------------------------------------------------------------------------
# Copyright (c) 2019 SUSE LLC
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of version 3 of the GNU General Public License as published by the
# Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, contact SUSE Linux GmbH.
#
# ------------------------------------------------------------------------------
# Author: Sören Schmidt <soeren.schmidt@suse.com>
#
# This tool checks if saptune is set up correctly. 
# It will not dig deeper to check if the tuning itself is working.
#
# exit codes:       0   All checks ok. Saptune have been set up correctly.
#                   1   Some warnings occurred. Saptune should work, but better check manually.
#                   2   Some errors occurred. Saptune will not work.
#                   3   Wrong parameters given to the tool on commandline.
#
# Changelog:
#
# 08.01.2021  v0.1      First release. (Split of sapconf_saptune_check v1.2.1)
# 19.08.2021  v0.2      supports (only) saptune v3
#                       tests system status and lists failed services
# 26.08.2021  v0.2.1    added missing os_release to global arrays
# 09.11.2021  v0.2.2    degraded system is no longer considered an error
# 26.09.2022  v0.2.3    degraded systemd state gets explained in more detail (TEAM-6584)
# 26.09.2022  v0.3      reactivate unused function compile_filelists() as file_check() and add detection of sapconf remains (TEAM-6275)
# 03.04.2023  v0.3.1    Removed error regarding tuned sapconf profile (TEAM-7529)

version="0.3.1"

# We use these global arrays through out the program:
#
# package_version     -  contains package version (string)
# os_version          -  contains os version and service pack
# system_status       -  contains system status and failed units 
# unit_state_active   -  contains systemd unit state (systemctl is-active) 
# unit_state_enabled  -  contains systemd unit state (systemctl is-enabled) 
# tool_profile        -  contains actual profile (string) for each tool
declare -A package_version os_version system_status unit_state_active unit_state_enabled tool_profile

color=1     # we like it colorful

function header() { 
    local len=${#1}
    echo -e "\n${1}"
    printf '=%.s' $(eval "echo {1.."$((${len}))"}")
    echo
}

function print_ok() {
    local col_on col_off
    [ -t 1 ] || color=0  # Disable color if we run in a pipe
    if [ ${color} -eq 1 ] ; then
        col_on="\033[0;32m"
        col_off="\033[0m"
    else
        col_on=""
        col_off=""
    fi
    echo -e "[ ${col_on}OK${col_off} ] ${1}"
}

function print_fail() {
    local col_on col_off bold_on
    [ -t 1 ] || color=0  # Disable color if we run in a pipe
    if [ ${color} -eq 1 ] ; then
        col_on="\033[0;31m"
        col_off="\033[0m"
        bold_on="\033[1m"
    else
        col_on=""
        col_off=""
        bold_on=""
    fi
    echo -e "[${col_on}FAIL${col_off}] ${1}${bold_on}\t-> ${2}${col_off}"
}

function print_warn() {
    local col_on col_off bold_on
    [ -t 1 ] || color=0  # Disable color if we run in a pipe
    if [ ${color} -eq 1 ] ; then
        col_on="\033[0;33m"
        col_off="\033[0m"
        bold_on="\033[1m"
    else
        col_on=""
        col_off=""
        bold_on=""
    fi
    echo -e "[${col_on}WARN${col_off}] ${1}${bold_on}\t-> ${2}${col_off}"
}

function print_note() {
    local col_on col_off
    [ -t 1 ] || color=0  # Disable color if we run in a pipe
    if [ ${color} -eq 1 ] ; then
        col_on="\033[0;37m"
        col_off="\033[0m"
    else
        col_on=""
        col_off=""
    fi
    echo -e "[${col_on}NOTE${col_off}] ${1}"
}

function is_in() {
    # Params:       NAME LIST
    # Output:       -
    # Returncode:   0 (true), 1 (false)
    #
    # Checks if NAME is in the space-separated string LIST
    # and returns 0 (true) or 1 (false).
    #
    # Requires:     -

    local name="${1}"
    local list=( ${2} )

    for elem in "${list[@]}" ; do
        if [ "${name}" == "${elem}" ] ; then
            return 1
        fi
    done
    return 1
}

function get_os_version() {
    # Params:   -
    # Output:   -
    # Exitcode: -
    #
    # Determines the OS version as string for each PACKAGE.
    #
    # The function updates the associative array "os_version".
    #
    # Requires:-

    local VERSION_ID
    
    eval "$(grep ^VERSION_ID= /etc/os-release)"
    os_version['release']="${VERSION_ID%.*}"
    os_version['servicepack']="${VERSION_ID#*.}"
}


function get_package_versions() {
    # Params:   PACKAGE...
    # Output:   -
    # Exitcode: -
    #
    # Determines package version as string for each PACKAGE.
    # Not installed packages will have an empty string as version.
    #
    # The function updates the associative array "package_version".
    #
    # Requires:-

    local package version
    for package in "${@}" ; do
        if version=$(rpm -q --qf '%{version}' "${package}" 2>&1) ; then
            package_version["${package}"]=${version}
        else
            package_version["${package}"]=''
        fi
    done
}


function get_system_status() {
    # Params:   -
    # Output:   -
    # Exitcode: -
    #
    # Collect data about system status and failed services.
    #
    # The function updates the associative arrays "system_status".
    #
    # Requires: -
    
    system_status["status"]=$(systemctl is-system-running 2> /dev/null)
    system_status["failed_units"]=$(systemctl list-units --state=failed --plain --no-legend --no-pager | cut -d ' ' -f 1 | tr '\n' ' ' 2> /dev/null)
}

function get_unit_states() {
    # Params:   UNIT...
    # Output:   -
    # Exitcode: -
    #
    # Determines the state (is-active/is-enabled) for each UNIT.
    # A missing state is reported as "missing".
    #
    # The function updates the associative arrays "unit_state_active" and "unit_state_enabled".
    #
    # Requires: -

    local unit state_active state_enabled
    for unit in "${@}" ; do
        state_active=$(systemctl is-active "${unit}" 2> /dev/null)
        state_enabled=$(systemctl is-enabled "${unit}" 2> /dev/null)
        unit_state_active["${unit}"]=${state_active:-missing}
        unit_state_enabled["${unit}"]=${state_enabled:-missing}
    done
}

function get_tool_profiles() {
    # Params:   -
    # Output:   -
    # Exitcode: -
    #
    # Determines the current profile of tuned and saptune (profile==Notes/Solution). 
    # A missing profile (file) is reported as "missing".
    #
    # The function updates the associative array "tool_profile".
    #
    # Requires: -

    local active_profile TUNE_FOR_NOTES TUNE_FOR_SOLUTIONS
    active_profile=''
    [ -e /etc/tuned/active_profile ] && active_profile=$(< /etc/tuned/active_profile)
    tool_profile['tuned']="${active_profile:-missing}"

    if [ -e /etc/sysconfig/saptune ] ; then
        eval $(grep ^TUNE_FOR_NOTES= /etc/sysconfig/saptune)
        eval $(grep ^TUNE_FOR_SOLUTIONS= /etc/sysconfig/saptune)
        if [ -z "${TUNE_FOR_NOTES}" -a -z "${TUNE_FOR_SOLUTIONS}" ] ; then
            tool_profile['saptune']='missing'    
        else
            tool_profile['saptune']="solutions: ${TUNE_FOR_SOLUTIONS:=-} notes: ${TUNE_FOR_NOTES:=-}"
        fi
    else
        tool_profile['saptune']='missing'    
    fi
}

function configured_saptune_version() {
    # Params:   -
    # Output:   -
    # Exitcode: -
    #
    # Checks the configured saptune version. 
    # A missing saptune is reported as "missing".
    #
    # The function updates the variable "configured_saptune_version".
    #
    # Requires: -

    local SAPTUNE_VERSION
    [ -e /etc/sysconfig/saptune ] && eval $(grep ^SAPTUNE_VERSION= /etc/sysconfig/saptune)
    configured_saptune_version="${SAPTUNE_VERSION:-missing}"
}

function collect_data() {
    # Params:   -
    # Output:   -
    # Exitcode: -
    #
    # Calls various functions to collect data.
    #
    # Requires: get_os_version()
    #           get_package_versions()
    #           get_system_status()
    #           get_unit_states()
    #           get_tool_profiles()
    #           configured_saptune_version()

    # Collect OS version.
    get_os_version

    # Collect data about some packages.
    get_package_versions sapconf saptune tuned

    # Collect data about system status and failed services.
    get_system_status

    # Collect data about some systemd services.
    get_unit_states sapconf.service tuned.service saptune.service

    # Collect the profiles of various tools.
    get_tool_profiles

    # Get configured saptune version.
    configured_saptune_version
}

function file_check() {
    # Params:   VERSIONTAG
    # Output:   warnings, fails and notes with print_warn(), print_fail() and print_note()
    # Exitcode: -
    #
    # Checks the existence of mandatory and invalid files for sapconf and saptune 
    # (depending on SLES release and VERSIONTAG) and prints warnings or fails.
    #
    # The following strings for VERSIONTAG are allowed: "saptune-3"
    #
    # Also for all mandatory and invalid files, we search for RPM leftovers (.rpmnew/.rpmsave). 
    #
    # IMPORTANT:
    #   When adding new files every file must be listed in either of the arrays mandatory_files"
    #   or "invalid_files" but in *each* SLES release and tag section!
    #
    # The function updates the variables "warnings" and "fails" used in saptune_check(). 
    #
    # Requires: print_warn(), print_fail() and print_note()

    local VERSION_ID tag="${1}" mandatory_files invalid_files rpm_leftovers sapconf_leftovers critical_sapconf_leftovers
    declare -a mandatory_files invalid_files rpm_leftovers

    eval $(grep ^VERSION_ID= /etc/os-release)
    case ${VERSION_ID} in 
        12*)
            case ${tag} in 
                saptune-3)
                    mandatory_files=( '/etc/sysconfig/saptune' )
                    invalid_files=( '/etc/saptune/extra/SAP_ASE-SAP_Adaptive_Server_Enterprise.conf' '/etc/saptune/extra/SAP_BOBJ-SAP_Business_OBJects.conf' '/etc/sysconfig/saptune-note-1275776' '/etc/sysconfig/saptune-note-1557506' '/etc/sysconfig/saptune-note-SUSE-GUIDE-01' '/etc/sysconfig/saptune-note-SUSE-GUIDE-02' '/etc/tuned/saptune' )
                    ;;
            esac
            ;;
        15*)
            case ${tag} in 
                saptune-3) 
                    mandatory_files=( '/etc/sysconfig/saptune' )
                    invalid_files=( '/etc/saptune/extra/SAP_ASE-SAP_Adaptive_Server_Enterprise.conf' '/etc/saptune/extra/SAP_BOBJ-SAP_Business_OBJects.conf' '/etc/sysconfig/saptune-note-1275776' '/etc/sysconfig/saptune-note-1557506' '/etc/sysconfig/saptune-note-SUSE-GUIDE-01' '/etc/sysconfig/saptune-note-SUSE-GUIDE-02' '/etc/tuned/saptune' )
                    ;;
            esac
            ;;
    esac

    # Leftovers from a damaged sapconf update/removal, which do not pervent saptune from starting.    
    sapconf_leftovers=( '/var/lib/sapconf' '/run/sapconf/active' '/run/sapconf_act_profile' )

    # Critical leftovers from a damaged sapconf update/removal, which prevent saptune from starting.    
    critical_sapconf_leftovers=( '/var/lib/sapconf/act_profile' '/run/sapconf/active' )


    # Now check the existence of mandatory and invalid files and print warnings and fails.    
    for ((i=0;i<${#mandatory_files[@]};i++)) ; do
        if [ ! -e "${mandatory_files[i]}" ] ; then 
            print_fail "${mandatory_files[i]} is missing, but a mandatory file." "Check your installation!"
            ((fails++))
        fi
        rpm_leftovers+=("${mandatory_files[i]}.rpmsave" "${mandatory_files[i]}.rpmnew" )
    done 
    for ((i=0;i<${#invalid_files[@]};i++)) ; do
        if [ -e "${invalid_files[i]}" ] ; then 
            print_warn "${invalid_files[i]} is not used by this version. Maybe a leftover from an update?" "Check the content and remove it."
            ((warnings++))
        fi
        rpm_leftovers+=("${invalid_files[i]}.rpmsave" "${invalid_files[i]}.rpmnew" )
    done 
    
    # Print a warning if we have found RPM leftovers!
    for ((i=0;i<${#rpm_leftovers[@]};i++)) ; do
        if [ -e "${rpm_leftovers[i]}" ] ; then 
            print_warn "${rpm_leftovers[i]} found. This is a leftover from a package update!" "Check the content and remove it."
            ((warnings++))
        fi
    done 

    # Print a warning and recommend a deletion, if sapconf is not intalled and we found some files.
    if [ -z ${package_version['sapconf']} ] ; then
        for ((i=0;i<${#sapconf_leftovers[@]};i++)) ; do
            if [ -e "${sapconf_leftovers[i]}" ] ; then 
                print_warn "${sapconf_leftovers[i]} found. This is a leftover from a sapconf package upgrade or removal!" "Delete ${critical_sapconf_leftovers[i]}. If this happens regularly, please report a bug,"
                ((warnings++))
            fi
        done 
    fi

    # Print a fail and recommend a deletion, if sapconf.service is stopped and we find these files.
    if [ "${unit_state_active['sapconf.service']}" == 'inactive' ] ; then 
        for ((i=0;i<${#critical_sapconf_leftovers[@]};i++)) ; do
            if [ -e "${critical_sapconf_leftovers[i]}" ] ; then 
                print_fail "${critical_sapconf_leftovers[i]} found. This is an unintended leftover from sapconf!" "Delete ${critical_sapconf_leftovers[i]}. If this happens regularly, please report a bug,"
                ((fails++))
            fi
        done 
    fi 

}

function check_saptune() {
    # Checks if saptune is installed correctly.

    local fails=0 warnings=0 version_tag SAPTUNE_VERSION TUNE_FOR_SOLUTIONS TUNE_FOR_NOTES

    # We can stop, if saptune is not installed.
    if [ -z "${package_version['saptune']}" ] ; then
        echo "saptune is not installed" 
        return 2    
    fi

    case "${package_version['saptune']}" in
        3.*)
            version_tag='saptune-3'
            ;;
        *)  
            print_fail "The saptune version ${package_version['saptune']} is unknown to this script! Exiting."
            return 2 
            ;;
    esac

    # Let's test.
    header "Checking saptune" 
    print_note "saptune package has version ${package_version['saptune']}"


    # Check if leftover files still in place.
    file_check saptune-3

    # Checking if system is "running" and has no failed units.
    case "${system_status['status']}" in
        running)
            print_ok "systemd reports status \"running\""
            ;;
        degraded)
            print_warn "systemd reports status \"${system_status['status']}\". Failed units: ${system_status['failed_units']}"  "Check the cause and reset the state with 'systemctl reset-failed'!"
            print_note "A degraded systemd system status means, that one or more systemd units failed. The system is still operational! Tuning might not be affected, please run 'saptune verfiy' for detailed information."
            ((warnings++))
            ;;
        *)  print_fail "systemd reports status \"${system_status['status']}\"."  "Check systemd to see what is wrong!"
            ((fails++))
            ;;   
    esac  

    # Checking if the correct version has been configured.
    case ${configured_saptune_version} in 
        3)  print_ok "configured saptune version is 3"
            ;; 
        *)  print_fail "Configured saptune version is ${configured_saptune_version}" "Misconfiguration happened or an update went wrong! This needs to be investigated."
            ((fails++))
            ;;
    esac 

    # Checking status of sapconf.service.
    if [ -n "${package_version['sapconf']}" ] ; then 
        case "${unit_state_active['sapconf.service']}" in
            inactive)
                print_ok "sapconf.service is inactive"
                ;;
            *)
                print_fail "sapconf.service is ${unit_state_active['sapconf.service']}" "Run 'systemctl stop sapconf.service' or 'saptune service takeover'."
                ((fails++))
                ;;
        esac
        case "${unit_state_enabled['sapconf.service']}" in
            disabled)
                print_ok "sapconf.service is disabled"
                ;;
             *)
                print_fail "sapconf.service is ${unit_state_enabled['sapconf.service']}" "Run 'systemctl disable sapconf.service' or 'saptune service takeover'."
                ((fails++))
                ;;
        esac
    fi

    # Checking if saptune.service is enabled and started.
    case "${unit_state_active['saptune.service']}" in
        active)
            print_ok "saptune.service is active"
            ;;
        *)
            print_fail "saptune.service is ${unit_state_active['saptune.service']}" "Run 'systemctl start saptune.service', 'saptune service start' or 'saptune service takeover'."
            ((fails++))
            ;;
    esac
    case "${unit_state_enabled['saptune.service']}" in
        enabled)
            print_ok "saptune.service is enabled"
            ;;
        *)
            print_fail "saptune.service is ${unit_state_enabled['saptune.service']}" "Run 'systemctl enable saptune.service', 'saptune service enable' or 'saptune service takeover'."
            ((fails++))
            ;;
    esac

    # Checking status of tuned.service. and the profile.
    if [ -n "${package_version['tuned']}" ] ; then 
            case "${tool_profile['tuned']}" in
                saptune)
                    print_fail "tuned.service is ${unit_state_active['tuned.service']}/${unit_state_enabled['tuned.service']} with profile ('${tool_profile['tuned']}')" "This profile should not exist anymore! This needs to be investigated."
                    ((fails++))
                    ;;
                *)
                    print_note "tuned profile is '${tool_profile['tuned']}'"
                    case "${unit_state_active['tuned.service']}" in
                        inactive)
                            print_ok "tuned.service is inactive"
                            ;;
                        *)
                            print_warn "tuned.service is ${unit_state_active['tuned.service']}" "Verify that tuning does not conflict with saptune or run 'systemctl stop tuned.service'!"
                            ((warnings++))
                            ;;
                    esac
                    case "${unit_state_enabled['tuned.service']}" in
                        disabled)
                            print_ok "tuned.service is disabled"
                            ;;
                        *)
                            print_warn "tuned.service is ${unit_state_enabled['tuned.service']}" "Verify that tuning does not conflict with saptune or run 'systemctl disable tuned.service'!"
                            ((warnings++))
                            ;;
                    esac
                    ;;
            esac
    fi

    # Summary.
    echo
    [ ${warnings} -gt 0 ] && echo "${warnings} warning(s) have been found."
    [ ${fails} -gt 0 ] && echo "${fails} error(s) have been found."
    if [ ${fails} -gt 0 ] ; then
        echo "Saptune will not work properly!"
        return 1
    else 
        if [ ${warnings} -gt 0 ] ; then
            echo "Saptune should work properly, but better investigate!"
        else
            echo "Saptune is set up correctly."
        fi
    fi
    return 0    
}


# --- MAIN ---

# Introduction.
echo -e "\nThis is ${0##*/} v${version}.\n"
echo -e "It verifies if saptune is set up correctly."
echo -e "Please keep in mind:"
echo -e " - This tool does not check, if the tuning itself works correctly."
echo -e " - Follow the hints from top to down to minimize side effects.\n"

# Determine if we are running a SLES.
eval $(grep ^ID= /etc/os-release)
[ "${ID}" != "sles" ] && { echo "Only SLES is supported! Your OS ID is ${ID}! Exiting." ; exit 2 ; }

# Check parameters.
if [ -n "${1}" ] ; then
    echo "Usage: ${0##*/}"
    exit 3
fi

collect_data
check_saptune

# Bye.
exit $?