src/collectors/charts.d.plugin/charts.d.plugin.in
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-3.0-or-later
# netdata
# real-time performance and health monitoring, done right!
# (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
#
# charts.d.plugin allows easy development of BASH plugins
#
# if you need to run parallel charts.d processes, link this file to a different name
# in the same directory, with a .plugin suffix and netdata will start both of them,
# each will have a different config file and modules configuration directory.
#
export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin:@sbindir_POST@"
PROGRAM_FILE="$0"
MODULE_NAME="main"
# -----------------------------------------------------------------------------
# logging
PROGRAM_NAME="$(basename "${0}")"
SHORT_PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
# these should be the same with syslog() priorities
NDLP_EMERG=0 # system is unusable
NDLP_ALERT=1 # action must be taken immediately
NDLP_CRIT=2 # critical conditions
NDLP_ERR=3 # error conditions
NDLP_WARN=4 # warning conditions
NDLP_NOTICE=5 # normal but significant condition
NDLP_INFO=6 # informational
NDLP_DEBUG=7 # debug-level messages
# the max (numerically) log level we will log
LOG_LEVEL=$NDLP_INFO
set_log_min_priority() {
case "${NETDATA_LOG_LEVEL,,}" in
"emerg" | "emergency")
LOG_LEVEL=$NDLP_EMERG
;;
"alert")
LOG_LEVEL=$NDLP_ALERT
;;
"crit" | "critical")
LOG_LEVEL=$NDLP_CRIT
;;
"err" | "error")
LOG_LEVEL=$NDLP_ERR
;;
"warn" | "warning")
LOG_LEVEL=$NDLP_WARN
;;
"notice")
LOG_LEVEL=$NDLP_NOTICE
;;
"info")
LOG_LEVEL=$NDLP_INFO
;;
"debug")
LOG_LEVEL=$NDLP_DEBUG
;;
esac
}
set_log_min_priority
log() {
local level="${1}"
shift 1
[[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <<EOFLOG
INVOCATION_ID=${NETDATA_INVOCATION_ID}
SYSLOG_IDENTIFIER=${PROGRAM_NAME}
PRIORITY=${level}
THREAD_TAG=charts.d.plugin
ND_LOG_SOURCE=collector
MESSAGE=${MODULE_NAME}: ${*//\\n/--NEWLINE--}
EOFLOG
# AN EMPTY LINE IS NEEDED ABOVE
}
info() {
log "$NDLP_INFO" "${@}"
}
warning() {
log "$NDLP_WARN" "${@}"
}
error() {
log "$NDLP_ERR" "${@}"
}
fatal() {
log "$NDLP_ALERT" "${@}"
echo "DISABLE"
exit 1
}
debug() {
[ "$debug" = "1" ] && log "$NDLP_DEBUG" "${@}"
}
# -----------------------------------------------------------------------------
# check for BASH v4+ (required for associative arrays)
if [ ${BASH_VERSINFO[0]} -lt 4 ]; then
echo >&2 "BASH version 4 or later is required (this is ${BASH_VERSION})."
exit 1
fi
# -----------------------------------------------------------------------------
# create temp dir
debug=0
TMP_DIR=
chartsd_cleanup() {
trap '' EXIT QUIT HUP INT TERM
if [ ! -z "$TMP_DIR" -a -d "$TMP_DIR" ]; then
[ $debug -eq 1 ] && echo >&2 "$PROGRAM_NAME: cleaning up temporary directory $TMP_DIR ..."
rm -rf "$TMP_DIR"
fi
echo "EXIT"
exit 0
}
trap chartsd_cleanup EXIT QUIT HUP INT TERM
if [ $UID = "0" ]; then
TMP_DIR="$(mktemp -d /var/run/netdata-${PROGRAM_NAME}-XXXXXXXXXX)"
else
TMP_DIR="$(mktemp -d /tmp/.netdata-${PROGRAM_NAME}-XXXXXXXXXX)"
fi
logdate() {
date "+%Y-%m-%d %H:%M:%S"
}
# -----------------------------------------------------------------------------
# check a few commands
require_cmd() {
local x=$(which "${1}" 2>/dev/null || command -v "${1}" 2>/dev/null)
if [ -z "${x}" -o ! -x "${x}" ]; then
warning "command '${1}' is not found in ${PATH}."
eval "${1^^}_CMD=\"\""
return 1
fi
eval "${1^^}_CMD=\"${x}\""
return 0
}
require_cmd date || exit 1
require_cmd sed || exit 1
require_cmd basename || exit 1
require_cmd dirname || exit 1
require_cmd cat || exit 1
require_cmd grep || exit 1
require_cmd egrep || exit 1
require_cmd mktemp || exit 1
require_cmd awk || exit 1
require_cmd timeout || exit 1
require_cmd curl || exit 1
# -----------------------------------------------------------------------------
[ $((BASH_VERSINFO[0])) -lt 4 ] && fatal "BASH version 4 or later is required, but found version: ${BASH_VERSION}. Please upgrade."
info "started from '$PROGRAM_FILE' with options: $*"
# -----------------------------------------------------------------------------
# internal defaults
# netdata exposes a few environment variables for us
[ -z "${NETDATA_PLUGINS_DIR}" ] && NETDATA_PLUGINS_DIR="$(dirname "${0}")"
[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@"
pluginsd="${NETDATA_PLUGINS_DIR}"
stockconfd="${NETDATA_STOCK_CONFIG_DIR}/${SHORT_PROGRAM_NAME}"
userconfd="${NETDATA_USER_CONFIG_DIR}/${SHORT_PROGRAM_NAME}"
olduserconfd="${NETDATA_USER_CONFIG_DIR}"
chartsd="$pluginsd/../charts.d"
minimum_update_frequency="${NETDATA_UPDATE_EVERY-1}"
update_every=${minimum_update_frequency} # this will be overwritten by the command line
# work around for non BASH shells
charts_create="_create"
charts_update="_update"
charts_check="_check"
charts_underscore="_"
# when making iterations, charts.d can loop more frequently
# to prevent plugins missing iterations.
# this is a percentage relative to update_every to align its
# iterations.
# The minimum is 10%, the maximum 100%.
# So, if update_every is 1 second and time_divisor is 50,
# charts.d will iterate every 500ms.
# Charts will be called to collect data only if the time
# passed since the last time the collected data is equal or
# above their update_every.
time_divisor=50
# number of seconds to run without restart
# after this time, charts.d.plugin will exit
# netdata will restart it
restart_timeout=$((3600 * 4))
# check if the charts.d plugins are using global variables
# they should not.
# It does not currently support BASH v4 arrays, so it is
# disabled
dryrunner=0
# check for timeout command
check_for_timeout=1
# the default enable/disable value for all charts
enable_all_charts="yes"
# -----------------------------------------------------------------------------
# parse parameters
check=0
chart_only=
while [ ! -z "$1" ]; do
if [ "$1" = "check" ]; then
check=1
shift
continue
fi
if [ "$1" = "debug" -o "$1" = "all" ]; then
debug=1
LOG_LEVEL=$NDLP_DEBUG
shift
continue
fi
if [ -f "$chartsd/$1.chart.sh" ]; then
debug=1
LOG_LEVEL=$NDLP_DEBUG
chart_only="$(echo $1.chart.sh | sed "s/\.chart\.sh$//g")"
shift
continue
fi
if [ -f "$chartsd/$1" ]; then
debug=1
LOG_LEVEL=$NDLP_DEBUG
chart_only="$(echo $1 | sed "s/\.chart\.sh$//g")"
shift
continue
fi
# number check
n="$1"
x=$((n))
if [ "$x" = "$n" ]; then
shift
update_every=$x
[ $update_every -lt $minimum_update_frequency ] && update_every=$minimum_update_frequency
continue
fi
fatal "Cannot understand parameter $1. Aborting."
done
# -----------------------------------------------------------------------------
# loop control
# default sleep function
LOOPSLEEPMS_HIGHRES=0
now_ms=
current_time_ms_default() {
now_ms="$(date +'%s')000"
}
current_time_ms="current_time_ms_default"
current_time_ms_accuracy=1
mysleep="sleep"
# if found and included, this file overwrites loopsleepms()
# and current_time_ms() with a high resolution timer function
# for precise looping.
source "$pluginsd/loopsleepms.sh.inc"
[ $? -ne 0 ] && error "Failed to load '$pluginsd/loopsleepms.sh.inc'."
# -----------------------------------------------------------------------------
# load my configuration
for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf"; do
if [ -f "$myconfig" ]; then
source "$myconfig"
if [ $? -ne 0 ]; then
error "Config file '$myconfig' loaded with errors."
else
info "Configuration file '$myconfig' loaded."
fi
else
warning "Configuration file '$myconfig' not found."
fi
done
# make sure time_divisor is right
time_divisor=$((time_divisor))
[ $time_divisor -lt 10 ] && time_divisor=10
[ $time_divisor -gt 100 ] && time_divisor=100
# we check for the timeout command, after we load our
# configuration, so that the user may overwrite the
# timeout command we use, providing a function that
# can emulate the timeout command we need:
# > timeout SECONDS command ...
if [ $check_for_timeout -eq 1 ]; then
require_cmd timeout || exit 1
fi
# -----------------------------------------------------------------------------
# internal checks
# netdata passes the requested update frequency as the first argument
update_every=$((update_every + 1 - 1)) # makes sure it is a number
test $update_every -eq 0 && update_every=1 # if it is zero, make it 1
# check the charts.d directory
[ ! -d "$chartsd" ] && fatal "cannot find charts directory '$chartsd'"
# -----------------------------------------------------------------------------
# library functions
fixid() {
echo "$*" |
tr -c "[A-Z][a-z][0-9]" "_" |
sed -e "s|^_\+||g" -e "s|_\+$||g" -e "s|_\+|_|g" |
tr "[A-Z]" "[a-z]"
}
isvarset() {
[ -n "$1" ] && [ "$1" != "unknown" ] && [ "$1" != "none" ]
return $?
}
getosid() {
if isvarset "${NETDATA_CONTAINER_OS_ID}"; then
echo "${NETDATA_CONTAINER_OS_ID}"
else
echo "${NETDATA_SYSTEM_OS_ID}"
fi
}
run() {
local ret pid="${BASHPID}" t
if [ "z${1}" = "z-t" -a "${2}" != "0" ]; then
t="${2}"
shift 2
timeout "${t}" "${@}" 2>"${TMP_DIR}/run.${pid}"
ret=$?
else
"${@}" 2>"${TMP_DIR}/run.${pid}"
ret=$?
fi
if [ ${ret} -ne 0 ]; then
{
printf "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: command '"
printf "%q " "${@}"
printf "' failed with code ${ret}:\n --- BEGIN TRACE ---\n"
cat "${TMP_DIR}/run.${pid}"
printf " --- END TRACE ---\n"
} >&2
fi
rm -f "${TMP_DIR}/run.${pid}"
return ${ret}
}
# convert any floating point number
# to integer, give a multiplier
# the result is stored in ${FLOAT2INT_RESULT}
# so that no fork is necessary
# the multiplier must be a power of 10
float2int() {
local f m="$2" a b l v=($1)
f=${v[0]}
# the length of the multiplier - 1
l=$((${#m} - 1))
# check if the number is in scientific notation
if [[ ${f} =~ ^[[:space:]]*(-)?[0-9.]+(e|E)(\+|-)[0-9]+ ]]; then
# convert it to decimal
# unfortunately, this fork cannot be avoided
# if you know of a way to avoid it, please let me know
f=$(printf "%0.${l}f" ${f})
fi
# split the floating point number
# in integer (a) and decimal (b)
a=${f/.*/}
b=${f/*./}
# if the integer part is missing
# set it to zero
[ -z "${a}" ] && a="0"
# strip leading zeros from the integer part
# base 10 conversion
a=$((10#$a))
# check the length of the decimal part
# against the length of the multiplier
if [ ${#b} -gt ${l} ]; then
# too many digits - take the most significant
b=${b:0:l}
elif [ ${#b} -lt ${l} ]; then
# too few digits - pad with zero on the right
local z="00000000000000000000000" r=$((l - ${#b}))
b="${b}${z:0:r}"
fi
# strip leading zeros from the decimal part
# base 10 conversion
b=$((10#$b))
# store the result
FLOAT2INT_RESULT=$(((a * m) + b))
}
# -----------------------------------------------------------------------------
# charts check functions
all_charts() {
cd "$chartsd"
[ $? -ne 0 ] && error "cannot cd to $chartsd" && return 1
ls *.chart.sh | sed "s/\.chart\.sh$//g"
}
declare -A charts_enable_keyword=(
['apache']="force"
['cpu_apps']="force"
['cpufreq']="force"
['example']="force"
['exim']="force"
['hddtemp']="force"
['load_average']="force"
['mem_apps']="force"
['mysql']="force"
['nginx']="force"
['phpfpm']="force"
['postfix']="force"
['sensors']="force"
['squid']="force"
['tomcat']="force"
)
declare -A obsolete_charts=(
['apache']="python.d.plugin module"
['cpu_apps']="apps.plugin"
['cpufreq']="proc plugin"
['exim']="python.d.plugin module"
['hddtemp']="python.d.plugin module"
['load_average']="proc plugin"
['mem_apps']="proc plugin"
['mysql']="python.d.plugin module"
['nginx']="python.d.plugin module"
['phpfpm']="python.d.plugin module"
['postfix']="python.d.plugin module"
['squid']="python.d.plugin module"
['tomcat']="python.d.plugin module"
)
all_enabled_charts() {
local charts enabled required
# find all enabled charts
for chart in $(all_charts); do
MODULE_NAME="${chart}"
if [ -n "${obsolete_charts["$MODULE_NAME"]}" ]; then
debug "is replaced by ${obsolete_charts["$MODULE_NAME"]}, skipping it."
continue
fi
eval "enabled=\$$chart"
if [ -z "${enabled}" ]; then
enabled="${enable_all_charts}"
fi
required="${charts_enable_keyword[${chart}]}"
[ -z "${required}" ] && required="yes"
if [ ! "${enabled}" = "${required}" ]; then
info "is disabled. Add a line with $chart=$required in '${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf' to enable it (or remove the line that disables it)."
else
debug "is enabled for auto-detection."
local charts="$charts $chart"
fi
done
MODULE_NAME="main"
local charts2=
for chart in $charts; do
MODULE_NAME="${chart}"
# check the enabled charts
local check="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_check()")"
if [ -z "$check" ]; then
error "module '$chart' does not seem to have a $chart$charts_check() function. Disabling it."
continue
fi
local create="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_create()")"
if [ -z "$create" ]; then
error "module '$chart' does not seem to have a $chart$charts_create() function. Disabling it."
continue
fi
local update="$(cat "$chartsd/$chart.chart.sh" | sed "s/^ \+//g" | grep "^$chart$charts_update()")"
if [ -z "$update" ]; then
error "module '$chart' does not seem to have a $chart$charts_update() function. Disabling it."
continue
fi
# check its config
#if [ -f "$userconfd/$chart.conf" ]
#then
# if [ ! -z "$( cat "$userconfd/$chart.conf" | sed "s/^ \+//g" | grep -v "^$" | grep -v "^#" | grep -v "^$chart$charts_underscore" )" ]
# then
# error "module's $chart config $userconfd/$chart.conf should only have lines starting with $chart$charts_underscore . Disabling it."
# continue
# fi
#fi
#if [ $dryrunner -eq 1 ]
# then
# "$pluginsd/charts.d.dryrun-helper.sh" "$chart" "$chartsd/$chart.chart.sh" "$userconfd/$chart.conf" >/dev/null
# if [ $? -ne 0 ]
# then
# error "module's $chart did not pass the dry run check. This means it uses global variables not starting with $chart. Disabling it."
# continue
# fi
#fi
local charts2="$charts2 $chart"
done
MODULE_NAME="main"
echo $charts2
debug "enabled charts: $charts2"
}
# -----------------------------------------------------------------------------
# load the charts
suffix_retries="_retries"
suffix_update_every="_update_every"
active_charts=
for chart in $(all_enabled_charts); do
MODULE_NAME="${chart}"
debug "loading module: '$chartsd/$chart.chart.sh'"
source "$chartsd/$chart.chart.sh"
[ $? -ne 0 ] && warning "Module '$chartsd/$chart.chart.sh' loaded with errors."
# first load the stock config
if [ -f "$stockconfd/$chart.conf" ]; then
debug "loading module configuration: '$stockconfd/$chart.conf'"
source "$stockconfd/$chart.conf"
[ $? -ne 0 ] && warning "Config file '$stockconfd/$chart.conf' loaded with errors."
else
debug "not found module configuration: '$stockconfd/$chart.conf'"
fi
# then load the user config (it overwrites the stock)
if [ -f "$userconfd/$chart.conf" ]; then
debug "loading module configuration: '$userconfd/$chart.conf'"
source "$userconfd/$chart.conf"
[ $? -ne 0 ] && warning "Config file '$userconfd/$chart.conf' loaded with errors."
else
debug "not found module configuration: '$userconfd/$chart.conf'"
if [ -f "$olduserconfd/$chart.conf" ]; then
# support for very old netdata that had the charts.d module configs in /etc/netdata
info "loading module configuration from obsolete location: '$olduserconfd/$chart.conf'"
source "$olduserconfd/$chart.conf"
[ $? -ne 0 ] && warning "Config file '$olduserconfd/$chart.conf' loaded with errors."
fi
fi
eval "dt=\$$chart$suffix_update_every"
dt=$((dt + 1 - 1)) # make sure it is a number
if [ $dt -lt $update_every ]; then
eval "$chart$suffix_update_every=$update_every"
fi
$chart$charts_check
if [ $? -eq 0 ]; then
debug "module '$chart' activated"
active_charts="$active_charts $chart"
else
error "module's '$chart' check() function reports failure."
fi
done
MODULE_NAME="main"
debug "activated modules: $active_charts"
# -----------------------------------------------------------------------------
# check overwrites
# enable work time reporting
debug_time=
test $debug -eq 1 && debug_time=tellwork
# if we only need a specific chart, remove all the others
if [ ! -z "${chart_only}" ]; then
debug "requested to run only for: '${chart_only}'"
check_charts=
for chart in $active_charts; do
if [ "$chart" = "$chart_only" ]; then
check_charts="$chart"
break
fi
done
active_charts="$check_charts"
fi
debug "activated charts: $active_charts"
# stop if we just need a pre-check
if [ $check -eq 1 ]; then
info "CHECK RESULT"
info "Will run the charts: $active_charts"
exit 0
fi
# -----------------------------------------------------------------------------
cd "${TMP_DIR}" || exit 1
# -----------------------------------------------------------------------------
# create charts
run_charts=
for chart in $active_charts; do
MODULE_NAME="${chart}"
debug "calling '$chart$charts_create()'..."
$chart$charts_create
if [ $? -eq 0 ]; then
run_charts="$run_charts $chart"
debug "'$chart' initialized."
else
error "module's '$chart' function '$chart$charts_create()' reports failure."
fi
done
MODULE_NAME="main"
debug "run_charts='$run_charts'"
# -----------------------------------------------------------------------------
# update dimensions
[ -z "$run_charts" ] && fatal "No charts to collect data from."
keepalive() {
if [ ! -t 1 ] && ! printf "\n"; then
chartsd_cleanup
fi
}
declare -A charts_last_update=() charts_update_every=() charts_retries=() charts_next_update=() charts_run_counter=() charts_serial_failures=()
global_update() {
local exit_at \
c=0 dt ret last_ms exec_start_ms exec_end_ms \
chart now_charts=() next_charts=($run_charts) \
next_ms x seconds millis
# return the current time in ms in $now_ms
${current_time_ms}
exit_at=$((now_ms + (restart_timeout * 1000)))
for chart in $run_charts; do
eval "charts_update_every[$chart]=\$$chart$suffix_update_every"
test -z "${charts_update_every[$chart]}" && charts_update_every[$chart]=$update_every
eval "charts_retries[$chart]=\$$chart$suffix_retries"
test -z "${charts_retries[$chart]}" && charts_retries[$chart]=10
charts_last_update[$chart]=$((now_ms - (now_ms % (charts_update_every[$chart] * 1000))))
charts_next_update[$chart]=$((charts_last_update[$chart] + (charts_update_every[$chart] * 1000)))
charts_run_counter[$chart]=0
charts_serial_failures[$chart]=0
echo "CHART netdata.plugin_chartsd_$chart '' 'Execution time for $chart plugin' 'milliseconds / run' charts.d netdata.plugin_charts area 145000 ${charts_update_every[$chart]} '' '' '$chart'"
echo "DIMENSION run_time 'run time' absolute 1 1"
done
# the main loop
while [ "${#next_charts[@]}" -gt 0 ]; do
keepalive
c=$((c + 1))
now_charts=("${next_charts[@]}")
next_charts=()
# return the current time in ms in $now_ms
${current_time_ms}
for chart in "${now_charts[@]}"; do
MODULE_NAME="${chart}"
if [ ${now_ms} -ge ${charts_next_update[$chart]} ]; then
last_ms=${charts_last_update[$chart]}
dt=$((now_ms - last_ms))
charts_last_update[$chart]=${now_ms}
while [ ${charts_next_update[$chart]} -lt ${now_ms} ]; do
charts_next_update[$chart]=$((charts_next_update[$chart] + (charts_update_every[$chart] * 1000)))
done
# the first call should not give a duration
# so that netdata calibrates to current time
dt=$((dt * 1000))
charts_run_counter[$chart]=$((charts_run_counter[$chart] + 1))
if [ ${charts_run_counter[$chart]} -eq 1 ]; then
dt=
fi
exec_start_ms=$now_ms
$chart$charts_update $dt
ret=$?
# return the current time in ms in $now_ms
${current_time_ms}
exec_end_ms=$now_ms
echo "BEGIN netdata.plugin_chartsd_$chart $dt"
echo "SET run_time = $((exec_end_ms - exec_start_ms))"
echo "END"
if [ $ret -eq 0 ]; then
charts_serial_failures[$chart]=0
next_charts+=($chart)
else
charts_serial_failures[$chart]=$((charts_serial_failures[$chart] + 1))
if [ ${charts_serial_failures[$chart]} -gt ${charts_retries[$chart]} ]; then
error "module's '$chart' update() function reported failure ${charts_serial_failures[$chart]} times. Disabling it."
else
error "module's '$chart' update() function reports failure. Will keep trying for a while."
next_charts+=($chart)
fi
fi
else
next_charts+=($chart)
fi
done
MODULE_NAME="${chart}"
# wait the time you are required to
next_ms=$((now_ms + (update_every * 1000 * 100)))
for x in "${charts_next_update[@]}"; do [ ${x} -lt ${next_ms} ] && next_ms=${x}; done
next_ms=$((next_ms - now_ms))
if [ ${LOOPSLEEPMS_HIGHRES} -eq 1 -a ${next_ms} -gt 0 ]; then
next_ms=$((next_ms + current_time_ms_accuracy))
seconds=$((next_ms / 1000))
millis=$((next_ms % 1000))
if [ ${millis} -lt 10 ]; then
millis="00${millis}"
elif [ ${millis} -lt 100 ]; then
millis="0${millis}"
fi
debug "sleeping for ${seconds}.${millis} seconds."
${mysleep} ${seconds}.${millis}
else
debug "sleeping for ${update_every} seconds."
${mysleep} $update_every
fi
test ${now_ms} -ge ${exit_at} && exit 0
done
fatal "nothing left to do, exiting..."
}
global_update