dpn-admin/dpn-sync

View on GitHub
bin/check_sidekiq_queue

Summary

Maintainability
Test Coverage
#!/bin/bash
# ========================================================================================
# Sidekiq Queue Size Nagios Check
#
# (c) Wanelo Inc, Distributed under Apache License
#
# Usage:
# To check a regular queue:
#        ./check_sidekiq_queue [ -h <host> ] [ -a <password> ] [ -q <default> ] [ <-n mq> ] [ -d <redis-db> ] [-w <warn_perc>] [-c <critical_perc>]
# Eg:    ./check_sidekiq_queue -w 500 -c 2000   # warning at 500 or higher used, critical at 2000 or higher
#
# To check schedule or retry (system) queue:
#        ./check_sidekiq_queue [ -h <host> ] [ -a <password> ] [ -s <schedule|retry> ] [ <-n mq> ] [ -d <redis-db> ] [-w <warn_perc>] [-c <critical_perc>]
#
# ========================================================================================

# Nagios return codes
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

WARNING_THRESHOLD=500
CRITICAL_THRESHOLD=1000
QUEUE="default"
SYSTEM=""
NAMESPACE=""
HOST="127.0.0.1"
PASS=""
DB=0

# Parse parameters
while [ $# -gt 0 ]; do
    case "$1" in
        -d | --db)
                shift
                DB=$1
                ;;
        -h | --hostname)
                shift
                HOST=$1
                ;;
        -a | --password)
                shift
                PASS=$1
                ;;
        -q | --queue)
                shift
                QUEUE=$1
                ;;
        -n | --namespace)
                shift
                NAMESPACE=$1
                ;;
        -s | --system)
                shift
                SYSTEM=$1
                ;;
        -w | --warning)
                shift
                WARNING_THRESHOLD=$1
                ;;
        -c | --critical)
                shift
                CRITICAL_THRESHOLD=$1
                ;;
        *)  echo "Unknown argument: $1"
            exit $STATE_UNKNOWN
            ;;
        esac
shift
done

PATH=/opt/local/bin:$PATH
NODENAME=$HOSTNAME

ERR=/tmp/redis-cli.error.$$
rm -f $ERR

function result {
  DESCRIPTION=$1
  STATUS=$2
  echo "SIDEKIQ $DESCRIPTION : ${NODENAME} ${QUEUE_SIZE} on ${QUEUE}|sidekiq_queue_${QUEUE}=${QUEUE_SIZE};${WARNING_THRESHOLD};${CRITICAL_THRESHOLD}"
  rm -f $ERR
  exit $STATUS
}

if [ "$QUEUE" != "default" -a -n "$SYSTEM" ]; then
  result "CRITICAL invalid usage: pass -q or -s but not both", $STATE_CRITICAL
fi

if [ -n "$SYSTEM" -a "$SYSTEM" != "schedule" -a "$SYSTEM" != "retry" ] ; then
  result "CRITICAL invalid usage: -s expect one of schedule or retry", $STATE_CRITICAL
fi

if [ ! -z "$PASS"  ]; then
  PASS="-a $PASS"
fi

if [ ! -z "$NAMESPACE"  ]; then
 NAMESPACE="$NAMESPACE:"
fi

if [ -n "$SYSTEM" ]; then
  QUEUE_SIZE=`redis-cli -h $HOST $PASS -n $DB zcard ${NAMESPACE}$SYSTEM 2>$ERR | cut -d " " -f 1`
  QUEUE=$SYSTEM
else
  QUEUE_SIZE=`redis-cli -h $HOST $PASS -n $DB llen ${NAMESPACE}queue:$QUEUE 2>$ERR | cut -d " " -f 1`
fi

if [ -s "$ERR" ];  then
  QUEUE_SIZE=`cat $ERR`
  result "CRITICAL" $STATE_CRITICAL
fi

if [ $QUEUE_SIZE -ge $WARNING_THRESHOLD ] && [ $QUEUE_SIZE -lt $CRITICAL_THRESHOLD ]; then
  result "WARNING" $STATE_WARNING
elif [ $QUEUE_SIZE -ge $CRITICAL_THRESHOLD ]; then
  result "CRITICAL" $STATE_CRITICAL
else
  result "OK" $STATE_OK
fi

# ensure that output from stderr is cleaned up
rm -f $ERR