Snippets

Edel SM CPE check_sip watchdog

Created by Edelberto Mania last modified
#!/bin/bash
# 2-sec sip watchdog
# monitors sip response, restart if output is not 200 or return code non-zero
# actual sip checking is via Sam Bashton tool (check_sip)
# http://bashton.com/content/nagiosplugins
# Edelberto Mania <ed@zenoradio.com>
# 20160120 - initial
# 20160212 - add websocket test
# 20160407 - add killin of defunct and old check_sip_vX.sh proess
# 20160429 - add livesupport@zenoradio.com and app-support@zenoradio.com as recipients"

interval=2 # seconds
rcpt="monitoring@zenoradio.com"
cc="ed@zenoradio.com,tier1@zenoradio.com,sysadmin@zenoradio.com,app-support@zenoradio.com"
bcc="livesupport@zenoradio.com"
sip_tool='timeout 1 /usr/bin/perl /opt/tools/check_sip -u sip:11@localhost'

pid_name=$(ps axu|grep -v grep|grep -w $(cat /tmp/check_sip)|awk '{print $NF}')

## websocket
scheme=http
host=localhost
ws_port=5066

this_pid=$$

## kill defunct processes - for now ALL
for d_pid in $(ps -ef|grep defunc[t]|awk '{print $3}'); do kill -9 ${d_pid} 2>/dev/null;done

## kill old, lying check_sip process except is one
for i in $(ps ax|grep check_sip_v2.s[h]|awk '{print $1}'); do 
        if [ ${this_pid} -ne ${i} ]; then
                kill -9 $i 2>/dev/null
        fi
done

## check and kill previous script instance
pid_name='invalid'
if [ -f /tmp/check_sip ]; then
        _pid=$(cat /tmp/check_sip)
        pid_name=$(ps axu|grep -v grep|grep -w ${_pid}|awk '{print $NF}')
fi

if [ "${pid_name}" == "$0" ]; then
        echo "Killing previous process with PID ${_pid}..."
        kill -9 ${_pid}
fi


function restart_asterisk() {
                echo "$2: asterisk ($1) [${return}] responded with an issue." >> ~emania/sip-restart.log
                timeout 2 kill -9 $(pidof asterisk) 2>/dev/null
                sleep 2
                /usr/sbin/asterisk -Fqp
                for w in media.connection now_session phone_broadcast; do 
                        timeout 2 kill -9 $(ps ax|grep [w]atchdog.${w}.pl|awk '{print $1}') 2>/dev/null
                        /usr/local/zenoradio/cpe/bin/service.watchdog.${w}.pl &
                done
                echo "$(date +%s%3N): asterisk ($1) restarted [${return}]." >> ~emania/sip-restart.log
}

function send_alert() {
                echo "$(date +%Y%m%dT%H%M%S): asterisk restarted [${return}] due to failed $1 response."|/usr/bin/mutt \
                        -e 'unmy_hdr from; my_hdr From:  sysadmin@zenoradio.com' \
                        -e "set realname=$(hostname)" -s "NOTIFICATION: $(hostname) asterisk restart" -c ${cc} -b ${bcc} ${rcpt}
}

## write pid in a file
echo ${this_pid} >/tmp/check_sip

while(true); do
        output=$($sip_tool) # if at this line, the check_sip tool hangs.. this shell will stay "forever" (should be resolved by 'timeout 1 CMD'
        return=$?
        status=$(echo ${output}|grep STATUS|awk '{print $3}')
        if [ "${status}" != "200" -o "${return}" -ne 0 ]; then
                ## failed sip, restart
                s=sip
                restart_asterisk ${s} $(date +%s%3N)
                send_alert ${s}
        #else
        # temp disabling wesbocket check
        #       ## sip is okay, let's check websocket
        #       s=ws
        #       timeout 1 curl -H "Connection: Upgrade" \
        #               -H "Upgrade: websocket" \
        #               -H "Host: ${host}:${ws_port}" \
        #               -H "Origin: ${scheme}://${host}:${ws_port}" ${scheme}://${host}:${ws_port}/ws
        #       ws_return=$?
        #       if [ "${ws_return}" -ne 0 ]; then
        #               restart_asterisk ${s} $(date +%s%3N)
        #               send_alert ${s}
        #       fi
        fi
        sleep ${interval}
done

rm -f /tmp/check_sip

Comments (0)

HTTPS SSH

You can clone a snippet to your computer for local editing. Learn more.