Created by
Edelberto Mania
last modified
| #!/bin/bash
# 2-sec sip watchdog
# monitors sip response, restart if output is not 200 or return code non-zero
# actual sip checking is via Sam Bashton tool (check_sip)
# http://bashton.com/content/nagiosplugins
# Edelberto Mania <ed@zenoradio.com>
# 20160120 - initial
# 20160212 - add websocket test
# 20160407 - add killin of defunct and old check_sip_vX.sh proess
# 20160429 - add livesupport@zenoradio.com and app-support@zenoradio.com as recipients"
interval=2 # seconds
rcpt="monitoring@zenoradio.com"
cc="ed@zenoradio.com,tier1@zenoradio.com,sysadmin@zenoradio.com,app-support@zenoradio.com"
bcc="livesupport@zenoradio.com"
sip_tool='timeout 1 /usr/bin/perl /opt/tools/check_sip -u sip:11@localhost'
pid_name=$(ps axu|grep -v grep|grep -w $(cat /tmp/check_sip)|awk '{print $NF}')
## websocket
scheme=http
host=localhost
ws_port=5066
this_pid=$$
## kill defunct processes - for now ALL
for d_pid in $(ps -ef|grep defunc[t]|awk '{print $3}'); do kill -9 ${d_pid} 2>/dev/null;done
## kill old, lying check_sip process except is one
for i in $(ps ax|grep check_sip_v2.s[h]|awk '{print $1}'); do
if [ ${this_pid} -ne ${i} ]; then
kill -9 $i 2>/dev/null
fi
done
## check and kill previous script instance
pid_name='invalid'
if [ -f /tmp/check_sip ]; then
_pid=$(cat /tmp/check_sip)
pid_name=$(ps axu|grep -v grep|grep -w ${_pid}|awk '{print $NF}')
fi
if [ "${pid_name}" == "$0" ]; then
echo "Killing previous process with PID ${_pid}..."
kill -9 ${_pid}
fi
function restart_asterisk() {
echo "$2: asterisk ($1) [${return}] responded with an issue." >> ~emania/sip-restart.log
timeout 2 kill -9 $(pidof asterisk) 2>/dev/null
sleep 2
/usr/sbin/asterisk -Fqp
for w in media.connection now_session phone_broadcast; do
timeout 2 kill -9 $(ps ax|grep [w]atchdog.${w}.pl|awk '{print $1}') 2>/dev/null
/usr/local/zenoradio/cpe/bin/service.watchdog.${w}.pl &
done
echo "$(date +%s%3N): asterisk ($1) restarted [${return}]." >> ~emania/sip-restart.log
}
function send_alert() {
echo "$(date +%Y%m%dT%H%M%S): asterisk restarted [${return}] due to failed $1 response."|/usr/bin/mutt \
-e 'unmy_hdr from; my_hdr From: sysadmin@zenoradio.com' \
-e "set realname=$(hostname)" -s "NOTIFICATION: $(hostname) asterisk restart" -c ${cc} -b ${bcc} ${rcpt}
}
## write pid in a file
echo ${this_pid} >/tmp/check_sip
while(true); do
output=$($sip_tool) # if at this line, the check_sip tool hangs.. this shell will stay "forever" (should be resolved by 'timeout 1 CMD'
return=$?
status=$(echo ${output}|grep STATUS|awk '{print $3}')
if [ "${status}" != "200" -o "${return}" -ne 0 ]; then
## failed sip, restart
s=sip
restart_asterisk ${s} $(date +%s%3N)
send_alert ${s}
#else
# temp disabling wesbocket check
# ## sip is okay, let's check websocket
# s=ws
# timeout 1 curl -H "Connection: Upgrade" \
# -H "Upgrade: websocket" \
# -H "Host: ${host}:${ws_port}" \
# -H "Origin: ${scheme}://${host}:${ws_port}" ${scheme}://${host}:${ws_port}/ws
# ws_return=$?
# if [ "${ws_return}" -ne 0 ]; then
# restart_asterisk ${s} $(date +%s%3N)
# send_alert ${s}
# fi
fi
sleep ${interval}
done
rm -f /tmp/check_sip
|