Commits

Lars Hansson committed b6df1a6 Draft

Support for passive checks.

Also move signal handling inside get_status.
Move option handling to a function.

  • Participants
  • Parent commits 971a217

Comments (0)

Files changed (1)

File check_monit_service.py

 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # Copyright 2012  Lars Hansson romabysen@gmail.com
 #
 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import signal
+import time
 import urllib2
 import httplib
 import socket
 
 def alarm_handler(signum, frame):
     """Handle alarm timeouts"""
+    signal.alarm(0)
     msg = 'Connection timeout, received SIGALRM!'
-    signal.alarm(0)
     do_exit(1, msg)
 
 
-def get_status(url, username=None, password=None):
+def submit_alarm_handler(signum, frame):
+    """Handle submit timeouts"""
+    signal.alarm(0)
+    msg = 'Timeout submitting passive service checks, received SIGALRM!'
+    do_exit(1, msg)
+
+
+def submit_results(results, opts):
+    """Submit passive check results"""
+    lines = []
+    for k, v in results.items():
+        if v['perf_data'] is None:
+            output = '%s: %s' % (nagios_exit_codes[v['code']], v['output'])
+        else:
+            output = '%s: %s|%s' % (nagios_exit_codes[v['code']], v['output'], v['perf_data'])
+        line = '[%s] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%s;%s\n' % (int(time.time()), opts.host_name, k, v['code'], output)
+        lines.append(line)
+    signal.signal(signal.SIGALRM, submit_alarm_handler)
+    signal.alarm(opts.timeout)
+    for l in lines:
+        try:
+            f = open(opts.cmdfile, 'w')
+            f.write(l)
+            f.close()
+        except IOError, e:
+            do_exit(1, "IOError writing to command pipe: %s" % (e[1]))
+    signal.alarm(0)
+
+
+def get_status(url, username=None, password=None, timeout=10):
     """Get XML status document from monit server"""
     if username is not None and password is not None:
         passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
         authhandler = urllib2.HTTPBasicAuthHandler(passman)
         opener = urllib2.build_opener(authhandler)
         urllib2.install_opener(opener)
-    pagehandle = urllib2.urlopen(url)
-    return pagehandle.read()
+    signal.signal(signal.SIGALRM, alarm_handler)
+    signal.alarm(timeout)
+    try:
+        pagehandle = urllib2.urlopen(url)
+        doc = pagehandle.read() 
+    except urllib2.HTTPError, e:
+        do_exit(1, 'HTTP error: %s (%s).' % (e.code, e.msg))
+    except urllib2.URLError, e:
+        do_exit(1, '%s.' % (e.reason))
+    except httplib.BadStatusLine:
+        do_exit(1, 'unknown HTTP status code.')
+    except socket.error, e:
+        do_exit(1, 'socket error: %s' % (e))
+    else:
+        signal.alarm(0)
+    return doc
 
 
 def make_data(element, prefix=None, uom=''):
     return (1, 'service %s not found' % svc, None)
 
 
-def main():
-    """Main loop"""
+def process_status_all(status):
+    """Process the XML status document"""
+    d = {}
+    tree = xml.etree.ElementTree.fromstring(status)
+    for service in tree.findall('service'):
+        svctype = int(service.get('type'))
+        svctypename = monit_service_types[svctype].capitalize()
+        if svctype == 5:
+            name = 'CPU and Memory'
+        else:
+            name = service.find('name').text
+        (code, output, perf_data) = process_service(service)
+        desc = '%s: %s' % (svctypename, name)
+        d[desc] = {'code': code, 'output': output, 'perf_data': perf_data}
+    return d
+
+
+def handle_options():
     usage = "Usage: %prog <-H hostname> [options] service"
     version = '%prog 0.1'
     parser = OptionParser(usage=usage, version=version)
                         help="Socket timeout in seconds (Default: %default)")
     parser.add_option("-u", dest="username", default=None, help="Username")
     parser.add_option("-p", dest="password", default=None, help="Password")
+    parser.add_option("-a", dest="checkall", action="store_true", default=False, help="Check all services and submit passive results")
+    parser.add_option("-f", dest="cmdfile", default=None, help="Nagios command file")
+    parser.add_option("-n", dest="host_name", default=None, help="Nagios host_name")
     (opts, args) = parser.parse_args()
 
-    if len(args) < 1 or not opts.host:
+    if (not opts.host) or (opts.checkall and opts.cmdfile is None):
         parser.print_usage()
         sys.exit(3)
+    return (opts, args)
 
+
+def main():
+    """Main loop"""
+    (opts, args) = handle_options()
     socket.setdefaulttimeout(opts.timeout)
-    svc = args[0]
+    if not opts.checkall:
+        svc = args[0]
     proto = 'http'
     if opts.ssl:
         proto = 'https'
     url = '%s://%s:%s/_status?format=xml' % (proto, opts.host, opts.port)
     perf_data = None
 
-    # Set up signal handler for SIGALRM
-    signal.signal(signal.SIGALRM, alarm_handler)
-
-    try:
-        signal.alarm(opts.timeout + 5)
-        statusdoc = get_status(url, opts.username, opts.password)
-        signal.alarm(0)
-    except urllib2.HTTPError, e:
-        signal.alarm(0)
-        code = 1
-        msg = 'HTTP error: %s (%s).' % (e.code, e.msg)
-    except urllib2.URLError, e:
-        signal.alarm(0)
-        code = 1
-        msg = '%s.' % e.reason
-    except httplib.BadStatusLine:
-        signal.alarm(0)
-        code = 1
-        msg = 'unknown HTTP status code.'
-    except socket.error, e:
-        signal.alarm(0)
-        code = 1
-        msg = 'socket error: %s' % (e)
+    statusdoc = get_status(url, opts.username, opts.password, opts.timeout)
+    statusdoc = statusdoc.replace("&", "&amp;")
+    if opts.checkall:
+        r = process_status_all(statusdoc)
+        submit_results(r, opts)
+        code = 0
+        msg = 'Data collected from monit daemon.'
     else:
         (code, msg, perf_data) = process_status(svc, statusdoc)
     do_exit(code, msg, perf_data)