Commits

Anonymous committed 6fd295d

Initial commit

  • Participants

Comments (0)

Files changed (10)

+Biawak is a simple, extendable agent based monitoring solution.
+
+To Install unzip to a directory of your choosing and edit monitor.cfg
+
+It currently does not ship with any monitoring modules, these are left as an exercise for the reader...
+
+
+For questions/comments please email john@begeman.us
+* Implement pure python solutions in abstract monitoring.
+* Implement win32 compatible abstract monitors
+* Implement python based snmptrap sender

File source/monitor.cfg

+[trapping]
+source = localhost
+destination  = localhost
+community_string = public
+command = /opt/monitor/snmptrap
+
+[settings]
+
+dir = ./logs/
+debug_log = debug.log
+error_log = error.log
+logging_level = 0
+monitor_dir = monitors/
+
+[monitors]
+# Example:
+# 1: monitor1
+# 2: monitor2
+# 3: etc...

File source/monitor.py

+#!/usr/bin/python
+from ConfigParser import *
+import datetime
+import logging, logging.handlers
+import sys
+import os
+
+CONFIG_DIR = ''
+CONFIG_FILE = 'monitor.cfg'
+
+log = logging.getLogger('fault_monitor')
+log.setLevel(log_level)
+
+
+
+def read_config(config = CONFIG_FILE, path = CONFIG_DIR):
+    c = ConfigParser()
+    c.read(path+config)
+
+    def _read_settings():
+        log = c.get('settings','logging_level')
+        monitors = c.get('settings', 'monitor_dir')
+        #datalog = c.get('settings','datalogs')
+
+        #_setSettings(profile,datalog,log,startdate)
+
+    def _read_monitors():
+        global MONITOR_LIST
+        for monitor in c.items('monitors'):
+            MONITOR_LIST.append(monitor[1])
+
+
+    _read_settings()
+    _read_monitors()
+    return c
+
+def logging_setup():
+    global log
+    formatter1 = logging.Formatter('[%(asctime)-22s] %(name)-10s - %(levelname)s - %(message)s', '%m/%d/%y %H:%M:%S %Z')
+    formatter2 = logging.Formatter('[%(asctime)-22s] %(name)-10s: %(levelname)s - %(message)s', '%m/%d/%y %H:%M:%S %Z')
+
+    filehandler = logging.handlers.RotatingFileHandler('monitor.log', 'a', 1000000, 10)
+    filehandler.setFormatter(formatter1)
+    filehandler.setLevel(logging.INFO)
+    log.addHandler(filehandler)
+
+    console = logging.StreamHandler()
+    console.setFormatter(formatter2)
+    console.setLevel(logging.CRITICAL)
+    log.addHandler(console)
+
+def get_modules(monitor_list=MONITOR_LIST, prefix='monitors.'):
+    classes = []
+    for mod in monitor_list:
+        mod_string = prefix + mod
+        __import__(mod_string)
+        classes.append(sys.modules[mod_string])
+    return classes
+
+
+class TrapSender(object):
+
+    def __init__(self, config):
+	self.snmptrap_path = config.get('trapping', 'command')
+	self.destination = config.get('trapping', 'destination')
+	self.source = config.get('trapping', 'source')
+	self.version = '1'
+	self.community_string = config.get('trapping', 'community_string')
+
+    def send(self, oid, specific_trap, varbind=None):
+	snmp_cmd = "%(trap_path)s -v %(version)s -c %(community)s %(destination)s %(oid)s %(source)s 6 %(specific_trap)s '' 1 s %(varbind)s" % \
+		 {'trap_path': self.snmptrap_path,
+		  'version': self.version,
+		  'community': self.community_string,
+		  'destination': self.destination,
+		  'oid': oid,
+		  'source': self.source,
+		  'specific_trap': specific_trap,
+		  'varbind': varbind,
+		  }
+
+
+
+	results = os.popen(snmp_cmd).read()
+
+	return results
+
+
+
+
+
+if __name__ == '__main__':
+
+    global MODULE_LIST
+    logging_setup()
+    monitor_list = []
+    config = read_config()
+
+    monitor_list = get_modules()
+
+
+    trap_sender = TrapSender(config)
+
+    now_tuple = datetime.datetime.utcnow().timetuple()[0:5]
+    run_timestamp = datetime.datetime(*now_tuple)
+
+    for monitor in monitor_list:
+	outage = True
+
+	try:
+		j = monitor.Monitor()
+		outage = j.in_outage()
+	except Exception, e:
+		outage = True
+
+	if outage:
+		try:
+		    r = trap_sender.send(monitor.OID, monitor.SPECIFIC, 1)
+		except Exception, e:
+			log.critical('Error sending trap for monitor ' + str(monitor) + str(e))
+	else:
+		try:
+		    r = trap_sender.send(monitor.OID, monitor.SPECIFIC, 0)
+		except Exception, e:
+			log.critical('Error sending trap for monitor ' + str(monitor) + str(e))
+
+	log.debug(str(monitor) + " in_outage was: " + str(outage))

File source/monitors/__init__.py

Empty file added.

File source/monitors/abstract/__init__.py

Empty file added.

File source/monitors/abstract/filesizemonitor.py

+import os.path
+
+
+class FileSizeMonitor(object):
+    def __init__(self, filename, max_size):
+            self.filename = filename
+            self.max_size = int(max_size)
+            self.file_size = os.path.getsize(filename)
+
+
+
+
+    def in_outage(self, filename=None, max_size=None):
+	if filename is None:
+		filename = self.filename
+
+	if max_size is None:
+		max_size = self.max_size
+        size = os.path.getsize(filename)
+        if int(size) > int(max_size):
+            return True
+
+        return False
+
+    def is_recovered(self, result_set):
+        last_event = result_set[-1]
+
+        # If we have a last event:
+        #    Was that last event in outage?
+        #         Yes -- We are not recovered.
+        #         No -- We are recovered
+        if last_event:
+            if last_event.in_outage == 1:
+                if self.in_outage:
+                    return False
+                else:
+                    return True
+
+        # No previous event so we cannot be recovering from an outage.
+        else:
+            return False

File source/monitors/abstract/logmonitor.py

+import re
+import mmap
+import os
+
+class LogMonitor:
+    '''Base class for log file grepping. Passed a regex, in_outage will return true if the file matches.  You'll want to define your own recovery methods, when you subclass this'''
+    def mapfile(self, filename):
+        file = open(filename, "r+")
+        size = os.path.getsize(filename)
+        return mmap.mmap(file.fileno(), size)
+
+    def __init__(self, filename, reg_match):
+        self.filename = filename
+
+        _re = re.compile(reg_match)
+
+
+        self.logs = self.mapfile(filename)
+        self.matches = []
+	self.matches = _re.findall(self.logs)
+
+    def in_outage(self):
+        if self.matches is None:
+            return True
+	return False

File source/monitors/abstract/processmonitor.py

+import os
+import re
+import datetime
+
+class Process(object):
+    '''Passed a process id, this class will parse the corresponding /proc/<pid> directory and return an object with process details'''
+    HZ = os.sysconf('SC_CLK_TCK')
+
+    def __init__(self, pid, cmd=None):
+        proc_path = '/proc/' + str(pid) + '/'
+        self.pid = pid
+        self.is_running()
+
+        self.status = self._parse_readable()
+        self._parse_uid()
+        self._parse_gid()
+
+
+
+    def _read_pid(self, key):
+        filename = '/proc/%s/%s' % (self.pid, key)
+        pid_file = open(filename).read()
+
+        return pid_file
+
+
+    def _parse_readable(self, key='status'):
+        '''Read in /proc/<pid>status and add its attributes to this object'''
+
+        f = self._read_pid(key)
+        status_re = re.compile('(.*):\t*(.*)')
+        status = {}
+
+
+        for (k,v) in status_re.findall(f):
+            status[k.lower()] = v.lower()
+        return status
+    def _parse_uid(self):
+        uids = self.status['uid'].split('\t')
+
+        # There is probably a more efficient way to do this
+        self.ruid = uids[0]
+        self.euid = uids[1]
+        self.ssuid = uids[2]
+        self.fsuid = uids[3]
+
+    def _parse_gid(self):
+        gids = self.status['gid'].split('\t')
+
+        self.rgid = gids[0]
+        self.egid = gids[1]
+        self.ssgid = gids[2]
+        self.fsgid = gids[3]
+    def get_time(self):
+        stat = self._read_pid('stat').split()
+
+        utime = stat[14]
+        stime = stat[15]
+        start = stat[21]
+        ttime = utime + stime
+
+        times = {'utime': utime,
+                 'stime': stime,
+                 'total': ttime,
+                 'start': start,
+                 'recorded_at': datetime.datetime.now()
+
+                 }
+        return times
+
+    def mem_usage(self):
+        # this needs to be cleaned up
+        meminfo = open('/proc/meminfo').readline()
+        meminfo = meminfo.replace(' ','')
+        meminfo = meminfo.replace('kB','')
+        total_mem = int(meminfo.split(':')[1])
+
+        status = p._parse_readable()
+
+        p_usage = int(status['vmrss'].replace('kb','').replace(' ',''))
+
+
+
+        mem_usage = int( (float(p_usage) / float(total_mem) ) * 100)
+        return mem_usage
+
+
+    def cpu_usage(self, previous_times):
+        times = self.get_time()
+        t = times['total']
+        y = previous_times['total']
+
+        delta_ttime = float(t) - float(y)
+        delta_start = times['recorded_at'] - previous_times['recorded_at']
+
+        jiffies_used= float(delta_ttime) / self.HZ
+        jiffies_elapsed = float(delta_start.microseconds) * (self.HZ)
+
+        cpu_percentage = jiffies_used / jiffies_elapsed * 100
+        return cpu_percentage
+
+
+    def convert_to_ms(tdelta):
+        milliseconds = 0
+        milliseconds = int(tdelta.microseconds / 1000)
+        milliseconds = int(tdelta.seconds * 1000)
+
+
+
+
+    def is_running(self):
+        try:
+            os.kill(int(self.pid), 0)
+            return True
+
+        except OSError, e:
+            return False
+
+class Service(object):
+    '''Interface for monitoring chkconfig service.  Passed a service name, that must be readable from chkconfig, and an optional port.  Port is used to ensure the service is listening on a given port.'''
+
+
+    def __init__(self, service_name, port=None):
+        self.port = port
+        self.servcname = service_name
+
+        self.processes = self.pid_lookup()
+
+    def pid_lookup(self):
+        '''Lookup the service name for a pid'''
+
+        if not self.service_name:
+            raise Exception
+
+        service_cmd = '/sbin/service %s status' % (self.name)
+        service_out = os.popen(service_cmd).read()
+        #'ibmslapd (pid 4671) is running...\n'
+        pid_info = re.match('(?P<service_name>.*) \(pid\s(?P<pid_list>.*)\) is running...', service_out)
+
+        pids = pid_info.group('pid_list').split()
+        processes = []
+
+        for pid in pids:
+            processes += [Process(pid)]
+
+        return processes
+
+
+
+
+
+
+
+
+
+
+
+
+

File source/monitors/abstract/systemmonitor.py

+import re
+import datetime
+import os
+
+
+class System(object):
+    HZ = os.sysconf('SC_CLK_TCK')
+
+    def __init__(self):
+        self.initial_cpu_times = self.get_cpu_usage()
+        self.initialized_at = datetime.now()
+
+
+    def get_cpu_times(self, cpu='cpu'):
+        proc_stat = '/proc/stat'
+
+        stats = open(proc_stat).read()
+
+        stat_re = re.compile('^' + cpu + '''\s+(?P<user_mode>\d+)\s+(?P<system_mode>\d+)\s+(?P<idle>\d+)\s+(?P<iowait>\d+)\s+(?P<irq>\d+).*''')
+        cpu_usage = stat_re.match(stats).groupdict()
+        cpu_usage['recorded_at'] = datetime.datetime.now()
+
+        return cpu_usage
+    def cpu_percentage(self, old_time=self.initial_cpu_times, new_time=self.get_cpu_times()):
+        old_time = self.get_cpu_times()
+        new_time = self.initial_cpu_times
+
+        delta_times = {}
+        for key in new_time.keys():
+
+            delta_times[key] = new_time[key] - old_time[key]
+
+        jiffies_elapsed = (float(delta_times['recorded_at']) * (self.HZ/1e6))
+
+
+
+