Commits

Alexis Lê-Quôc committed ed2a05d

Starting to pretty up the apache log tailer

Comments (0)

Files changed (2)

src/ApacheLogtailer.py

 import time
 import threading
 import re
+import logging
+logger = logging.getLogger("ganglia_logtailer")
 
 # local dependencies
 from ganglia_logtailer_helper import GangliaMetricObject
 class ApacheLogtailer(object):
     # only used in daemon mode
     period = 30
-    def __init__(self):
+    
+    @staticmethod
+    def translate_log_format(log_format):
+        """Takes an apache log format, returns a compiled regex
+        """
+        xlate = {   "%a": r"(?P<local_ip>\S+)",
+                    "%A": r"(?P<remote_ip>\S+)",
+                    "%u": r"(?P<user>\S+)",
+                    "%v": r"(?P<server>\S+)",
+                    "%B": r"(?P<size>\d+)",
+                    "%b": r"(?P<clf_size>\S+)",
+                    "%D": r"(?P<req_time>\d+)",
+                    "%P": r"(?P<pid>\d+)",
+                    "%s": r"(?P<init_retcode>\d{3})",
+                    "%>s": r"(?P<final_retcode>\d{3})",
+                    "%h": r"(?P<remote_host>\S+)",
+                    "%l": r"(?P<remote_logname>\S+)",
+                    "%t": r"(?P<req_time_std>\[\w+ [+-]\d+\])",
+                    '"%r"': r'"(?P<request>[^"]+)"',
+                    '"%{Referer}i"': r'"(?P<referer>[^"]+)"',
+                    '"%{User-agent}i"': r'"(?P<user_agent>[^"]+)"',
+                }
+        regex = r"^"
+        for directive in log_format.split():
+            logger.debug("directive: %s" % (directive,))
+            rg = xlate.get(directive, None)
+            logger.debug("regex: %s" % (rg,))
+            if rg is not None:
+                if len(regex) > 1:
+                    regex += " "
+                regex += rg
+        logger.debug("Log directive: %s\nRegex: %s" % (log_format, regex))
+        return re.compile(regex)
+    
+    def __init__(self, log_format='%h %l %u %t "%r" %>s %b "%{Referer}i" "%{User-agent}i"'):
         '''This function should initialize any data structures or variables
-        needed for the internal state of the line parser.'''
+        needed for the internal state of the line parser.
+        log_format: apache log format string
+        
+        Source: http://httpd.apache.org/docs/current/logs.html
+                http://httpd.apache.org/docs/current/mod/mod_log_config.html#formats
+        '''
         self.reset_state()
         self.lock = threading.RLock()
+        self.log_format = log_format
         # this is what will match the apache lines
-        # apache log format string:
-        # %v %A %a %u %{%Y-%m-%dT%H:%M:%S}t %c %s %>s %B %D %{cookie}n \"%{Referer}i\" \"%r\" \"%{User-Agent}i\" %P
-        # host.com 127.0.0.1 127.0.0.1 - 2008-05-08T07:34:44 - 200 200 371 103918 - "-" "GET /path HTTP/1.0" "-" 23794
-        # match keys: server_name, local_ip, remote_ip, date, conn_status, init_retcode, final_retcode, size,
-        #               req_time, cookie, referrer, request, user_agent, pid
-        self.reg = re.compile('^(?P<local_ip>[^ ]+) (?P<remote_ip>[^ ]+) (?P<user>[^ ]+) (?P<date>[^ ]+) (?P<conn_status>[^ ]+) (?P<init_retcode>[^ ]+) (?P<final_retcode>[^ ]+) (?P<size>[^ ]+) (?P<req_time>[^ ]+) (?P<cookie>[^ ]+) "(?P<referrer>[^"]+)" "(?P<request>[^"]+)" "(?P<user_agent>[^"]+)" (?P<pid>[^ ]+)')
 
+        self.reg = ApacheLogtailer.translate_log_format(self.log_format)
+        
         # assume we're in daemon mode unless set_check_duration gets called
         self.dur_override = False
 
-
     # example function for parse line
     # takes one argument (text) line to be parsed
     # returns nothing
             regMatch = self.reg.match(line)
             if regMatch:
                 linebits = regMatch.groupdict()
+                assert "request" in linebits
                 # capture GETs
-                if( 'GET' in linebits['request'] ):
+                if( 'GET' in linebits.get("request", None) ):
                     self.num_gets+=1
                 # capture HTTP response code
                 rescode = float(linebits['init_retcode'])
                 # store for 90th % calculation
                 self.ninetieth.append(dur)
             else:
-                raise LogtailerParsingException, "regmatch failed to match"
+                logger.warn("regmatch %s failed to match: %s" % (line, self.log_format))
         except Exception, e:
             self.lock.release()
-            raise LogtailerParsingException, "regmatch or contents failed with %s" % e
+            logger.exception(e)
+            raise LogtailerParsingException(e)
         self.lock.release()
+        
     # example function for deep copy
     # takes no arguments
     # returns one object

src/ganglia-logtailer

 
 # System Libraries
 import os
+import os.path
 import sys
 import threading
 import time
 import optparse
 import stat
 # Logging module
+import logging
 import logging.handlers
 import fcntl
 
 # Local dependencies
-sys.path.append("/usr/share/ganglia-logtailer")
 from tailnostate import LogTail
 from ganglia_logtailer_helper import LogtailerParsingException, LogtailerStateException, LockingError
 
-## globals
-# gmetric = '/usr/bin/gmetric'
 gmetric = '/bin/echo'
-logtail = '/usr/sbin/logtail'
-logtail_statedir = '/var/lib/ganglia-logtailer/'
-
+logtail_statedir = "/tmp/ganglia-logtailer"
+# FIXME exception handling...
+logtail = os.popen("which logtail").read().strip()
 
 ## set up logging infrastructure for use throughout the script
-logDir = '/var/log/ganglia'
-if(not os.path.isdir(logDir)):
-    os.mkdir(logDir)
 logger = logging.getLogger('ganglia_logtailer')
 # open the log file for append, rotate at 1GB, keep 10 of them
-#hdlr = logging.handlers.RotatingFileHandler('%s/ganglia_logtailer.log' % logDir, 'a', 10 * 1024 * 1024, 10)
+#hdlr = logging.handlers.RotatingFileHandler(os.path.join([logDir, "ganglia_logtailer.log"]), 'a', 10 * 1024 * 1024, 10)
 hdlr = logging.StreamHandler()
 formatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s')
 hdlr.setFormatter(formatter)
 logger.addHandler(hdlr)
 logger.setLevel(logging.DEBUG)
 
-
-## This provides a lineno() function to make it easy to grab the line
-## number that we're on (for logging)
-## Danny Yoo (dyoo@hkn.eecs.berkeley.edu)
-## taken from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/145297
-import inspect
-def lineno():
-    """Returns the current line number in our program."""
-    return inspect.currentframe().f_back.f_lineno
-
 def submit_stats( parser, metric_prefix, gmetric_options, duration=None ):
     if( duration != None ):
         # this only happens in cron mode
             os.system("%s %s --name %s --value %s --type %s --units %s" %
                 (gmetric, gmetric_options, m.name, m.value, m.type, m.units) )
     except LogtailerStateException, e:
-        logger.warning( "State exception caught (line %s): %s" % (lineno(), e) )
-
-
+        logger.exception(e)
+        
 # function gmetric_manager
 # takes a parser object - class instance
 def gmetric_manager( parser, metric_prefix, gmetric_options ):
         runtime = finish - start
         sleep_time = period - runtime
         while( sleep_time <= 0 ):
-            logger.info( "manager: calculation time is longer than period.  doubling period to %s." % (period * 2) )
+            logger.info( "manager: calculation time is longer than period.  Doubling period to %s." % (period * 2) )
             sleep_time += period
             period *= 2
             # tell the logtailer class that we're slowing period
 
     logger.debug("Unlocking sucessful")
 
-    return
-
-
-
-
 def main():
-
+    global logtail_statedir
+    
     cmdline = optparse.OptionParser()
     cmdline.add_option('--classname', '-c', action='store', help='The name of the plugin to use to parse the log file')
     cmdline.add_option('--log_file', '-l', action='store', help='The path to the file to tail and parse')
                        help='The state dir is used in cron mode, and is where to store the logtail state file.  Default location %s' % logtail_statedir)
 
     options, arguments = cmdline.parse_args()
-    print ('classname = %s, log_file = %s, mode = %s, state_file = %s' % (options.classname, options.log_file, options.mode, options.state_dir) )
 
     class_name = options.classname
     log_file = options.log_file
     metric_prefix = options.metric_prefix
     gmetric_options = options.gmetric_options
     state_dir = options.state_dir
+
     dirsafe_logfile = log_file.replace('/','-')
+
     logtail_state_file = '%s/logtail-%s%s.state' % (state_dir, class_name, dirsafe_logfile)
     logtail_lock_file = '%s/logtail-%s%s.lock' % (state_dir, class_name, dirsafe_logfile)
 
     # only used in cron mode
     shell_tail = '%s -f %s -o %s' % (logtail, log_file, logtail_state_file)
 
-    logger.debug( "ganglia-logtailer started with class %s, log file %s, mode %s" % (class_name, log_file, mode))
+    logger.debug("ganglia-logtailer started with class %s, log file %s, mode %s" % (class_name, log_file, mode))
 
     # import and instantiate the class from the module passed in.  Files and Class names must be the same.
     try:
         module = __import__(class_name)
         parser = getattr(module, class_name)()
     except Exception, e:
-        print "Failed to instantiate parser (line %s): %s" % (lineno(), e)
+        logging.exception("Failed to instantiate parser: %s" % (e,))
         sys.exit(1)
 
     # check for lock file so we don't run multiple copies of the same parser simultaneuosly
     try:
         lockfile = start_locking(logtail_lock_file)
     except LockingError, e:
-        print "Failed to get lock.  Is another instance of ganglia-logtailer running?  Exiting."
+        logging.exception("Failed to get lock.  Is another instance of ganglia-logtailer running?  Exiting.")
         sys.exit(1)
     # we now have a lock that we must clear anywhere we exit.
 
     if ( mode == 'daemon' ):
         # open the log file for tailing
         try:
-            input = LogTail(log_file)
+            _input = LogTail(log_file)
+            #launch gmetric caller thread
+            submitter = threading.Thread(target=gmetric_manager, args=[parser, metric_prefix, gmetric_options])
+            # the process should die when the main thread dies
+            submitter.setDaemon(True)
+            submitter.start()
         except Exception, e:
-            print "Failed to instantiate LogTail instance (line %s): %s" % (lineno(), e)
+            logging.exception("Failed to instantiate LogTail instance: %s" % (e,))
             end_locking(lockfile, logtail_lock_file)
             sys.exit(1)
     elif ( mode == 'cron' ):
                                    (shell_tail, retval))
                 end_locking(lockfile, logtail_lock_file)
                 sys.exit(0)
-            input = os.popen(shell_tail)
+            _input = os.popen(shell_tail)
         except SystemExit, e:
             raise
         except Exception, e:
     else:
         raise Exception, "mode (%s) misunderstood" % mode
 
-    # if we're a daemon, launch the other thread (cron mode runs after the parsing)
-    if ( mode == 'daemon' ):
-        #launch gmetric caller thread
-        submitter = threading.Thread(target=gmetric_manager, args=[parser, metric_prefix, gmetric_options])
-        # the process should die when the main thread dies
-        submitter.setDaemon( True )
-        submitter.start()
-
     # parse each line in turn
     try:
-        for line in input:
+        for line in _input:
             # this will never end in daemon mode, but will in cron mode
             try:
                 # if in daemon mode, die if our submitter thread has failed
                 if( mode == 'daemon' and not submitter.isAlive() ):
                     raise Exception, "submitter thread died"
 
-#                logger.info( "parsing line")
+                logger.debug("parsing line")
                 parser.parse_line(line)  # crunch each line in turn
 
             except LogtailerParsingException, e:
                 # this should only catch recoverable exceptions (of which there aren't any at the moment)
-                logger.warning( "Parsing exception caught at %s: %s" % (lineno(), e))
+                logging.exception( "Parsing exception: %s" % (e,))
     except Exception, e:
-        print "Exception caught at %s: %s" % (lineno(), e)
+        logging.exception("Exception caught at: %s" % (e,))
         end_locking(lockfile, logtail_lock_file)
         sys.exit(1)
 
         pass
 
 if __name__ == '__main__':
-    main()
-
-
+    main()