Anonymous committed f352111

adding locking to ganglia-logtailer to prevent multiple simultaneous instances with the same logfile/module combination.

Comments (0)

Files changed (4)

 	install -d ${DESTDIR}/var/lib/ganglia-logtailer
 	install -d ${DESTDIR}/var/log/ganglia-logtailer
-	install -d ${DESTDIR}/usr/bin
-	install -m 0755 ${SCRIPTS} ${DESTDIR}/usr/bin
+	install -d ${DESTDIR}/usr/sbin
+	install -m 0755 ${SCRIPTS} ${DESTDIR}/usr/sbin
 	install -d ${DESTDIR}/usr/share/ganglia-logtailer
-	install -m 0644 ${MODULES} ${DESTDIR}/usr/share/ganglia-logtailer	
+	install -m 0644 ${MODULES} ${DESTDIR}/usr/share/ganglia-logtailer
 	debuild -uc -us -i -S
+	debuild clean
+ganglia-logtailer (1.2-1) stable; urgency=low
+  * Changed the install target from /usr/bin to /usr/sbin, to match the
+    filesystem heirarchy standard.
+ -- Ben Hartshorne <>  Wed, 28 Oct 2009 11:16:37 -0700
 ganglia-logtailer (1.1-2) stable; urgency=low
   * added comment to ganglia-logtailer where logic to include a lockfile would


 import stat
 # Logging module
 import logging.handlers
+import fcntl
 # Local dependencies
 from tailnostate import LogTail
-from ganglia_logtailer_helper import LogtailerParsingException, LogtailerStateException
+from ganglia_logtailer_helper import LogtailerParsingException, LogtailerStateException, LockingError
 ## globals
 gmetric = '/usr/bin/gmetric'
+# function start_locking
+def start_locking(lockfile_name):
+    """ Acquire a lock via a provided lockfile filename. """
+    # This lock can be improved by insterting the current PID into the lockfile
+    # created.  This lets us check for stale lockfiles (if the PID mentioned in
+    # the lockfile isn't running).
+    f = open(lockfile_name, 'w')
+    try:
+        fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except IOError:
+        raise LockingError("Cannot acquire ganglia-logtailer lock (%s)" % lockfile_name)
+    logger.debug("Locking sucessful")
+    return f
+# function end_locking
+def end_locking(lockfile_fd, lockfile_name):
+    """ Release a lock via a provided file descriptor. """
+    try:
+        fcntl.flock(lockfile_fd, fcntl.LOCK_UN | fcntl.LOCK_NB)
+    except IOError, e:
+        raise LockingError("Cannot release ganglia-logtailer lock (%s)" % lockfile_name)
+    try:
+        os.unlink(lockfile_name)
+    except OSError, e:
+        raise LockingError("Cannot unlink %s" % lockfile_name)
+    logger.debug("Unlocking sucessful")
+    return
 def main():
     cmdline = optparse.OptionParser()
     state_dir = options.state_dir
     dirsafe_logfile = log_file.replace('/','-')
     logtail_state_file = '%s/logtail-%s%s.state' % (state_dir, class_name, dirsafe_logfile)
+    logtail_lock_file = '%s/logtail-%s%s.lock' % (state_dir, class_name, dirsafe_logfile)
     # only used in cron mode
     shell_tail = '%s -f %s -o %s' % (logtail, log_file, logtail_state_file)
     # check for lock file so we don't run multiple copies of the same parser simultaneuosly
     # this will happen if the log parsing takes more time than the cron period
     # which is likely on first run when the logfile is huge
-    # FIXME I need to actually add this.
+    try:
+        lockfile = start_locking(logtail_lock_file)
+    except LockingError, e:
+        print "Failed to get lock.  Is another instance of ganglia-logtailer running?  Exiting."
+        sys.exit(1)
+    # we now have a lock that we must clear anywhere we exit.
     # get input to parse
             input = LogTail(log_file)
         except Exception, e:
             print "Failed to instantiate LogTail instance (line %s): %s" % (lineno(), e)
+            end_locking(lockfile, logtail_lock_file)
     elif ( mode == 'cron' ):
                 if( retval != 256 ):
                     logger.warning('%s returned bad exit code %s' %
                                    (shell_tail, retval))
+                end_locking(lockfile, logtail_lock_file)
             input = os.popen(shell_tail)
         except Exception, e:
             # I don't know when this exception will ever actually be triggered.
             print ("Failed to run %s to get log data (line %s): %s" %
                    (shell_tail, lineno(), e))
+            end_locking(lockfile, logtail_lock_file)
         raise Exception, "mode (%s) misunderstood" % mode
                 logger.warning( "Parsing exception caught at %s: %s" % (lineno(), e))
     except Exception, e:
         print "Exception caught at %s: %s" % (lineno(), e)
+        end_locking(lockfile, logtail_lock_file)
     # if we're called from cron, crunch the stats
             logger.warning('duration (%s) less than 45s, despite being called from cron.  Shouldn\'t happen. (line: %s)' % (duration, lineno()))
         #print 'metric measure with duration: %s' % duration
         submit_stats(parser, duration=duration)
+        end_locking(lockfile, logtail_lock_file)
+    # try and remove the lockfile one last time, but it's a valid state that it's already been removed.
+    try:
+        end_locking(lockfile, logtail_lock_file)
+    except Exception, e:
+        pass
 if __name__ == '__main__':


        return), but reset_state() should have been called so that the metrics
        are valid next time."""
+class LockingError(Exception):
+    """ Exception raised for errors creating or destroying lockfiles. """
+    def __init__(self, message):
+        self.message = message
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.