Source

weblog / multiple.py

Full commit
#!/usr/bin/env python

'''
Multiple Web Logfile combining class.

Contents:
- Combiner: multiple logfile combining class
  ag_log = weblog.multiple.Combiner(WebLog_Class, [list of filehandles])
  methods:
	- ag_log.getlogent()
  read-only variables:
  	- ag_log.num_error
	- ag_log.num_proccessed
  	
  This class is useful when you need to combine logfiles from separate servers
  that log the same content; for instance, if you use a round-robin DNS or
  other load distribution system.

  The Combiner needs to be fed a prototype of the class of parser you want
  it to use (NOT an instance, and a list of filehandles to the logs. For
  instance:
  
  import weblog.common, weblog.multiple
  log1 = open('log1_log')
  log2 = open('log2_log')
  mult_log = multiple.Combiner(common.Parser, [log1, log2]
  [...]

- test: test function
'''


# (c) 1998 Copyright Mark Nottingham
# <mnot@pobox.com>
#
# This software may be freely distributed, modified and used,
# provided that this copyright notice remain intact.
#
# This software is provided 'as is' without warranty of any kind.


__version__ = '1.0'




class Combiner:
	''' 
	Class to combine multiple logfiles of the same format. 
	(chronologically correct)
	'''

	def __init__(self, logtype, fds):
		self.logs = map(lambda a, lt=logtype: lt(a), fds)
		self.processed_lines = 0
		self.error_lines = 0
		self._queue = []	# index used to determine next line to return
		self._proc = []		# index of num_processed attributes
		self._err = []		# index of num_error attributes
		for log in self.logs:
			if log.getlogent():
				self._queue.append(log.utime)
			else:
				self._queue.append(0)
			self._proc.append(log.num_processed)
			self._err.append(log.num_error)
		self._q_pos = self._queue.index(min(self._queue)) 
		self.target = Dummy(min(self._queue))


	def __getattr__(self, attr):
		if attr == 'num_processed':
			return reduce(lambda a,b: a+b, self._proc, 0)
		elif attr == 'num_error':
			return reduce(lambda a,b: a+b, self._err, 0)
		else:
			try:
				return getattr(self.target, attr)
			except AttributeError:
				raise AttributeError, attr


	def getlogent(self):
		''' Increment position in the log and populate requested attributes '''

		if self.target.getlogent():			# fetch next line from last log
			self._queue[self._q_pos] = self.target.utime	# update candidates
			self._proc[self._q_pos] = self.target.num_processed
			self._err[self._q_pos] = self.target.num_error
		else:
			del self._queue[self._q_pos]	# close a log if done with it
			del self.logs[self._q_pos]
			if not len(self._queue): return 0
		self._q_pos = self._queue.index(min(self._queue))	# find our next one
		self.target = self.logs[self._q_pos]	# make it availble
		return 1


class Dummy:
	''' Dummy class for Combiner so it has something to start with '''
	
	def __init__(self, first):
		self.utime = first
		self.num_processed = 0
		self.num_error = 0

	def getlogent(self):
		return 1
	


def test():
	''' basic test suite- modify at will to test full functionality '''

	import sys
	from weblog import combined

	logs = []
	for arg in sys.argv[1:]:
		logs.append(open(arg))

	log = Combiner(combined.Parser, logs)

	while log.getlogent():
		print "%s %s %s" % (log.client, log.utime, log.url)
	print "processed-", log.num_processed
	print "error-", log.num_error


if __name__ == '__main__':
	test()