Source

weblog / resolve.py

#!/usr/bin/env python

'''
Web Log Resolving class.

Contents:
- SimpleResolver: logfile resolver class
  res_log = weblog.resolve.SimpleResolver(log object)
  methods:
	- res_log.getlogent()
  booleans:
	- res_log.lookup_host [puts hostname in res_log.host]
	- res_log.lookup_ip [puts ip address in res_log.ip]
  variables:
	- res_log.set_client ('host' | 'ip')
	- res_log.cache_size [limits size of ip and host caches, in entries]
  attributes:
	- res_log.host [FQDN, if requested]
	- res_log.ip [IP address, if requested]
	- all attributes of the log object are available as well.

  lookup_host will put the ip in the host attribute if it cannot resolve an
  ip; lookup_ip will put None in the ip attribute if it cannot. If set_client
  is set, it will replace the current contents of the client attribute with
  the specified information.

- test: test function
'''


# (c) 1998 Copyright Mark Nottingham
# <mnot@pobox.com>
#
# This software may be freely distributed, modified and used,
# provided that this copyright notice remain intact.
#
# This software is provided 'as is' without warranty of any kind.


__version__ = '1.0'


import socket

_gethost = socket.gethostbyaddr
_getip = socket.gethostbyname

class SimpleResolver:
	def __init__(self, log):
		self.log = log
		self._namecache = {}
		self._ipcache = {}
		self.cache_size = 100000
		self.lookup_host = 0
		self.lookup_ip = 0
		self.set_client = ''

	def __getattr__(self, attr):
		try:
			return getattr(self.log, attr)
		except AttributeError:
			raise AttributeError, attr

	def getlogent(self):
		''' Increment position in the log and populate requested attributes '''

		if self.log.getlogent():

			### hostname lookup
			if self.lookup_host or self.set_client == 'host':
				if len(self._namecache) > self.cache_size:
					self._namecache = {}
				try:
					self.host = self._namecache[self.log.client]
				except KeyError:
					try:
						self.host = _gethost(self.log.client)[0]
					except socket.error:
						self.host = self.log.client
					self._namecache[self.log.client] = self.host
				if self.set_client == 'host':
					self.client = self.host

			### ip lookup
			if self.lookup_ip or self.set_client == 'ip':
				if len(self._ipcache) > self.cache_size:
					self._ipcache = {}
				try:
					self.ip = self._ipcache[self.log.client]
				except KeyError:
					try:
						self.ip = _getip(self.log.client)			
					except socket.error:
						self.ip = None
					self._ipcache[self.log.client] = self.ip
				if self.set_client == 'ip':
					if self.ip:
						self.client = self.ip

			return 1
		else:
			return 0






def test():
	''' basic test suite- modify at will to test full functionality '''

	import sys
	from weblog import combined

	file = sys.stdin
	log = combined.Parser(file)

	res_log = SimpleResolver(log)
	res_log.set_client = 'host'

	while res_log.getlogent():
		print "%s %s" % (res_log.client, res_log.url)


if __name__ == '__main__':
	test()