1. Peter Nixon
  2. weblog


weblog / referer.py

#!/usr/bin/env python

Web Log Referer Typing class.

- Typer: logfile referer typing class
  ref_log = weblog.referer.Typer(log object)
	- ref_log.getlogent()
	- ref_log.siteurl [list of local site urls]
	- res_log.referer_type (MANUAL|LOCAL|OFFSITE|FILE)
    - all attributes available from the log object are available as well.

- test: test function

# (c) 1998 Copyright Mark Nottingham
# <mnot@pobox.com>
# This software may be freely distributed, modified and used,
# provided that this copyright notice remain intact.
# This software is provided 'as is' without warranty of any kind.

# Web Log Referer Typing
# referer.Typer will determine the type of referer associated with a hit, 
# based on the site URLS that you give it. The possible referer types are:
# - LOCAL -  on one of the sites given
# - MANUAL - a '-' hit
# - FILE - an url beginning with 'file://'
# - OFFSITE - not on one of the sites given, a file or manual hit
# the referer attribute of LOCAL hits will be truncated to exclude the 
# scheme and host, and the corresponding attributes will be erased, if
# present.
# Make sure the .siteurl attribute is fed a list, even if it's only one item. 
# This class MUST be fed the output of both a Web logfile parsing module
# that produces a referer attribute, and the weblog.url.Parse class, 
# which populates the individual components that it needs to operate. 
# See the test() for an example.

__version__ = '1.0'

from urlparse import urlunparse
from string import lower
import socket

class Typer:
	def __init__(self, log):
		self.log = log
		self.siteurl = []
		self.referer_type = ''

	def __getattr__(self, attr):
			return getattr(self.log, attr)
		except AttributeError:
			raise AttributeError, attr

	def __setattr__(self, attr, value):
		if attr == 'siteurl':
			siteurls = []
			for item in value:
				except socket.error:
			value = siteurls
		self.__dict__[attr] = value

	def getlogent(self):
		''' Increment position in the log and populate requested attributes '''

		if self.log.getlogent():
			### clear attributes if last logent was a LOCAL
			if self.referer_type == 'LOCAL':
				delattr(self, 'referer')
				delattr(self, 'ref_scheme')
				delattr(self, 'ref_host')

			if self.referer == '-':
				self.__dict__['referer_type'] = 'MANUAL'
			elif self.ref_host in self.siteurl:
				self.__dict__['referer_type'] = 'LOCAL'
				self.__dict__['ref_scheme'] = ''
				self.__dict__['ref_host'] = ''
				self.__dict__['referer'] = urlunparse((	'', 
			elif self.ref_scheme == 'file':
				self.__dict__['referer_type'] = 'FILE'
				self.__dict__['referer_type'] = 'OFFSITE'

			return 1
			return 0

def test():
	''' basic test suite- modify at will to test full functionality '''

	import sys
	from weblog import combined, url

	file = sys.stdin
	log = combined.Parser(file)

	p_log = url.Parser(log)
	ref_log = Typer(p_log)
	ref_log.siteurl = sys.argv[1:]

	while ref_log.getlogent():
		print "%20s	%20s	%s" % (ref_log.referer[:20], ref_log.ref_path[:20], ref_log.referer_type)

if __name__ == '__main__':