Commits

Josh VanderLinden  committed 9c72f9f

Added some caching so there are fewer hits to the database on each request. Better use of the Django's built-in GIS utilities.

  • Participants
  • Parent commits b960559

Comments (0)

Files changed (10)

 syntax: glob
+dist
+build
+*.egg*
 *.pyc
 *.swp
-MANIFEST
+*.log
+MANIFEST
 django-tracking changes
 =======================
 
+0.3.5
+-----
+
+- Using Django's GIS utilities a little better
+- Added caching in the middleware to reduce hits to the database
+- Added some logging
+
+0.3.3
+-----
+
+- Improved the setup.py script
+
+0.3.2
+-----
+
+- Merged changes from ramusus to better deal with the unicode problems
+
+0.3.1
+-----
+
+- Trying to handle some unicode problems
+- Added a sample base.html template
+- Code cleaning
+
+0.3.0
+-----
+
+- Fixed several bugs dealing with performance
+- Improved stability
+- Added some German translations
+- Removed dependency on GeoIP C API and Python API in favor of Django's
+  built-in GIS utilities
+- Tweaked the active users map
+
 0.2.5
 -----
 
 * ``GOOGLE_MAPS_KEY``: Your very own Google Maps API key
 * ``TRACKING_USE_GEOIP``: set this to ``True`` if you want to see markers on
   the map
-* ``GEOIP_DATA_FILE``: set this to the absolute path on the filesystem of your
+* ``GEOIP_PATH``: set this to the absolute path on the filesystem of your
   ``GeoIP.dat`` or ``GeoIPCity.dat`` or whatever file.  It's usually something
-  like ``/usr/local/share/GeoIP.dat`` or ``/usr/share/GeoIP/GeoIP.dat``.  You
-  can try leaving this blank if you want; the code will look in the default
-  location if possible.
+  like ``/usr/local/share/GeoIP.dat`` or ``/usr/share/GeoIP/GeoIP.dat``.
+* ``GEOIP_CACHE_TYPE``: The type of caching to use when dealing with GeoIP data:
+
+    * ``0``: read database from filesystem, uses least memory.
+    * ``1``: load database into memory, faster performance but uses more
+      memory.
+    * ``2``: check for updated database.  If database has been updated, reload
+      filehandle and/or memory cache.
+    * ``4``: just cache the most frequently accessed index portion of the
+      database, resulting in faster lookups than ``GEOIP_STANDARD``, but less
+      memory usage than ``GEOIP_MEMORY_CACHE`` - useful for larger databases
+      such as GeoIP Organization and GeoIP City.  Note, for GeoIP Country,
+      Region and Netspeed databases, ``GEOIP_INDEX_CACHE`` is equivalent to
+      ``GEOIP_MEMORY_CACHE``. *default*
+
 * ``DEFAULT_TRACKING_TEMPLATE``: The template to use when generating the
   visitor map.  Defaults to ``tracking/visitor_map.html``.
 

File tracking/__init__.py

-VERSION = (0, 3, 3)
+import listeners
+
+VERSION = (0, 3, 5)
 
 def get_version():
     "Returns the version as a human-format string."

File tracking/listeners.py

+import logging
+
+from django.core.cache import cache
+from django.db.models.signals import post_save, post_delete
+from tracking.models import UntrackedUserAgent, BannedIP
+
+log = logging.getLogger('tracking.listeners')
+
+def refresh_untracked_user_agents(sender, instance, created=False, **kwargs):
+    """Updates the cache of user agents that we don't track"""
+
+    log.debug('Updating untracked user agents cache')
+    cache.set('_tracking_untracked_uas',
+        UntrackedUserAgent.objects.all(),
+        3600)
+
+def refresh_banned_ips(sender, instance, created=False, **kwargs):
+    """Updates the cache of banned IP addresses"""
+
+    log.debug('Updating banned IP cache')
+    cache.set('_tracking_banned_ips',
+        [b.ip_address for b in BannedIP.objects.all()],
+        3600)
+
+post_save.connect(refresh_untracked_user_agents, sender=UntrackedUserAgent)
+post_delete.connect(refresh_untracked_user_agents, sender=UntrackedUserAgent)
+
+post_save.connect(refresh_banned_ips, sender=BannedIP)
+post_delete.connect(refresh_banned_ips, sender=BannedIP)

File tracking/middleware.py

 from datetime import datetime, timedelta
+import logging
 import random
+import re
 import time
-import re
+import traceback
 import urllib, urllib2
 
 from django.conf import settings
 from django.contrib.auth.models import AnonymousUser
+from django.core.cache import cache
 from django.db.utils import DatabaseError
 from django.http import Http404
 from tracking import utils
 from tracking.models import Visitor, UntrackedUserAgent, BannedIP
 
 title_re = re.compile('<title>(.*?)</title>')
+log = logging.getLogger('tracking.middleware')
 
 class VisitorTrackingMiddleware:
     """
         ip_address = utils.get_ip(request)
         user_agent = request.META.get('HTTP_USER_AGENT', '')[:255]
 
+        # retrieve untracked user agents from cache
+        ua_key = '_tracking_untracked_uas'
+        untracked = cache.get(ua_key)
+        if untracked is None:
+            log.info('Updating untracked user agent cache')
+            untracked = UntrackedUserAgent.objects.all()
+            cache.set(ua_key, untracked, 3600)
+
         # see if the user agent is not supposed to be tracked
-        for ua in UntrackedUserAgent.objects.all():
+        for ua in untracked:
             # if the keyword is found in the user agent, stop tracking
             if unicode(user_agent, errors='ignore').find(ua.keyword) != -1:
+                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                 return
 
         if hasattr(request, 'session'):
         # ensure that the request.path does not begin with any of the prefixes
         for prefix in prefixes:
             if request.path.startswith(prefix):
+                log.debug('Not tracking request to: %s' % request.path)
                 return
 
         # if we get here, the URL needs to be tracked
         try:
             visitor = Visitor.objects.get(**attrs)
         except Visitor.DoesNotExist:
-            # see if there's a visitor with the same IP and session key
+            # see if there's a visitor with the same IP and user agent
             # within the last 5 minutes
             cutoff = now - timedelta(minutes=5)
             visitors = Visitor.objects.filter(
             if len(visitors):
                 visitor = visitors[0]
                 visitor.session_key = session_key
+                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
             else:
                 # it's probably safe to assume that the visitor is brand new
                 visitor = Visitor(**attrs)
+                log.debug('Created a new visitor: %s' % attrs)
         except:
             return
 
         try:
             visitor.save()
         except DatabaseError:
-            pass
+            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
 
 class VisitorCleanUpMiddleware:
-    """
-    Clean up old visitor tracking records in the database
-    """
+    """Clean up old visitor tracking records in the database"""
+
     def process_request(self, request):
-        timeout = datetime.now() - timedelta(hours=utils.get_cleanup_timeout())
-        Visitor.objects.filter(last_update__lte=timeout).delete()
+        timeout = utils.get_cleanup_timeout()
+
+        if str(timeout).isdigit():
+            log.debug('Cleaning up visitors older than %s hours' % timeout)
+            timeout = datetime.now() - timedelta(hours=int(timeout))
+            Visitor.objects.filter(last_update__lte=timeout).delete()
 
 class BannedIPMiddleware:
     """
     The banned users do not actually receive the 404 error--instead they get
     an "Internal Server Error", effectively eliminating any access to the site.
     """
+
     def process_request(self, request):
-        # compile a list of all banned IP addresses
-        try:
+        key = '_tracking_banned_ips'
+        ips = cache.get(key)
+        if ips is None:
+            # compile a list of all banned IP addresses
+            log.info('Updating banned IPs cache')
             ips = [b.ip_address for b in BannedIP.objects.all()]
-        except:
-            # in case we don't have the database setup yet
-            ips = []
+            cache.set(key, ips, 3600)
 
         # check to see if the current user's IP address is in that list
         if utils.get_ip(request) in ips:
             raise Http404
-
-class GoogleAnalyticsMiddleware:
-    """
-    This is a server-side version of the Google Analytics tracking.  It should
-    be able to track things like requests to RSS feeds and whatnot, but it does
-    tend to lose some information, such as the IP the request is coming from.
-
-    ******* THIS IS NON-OPERATIONAL FOR THE TIME BEING *******
-    """
-    def process_response(self, request, response):
-        # get the title from the response if possible
-        try:
-            title = title_re.search(response.content).group(1)
-        except:
-            title = ''
-
-        host = request.META.get('HTTP_HOST', '')
-        path = request.META.get('PATH_INFO', '/')
-
-        cookie = random.randint(10000000, 99999999)
-        rand = random.randint(1000000000, 3188019257283953000)
-        today = int(time.mktime(datetime.now().timetuple()))
-        small = random.randint(3, 50)
-
-        utmcc = '__utma=%(r1)i.%(r2)i.%(r3)i.%(r3)i.%(r3)i.%(r4)i;+__utmz=%(r1)i.%(r3)i.%(r4)i.%(r4)i.utmscr=%(host)s|utmccn=(referral)|utmcmd=referral|utmcct=%(path)s;' % {
-            'r1': cookie,       'r2': rand,         'r3': today,
-            'r4': small,        'host': host,       'path': path
-        }
-
-        # setup a dictionary of values for use in the query string
-        info = {
-            'utmwv': 4.3,
-            'utmac': settings.GOOGLE_ANALYTICS_ID,
-            'utmhn': host,
-            'utmp': path,
-            'utmr': request.META.get('HTTP_REFERER', '-'),
-            'utmcc': utmcc,
-            'utmn': random.randint(1000000000, 9999999999),
-            'utmcs': 'UTF-8',
-            'utmsr': '800x600',                                     # resolution
-            'utmsc': '16-bit',                                      # color-depth
-            'utmul': 'en-us',                                       # language
-            'utmje': '0',                                           # java
-            'utmfl': '9.0  r115',                                   # flash
-            'utmdt': title,                                         # title
-        }
-
-        # put all of the info values where they belong
-        url = 'http://www.google-analytics.com/__utm.gif'
-        data = '&'.join(['%s=%s' % (k, urllib.quote(str(info[k]))) for k in info])
-
-        # talk to Google Analytics
-        conn = urllib2.urlopen('%s?%s' % (url, data))
-        conn.read()
-
-        # send the response back to the client
-        return response

File tracking/models.py

 from datetime import datetime, timedelta
+import logging
+import traceback
+
 from django.conf import settings
 from django.contrib.auth.models import User
-from django.contrib.gis.utils import GeoIP
+from django.contrib.gis.utils import GeoIP, GeoIPException, HAS_GEOIP
 from django.db import models
 from django.utils.translation import ugettext, ugettext_lazy as _
 from tracking import utils
-import os
+
+USE_GEOIP = getattr(settings, 'TRACKING_USE_GEOIP', False)
+CACHE_TYPE = getattr(settings, 'GEOIP_CACHE_TYPE', 4)
+
+log = logging.getLogger('tracking.models')
 
 class VisitorManager(models.Manager):
     def active(self, timeout=None):
         """
         Attempts to retrieve MaxMind GeoIP data based upon the visitor's IP
         """
-        if getattr(settings, 'TRACKING_USE_GEOIP', False) and GeoIP:
-            geoip_data_file = getattr(settings, 'GEOIP_DATA_FILE', None)
 
-            if geoip_data_file and os.access(geoip_data_file, os.R_OK):
-                gip = GeoIP.open(geoip_data_file, GeoIP.GEOIP_MEMORY_CACHE)
+        if not HAS_GEOIP or not USE_GEOIP:
+            # go no further when we don't need to
+            log.debug('Bailing out.  HAS_GEOIP: %s; TRACKING_USE_GEOIP: %s' % (HAS_GEOIP, USE_GEOIP))
+            return None
 
+        if not hasattr(self, '_geoip_data'):
+            self._geoip_data = None
             try:
-                return gip.record_by_addr(self.ip_address)
-            except SystemError:
-                # if we get here, chances are that we didn't get a result for
-                # the IP
-                pass
+                gip = GeoIP(cache=CACHE_TYPE)
+                self._geoip_data = gip.city(self.ip_address)
+            except GeoIPException:
+                # don't even bother...
+                log.error('Error getting GeoIP data for IP "%s": %s' % (self.ip_address, traceback.format_exc()))
 
-        return None
+        return self._geoip_data
 
     geoip_data = property(_get_geoip_data)
 

File tracking/templates/base.html

 <script type="text/javascript" src="{{ MEDIA_URL }}js/jquery-1.4.4.min.js"></script>
 <style type="text/css">
 #active-users-map {
-    height: 600px;
-    width: 500px;
+    height: 400px;
+    width: 600px;
 }
 </style>
 {% block extra-head %}{% endblock %}

File tracking/templates/tracking/_active_users.js

                     var listHtml = '<div id="au-' + user.id + '" ' +
                         'class="active-user location-info"><h3>' +
                         user.geoip.city + '</h3><div>' + img +
-                        user.geoip.region_name + ', ' +
+                        user.geoip.region + ', ' +
                         user.geoip.country_name + '</div>' +
                         '<div><strong>Viewing</strong> <span id="auu-' + user.id +'">' +
                         url + '</span></div>' +
     marker.value = user.id;
 
     var myHtml = '<div class="mapOverlay"><h3>' + user.geoip.city + '</h3>';
-    myHtml += '<div>' + img + user.geoip.region_name;
+    myHtml += '<div>' + img + user.geoip.region;
     myHtml += ', ' + user.geoip.country_name + '</div></div>';
 
     // Add a listener to pop up an info box when the mouse goes over a marker

File tracking/views.py

                 seconds
         ) % {'seconds': seconds })
 
-    return friendly_time
+    return friendly_time or 0
 
 def display_map(request, template_name=DEFAULT_TRACKING_TEMPLATE,
         extends_template='base.html'):