Commits

Virgil Dupras committed 911c106

Added visit expiration, and made a few cosmetic changes. That's pretty much done for 0.1 (which has been revised a little bit).

Comments (0)

Files changed (7)

 
 WEBGURU 0.1
 
-A main window with a button to load a single log file. When it's loaded, it shows a list of all visitors in the file, along with their last hit, the number of hits, visit duration, referer, last agent.
+A main window with a button to load a single log file. When it's loaded, it shows a list of all visitors in the file, along with their last hit, the number of hits, referrer, last agent.
 
 Visit expiration is hardcoded at 20 minutes.
-
-All the structural stuff is going to be there (build/package scripts and all that stuff).

core/model/visit.py

 # http://www.hardcoded.net/licenses/bsd_license
 
 from collections import Sequence, namedtuple
+from datetime import timedelta
+
+VISIT_EXPIRATION_DELAY = timedelta(minutes=20)
 
 Hit = namedtuple('Hit', 'ip ident user time method url version response size referrer agent')
 
         self.last_url = basehit.url
         self.referrer = basehit.referrer
         self.last_time = basehit.time
+        self.last_agent = basehit.agent
     
     def add_hit(self, hit):
         assert hit.ip == self.ip
         self.hitcount += 1
         self.last_url = hit.url
         self.last_time = hit.time
+        self.last_agent = hit.agent
+    
+    @property
+    def expiration_time(self):
+        return self.last_time + VISIT_EXPIRATION_DELAY
     
 
 class VisitList(Sequence):
     
     #--- Public
     def add_hit(self, hit):
-        if hit.ip in self._ip2visit:
-            visit = self._ip2visit[hit.ip]
+        visit = self._ip2visit.get(hit.ip, None)
+        if visit is not None and hit.time < visit.expiration_time:
             visit.add_hit(hit)
         else:
             visit = Visit(hit)

core/reader/apache.py

 # http://www.hardcoded.net/licenses/bsd_license
 
 import re
+from datetime import datetime
 
 from hsutil.files import FileOrPath
 
 from ..model.visit import Hit
 
 RE_LOGLINE = re.compile(r'(\d+\.\d+\.\d+\.\d+) ([^ ]*) ([^ ]*) \[([^ ]*) \+\d{4}\] "([^"]*)" (\d+) ([^ ]*) "([^"]*)" "([^"]*)"')
+DATETIME_FMT = '%d/%b/%Y:%H:%M:%S'
 
 def parse(infile):
     with FileOrPath(infile, 'rt') as fp:
             if m is None:
                 continue
             ip, ident, user, time, request, response, size, referrer, agent = m.groups()
+            time = datetime.strptime(time, DATETIME_FMT)
             try:
                 method, url, version = request.split()
             except ValueError:

gui/visit_table.py

 
 from hsgui.table import GUITable, Row
 
+DATETIME_FMT = '%d/%b/%Y:%H:%M:%S'
+
 class VisitTableRow(Row):
     def __init__(self, visit):
         self.ip = visit.ip
         self.last_url = visit.last_url
         self.referrer = visit.referrer
-        self.last_time = visit.last_time
+        self.last_time = visit.last_time.strftime(DATETIME_FMT)
+        self.last_agent = visit.last_agent
         self.hitcount = visit.hitcount
     
 

qt/controller/main_window.py

         
     def _setupUi(self):
         self.setWindowTitle(QCoreApplication.instance().applicationName())
+        self.resize(640, 480)
         self.centralwidget = QWidget(self)
         self.verticalLayout = QVBoxLayout(self.centralwidget)
         self.visitTableView = QTableView(self.centralwidget)

qt/controller/visit_table.py

         Column('last_url', 'Last Url', 150),
         Column('referrer', 'Referrer', 150),
         Column('last_time', 'Last Time', 80),
+        Column('last_agent', 'Agent', 100),
+        Column('hitcount', '# Hits', 50),
     ]
     
     def __init__(self, mainwindow, view):
         model = VisitTableModel(view=self, mainwindow=mainwindow.model)
         Table.__init__(self, model, view)
+        self.setColumnsWidth(None)
     

tests/main_test.py

     eq_(row.referrer, '-')
     eq_(row.hitcount, 1)
     eq_(row.last_time, '14/Nov/2010:06:35:00')
+    eq_(row.last_agent, 'AdvancedInstaller')
     # Now, let's check a multi-hit visit
     row = app.vtable[1]
     eq_(row.ip, '213.245.230.133')
     eq_(row.referrer, '-')
     eq_(row.hitcount, 4)
     eq_(row.last_time, '14/Nov/2010:06:33:36')
+    eq_(row.last_agent, 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; fr-fr) AppleWebKit/533.'\
+        '18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5')
 
 def test_bogus_request_field_doesnt_crash():
     # Sometimes, request are empty (instead of being 3-parted). Don't crash on this
     lines = ['109.86.196.64 - - [14/Nov/2010:07:01:15 +0000] "-" 400 0 "-" "-"']
     app.load_lines(lines) # no crash
     eq_(len(app.vtable), 1)
-    eq_(app.vtable[0].last_url, '')
+    eq_(app.vtable[0].last_url, '')
+
+def test_hit_from_expired_ip_creates_a_new_visit():
+    # When a hit happens from the an IP that is already in a visit, but for which the last hit
+    # happened after the expiration delay (20 minutes by default), create a new visit.
+    app = TestApp()
+    lines = [
+        '42.42.42.42 - - [15/Nov/2010:04:20:00 +0000] "-" 400 0 "-" "-"',
+        # 5 minutes later, same visit
+        '42.42.42.42 - - [15/Nov/2010:04:25:00 +0000] "-" 400 0 "-" "-"',
+        # 15 minutes later, more than 20 min after the first hit, but *not* 20 min after the last
+        # hit. Still the same visit.
+        '42.42.42.42 - - [15/Nov/2010:04:40:00 +0000] "-" 400 0 "-" "-"',
+        # 20 minutes later, new visit.
+        '42.42.42.42 - - [15/Nov/2010:05:00:00 +0000] "-" 400 0 "-" "-"',
+    ]
+    app.load_lines(lines)
+    eq_(len(app.vtable), 2)