Commits

Chris Perl  committed 86d97b9

WIP. Working toward making unit tests more representative of actual usage

  • Participants
  • Parent commits adae9df

Comments (0)

Files changed (2)

File loganalyze.py

 
 class CountAggr(Aggregator):
     def _aggregate(self, datetimeobj, url, ms, ip):
+        # short circuit if the re_filter does not match the given self._key
+        if self._re_filter and not self._re_filter.search(self._key):
+            return
         hits = self.aggr_data.setdefault(self._key, 0)
         self.aggr_data[self._key] = hits + 1
 
-    def display(self, num, format_str):
-        sorted_keys = sorted(self.aggr_data, lambda a,b: cmp(data[a], data[b]))
+    def display(self, display_obj, num, format_str):
+        sorted_keys = sorted(self.aggr_data, lambda a,b: cmp(self.aggr_data[a], self.aggr_data[b]))
         for key in sorted_keys[-num:]:
-            print format_str % (key, data[key])
+            display_obj.write(format_str % (key, self.aggr_data[key]))
 
 class HitsPerUrl(CountAggr):
     # __call__ requires start, end keyword args
         self._key = url
         super(HitsPerUrl, self)._aggregate(datetimeobj, url, ms, ip)
 
-    def display(self, num):
-        super(HitsPerUrl, self).display(self, num, "%-100s %10d\n")
+    def display(self, display_obj, num):
+        super(HitsPerUrl, self).display(display_obj, num, "%-100s %10d\n")
 
 class HitsPerIP(CountAggr):
     # __call__ requires start, end keyword args
         self._key = ip
         super(HitsPerIP, self)._aggregate(datetimeobj, url, ms, ip)
 
-    def display(self, num):
-        super(HitsPerIP, self).display(self, num, "%-20s %10d\n")
+    def display(self, display_obj, num):
+        super(HitsPerIP, self).display(display_obj, num, "%-20s %10d\n")
 
 class PerUnitTimeAggr(Aggregator):
     def _aggregate(self, datetimeobj, url, ms, ip):
         if self._re_filter and not self._re_filter.search(self._key):
             return
         sub_aggr = self.aggr_data.setdefault(self._key, {'hits': 0})
-        sub_aggr['hits'] + 1
+        sub_aggr['hits'] += 1
         # Note that we are guaranteed that datetimeobj is between start and end
         seconds_offset_into_range = datetimeobj - self._start
         seconds_offset_into_range = self._total_seconds(seconds_offset_into_range)
         hits_for_bucket = sub_aggr.setdefault((bucket_start, bucket_end), 0)
         sub_aggr[(bucket_start, bucket_end)] = hits_for_bucket + 1
 
-    def display(self):
+    def display(self, display_obj):
         sorted_keys = sorted(self.aggr_data, lambda a,b: cmp(self.aggr_data[a]['hits'], self.aggr_data[b]['hits']))
         for key in sorted_keys:
-            print "%-100s %10d" % (key, self.aggr_data[key]['hits'])
-            time_period_keys = [ x for x in self.aggr_data[key] ]
+            display_obj.write("%-100s %10d\n" % (key, self.aggr_data[key]['hits']))
+            time_period_keys = [ x for x in self.aggr_data[key] if x != 'hits' ]
             sorted_time_period_keys = sorted(time_period_keys, lambda a,b: cmp(a[0], b[0]))
             for bucket_bottom, bucket_top in sorted_time_period_keys:
-                print "\t%s - %s %20d" % (bucket_bottom, bucket_top, 
-                                          self.aggr_data[key][(bucket_bottom, bucket_top)])
-            print ""
+                display_obj.write("\t%s - %s %20d\n" % (bucket_bottom, bucket_top, 
+                                                        self.aggr_data[key][(bucket_bottom, bucket_top)]))
+            display_obj.write("\n")
 
 class HitsPerUrlPerUnitTime(PerUnitTimeAggr):
     # __call__ requires start, end, step, re_filter keyword args
         self._key = ip
         return super(HitsPerIPPerUnitTime, self)._aggregate(datetimeobj, url, ms, ip)
 
-    def display(self):
-        sorted_keys = sorted(self.aggr_data, lambda a,b: cmp(self.aggr_data[a]['hits'], self.aggr_data[b]['hits']))
-        for key in sorted_keys:
-            print "%-100s %10d" % (key, self.aggr_data[key]['hits'])
-            ms_period_keys = [ x for x in self.aggr_data[key] ]
-            sorted_ms_period_keys = sorted(ms_period_keys, lambda a,b: cmp(a[0], b[0]))
-            for bucket_bottom, bucket_top in sorted_ms_period_keys:
-                    print "\t%4d ms - %4d ms %10d" % (bucket_bottom, bucket_top,
-                                                  self.aggr_data[key][(bucket_bottom, bucket_top)])
-            print ""
-
 class TimeToRenderLinearAggr(Aggregator):
     def _aggregate(self, datetimeobj, url, ms, ip):
         # short circuit if the re_filter does not match the given self._key
         hits_for_bucket = sub_aggr.setdefault((bucket_start, bucket_end), 0)
         sub_aggr[(bucket_start, bucket_end)] = hits_for_bucket + 1
 
+    def display(self, display_obj):
+        sorted_keys = sorted(self.aggr_data, lambda a,b: cmp(self.aggr_data[a]['hits'], self.aggr_data[b]['hits']))
+        for key in sorted_keys:
+            display_obj.write("%-100s %10d\n" % (key, self.aggr_data[key]['hits']))
+            ms_period_keys = [ x for x in self.aggr_data[key] if x != 'hits' ]
+            sorted_ms_period_keys = sorted(ms_period_keys, lambda a,b: cmp(a[0], b[0]))
+            for bucket_bottom, bucket_top in sorted_ms_period_keys:
+                    display_obj.write("\t%4d ms - %4d ms %10d\n" % (bucket_bottom, bucket_top,
+                                                                    self.aggr_data[key][(bucket_bottom, bucket_top)]))
+            display_obj.write("\n")
+ 
 class HitsPerUrlByTimeToRenderLinear(TimeToRenderLinearAggr):
     # __call__ requires start, end, ms_step, re_filter keyword args
     def _aggregate(self, datetimeobj, url, ms, ip):
         "Dec": 12
     }
 
-    def __init__(self):
+    def __init__(self, display_obj=sys.stdout):
         self.log_data = []
         self.start = None
         self.end = None
+
         self._saved_time_periods = {}
+        self._display_obj = display_obj
 
         self._setup_parser_do_set()
         self._setup_parser_do_topurls()
                 traceback.print_exc()
                 continue
 
-    def do_cmd(self, line, testing=False):
+    def do_cmd(self, line):
         components = shlex.split(line)
         cmd = components[0]
         args = components[1:]
             method_for_cmd = method_for_cmd.replace('-', '_')
             if method_for_cmd == method:
                 getattr(self, method)(*args)
-                if not testing:
-                    display_method_for_cmd = method_for_cmd.replace("_do", "_display")
                 break
         else:
-            print "Unknown Command: %s" % cmd
+            self._display_obj.write("Unknown Command: %s\n" % cmd)
 
     def parse_logfile(self, path):
         cmds = []
 
     def _setup_parser_do_topurls(self):
         self._parser_do_topurls = NonExitingOptionParser(usage="topurls [number of urls to display]")
+        self._parser_do_topurls.add_option('-f', '--filter', action='store', type='str', dest='re_filter')
 
     def _setup_parser_do_url_hits_per_time(self):
         self._parser_do_url_hits_per_time = NonExitingOptionParser(usage="""url-hits-per-time --step STEP --filter FILTER""")
 
     def _setup_parser_do_toptalkers(self):
         self._parser_do_toptalkers = NonExitingOptionParser(usage="toptalkers [number of IPs to display]")
+        self._parser_do_toptalkers.add_option('-f', '--filter', action='store', type='str', dest='re_filter')
 
     def _setup_parser_do_ip_hits_per_time(self):
         self._parser_do_ip_hits_per_time = NonExitingOptionParser(usage="""ip-hits-per-time --step STEP --filter FILTER""")
         options, args = self._parser_do_topurls.parse_args(list(cmd_args))
         num_urls = self._do_single_optional_numerical_positional_arg_check('topurls', args, 10)
 
-        self._hpu(start=self.start, end=self.end)
+        self._hpu(start=self.start, end=self.end, re_filter=options.re_filter)
+        self._hpu.display(self._display_obj, num_urls)
 
     def _do_toptalkers(self, *cmd_args):
         options, args = self._parser_do_toptalkers.parse_args(list(cmd_args))
-        num_talkers = self._do_single_optional_numerical_positional_arg_check('toptalkers', args, 10)
+        num_ips = self._do_single_optional_numerical_positional_arg_check('toptalkers', args, 10)
 
-        self._hpi(start=self.start, end=self.end)
+        self._hpi(start=self.start, end=self.end, re_filter=options.re_filter)
+        self._hpi.display(self._display_obj, num_ips)
 
     def _do_url_hits_per_time(self, *cmd_args):
         options, args = self._parser_do_url_hits_per_time.parse_args(list(cmd_args))
             step = options.step
         
         self._hpuput(start=self.start, end=self.end, step=step, re_filter=options.re_filter)
+        self._hpuput.display(self._display_obj)
 
     def _do_ip_hits_per_time(self, *cmd_args):
         options, args = self._parser_do_ip_hits_per_time.parse_args(list(cmd_args))
             step = options.step
 
         self._hpiput(start=self.start, end=self.end, step=step, re_filter=options.re_filter)
+        self._hpiput.display(self._display_obj)
 
     def _do_url_time_to_render(self, *cmd_args):
         options, args = self._parser_do_url_time_to_render.parse_args(list(cmd_args))
             step = options.step
 
         self._hpubttrl(start=self.start, end=self.end, ms_step=step, re_filter=options.re_filter)
+        self._hpubttrl.display(self._display_obj)
 
     def _do_ip_time_to_render(self, *cmd_args):
         options, args = self._parser_do_ip_time_to_render.parse_args(list(cmd_args))
             step = options.step
 
         self._hpibttrl(start=self.start, end=self.end, ms_step=step, re_filter=options.re_filter)
+        self._hpibttrl.display(self._display_obj)
 
 def main():
     log_analyzer = LogAnalyzer()

File test/tests.py

 
 class LogFileTester(unittest.TestCase):
     def setUp(self):
-        self.log_analyzer = LogAnalyzer()
+        self._devnull = open(os.devnull, 'w')
+        self.log_analyzer = LogAnalyzer(display_obj=self._devnull)
         self._setup_log_paths()
         for path in self.logs:
             self.log_analyzer.parse_logfile(path)
 
-        self.HitsPerUrl = HitsPerUrl(self.log_analyzer)
-        self.HitsPerIP = HitsPerIP(self.log_analyzer)
-        self.HitsPerUrlPerUnitTime = HitsPerUrlPerUnitTime(self.log_analyzer)
-        self.HitsPerIPPerUnitTime = HitsPerIPPerUnitTime(self.log_analyzer)
-        self.HitsPerUrlByTimeToRenderLinear = HitsPerUrlByTimeToRenderLinear(self.log_analyzer)
-        self.HitsPerIPByTimeToRenderLinear = HitsPerIPByTimeToRenderLinear(self.log_analyzer)
-
-    def _test_HitsPerX(self, class_name, expected_data):
-        instance = getattr(self, class_name)
-        instance(start=self.log_analyzer.start, end=self.log_analyzer.end)
-        for key, count in expected_data:
-            self.assertEqual(instance.aggr_data[key], count)
+    def tearDown(self):
+        self._devnull.close()
 
     def _test_HitsPerXPerUnitTime(self, class_name, expected_data, step, re_filter, expected_keys):
         instance = getattr(self, class_name)
         self.assertEqual(len(self.log_analyzer.log_data), 245686)
 
     def test_HitsPerUrl(self):
-        expected_data = (
-            ('www.nhl.com/ice/m_scores.htm', 21967),
-            ('www.nhl.com/ice/m_home.htm', 20645),
-            ('www.nhl.com/ice/m_menu.htm', 3792),
-            ('www.nhl.com/ice/m_standings.htm', 1826),
-        )
-        self._test_HitsPerX('HitsPerUrl', expected_data)
+        expected_data = {
+            'www.nhl.com/ice/m_draft.htm':  17,
+            'www.nhl.com/ice/m_awards.htm': 17,
+            'www.nhl.com/ice/m_events.htm': 50,
+            'www.nhl.com/ice/m_photos.htm': 58,
+            'www.nhl.com/ice/m_stats.htm':  828,
+            'www.nhl.com/ice/m_teams.htm':  833,
+            'www.nhl.com/ice/m_scores.htm': 21967,
+        }
+        self.log_analyzer.do_cmd("topurls --filter '/ice/m_......?\.htm$'")
+        self.assertEqual(self.log_analyzer._hpu.aggr_data, expected_data)
 
     def test_HitsPerIP(self):
-        expected_data = (
-            ('207.46.193.83', 2231),
-            ('204.101.237.139', 2630),
-            ('31.170.160.104', 2685),
-            ('67.195.112.115', 2795),
-            ('68.171.231.80', 2816),
-            ('66.249.72.196', 3231),
-        )
-        self._test_HitsPerX('HitsPerIP', expected_data)
+        expected_data = {
+            '208.92.134.5':   1,
+            '208.92.17.243':  1,
+            '208.92.19.165':  1,
+            '208.92.136.50':  1,
+            '208.92.241.25':  1,
+            '208.92.228.62':  1,
+            '208.92.18.107':  1,
+            '208.92.59.110':  1,
+            '208.92.59.206':  1,
+            '208.92.139.246': 1,
+            '208.92.224.27':  1,
+            '208.92.240.21':  1,
+            '208.92.19.114':  1,
+            '208.92.36.12':   36,
+            '208.92.36.18':   1793,
+        }
+        self.log_analyzer.do_cmd("toptalkers --filter '^208\.92\.'")
+        self.assertEqual(self.log_analyzer._hpi.aggr_data, expected_data)
 
-    def test_HitsPerUrlPerUnitTime(self):
-        expected_data = (
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:00:00:00', '2011-10-24:00:59:59'), 25),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:01:00:00', '2011-10-24:01:59:59'), 32),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:02:00:00', '2011-10-24:02:59:59'), 28),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:03:00:00', '2011-10-24:03:59:59'), 32),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:04:00:00', '2011-10-24:04:59:59'), 26),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:05:00:00', '2011-10-24:05:59:59'), 26),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:06:00:00', '2011-10-24:06:59:59'), 24),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:07:00:00', '2011-10-24:07:59:59'), 32),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:08:00:00', '2011-10-24:08:59:59'), 23),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:09:00:00', '2011-10-24:09:59:59'), 28),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:10:00:00', '2011-10-24:10:59:59'), 33),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:11:00:00', '2011-10-24:11:59:59'), 26),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:12:00:00', '2011-10-24:12:59:59'), 25),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:13:00:00', '2011-10-24:13:59:59'), 28),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:14:00:00', '2011-10-24:14:59:59'), 29),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:15:00:00', '2011-10-24:15:59:59'), 27),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:16:00:00', '2011-10-24:16:59:59'), 26),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:17:00:00', '2011-10-24:17:59:59'), 37),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:18:00:00', '2011-10-24:18:59:59'), 35),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:19:00:00', '2011-10-24:19:59:59'), 35),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:20:00:00', '2011-10-24:20:59:59'), 24),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:21:00:00', '2011-10-24:21:59:59'), 18),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:22:00:00', '2011-10-24:22:59:59'), 29),
-            ('www.nhl.com/ice/index.htm', ('2011-10-24:23:00:00', '2011-10-24:23:59:59'), 30),
-        )
-        self._test_HitsPerXPerUnitTime('HitsPerUrlPerUnitTime', expected_data, step=3600, 
-                                       re_filter='^www.nhl.com/ice/index.htm$', expected_keys=1)
+    def _test_HitsPerUrlPerUnitTime(self):
+        expected_data = { 
+            'www.nhl.com/ice/m_home.htm': 20645
+            ('2011-10-24 00:00:00', '2011-10-24 00:59:59'):                 1775,
+            ('2011-10-24 01:00:00', '2011-10-24 01:59:59'):                 1236,
+            ('2011-10-24 02:00:00', '2011-10-24 02:59:59'):                  355,
+            ('2011-10-24 03:00:00', '2011-10-24 03:59:59'):                  212,
+            ('2011-10-24 04:00:00', '2011-10-24 04:59:59'):                  239,
+            ('2011-10-24 05:00:00', '2011-10-24 05:59:59'):                  254,
+            ('2011-10-24 06:00:00', '2011-10-24 06:59:59'):                  297,
+            ('2011-10-24 07:00:00', '2011-10-24 07:59:59'):                  327,
+            ('2011-10-24 08:00:00', '2011-10-24 08:59:59'):                  383,
+            ('2011-10-24 09:00:00', '2011-10-24 09:59:59'):                  463,
+            ('2011-10-24 10:00:00', '2011-10-24 10:59:59'):                  484,
+            ('2011-10-24 11:00:00', '2011-10-24 11:59:59'):                  514,
+            ('2011-10-24 12:00:00', '2011-10-24 12:59:59'):                  585,
+            ('2011-10-24 13:00:00', '2011-10-24 13:59:59'):                  494,
+            ('2011-10-24 14:00:00', '2011-10-24 14:59:59'):                  582,
+            ('2011-10-24 15:00:00', '2011-10-24 15:59:59'):                  559,
+            ('2011-10-24 16:00:00', '2011-10-24 16:59:59'):                  585,
+            ('2011-10-24 17:00:00', '2011-10-24 17:59:59'):                  709,
+            ('2011-10-24 18:00:00', '2011-10-24 18:59:59'):                  815,
+            ('2011-10-24 19:00:00', '2011-10-24 19:59:59'):                 1882,
+            ('2011-10-24 20:00:00', '2011-10-24 20:59:59'):                 2292,
+            ('2011-10-24 21:00:00', '2011-10-24 21:59:59'):                 2695,
+            ('2011-10-24 22:00:00', '2011-10-24 22:59:59'):                 1858,
+            ('2011-10-24 23:00:00', '2011-10-24 23:59:59'):                 1050,
+        }
 
-        expected_data = (
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:00:00:00', '2011-10-24:02:14:59'), 607),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:02:15:00', '2011-10-24:04:29:59'), 323),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:04:30:00', '2011-10-24:06:44:59'), 236),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:06:45:00', '2011-10-24:08:59:59'), 377),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:09:00:00', '2011-10-24:11:14:59'), 401),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:11:15:00', '2011-10-24:13:29:59'), 463),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:13:30:00', '2011-10-24:15:44:59'), 467),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:15:45:00', '2011-10-24:17:59:59'), 598),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:18:00:00', '2011-10-24:20:14:59'), 4505),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:20:15:00', '2011-10-24:22:29:59'), 11659),
-            ('www.nhl.com/ice/m_scores.htm', ('2011-10-24:22:30:00', '2011-10-24:23:59:59'), 2331),
-        )
-        self._test_HitsPerXPerUnitTime('HitsPerUrlPerUnitTime', expected_data, step=8100, 
-                                       re_filter='^www.nhl.com/ice/m_scores.htm$', expected_keys=1)
 
     def test_HitsPerIPPerUnitTime(self):
         expected_data = (