Chris Perl avatar Chris Perl committed 15faa32

Starting to wire up some of the plumbing. Still have to get tests going

Comments (0)

Files changed (1)

 import math
 
 from datetime import datetime, timedelta
-from optparse import OptionParser, OptionValueError
+from optparse import OptionParser, OptParseError, OptionValueError
 
 # XXX: Debug
 import pdb
         sub_aggr[(bucket_start, bucket_end)] = hits_for_bucket + 1
         return aggr
 
-class HitsByIPPerByTimeToRenderLinear(IpAggregator):
+class HitsByIPByTimeToRenderLinear(IpAggregator):
     # __call__ requires start, end, ms_step keyword args
     def _aggregate(self, aggr, datetimeobj, url, ms, ip):
         sub_aggr = aggr.setdefault(ip, {'total_hits': 0})
         sub_aggr[(bucket_start, bucket_end)] = hits_for_bucket + 1
         return aggr
 
-class LinearQuantizeSpeed(UrlAggregator):
-    def __init__(self, log_analyzer):
-        self._log_analyzer = log_analyzer
-        self._start = None
-        self._end = None
-
-    def __call__(self, start, end, ms_step):
-        self._ms_step = int(ms_step)
-
-        if self._start == start and self._end == end:
-            self.output()
-        else:
-            self.clear()
-            self._start = start
-            self._end = end
-            for datetimeobj, url, ms, ip in self._log_analyzer.log_data:
-                if start <= datetimeobj <= end:
-                    self.update(datetimeobj, url, ms, ip)
-            self.output()
-
-    def update(self, datetimeobj, url, ms, ip):
-        url_dict = self.setdefault(url, {'count': 0, 'buckets': []})
-        url_dict['count'] += 1
-
-        bucket_idx = ms / self._ms_step
-        num_buckets = bucket_idx + 1
-        bucket_list_len = len(url_dict['buckets'])
-        if bucket_list_len < num_buckets:
-            url_dict['buckets'].extend([0 for x in range(num_buckets-bucket_list_len)])
-
-        url_dict['buckets'][bucket_idx] += 1
-
-    def output(self):
-        url_list = self._sort()
-        if url_list:
-            # only want distributions for the top 5 urls
-            for url in url_list[-5:]:
-                print "%-60s\n" % (url)
-                url_dict = self[url]
-                for bucket_idx in range(len(url_dict['buckets'])):
-                    print "%5d ms: %d" % ((bucket_idx+1)*self._ms_step, url_dict['buckets'][bucket_idx])
-                print
-                
-class QuantizeSpeed(UrlAggregator):
-    def __init__(self):
-        pass
-
-    def add(self, datetimeobj, url, ip, ms):
-        pass
-
-    def output(self):
-        pass
-
 class LogAnalyzer(object):
 
     month_map = {
         self.log_data = []
         self.start = None
         self.end = None
-
-        self.aggr_map = {
-        }
+        self._setup_parser_do_set()
+        self._setup_parser_do_topurls()
+        self._setup_parser_do_toptalkers()
 
     def cmd_loop(self):
         while True:
                     continue
 
                 # TODO: This splitting is broken, i.e. nocmd causes exception
-                func, args = line.split(' ', 1)
-                if func.startswith('\\'):
-                        # internal command, not for an aggregator
-                        # i.e. setting start and end times
-                        func = func[1:]
-                        method = "_%s" % func
-                        if hasattr(self, method):
-                            getattr(self, method)(*args.split(' '))
-                            continue
-
-                for cmd in self.aggr_map.keys():
-                    if func.lower() == cmd:
-                        self.aggr_map[cmd](self.start, self.end, *args.split(' '))
+                cmd, args = line.split(' ', 1)
+
+                for method in [ x for x in dir(self) if x.startswith('_do_')]:
+                    method_for_cmd = "_do_%s" % cmd.lower()
+                    if method_for_cmd == method:
+                        # TODO: figure out the kwargs stuff
+                        getattr(self, method)(*args.split(' '))
                         break
                 else:
                     print "Unknown Command: %s" % line
                 print ""
                 break
 
+            except OptParseError, e:
+                print e
+
             except StandardError, e:
                 traceback.print_exc()
                 continue
         line = re.sub('&_=\d+', '', line)
         return line
 
-    def _set(self, what, arg):
-        if what == "start":
-            self.start = datetime(*[ int(x) for x in re.split('[-:]', arg)])
-        elif what == "end":
-            self.end = datetime(*[int(x) for x in re.split('[-:]', arg)])
-        else:
-            pass
+    def _setup_parser_do_set(self):
+        self._parser_do_set = OptionParser(usage="set [start|end] YYYY-dd-mm:HH:MM:SS")
+
+    def _setup_parser_do_topurls(self):
+        self._parser_do_topurls = OptionParser(usage="topurls [number of urls to display]")
+
+    def _setup_parser_do_toptalkers(self):
+        self._parser_to_toptalkers = OptionParser(usage="toptalkers [number of IPs to display]")
+
+    def _do_set(self, *cmd_args):
+        options, args = self._parser_do_set.parse_args(list(cmd_args))
+        if len(args) != 2:
+            raise OptionValueError("Wrong number of arguments for set command")
+        what = args[0]
+        if what not in ("start", "end"):
+            raise OptionValueError("First argument to 'set' must be 'start' or 'end'")
+        datetime_string = args[1]
+        if not re.match('\d{4}-\d{2}-\d{2}:\d{2}:\d{2}:\d{2}', datetime_string):
+            raise OptionValueError("Datetime must be specified as YYYY-dd-mm:HH:MM:SS")
+
+        setattr(self, what, datetime(*[int(x) for x in re.split('[-:]', datetime_string)]))
+
+    def _do_topurls_toptalkers_arg_checks(self, cmd, args):
+        if len(args) != 1:
+            raise OptionValueError("First and only argument to '%s' must be a number" % cmd)
+        num = args[0]
+        if not re.match('\d+', num):
+            raise OptionValueError("First and only argument to '%s' must be a number" % cmd)
+        return int(num)
+
+    def _do_topurls(self, *cmd_args):
+        options, args = self._parser_do_topurls.parse_args(list(cmd_args))
+        num_urls = self._do_topurls_toptalkers_arg_checks('topurls', args)
+
+        # setup a singleton of this aggregator for use
+        if not hasattr(self, '_HitsPerUrl'):
+            self._HitsPerUrl = HitsPerUrl(self)
+        data = self._HitsPerUrl(start=self.start, end=self.end)
+        self._display_topurls(data, num_urls)
+
+    def _display_topurls(self, data, num_urls):
+        sorted_keys = sorted(data.keys(), lambda a,b: cmp(data[a], data[b]))
+        for url in sorted_keys[-num_urls:]:
+            print "%-100s %10d" % (url, data[url])
+
+    def _do_toptalkers(self, *cmd_args):
+        options, args = self._parser_do_toptalkers.parse_args(list(cmd_args))
+        num_talkers = self._do_topurls_toptalkers_arg_checks('toptalkers', args)
+
+        # setup a singleton of this aggregator for use
+        if not hasattr(self, '_HitsByIP'):
+            self._HitsByIP = HitsByIP(self)
+        data = self._HitsByIP(start=self.start, end=self.end)
+        self._display_toptalkers(data, num_talkers)
+
+    def _display_toptalkers(self, data, num_talkers):
+        sorted_keys = sorted(data.keys(), lambda a,b: cmp(data[a], data[b]))
+        for ip in sorted_keys[-num_talkers:]:
+            print "%-16s %10d" % (ip, data[ip])
+
+
+    def _do_toptalkers(self, *cmd_args):
+        pass
         
 
 def main():
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.