Commits

Anonymous committed a21355c

new HTML stats, some optimizations

Comments (0)

Files changed (2)

src/engine/SCons/Heapmonitor.py

 import sys
 import time
 
-import weakref
-import new
-import inspect
+from weakref     import ref as weakref_ref
+from new         import instancemethod
+from inspect     import stack, getmembers
+
 import cPickle
 import gc
 
     # Therefore, the additional arguments have 'magic' names to make it less
     # likely that an argument name clash occurs.
     _observers[klass] = _ClassObserver(ki, name, resolution_level, keep, trace)
-    klass.__init__ = new.instancemethod(
+    klass.__init__ = instancemethod(
         lambda *args, **kwds: f(_observers[klass], *args, **kwds), None, klass)
 
 def _restore_constructor(klass):
     """
     Get a friendly timestamp represented as a string.
     """
+    if t is None: 
+        return ''
     h, m, s = int(t / 3600), int(t / 60 % 60), t % 60
     return "%02d:%02d:%05.2f" % (h, m, s)
 
     Stores size and lifetime information of a tracked object. A weak reference is
     attached to monitor the object without preventing its deletion.
     """
+    __slots__ = ("ref", "id", "repr", "name", "birth", "death", "trace",
+        "footprint", "_resolution_level", "__dict__")
 
     def __init__(self, instance, resolution_level=0, trace=0):
         """
         callback). The size of the object is recorded in 'footprint' as 
         (timestamp, size) tuples.
         """
-        self.ref = weakref.ref(instance, self.finalize)
+        self.ref = weakref_ref(instance, self.finalize)
         self.id = id(instance)
         self.repr = ''
         self.name = str(instance.__class__)
         if trace:
             self._save_trace()
 
-        #initial_size = SCons.asizeof.basicsize(instance)
         initial_size = SCons.asizeof.basicsize(instance) or 0
         so = SCons.asizeof.Asized(initial_size, initial_size)
         self.footprint = [(self.birth, so)]
 
     def __getstate__(self):
         """
-        Make the object serializable for dump_stats. 
-        Weakrefs cannot be serialized. Return all members but the weak reference
-        in a dictionary.
+        Make the object serializable for dump_stats. Read the available slots
+        and store the values in a dictionary. Derived values (stored in the
+        dict) are not pickled as those can be reconstructed based on the other
+        data. References cannot be serialized, ignore 'ref' as well.
         """
-        state = self.__dict__.copy()
-        del state['ref']
-        return state        
+        state = {}
+        for name in getattr(TrackedObject, '__slots__', ()):
+            if hasattr(self, name) and name not in ['ref', '__dict__']:
+                state[name] = getattr(self, name)
+        return state
+
+    def __setstate__(self, state):
+        """
+        Restore the state from pickled data. Needed because a slotted class is
+        used.
+        """
+        for key, value in state.items():
+            setattr(self, key, value)        
 
     def _print_refs(self, file, refs, total, prefix='    ', level=1, 
         minsize=0, minpct=0.1):
         """
         Save current stack trace as formatted string.
         """
-        st = inspect.stack()
+        st = stack()
         try:
             self.trace = []
             for f in st[5:]: # eliminate our own overhead
         fp.desc = str(description)
 
         # Compute overhead of all structures, use sizer to exclude tracked objects(!)
+        fp.overhead = 0
         if fp.tracked_total:
             fp.overhead = sizer.asizeof(tracked_index, tracked_objects, footprint)
             fp.asizeof_total -= fp.overhead
         """
         if isinstance(file, type('')):
             file = open(file, 'w')
-        cPickle.dump(tracked_index, file)
-        cPickle.dump(footprint, file)
+        cPickle.dump(tracked_index, file, protocol=cPickle.HIGHEST_PROTOCOL)
+        cPickle.dump(footprint, file, protocol=cPickle.HIGHEST_PROTOCOL)
         if close:
             file.close()
 
 
     def diff_stats(self, stats):
         raise NotImplementedError
+
+    def annotate_snapshot(self, snapshot):
+        """
+        Store addition statistical data in snapshot.
+        """
+        if hasattr(snapshot, 'classes'):
+            return
+
+        snapshot.classes = {}
+
+        for classname in self.tracked_index.iterkeys():
+            sum = 0
+            active = 0
+            for to in self.tracked_index[classname]:
+                sum += to.get_size_at_time(snapshot.timestamp)
+                if to.birth < snapshot.timestamp and (to.death is None or 
+                   to.death > snapshot.timestamp):
+                    active += 1
+            try:
+                pct = sum * 100.0 / snapshot.asizeof_total
+            except ZeroDivisionError:
+                pct = 0
+            try:
+                avg = sum / active
+            except ZeroDivisionError:
+                avg = 0
+
+            snapshot.classes[classname] = {'sum': sum, 'avg': avg, 'pct': pct, \
+                'active': active}
         
     def print_stats(self, filter=None, limit=1.0):
         """
 
         file.write('---- SUMMARY '+'-'*66+'\n')
         for fp in self.footprint:
+            self.annotate_snapshot(fp)
             file.write('%-35s %11s %12s %12s %5s\n' % \
                 (_trunc(fp.desc, 35), 'active', _pp(fp.asizeof_total), 
                  'average', 'pct'))
             for classname in classlist:
-                sum = 0
-                active = 0
-                for to in self.tracked_index[classname]:
-                    sum += to.get_size_at_time(fp.timestamp)
-                    if to.birth < fp.timestamp and (to.death is None or 
-                       to.death > fp.timestamp):
-                        active += 1
-                try:
-                    pct = sum * 100 / fp.asizeof_total
-                except ZeroDivisionError:
-                    pct = 0
-                try:
-                    avg = sum / active
-                except ZeroDivisionError:
-                    avg = 0
+                info = fp.classes[classname]
+                sum, avg, pct, active = info['sum'], info['avg'], info['pct'], info['active']
                 file.write('  %-33s %11d %12s %12s %4d%%\n' % \
                     (_trunc(classname, 33), active, _pp(sum), _pp(avg), pct))
         file.write('-'*79+'\n')
 
+class HtmlStats(MemStats):
+    """
+    Output the Heapmonitor statistics as HTML pages and graphs.
+    """
+
+    style          = """<style type="text/css">
+        table { width:100%; border:1px solid #000; border-spacing:0px; }
+        td, th { border:0px; }
+        #nb { border:0px; }
+        #tl { margin-top:5mm; margin-bottom:5mm; }
+        #p1 { padding-left: 5px; }
+        #p2 { padding-left: 50px; }
+        #p3 { padding-left: 100px; }
+        #p4 { padding-left: 150px; }
+        #p5 { padding-left: 200px; }
+        #p6 { padding-left: 210px; }
+        #p7 { padding-left: 220px; }
+        #hl { background-color:#FFFFCC; }
+        #r1 { background-color:#BBBBBB; }
+        #r2 { background-color:#CCCCCC; }
+        #r3 { background-color:#DDDDDD; }
+        #r4 { background-color:#EEEEEE; }
+        #r5,#r6,#r7 { background-color:#FFFFFF; }
+        #num { text-align:right; }
+    </style>
+    """
+
+    nopylab_msg    = """<div>Could not generate %s chart!
+    Install <a href=http://matplotlib.sourceforge.net/">Matplotlib</a> 
+    to generate charts.</div>\n"""
+
+    chart_tag      = '<img src="%s">\n'
+    header         = "<html><head><title>%s</title>%s</head><body>\n"
+    tableheader    = '<table border="1">\n'
+    tablefooter    = '</table>\n'
+    footer         = '</body></html>\n'
+
+    refrow = """<tr id="r%(level)d">
+        <td id="p%(level)d">%(name)s</td>
+        <td id="num">%(size)s</td>
+        <td id="num">%(pct)3.1f%%</td></tr>"""
+
+    def _print_refs(self, file, refs, total, level=1, minsize=0, minpct=0.1):
+        """
+        Print individual referents recursively.
+        """
+        lcmp = lambda i, j: (i.size > j.size) and -1 or (i.size < j.size) and 1 or 0
+        lrefs = list(refs)
+        lrefs.sort(lcmp)
+        if level == 1:
+            file.write('<table>\n')
+        for r in lrefs:
+            if r.size > minsize and (r.size*100.0/total) > minpct:
+                data = {'level': level, 'name': _trunc(str(r.name),128),
+                    'size': _pp(r.size), 'pct': r.size*100.0/total }
+                file.write(self.refrow % data)
+                self._print_refs(file, r.refs, total, level=level+1)
+        if level == 1:
+            file.write("</table>\n")
+
+    class_summary = """<p>%(cnt)d instances of %(cls)s were registered. The
+    average size is %(avg)s, the minimal size is %(min)s, the maximum size is
+    %(max)s.</p>\n"""
+
+    def print_class_details(self, fname, classname):
+        """
+        Print detailed statistics and instances for the class `classname`. All
+        data will be written to the file `fname`.
+        """
+        file = open(fname, "w")
+        file.write(self.header % (classname, self.style))
+
+        file.write("<h1>%s</h1>\n" % (classname))
+
+        sizes = [to.get_max_size() for to in self.tracked_index[classname]]
+        sum = reduce( lambda s,x: s+x, sizes )
+        data = {'cnt': len(self.tracked_index[classname]), 'cls': classname}
+        data['avg'] = _pp(sum / len(sizes))
+        data['max'] = _pp(max(sizes))
+        data['min'] = _pp(min(sizes))
+        file.write(self.class_summary % data)
+
+        file.write(self.charts[classname])
+
+        file.write("<h2>Instances</h2>\n")
+        for to in self.tracked_index[classname]:
+            file.write('<table id="tl" width="100%" rules="rows">\n')
+            file.write('<tr><td id="hl" width="140px">Instance</td><td id="hl">%s at 0x%08x</td></tr>\n' % (to.name, to.id))
+            if to.repr:
+                file.write("<tr><td>Representation</td><td>%s&nbsp;</td></tr>\n" % to.repr)
+            file.write("<tr><td>Lifetime</td><td>%s - %s</td></tr>\n" % (_get_timestamp(to.birth), _get_timestamp(to.death)))
+            if hasattr(to, 'trace'):
+                trace = "<pre>%s</pre>" % (''.join(to.trace))                
+                file.write("<tr><td>Instantiation</td><td>%s</td></tr>\n" % trace)
+            for (ts, size) in to.footprint:
+                file.write("<tr><td>%s</td>" % _get_timestamp(ts))
+                if not size.refs:
+                    file.write("<td>%s</td></tr>\n" % _pp(size.size))
+                else:
+                    file.write("<td>%s" % _pp(size.size))
+                    self._print_refs(file, size.refs, size.size)
+                    file.write("</td></tr>\n")
+            file.write("</table>\n")
+
+        file.write(self.footer)    
+        file.close()
+    
+    snapshot_cls_header = """<tr>
+        <th id="hl">Class</th>
+        <th id="hl" align="right">Instance #</th>
+        <th id="hl" align="right">Total</th>
+        <th id="hl" align="right">Average size</th>
+        <th id="hl" align="right">Share</th></tr>\n"""
+
+    snapshot_cls = """<tr>
+        <td>%(cls)s</td>
+        <td align="right">%(active)d</td>
+        <td align="right">%(sum)s</td>
+        <td align="right">%(avg)s</td>
+        <td align="right">%(pct)3.2f%%</td></tr>\n"""
+
+    snapshot_summary = """<p>Total virtual memory assigned to SCons at that time
+        was %(sys)s, which includes %(overhead)s profiling overhead. The
+        Heapmonitor tracked %(tracked)s in total. The measurable objects
+        including code objects but excluding overhead have a total size of
+        %(asizeof)s.</p>\n"""
+
+    def print_summary(self, filename, title=''):
+        """
+        Output the title page.
+        """
+        file = open(filename, "w")
+        file.write(self.header % (title, self.style))
+
+        file.write("<h1>%s</h1>\n" % title)
+        file.write("<h2>Memory distribution over time</h2>\n")
+        file.write(self.charts['snapshots'])
+
+        file.write("<h2>Snapshots statistics</h2>\n")
+        file.write('<table id="nb">\n')
+
+        classlist = self.tracked_index.keys()
+        classlist.sort()
+
+        for fp in self.footprint:
+            file.write('<tr><td>\n')
+            file.write('<table id="tl" rules="rows">\n')
+            self.annotate_snapshot(fp)
+            file.write("<h3>%s snapshot at %s</h3>\n" % (fp.desc or 'Untitled',\
+                _get_timestamp(fp.timestamp)))
+
+            data = {}
+            data['sys']      = _pp(fp.system_total)
+            data['tracked']  = _pp(fp.tracked_total)
+            data['asizeof']  = _pp(fp.asizeof_total)
+            data['overhead'] = _pp(getattr(fp, 'overhead', 0))
+
+            file.write(self.snapshot_summary % data)
+
+            if fp.tracked_total:
+                file.write(self.snapshot_cls_header)
+                for classname in classlist:
+                    data = fp.classes[classname].copy()
+                    data['cls'] = "<a href='%s'>%s</a>" % (self.links[classname], classname)
+                    data['sum'] = _pp(data['sum'])
+                    data['avg'] = _pp(data['avg'])
+                    file.write(self.snapshot_cls % data)
+            file.write('</table>')
+            file.write('</td><td>\n')
+            if fp.tracked_total:
+                file.write(self.charts[fp])
+            file.write('</td></tr>\n')
+
+        file.write("</table>\n")
+        file.write(self.footer)    
+        file.close()
+
+    def create_lifetime_chart(self, classname, filename=''):
+        """
+        Create chart that depicts the lifetime of the instance registered with
+        `classname`. The output is written to `filename`.
+        """
+        try:
+            from pylab import figure, title, xlabel, ylabel, plot, savefig
+        except ImportError:
+            return HtmlStats.nopylab_msg % (classname+" lifetime")
+
+        cnt = []
+        for to in self.tracked_index[classname]:
+            cnt.append([to.birth, 1])
+            if to.death:
+                cnt.append([to.death, -1])
+        cnt.sort()
+        for i in xrange(1, len(cnt)):
+            cnt[i][1] += cnt[i-1][1]
+            #if cnt[i][0] == cnt[i-1][0]:
+            #    del cnt[i-1]
+
+        x = [t for [t,c] in cnt]
+        y = [c for [t,c] in cnt]
+
+        figure()
+        xlabel("Execution time [s]")
+        ylabel("Instance #")
+        title("%s instances" % classname)
+        plot(x, y, 'o')
+        savefig(filename)
+
+        return HtmlStats.chart_tag % (filename)
+    
+    def create_snapshot_chart(self, filename=''):
+        """
+        Create chart that depicts the memory allocation over time apportioned to
+        the tracked classes.
+        """
+        try:
+            from pylab import figure, title, xlabel, ylabel, plot, fill, legend, savefig
+            import matplotlib.mlab as mlab
+        except ImportError:
+            return self.nopylab_msg % ("memory allocation")
+
+        for fp in self.footprint:
+            self.annotate_snapshot(fp)
+
+        classlist = self.tracked_index.keys()
+        classlist.sort()
+
+        x = [fp.timestamp for fp in self.footprint]
+        base = [0] * len(self.footprint)
+        poly_labels = []
+        polys = []
+        for cn in classlist:
+            pct = [fp.classes[cn]['pct'] for fp in self.footprint]
+            if max(pct) > 3.0:
+                sz = [float(fp.classes[cn]['sum'])/(1024*1024) for fp in self.footprint]
+                sz = map( lambda x, y: x+y, base, sz )
+                xp, yp = mlab.poly_between(x, base, sz)
+                polys.append( ((xp, yp), {'label': cn}) )
+                poly_labels.append(cn)
+                base = sz
+       
+        figure()
+        title("Snapshot Memory")
+        xlabel("Execution Time [s]")
+        ylabel("Virtual Memory [MiB]")
+
+        y = [float(fp.asizeof_total)/(1024*1024) for fp in self.footprint]
+        plot(x, y, 'r--', label='Total')
+        y = [float(fp.tracked_total)/(1024*1024) for fp in self.footprint]
+        plot(x, y, 'b--', label='Tracked total')
+        
+        for (args, kwds) in polys:
+            fill(*args, **kwds)
+        legend(loc=2)
+        savefig(filename)
+
+        return self.chart_tag % (filename)
+
+    def create_pie_chart(self, snapshot, filename=''):
+        """
+        Create a pie chart that depicts the distribution of the allocated memory
+        for a given `snapshot`. The chart is saved to `filename`.
+        """
+        try:
+            from pylab import figure, title, pie, axes, savefig, sum
+        except ImportError:
+            return self.nopylab_msg % (classname+" lifetime")
+
+        # Don't bother illustrating a pie without pieces.
+        if not snapshot.tracked_total:
+            return ''
+
+        self.annotate_snapshot(snapshot)
+        classlist = []
+        sizelist = []
+        for k, v in snapshot.classes.items():
+            if v['pct'] > 3.0:
+                classlist.append(k)
+                sizelist.append(v['sum'])
+        sizelist.insert(0, snapshot.asizeof_total - sum(sizelist))
+        classlist.insert(0, 'Other')
+        #sizelist = [x*0.01 for x in sizelist]
+
+        title("Snapshot (%s) Memory Distribution" % (snapshot.desc))
+        figure(figsize=(8,8))
+        axes([0.1, 0.1, 0.8, 0.8])
+        pie(sizelist, labels=classlist)
+        savefig(filename, dpi=50)
+
+        return self.chart_tag % (filename)
+
+    def create_html(self, fname, title="Heapmonitor Statistics"):
+        """
+        Create HTML page `fname` and additional files in a directory derived
+        from `fname`.
+        """
+        from os import path, mkdir
+
+        # Create a folder to store the charts and additional HTML files.
+        self.filesdir = path.splitext(fname)[0] + '_files'
+        if not path.isdir(self.filesdir):
+            mkdir(self.filesdir)
+        self.filesdir = path.abspath(self.filesdir)
+        self.links = {}
+
+        # Create charts. The tags to show the images are returned and stored in
+        # the self.charts dictionary. This allows to return alternative text if
+        # the chart creation framework is not available.
+        self.charts = {}
+        fn = path.join(self.filesdir, 'timespace.png')
+        self.charts['snapshots'] = self.create_snapshot_chart(fn)
+
+        for fp, idx in map(None, self.footprint, range(len(self.footprint))):
+            fn = path.join(self.filesdir, 'fp%d.png' % (idx))
+            self.charts[fp] = self.create_pie_chart(fp, fn)
+
+        for cn in self.tracked_index.iterkeys():
+            fn = path.join(self.filesdir, cn.replace('.', '_')+'-lt.png')
+            self.charts[cn] = self.create_lifetime_chart(cn, fn)
+
+        # Create HTML pages first for each class and then the index page.
+        for cn in self.tracked_index.iterkeys():
+            fn = path.join(self.filesdir, cn.replace('.', '_')+'.html')
+            self.links[cn]  = fn
+            self.print_class_details(fn, cn)
+
+        self.print_summary(fname, title=title)
+    
+
 def dump_stats(file, close=1):
     """
-    Dump the logged data to a file.
+    Dump the logged data to `file`. Stop asynchronous snapshots to prevent the
+    data of changing while being dumped. The side effect are lags, especially
+    when a long period has been set. As dumping and loading are time-intensive
+    operations when there is a large amount of data, allow creating HTML
+    documents directly if a filename with a '.html' suffix is passed.
     """
-    stats = MemStats(tracked_index=tracked_index, footprint=footprint)
-    stats.dump_stats(file, close)
+    stop_periodic_snapshots()
+    if isinstance(file, type('')) and file[-5:] == '.html':
+        stats = HtmlStats(tracked_index=tracked_index, footprint=footprint)
+        stats.create_html(file)
+    else:
+        stats = MemStats(tracked_index=tracked_index, footprint=footprint)
+        stats.dump_stats(file, close)
 
 def print_stats(file=sys.stdout):
     """
-    Write tracked objects by class to stdout.
+    Write tracked objects by class to stdout. Stop asynchronous snapshots to
+    prevent the data of changing while being printed. The side effect are lags,
+    especially when a long period has been set. 
     """
+    stop_periodic_snapshots()
     stats = MemStats(stream=file, tracked_index=tracked_index, footprint=footprint)
     stats.print_stats()
     stats.print_summary()
             refset = set([id(x) for x in get_referents(n)])
             for ref in refset.intersection(idset):
                 label = ''
-                for (k, v) in inspect.getmembers(n):
+                for (k, v) in getmembers(n):
                     if id(v) == ref:
                         label = k
                         break

src/engine/SCons/Script/Main.py

         pass
     def do_print(self):
         if self.profile:
-            SCons.Heapmonitor.dump_stats(open(self.profile, 'w'))
+            SCons.Heapmonitor.dump_stats(self.profile)
         else:
             SCons.Heapmonitor.print_stats(self.outfp)