Source

flickzeug / flickzeug / leakfinder / leak.py

Full commit
# -*- coding: utf-8 -*-
"""
    flickzeug.leakfinder.leak
    ~~~~~~~~~~~~~~~~~~~~~~~~~

    WSGI application memory usage debugger.

    Example usage::

        from flickzeug.leakfinder.leak import LeakFinder
        app = LeakFinder(app)

    To only count("leak find") your own package code::

        from flickzeug.leakfinder.leak import LeakFinder
        app = LeakFinder(app, filter=lambda v: not v.startswith("myproj."))

    :copyright: (c) 2009 by the Flickzeug Team, see AUTHORS for more details.
    :license: BSD, see LICENSE for more details.
"""

import cgi
import gc
import sys
import threading
import time
from datetime import datetime, timedelta
from types import FrameType, ModuleType

from werkzeug.routing import Rule
from werkzeug.utils import escape
from werkzeug.wrappers import Response

from flickzeug.leakfinder import reftree
from flickzeug.utils import render_response, render_template, WebMiddleware
from flickzeug.utils.repr import json_debug_repr

def get_repr(obj, limit=200):
    return ('<div><span obj_id="%s" obj_type="%s" class="attr extendable">'
            '%s</span></div>' % (id(obj), '.'.join([type(obj).__module__,
                                              type(obj).__name__]),
                           reftree.get_repr(obj, limit)))

class _(object): pass
dictproxy = type(_.__dict__)

method_types = [type(tuple.__le__),                # 'wrapper_descriptor'
                type([1].__le__),                  # 'method-wrapper'
                type(sys.getcheckinterval),        # 'builtin_function_or_method'
                type(cgi.FieldStorage.getfirst),   # 'instancemethod'
                ]


class LeakFinder(WebMiddleware):
    """LeakFinder middleware.  Wraps a WSGI application and counts object at
    specific intervals. This allows you to get a notion on possible memory
    leaks.

    :param app: the WSGI application to search for leaks.
    :param app_path: the url path where the stats will be served from.
    :param period: Interval (in seconds) at which object count will occur.
    :param maxhistory: Maximum amount of history data stored.
    :param async_ajax: The types list page has quite some objects shown. For
                       each object an ajax request is made. If asynchronous, the
                       requests will be made almost at the same time which might
                       get your browser to complain or even lock. If not
                       asynchronous, each ajax request waits for the previous to
                       end, which is a bit easier for the browser. Pass it
                       True or False. (Default: False)
    :param filter: A callable which takes a single argument, a type name.
    """

    def __init__(self, app, app_path='/_leakfinder', period=5, maxhistory=300,
                 async_ajax=False, filter=None):
        WebMiddleware.__init__(self, app, app_path)
        self.period = period
        self.maxhistory = maxhistory
        self.async_ajax = async_ajax
        self.filter = filter
        self.running = False
        self.history = {}
        self.samples = 0

        self.handlers = {
            'index':     self.index,
            'chart':     self.chart,
            'trace':     self.trace,
            'tree':      self.tree,
            'json-repr': self.json_repr
        }

        self.runthread = threading.Thread(target=self.start)
        try:
            # Python <= 2.6.2
            self.runthread.setDaemon(True)
        except:
            # Python > 2.6 ?
            self.runthread.daemon = True
        self.runthread.start()

    def get_url_rules(self):
        return [Rule('/', endpoint='index', defaults={'floor': 0}),
                Rule('/<int:floor>', endpoint='index'),
                Rule('/chart/<typename>', endpoint='chart'),
                Rule('/trace/<typename>', endpoint='trace'),
                Rule('/trace/<typename>/<int:objid>', endpoint='trace'),
                Rule('/tree/<typename>', endpoint='tree'),
                Rule('/tree/<typename>/<int:objid>', endpoint='tree'),
                Rule('/json-repr', endpoint='json-repr'),
                Rule('/json-repr/<typename>/<int:objid>', endpoint='json-repr')]

    def start(self):
        self.start_time = datetime.utcnow()
        self.running = True
        while self.running:
            self.tick()
            time.sleep(self.period)

    def stop(self):
        self.running = False

    def tick(self):
        gc.collect()

        typecounts = {}
        for obj in gc.get_objects():
            objtype = type(obj)
            if objtype in typecounts:
                typecounts[objtype] += 1
            else:
                typecounts[objtype] = 1

        for objtype, count in typecounts.iteritems():
            typename = objtype.__module__ + "." + objtype.__name__
            if typename not in self.history:
                if self.filter and self.filter(typename):
                    continue
                self.history[typename] = [0] * self.samples
            self.history[typename].append(count)

        samples = self.samples + 1

        # Add dummy entries for any types which no longer exist
        for typename, hist in self.history.iteritems():
            diff = samples - len(hist)
            if diff > 0:
                hist.extend([0] * diff)

        # Truncate history to self.maxhistory
        if samples > self.maxhistory:
            for typename, hist in self.history.iteritems():
                hist.pop(0)
        else:
            self.samples = samples

    def __call__(self, environ, start_response):
        assert not environ['wsgi.multiprocess'], ("LeakFinder middleware is not"
                                                  " usable in a multi-process "
                                                  "environment")
        return WebMiddleware.__call__(self, environ, start_response)


    def index(self, request, floor=0):
        now = self.start_time
        ticks = [str(now + timedelta(seconds=n*5) - now).split('.')[0] for
                 n in range(len(self.history)+5)]
        return render_response('leakfinder/list_types.html',
                               url_for=self.url_for, floor=floor,
                               history=self.history, period=self.period,
                               typenames=sorted(self.history.keys()),
                               async_ajax=self.async_ajax, ticks=ticks)

    def chart(self, request, typename=None):
        """Return a sparkline chart of the given type."""
        return Response([map(list, enumerate(self.history[typename]))],
                        content_type="application/json")

    def trace(self, request, typename=None, objid=None):
        gc.collect()

        if objid is None:
            rows = self.trace_all(request, typename)
        else:
            rows = self.trace_one(request, typename, objid)

        return render_response('leakfinder/leakfinder.html', rows=rows,
                               title='Trace', objid=str(objid or ''),
                               typename=escape(typename), url_for=self.url_for)

    def trace_all(self, request, typename):
        rows = []
        for obj in gc.get_objects():
            objtype = type(obj)
            if objtype.__module__ + "." + objtype.__name__ == typename:
                rows.append("<p class='obj'>%s</p>"
                            % ReferrerTree(obj, request,
                                           url_for=self.url_for).get_repr(obj))
        if not rows:
            rows = ["<h3>The type you requested was not found.</h3>"]
        return rows

    def trace_one(self, request, typename, objid):
        rows = []
        all_objs = gc.get_objects()
        for obj in all_objs:
            if id(obj) == objid:
                objtype = type(obj)
                if objtype.__module__ + "." + objtype.__name__ != typename:
                    rows = ["<h3>The object you requested is no longer "
                            "of the correct type.</h3>"]
                else:
                    # Attributes
                    rows.append('<div class="obj"><h3>Attributes</h3>')
                    rows.append('<table class="attr" border="0" padding="0">')
                    for k in dir(obj):
                        v = getattr(obj, k)
                        if type(v) not in method_types:
                            rows.append(
                                '<tr><th>%s:</th><td>%s</td></tr>' %
                                        (k, get_repr(v)))
#                            rows.append('<p class="attr"><b>%s:</b> %s</p>' %
#                                        (k, get_repr(v)))
                        del v
                    rows.append('</table></div>')

                    # Referrers
                    referrers = []
                    tree = ReferrerTree(obj, request, url_for=self.url_for)
                    tree.ignore(all_objs)
                    for depth, parentid, parentrepr in tree.walk(maxdepth=1):
                        if parentid:
                            referrers.append(
                                "<p class='obj'>%s</p>" % parentrepr
                            )
                    if referrers:
                        rows.append(
                            '<div class="refs"><h3>Referrers (Parents)</h3>'
                        )
                        rows.append('<p class="desc"><a href="%s">Show the '
                                    'entire tree</a> of reachable objects</p>'
                                    % self.url_for('tree', typename=typename,
                                                   objid=objid))
                        rows.extend(referrers)
                        rows.append('</div>')

                    # Referents
                    referents = []
                    for child in gc.get_referents(obj):
                        try:
                            show_link = child in all_objs
                        except:
                            show_link = False
                        referents.append("<p class='obj'>%s</p>" %
                                    tree.get_repr(child, show_link=show_link))
                    if referents:
                        rows.append(
                            '<div class="refs"><h3>Referents (Children)</h3>'
                        )
                        rows.extend(referents)
                        rows.append('</div>')
                break
        if not rows:
            rows = ["<h3>The object you requested was not found.</h3>"]
        return rows

    def tree(self, request, typename=None, objid=None):
        rows = []
        all_objs = gc.get_objects()
        for obj in all_objs:
            if id(obj) == objid:
                objtype = type(obj)
                if objtype.__module__ + "." + objtype.__name__ != typename:
                    rows = ["<h3>The object you requested is no longer "
                            "of the correct type.</h3>"]
                else:
                    rows.append('<div class="obj">')

                    tree = ReferrerTree(obj, request, url_for=self.url_for)
                    tree.ignore(all_objs)
                    for depth, parentid, parentrepr in \
                                                    tree.walk(maxresults=1000):
                        rows.append(parentrepr)

                    rows.append('</div>')
                break
        if not rows:
            rows = ["<h3>The object you requested was not found. It may have "
                    "been garbage collected.</h3>"]

        return render_response('leakfinder/leakfinder.html', rows=rows,
                               objid=objid, title='Tree',
                               typename=typename, url_for=self.url_for)

    def json_repr(self, request, typename=None, objid=None):
        all_objs = gc.get_objects()
        objid = int(objid)
        for obj in all_objs:
            if id(obj) == objid:
                return Response(json_debug_repr(obj),
                                mimetype='application/json')
        return Response('"Sorry, object not found."',
                        mimetype='application/json')


class ReferrerTree(reftree.Tree):

    ignore_modules = True

    def _gen(self, obj, depth=0):
        if self.maxdepth and depth >= self.maxdepth:
            yield depth, 0, "---- Max depth reached ----"
            raise StopIteration

        if isinstance(obj, ModuleType) and self.ignore_modules:
            raise StopIteration

        refs = gc.get_referrers(obj)
        refiter = iter(refs)
        self.ignore(refs, refiter)
        thisfile = sys._getframe().f_code.co_filename
        for ref in refiter:
            # Exclude all frames that are from this module or reftree.
            if (isinstance(ref, FrameType)
                and ref.f_code.co_filename in (thisfile, self.filename)):
                continue

            # Exclude all functions and classes from this module or reftree.
            mod = getattr(ref, "__module__", "")
            if mod is None:
                continue
            if "leakfinder" in mod or "reftree" in mod or mod == '__main__':
                continue

            # Exclude all parents in our ignore list.
            if id(ref) in self._ignore:
                continue

            # Yield the (depth, id, repr) of our object.
            yield depth, 0, '%s<div class="branch">' % (" " * depth)
            if id(ref) in self.seen:
                yield (depth, id(ref),
                       'see <a href="#%(ref)s">%(ref)s</a> above' %
                        {'ref': id(ref)})
            else:
                self.seen[id(ref)] = None
                yield depth, id(ref), self.get_repr(ref, obj)

                for parent in self._gen(ref, depth + 1):
                    yield parent
            yield depth, 0, '%s</div>' % (" " * depth)

    def get_repr(self, obj, referent=None, show_link=True):
        """Return an HTML tree block describing the given object."""
        objtype = type(obj)
        typename = objtype.__module__ + "." + objtype.__name__
        prettytype = typename.replace("__builtin__.", "")

        name = getattr(obj, "__name__", "")
        if name:
            prettytype = "%s %r" % (prettytype, name)

        key = ""
        if referent:
            key = self.get_refkey(obj, referent)
        return render_template('leakfinder/obj_repr.html', obj=obj, key=key,
                               prettytype=prettytype, typename=typename,
                               obj_repr=get_repr(obj, 150), show_link=show_link,
                               url_for=self.url_for)

    def get_refkey(self, obj, referent):
        """Return the dict key or attribute name of obj which refers to
        referent."""
        if isinstance(obj, dict):
            for k, v in obj.iteritems():
                if v is referent:
                    return " (via its <tt>%s</tt> key)" % repr(k)

        for k in dir(obj) + ['__dict__']:
            if getattr(obj, k, None) is referent:
                return " (via its <tt>%s</tt> attribute)" % repr(k)
        return ""