Commits

Mike Bayer  committed a84385b Draft

- a new approach to profiling where we attempt to strip out
parts of the pstats that are idiosyncratic to different platforms.
the goal is no per-version assertions on tests, version differences
in theory would go into the list of profiling exceptions.

  • Participants
  • Parent commits 9262a98

Comments (0)

Files changed (8)

File test/aaa_profiling/test_compiler.py

 
         cls.dialect = default.DefaultDialect()
 
-    @profiling.function_call_count(versions={'2.7':62, '2.6':62,
-                                            '3':68})
+    @profiling.function_call_count(62)
     def test_insert(self):
         t1.insert().compile(dialect=self.dialect)
 
-    @profiling.function_call_count(versions={'2.6':56, '2.7':56})
+    @profiling.function_call_count(56)
     def test_update(self):
         t1.update().compile(dialect=self.dialect)
 
-    @profiling.function_call_count(versions={'2.6':117, '2.7':117, '3':124})
+    @profiling.function_call_count(110)
     def test_update_whereclause(self):
         t1.update().where(t1.c.c2==12).compile(dialect=self.dialect)
 
-    @profiling.function_call_count(versions={'2.7':148, '2.6':148,
-                                                '3':161})
+    @profiling.function_call_count(148)
     def test_select(self):
         s = select([t1], t1.c.c2==t2.c.c1)
         s.compile(dialect=self.dialect)

File test/aaa_profiling/test_orm.py

 
     @classmethod
     def define_tables(cls, metadata):
-        parent = Table('parent', metadata, Column('id', Integer,
+        Table('parent', metadata, Column('id', Integer,
                        primary_key=True,
                        test_needs_autoincrement=True), Column('data',
                        String(20)))
-        child = Table('child', metadata, Column('id', Integer,
+        Table('child', metadata, Column('id', Integer,
                       primary_key=True, test_needs_autoincrement=True),
                       Column('data', String(20)), Column('parent_id',
                       Integer, ForeignKey('parent.id'), nullable=False))
                                 cls.tables.parent,
                                 cls.tables.child)
 
-        mapper(Parent, parent, properties={'children'
-               : relationship(Child, backref='parent')})
+        mapper(Parent, parent, properties={'children':
+                        relationship(Child, backref='parent')})
         mapper(Child, child)
 
     @classmethod
         parent, child = cls.tables.parent, cls.tables.child
 
         parent.insert().execute({'id': 1, 'data': 'p1'})
-        child.insert().execute({'id': 1, 'data': 'p1c1', 'parent_id'
-                               : 1})
+        child.insert().execute({'id': 1, 'data': 'p1c1', 'parent_id': 1})
 
     def test_merge_no_load(self):
         Parent = self.classes.Parent
         # down from 185 on this this is a small slice of a usually
         # bigger operation so using a small variance
 
-        @profiling.function_call_count(variance=0.10,
-                versions={'2.7':96, '2.6':96, '2.5':96, '3': 100})
-        def go():
+        @profiling.function_call_count(96, {}, variance=0.10)
+        def go1():
             return sess2.merge(p1, load=False)
-        p2 = go()
+        p2 = go1()
 
         # third call, merge object already present. almost no calls.
 
-        @profiling.function_call_count(variance=0.10,
-                versions={'2.7':16, '2.6':16, '2.5':16, '3': 16})
-        def go():
+        @profiling.function_call_count(16, {}, variance=0.10)
+        def go2():
             return sess2.merge(p2, load=False)
-        p3 = go()
+        go2()
 
     @testing.only_on('sqlite', 'Call counts tailored to pysqlite')
     def test_merge_load(self):
         # using sqlite3 the C extension took it back up to approx. 1257
         # (py2.6)
 
-        @profiling.function_call_count(variance=0.10,
-                                versions={'2.7': 1128,
-                                        '3': 1150}
-                            )
+        @profiling.function_call_count(1128, {}, variance=0.10)
         def go():
             p2 = sess2.merge(p1)
         go()
 
     @classmethod
     def define_tables(cls, metadata):
-        parent = Table('parent', metadata,
+        Table('parent', metadata,
                         Column('id', Integer, primary_key=True),
                        Column('data', String(20)),
                        Column('child_id', Integer, ForeignKey('child.id'))
                        )
 
-        child = Table('child', metadata,
-                    Column('id', Integer,primary_key=True),
+        Table('child', metadata,
+                    Column('id', Integer, primary_key=True),
                   Column('data', String(20))
                  )
 
         parents = sess.query(Parent).all()
 
 
-        @profiling.function_call_count(108019, variance=.2)
+        @profiling.function_call_count(110761, variance=.2)
         def go():
             for p in parents:
                 p.child
         parents = sess.query(Parent).all()
         children = sess.query(Child).all()
 
-        @profiling.function_call_count(17987, {'3':18987})
+        @profiling.function_call_count(16988)
         def go():
             for p in parents:
                 p.child
     def setup_mappers(cls):
         A, B, C, D = cls.classes.A, cls.classes.B, \
                     cls.classes.C, cls.classes.D
-        a, b, c, d= cls.tables.a, cls.tables.b, \
+        a, b, c, d = cls.tables.a, cls.tables.b, \
                     cls.tables.c, cls.tables.d
         mapper(A, a, properties={
-            'bs':relationship(B, backref='a'),
-            'c':relationship(C, backref='as'),
-            'ds':relationship(D, backref='a'),
+            'bs': relationship(B, backref='a'),
+            'c': relationship(C, backref='as'),
+            'ds': relationship(D, backref='a'),
         })
         mapper(B, b)
         mapper(C, c)

File test/aaa_profiling/test_pool.py

     # probably
     # due to the event mechanics being established
     # or not already...
-    @profiling.function_call_count(72, {'2.4': 68, '2.7':75,
-                                            '2.7+cextension':75,
-                                            '3':62},
-                                            variance=.15)
+    @profiling.function_call_count(72, variance=.15)
     def test_first_connect(self):
         conn = pool.connect()
 
         conn = pool.connect()
         conn.close()
 
-        @profiling.function_call_count(32, {'2.4': 21, '2.7':29,
-                                            '3.2':25,
-                                            '2.7+cextension':29},
-                                            variance=.10)
+        @profiling.function_call_count(29, variance=.10)
         def go():
             conn2 = pool.connect()
             return conn2
     def test_second_samethread_connect(self):
         conn = pool.connect()
 
-        @profiling.function_call_count(6, {'2.4': 4, '3':7})
+        @profiling.function_call_count(6)
         def go():
             return pool.connect()
         c2 = go()

File test/aaa_profiling/test_resultset.py

 
 
 class ResultSetTest(fixtures.TestBase, AssertsExecutionResults):
-    __requires__ = 'cpython',
+    __requires__ = 'cpython', 'cextensions',
     __only_on__ = 'sqlite'
 
     @classmethod
     def teardown(self):
         metadata.drop_all()
 
-    @profiling.function_call_count(versions={
-                                    '2.4': 13214,
-                                    '2.6':14416,
-                                    '2.7':14416,
-                                   '2.6+cextension': 354,
-                                   '2.7+cextension':354})
+    @profiling.function_call_count(354)
     def test_string(self):
         [tuple(row) for row in t.select().execute().fetchall()]
 
-    # sqlite3 returns native unicode.  so shouldn't be an increase here.
-
-    @profiling.function_call_count(versions={
-                                    '2.7':14396,
-                                    '2.6':14396,
-                                   '2.6+cextension': 354,
-                                   '2.7+cextension':354})
+    @profiling.function_call_count(354)
     def test_unicode(self):
         [tuple(row) for row in t2.select().execute().fetchall()]
 
         # ensure initial connect activities complete
         c.execute("select 1")
 
-        @profiling.function_call_count(versions={'2.7':40, '2.6':40, '2.5':35,
-                                                    '2.4':21, '3':40},
-                                            variance=.10)
+        @profiling.function_call_count(40, variance=.10)
         def go():
             c.execute("select 1")
         go()
         # ensure initial connect activities complete
         e.execute("select 1")
 
-        @profiling.function_call_count(versions={'2.4':41, '2.5':65,
-                                                    '2.6':65, '3':61,
-                                                    '2.7':65,
-                                                    '2.6+cextension':65},
-                                            variance=.05)
+        @profiling.function_call_count(65, variance=.05)
         def go():
             e.execute("select 1")
         go()
             return value
         value1, value2 = "x", "y"
         row = self._rowproxy_fixture(
-            [(col1, "a"),(col2, "b")],
+            [(col1, "a"), (col2, "b")],
             [proc1, None],
             seq_factory([value1, value2])
         )

File test/aaa_profiling/test_zoomark.py

         metadata = MetaData(engine)
         engine.connect()
 
-    @profiling.function_call_count(3896, {'2.4': 1711})
+    @profiling.function_call_count(3896)
     def test_profile_1_create_tables(self):
         self.test_baseline_1_create_tables()
 
-    @profiling.function_call_count(5045, {'2.6':5099, '2.4': 3650, '3.2':4699})
+    @profiling.function_call_count(4798)
     def test_profile_1a_populate(self):
         self.test_baseline_1a_populate()
 
-    @profiling.function_call_count(245, {'2.4': 172})
+    @profiling.function_call_count(245)
     def test_profile_2_insert(self):
         self.test_baseline_2_insert()
 
-    @profiling.function_call_count(3118, {'2.7':3333,
-                                        '2.7+cextension':3109, '2.6':3109})
+    @profiling.function_call_count(3118)
     def test_profile_3_properties(self):
         self.test_baseline_3_properties()
 
-    @profiling.function_call_count(11624, {'2.4': 7963, '2.6+cextension'
-                                   : 10736, '2.7+cextension': 10736},
-                                   variance=0.10)
+    @profiling.function_call_count(9700, variance=0.10)
     def test_profile_4_expressions(self):
         self.test_baseline_4_expressions()
 
-    @profiling.function_call_count(1059, {'2.4': 904, '2.6+cextension'
-                                   : 1027, '2.6.4':1167, '2.7+cextension': 1027},
-                                   variance=0.10)
+    @profiling.function_call_count(1059, variance=0.10)
     def test_profile_5_aggregates(self):
         self.test_baseline_5_aggregates()
 
-    @profiling.function_call_count(1788, {'2.4': 1118, '3.2':1647,
-                                        '2.7+cextension':1698})
+    @profiling.function_call_count(1606)
     def test_profile_6_editing(self):
         self.test_baseline_6_editing()
 
-    @profiling.function_call_count(2252, {'2.4': 1673,
-                                            '2.6':2412,
-                                            '2.7':2412,
-                                            '3.2':2396,
-                                            '2.7+cextension':2110,
-                                            '2.6+cextension': 2252})
+    @profiling.function_call_count(2095)
     def test_profile_7_multiview(self):
         self.test_baseline_7_multiview()
 

File test/aaa_profiling/test_zoomark_orm.py

         session = sessionmaker(engine)()
         engine.connect()
 
-    @profiling.function_call_count(5600, {"3.2":5928}, variance=0.25)
+    @profiling.function_call_count(5600, variance=0.25)
     def test_profile_1_create_tables(self):
         self.test_baseline_1_create_tables()
 
-    @profiling.function_call_count(5786, {'2.7+cextension':5683,
-                                            '2.6+cextension':5992})
+    @profiling.function_call_count(5786)
     def test_profile_1a_populate(self):
         self.test_baseline_1a_populate()
 
-    @profiling.function_call_count(388, {'3.2':388})
+    @profiling.function_call_count(388)
     def test_profile_2_insert(self):
         self.test_baseline_2_insert()
 
-    # this number...
-
-    @profiling.function_call_count(6783, {
-        '2.6': 6058,
-        '2.7': 5922,
-        '2.7+cextension': 5714,
-        '2.6+cextension': 5714,
-        '3.2':5787,
-        })
+    @profiling.function_call_count(5702)
     def test_profile_3_properties(self):
         self.test_baseline_3_properties()
 
-    # and this number go down slightly when using the C extensions
-
-    @profiling.function_call_count(17698, {'2.7+cextension':17698, '2.6': 18943, '2.7':19110, '3.2':19264})
+    @profiling.function_call_count(17698)
     def test_profile_4_expressions(self):
         self.test_baseline_4_expressions()
 
-    @profiling.function_call_count(1172, {'2.6+cextension': 1090,
-                                   '2.7+cextension': 1086},
+    @profiling.function_call_count(1065,
                                    variance=0.1)
     def test_profile_5_aggregates(self):
         self.test_baseline_5_aggregates()

File test/lib/profiling.py

 
 """
 
-import os, sys
+import os
+import sys
 from test.lib.util import gc_collect, decorator
 from nose import SkipTest
-
-__all__ = 'profiled', 'function_call_count', 'conditional_call_count'
-
-all_targets = set()
-profile_config = { 'targets': set(),
-                   'report': True,
-                   'print_callers':False,
-                   'print_callees':False,
-                   'graphic':False,
-                   'sort': ('time', 'calls'),
-                   'limit': None }
-profiler = None
+import pstats
+import time
+import collections
+try:
+    import cProfile
+except ImportError:
+    cProfile = None
 
 def profiled(target=None, **target_opts):
-    """Optional function profiling.
+    """Function profiling.
 
     @profiled('label')
     or
     configuration and command-line options.
     """
 
-    # manual or automatic namespacing by module would remove conflict issues
+    profile_config = {'targets': set(),
+                       'report': True,
+                       'print_callers': False,
+                       'print_callees': False,
+                       'graphic': False,
+                       'sort': ('time', 'calls'),
+                       'limit': None}
     if target is None:
         target = 'anonymous_target'
-    elif target in all_targets:
-        print "Warning: redefining profile target '%s'" % target
-    all_targets.add(target)
 
     filename = "%s.prof" % target
 
     @decorator
     def decorate(fn, *args, **kw):
-        if (target not in profile_config['targets'] and
-            not target_opts.get('always', None)):
-            return fn(*args, **kw)
-
         elapsed, load_stats, result = _profile(
             filename, fn, *args, **kw)
 
             if report:
                 sort_ = target_opts.get('sort', profile_config['sort'])
                 limit = target_opts.get('limit', profile_config['limit'])
-                print "Profile report for target '%s' (%s)" % (
+                print ("Profile report for target '%s' (%s)" % (
                     target, filename)
+                    )
 
                 stats = load_stats()
                 stats.sort_stats(*sort_)
     return decorate
 
 def function_call_count(count=None, versions={}, variance=0.05):
-    """Assert a target for a test case's function call count.
+    """Assert a target for a test case's function call count."""
 
-    count
-      Optional, general target function call count.
-
-    versions
-      Optional, a dictionary of Python version strings to counts,
-      for example::
-
-        { '2.5.1': 110,
-          '2.5': 100,
-          '2.4': 150 }
-
-      The best match for the current running python will be used.
-      If none match, 'count' will be used as the fallback.
-
-    variance
-      An +/- deviation percentage, defaults to 5%.
-    """
-
-    # this could easily dump the profile report if --verbose is in effect
-
-    version_info = list(sys.version_info)
     py_version = '.'.join([str(v) for v in sys.version_info])
-    try:
-        from sqlalchemy.cprocessors import to_float
-        cextension = True
-    except ImportError:
-        cextension = False
-
-    while version_info:
-        version = '.'.join([str(v) for v in version_info])
-        if cextension and (version + "+cextension") in versions:
-            version += "+cextension"
-            count = versions[version]
-            break
-        elif version in versions:
-            count = versions[version]
-            break
-
-        version_info.pop()
-
-    if count is None:
-        print "Warning: no function call count specified for version: '%s'" % py_version
-        return lambda fn: fn
 
     @decorator
     def decorate(fn, *args, **kw):
-        try:
-            filename = "%s.prof" % fn.__name__
+        if cProfile is None:
+            raise SkipTest("cProfile is not installed")
+        if count is None:
+            raise SkipTest("no count installed")
+        gc_collect()
 
-            elapsed, stat_loader, result = _profile(
-                filename, fn, *args, **kw)
+        stats_total_calls, callcount, fn_result = _count_calls(
+            {"exclude": _exclude},
+            fn, *args, **kw
+        )
+        print("Pstats calls: %d Adjusted callcount %d  Expected %d" % (
+                stats_total_calls,
+                callcount,
+                count
+            ))
+        deviance = int(callcount * variance)
+        if abs(callcount - count) > deviance:
+            raise AssertionError(
+                "Adjusted function call count %s not within %s%% "
+                "of expected %s. (cProfile reported %s "
+                    "calls, Python version %s)" % (
+                callcount, (variance * 100),
+                count, stats_total_calls, py_version))
+        return fn_result
 
-            stats = stat_loader()
-            calls = stats.total_calls
-
-            stats.sort_stats('calls', 'cumulative')
-            stats.print_stats()
-            #stats.print_callers()
-            deviance = int(count * variance)
-            if (calls < (count - deviance) or
-                calls > (count + deviance)):
-                raise AssertionError(
-                    "Function call count %s not within %s%% "
-                    "of expected %s. (Python version %s)" % (
-                    calls, (variance * 100), count, py_version))
-
-            return result
-        finally:
-            if os.path.exists(filename):
-                os.unlink(filename)
     return decorate
 
-def conditional_call_count(discriminator, categories):
-    """Apply a function call count conditionally at runtime.
+py3k = sys.version_info >= (3,)
 
-    Takes two arguments, a callable that returns a key value, and a dict
-    mapping key values to a tuple of arguments to function_call_count.
+def _paths(key, stats, cache, seen=None):
+    if seen is None:
+        seen = collections.defaultdict(int)
+    fname, lineno, fn_name = key
+    if seen.get(key):
+        return
 
-    The callable is not evaluated until the decorated function is actually
-    invoked.  If the `discriminator` returns a key not present in the
-    `categories` dictionary, no call count assertion is applied.
+    if key in cache:
+        for item in cache[key]:
+            yield item
+    else:
+        seen[key] += 1
+        try:
+            paths_to_yield = []
+            (cc, nc, tt, ct, callers) = stats[key]
+            if not callers:
+                paths_to_yield.append(())
+                yield ()
 
-    Useful for integration tests, where running a named test in isolation may
-    have a function count penalty not seen in the full suite, due to lazy
-    initialization in the DB-API, SA, etc.
-    """
-    @decorator
-    def decorate(fn, *args, **kw):
-        criteria = categories.get(discriminator(), None)
-        if criteria is None:
-            return fn(*args, **kw)
+            for subkey in callers:
+                sub_cc, sub_nc, sub_tt, sub_ct = callers[subkey]
+                parent_iterator = list(_paths(subkey, stats, cache, seen))
+                path_element = (fname, lineno, fn_name)
+                for parent in parent_iterator:
+                    paths_to_yield.append(parent + (path_element,))
+                    yield parent + (path_element,)
+            cache[key] = paths_to_yield
+        finally:
+            seen[key] -= 1
 
-        rewrapped = function_call_count(*criteria)(fn)
-        return rewrapped(*args, **kw)
-    return decorate
+def _exclude(path):
 
+    for pfname, plineno, pfuncname in path:
+        if "compat" in pfname or \
+            "processors" in pfname or \
+            "cutils" in pfname:
+            return True
+    if path[-1][2] in (
+            "<built-in method exec>",
+            "<listcomp>"
+            ):
+        return True
 
-def _profile(filename, fn, *args, **kw):
-    global profiler
-    if not profiler:
-        if sys.version_info > (2, 5):
-            try:
-                import cProfile
-                profiler = 'cProfile'
-            except ImportError:
-                pass
-        if not profiler:
-            try:
-                import hotshot
-                profiler = 'hotshot'
-            except ImportError:
-                profiler = 'skip'
+    return False
 
-    if profiler == 'skip':
-        raise SkipTest('Profiling not supported on this platform')
-    elif profiler == 'cProfile':
-        return _profile_cProfile(filename, fn, *args, **kw)
-    else:
-        return _profile_hotshot(filename, fn, *args, **kw)
+def _count_calls(options, fn, *args, **kw):
+    total_time, stats_loader, fn_result = _profile(fn, *args, **kw)
+    exclude_fn = options.get("exclude", None)
 
-def _profile_cProfile(filename, fn, *args, **kw):
-    import cProfile, pstats, time
+    stats = stats_loader()
 
-    load_stats = lambda: pstats.Stats(filename)
-    gc_collect()
+    callcount = 0
+    report = []
+    path_cache = {}
+    for (fname, lineno, fn_name), (cc, nc, tt, ct, callers) \
+                in stats.stats.items():
+        exclude = 0
+        paths = list(_paths((fname, lineno, fn_name), stats.stats, path_cache))
+        for path in paths:
+            if not path:
+                continue
+            if exclude_fn and exclude_fn(path):
+                exclude += 1
+
+        adjusted_cc = cc
+        if exclude:
+            adjust = (float(exclude) / len(paths)) * cc
+            adjusted_cc -= adjust
+        dirname, fname = os.path.split(fname)
+        #print(" ".join([str(x) for x in [fname, lineno, fn_name, cc, adjusted_cc]]))
+        report.append(" ".join([str(x) for x in [fname, lineno, fn_name, cc, adjusted_cc]]))
+        callcount += adjusted_cc
+
+    #stats.sort_stats('calls', 'cumulative')
+    #stats.print_stats()
+    report.sort()
+    print "\n".join(report)
+    return stats.total_calls, callcount, fn_result
+
+def _profile(fn, *args, **kw):
+    filename = "%s.prof" % fn.__name__
+
+    def load_stats():
+        st = pstats.Stats(filename)
+        os.unlink(filename)
+        return st
 
     began = time.time()
     cProfile.runctx('result = fn(*args, **kw)', globals(), locals(),
     ended = time.time()
 
     return ended - began, load_stats, locals()['result']
-
-def _profile_hotshot(filename, fn, *args, **kw):
-    import hotshot, hotshot.stats, time
-    load_stats = lambda: hotshot.stats.load(filename)
-
-    gc_collect()
-    prof = hotshot.Profile(filename)
-    began = time.time()
-    prof.start()
-    try:
-        result = fn(*args, **kw)
-    finally:
-        prof.stop()
-        ended = time.time()
-        prof.close()
-
-    return ended - began, load_stats, result
-

File test/lib/requires.py

         skip_if(lambda: not _has_cextensions(), "C extensions not installed")
     )
 
+
 def dbapi_lastrowid(fn):
     if util.pypy:
         return _chain_decorators_on(