Commits

Robert Kern committed 582df34

ENH: Use a pickled LineStats object instead of marshalled objects. When the code cannot be found, print the timings anyways.

Comments (0)

Files changed (4)

 increment and decrement a counter and only actually enable or disable the
 profiler when the count transitions from or to 0.
 
-After profiling, the `dump_stats(filename)` method will marshal the results out
+After profiling, the `dump_stats(filename)` method will pickle the results out
 to the given file. `print_stats([stream])` will print the formatted results to
 sys.stdout or whatever stream you specify. `get_stats()` will return 2-tuple:
 a dictionary containing the results and the timer unit.
     hotshot can do line-by-line timings, too. However, it is deprecated and may
     disappear from the standard library. Also, it can take a long time to
     process the results while I want quick turnaround in my workflows. hotshot
-    pays for this processing time by making itself minimally intrusive to the
+    pays this processing time in order to make itself minimally intrusive to the
     code it is profiling. Code that does network operations, for example, may
     even go down different code paths if profiling slows down execution too
     much. For my use cases, and I think those of many other people, their
     I am open to suggestions on how to make this more robust. Or simple
     admonitions against trying to be clever.
 
+* Why do my list comprehensions have so many hits when I use the LineProfiler?
+
+    LineProfiler records the line with the list comprehension once for each
+    iteration of the list comprehension.
+
 * Why is kernprof distributed with line_profiler? It works with just cProfile,
   right?
 
     Both line_profiler and kernprof have been tested with Python 2.4 and Python
     2.5. It might work with Python 2.3, and will probably work with Python 2.6.
 
+
 To Do
 -----
 

_line_profiler.pyx

         return '<LineTiming for %r\n  lineno: %r\n  nhits: %r\n  total_time: %r>' % (self.code, self.lineno, self.nhits, <long>self.total_time)
 
 
+# Note: this is a regular Python class to allow easy pickling.
+class LineStats(object):
+    """ Object to encapsulate line-profile statistics.
+
+    Attributes
+    ----------
+    timings : dict
+        Mapping from (filename, first_lineno, function_name) of the profiled
+        function to a list of (lineno, nhits, total_time) tuples for each
+        profiled line. total_time is an integer in the native units of the
+        timer.
+    unit : float
+        The number of seconds per timer unit.
+    """
+    def __init__(self, timings, unit):
+        self.timings = timings
+        self.unit = unit
+
+
 cdef class LineProfiler:
     """ Time the execution of lines of Python code.
     """
         PyEval_SetTrace(NULL, <object>NULL)
 
     def get_stats(self):
-        """ Return a serializable dictionary of the profiling data along with
-        the timer unit.
-
-        Returns
-        -------
-        stats : dict
-            Mapping from (filename, first_lineno, function_name) of the profiled
-            function to a list of (lineno, nhits, total_time) tuples for each
-            profiled line. total_time is an integer in the native units of the
-            timer.
-        timer_unit : float
-            The number of seconds per timer unit.
+        """ Return a LineStats object containing the timings.
         """
         stats = {}
         for code in self.code_map:
             key = label(code)
             stats[key] = [e.astuple() for e in entries]
             stats[key].sort()
-        return stats, self.timer_unit
+        return LineStats(stats, self.timer_unit)
 
 
 cdef class LastTime:
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-""" Script to conveniently run the profiler on code in a variety of
-circumstances.
+""" Script to conveniently run profilers on code in a variety of circumstances.
 """
 
 import optparse
 
 
 # Guard the import of cProfile such that 2.4 people without lsprof can still use
-# this script with line_profiler.
+# this script.
 try:
     from cProfile import Profile
 except ImportError:
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 
+import cPickle
 from cStringIO import StringIO
 import inspect
 import linecache
-import marshal
 import optparse
 import os
 import sys
         return f
 
     def dump_stats(self, filename):
-        """ Dump a representation of the data to a file as a marshalled
-        dictionary from `get_stats()`.
+        """ Dump a representation of the data to a file as a pickled LineStats
+        object from `get_stats()`.
         """
-        stats, unit = self.get_stats()
+        lstats= self.get_stats()
         f = open(filename, 'wb')
         try:
-            marshal.dump((stats, unit), f)
+            cPickle.dump(lstats, f, cPickle.HIGHEST_PROTOCOL)
         finally:
             f.close()
 
     def print_stats(self, stream=None):
         """ Show the gathered statistics.
         """
-        stats, unit = self.get_stats()
-        show_text(stats, unit, stream=stream)
+        lstats = self.get_stats()
+        show_text(lstats.timings, lstats.unit, stream=stream)
 
     def run(self, cmd):
         """ Profile a single executable statment in the main namespace.
     """
     if stream is None:
         stream = sys.stdout
-    if not os.path.exists(filename):
-        print >>stream, 'Could not find file %s' % filename
-        print >>stream, 'Are you sure you are running this program from the same directory'
-        print >>stream, 'that you ran the profiler from?'
-        return
-    print >>stream, 'File: %s' % filename
-    print >>stream, 'Function: %s at line %s' % (func_name, start_lineno)
-    all_lines = linecache.getlines(filename)
-    sublines = inspect.getblock(all_lines[start_lineno-1:])
+    print >>stream, "File: %s" % filename
+    print >>stream, "Function: %s at line %s" % (func_name, start_lineno)
     template = '%6s %9s %12s %8s %8s  %-s'
     d = {}
     total_time = 0.0
+    linenos = []
     for lineno, nhits, time in timings:
         total_time += time
-    print >>stream, 'Total time: %g s' % (total_time * unit)
+        linenos.append(lineno)
+    print >>stream, "Total time: %g s" % (total_time * unit)
+    if not os.path.exists(filename):
+        print >>stream, ""
+        print >>stream, "Could not find file %s" % filename
+        print >>stream, "Are you sure you are running this program from the same directory"
+        print >>stream, "that you ran the profiler from?"
+        print >>stream, "Continuing without the function's contents."
+        # Fake empty lines so we can see the timings, if not the code.
+        nlines = max(linenos) - min(min(linenos), start_lineno) + 1
+        sublines = [''] * nlines
+    else:
+        all_lines = linecache.getlines(filename)
+        sublines = inspect.getblock(all_lines[start_lineno-1:])
     for lineno, nhits, time in timings:
         d[lineno] = (nhits, time, '%5.1f' % (float(time) / nhits),
             '%5.1f' % (100*time / total_time))
     linenos = range(start_lineno, start_lineno + len(sublines))
     empty = ('', '', '', '')
-    header = template % ('Line #', 'Hits', 'Time', 'Per Hit', '% Time', 'Line Contents')
-    print >>stream, ''
+    header = template % ('Line #', 'Hits', 'Time', 'Per Hit', '% Time', 
+        'Line Contents')
+    print >>stream, ""
     print >>stream, header
     print >>stream, '=' * len(header)
     for lineno, line in zip(linenos, sublines):
         nhits, time, per_hit, percent = d.get(lineno, empty)
-        print >>stream, template % (lineno, nhits, time, per_hit, percent, line.rstrip('\n').rstrip('\r'))
-    print >>stream, ''
+        print >>stream, template % (lineno, nhits, time, per_hit, percent,
+            line.rstrip('\n').rstrip('\r'))
+    print >>stream, ""
 
 def show_text(stats, unit, stream=None):
     """ Show text for the given timings.
 
     One or more -f options are required to get any useful results.
 
-    -D <filename>: dump the raw statistics out to a marshal file on disk. The
+    -D <filename>: dump the raw statistics out to a pickle file on disk. The
     usual extension for this is ".lprof". These statistics may be viewed later
     by running line_profiler.py as a script.
 
     dump_file = opts.D[0]
     if dump_file:
         profile.dump_stats(dump_file)
-        print '\n*** Profile stats marshalled to file',\
+        print '\n*** Profile stats pickled to file',\
               `dump_file`+'.',message
 
     text_file = opts.T[0]
 
     return return_value
 
+def load_stats(filename):
+    """ Utility function to load a pickled LineStats object from a given
+    filename.
+    """
+    f = open(filename, 'rb')
+    try:
+        lstats = cPickle.load(f)
+    finally:
+        f.close()
+    return lstats
+
 
 def main():
     usage = "usage: %prog profile.lprof"
     options, args = parser.parse_args()
     if len(args) != 1:
         parser.error("Must provide a filename.")
-    f = open(args[0], 'rb')
-    stats, unit = marshal.load(f)
-    f.close()
-    show_text(stats, unit)
+    lstats = load_stats(args[0])
+    show_text(lstats.timings, lstats.unit)
 
 if __name__ == '__main__':
     main()
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.