Commits

Ned Batchelder  committed 6c51d1b

A lab directory for experiments in progress.

  • Participants
  • Parent commits ecd825e

Comments (0)

Files changed (5)

File lab/hack_pyc.py

+""" Wicked hack to get .pyc files to do bytecode tracing instead of
+    line tracing.
+"""
+
+import marshal, new, opcode, sys, types
+
+from lnotab import lnotab_numbers, lnotab_string
+
+class PycFile:
+    def read(self, f):
+        if isinstance(f, basestring):
+            f = open(f, "rb")
+        self.magic = f.read(4)
+        self.modtime = f.read(4)
+        self.code = marshal.load(f)
+    
+    def write(self, f):
+        if isinstance(f, basestring):
+            f = open(f, "wb")
+        f.write(self.magic)
+        f.write(self.modtime)
+        marshal.dump(self.code, f)
+
+    def hack_line_numbers(self):
+        self.code = hack_line_numbers(self.code)
+        
+def hack_line_numbers(code):
+    """ Replace a code object's line number information to claim that every
+        byte of the bytecode is a new source line.  Returns a new code
+        object.  Also recurses to hack the line numbers in nested code objects.
+    """
+    
+    # Create a new lnotab table.  Each opcode is claimed to be at
+    # 1000*lineno + (opcode number within line), so for example, the opcodes on
+    # source line 12 will be given new line numbers 12000, 12001, 12002, etc.
+    old_num = list(lnotab_numbers(code.co_lnotab, code.co_firstlineno))
+    n_bytes = len(code.co_code)
+    new_num = []
+    line = 0
+    opnum_in_line = 0
+    i_byte = 0
+    while i_byte < n_bytes:
+        if old_num and i_byte == old_num[0][0]:
+            line = old_num.pop(0)[1]
+            opnum_in_line = 0
+        new_num.append((i_byte, 100000000 + 1000*line + opnum_in_line))
+        if ord(code.co_code[i_byte]) >= opcode.HAVE_ARGUMENT:
+            i_byte += 3
+        else:
+            i_byte += 1
+        opnum_in_line += 1
+    
+    # new_num is a list of pairs, (byteoff, lineoff).  Turn it into an lnotab.
+    new_firstlineno = new_num[0][1]-1
+    new_lnotab = lnotab_string(new_num, new_firstlineno)
+
+    # Recurse into code constants in this code object.
+    new_consts = []
+    for const in code.co_consts:
+        if type(const) == types.CodeType:
+            new_consts.append(hack_line_numbers(const))
+        else:
+            new_consts.append(const)
+
+    # Create a new code object, just like the old one, except with new
+    # line numbers.
+    new_code = new.code(
+        code.co_argcount, code.co_nlocals, code.co_stacksize, code.co_flags,
+        code.co_code, tuple(new_consts), code.co_names, code.co_varnames,
+        code.co_filename, code.co_name, new_firstlineno, new_lnotab
+        )
+
+    return new_code
+
+def hack_file(f):
+    pyc = PycFile()
+    pyc.read(f)
+    pyc.hack_line_numbers()
+    pyc.write(f)
+
+if __name__ == '__main__':
+    hack_file(sys.argv[1])

File lab/lnotab.py

+# Comment copied from Python/compile.c:
+#
+# All about a_lnotab.
+# 
+# c_lnotab is an array of unsigned bytes disguised as a Python string.
+# It is used to map bytecode offsets to source code line #s (when needed
+# for tracebacks).
+# 
+# The array is conceptually a list of
+#     (bytecode offset increment, line number increment)
+# pairs. The details are important and delicate, best illustrated by example:
+# 
+#     byte code offset   source code line number
+#        0                   1
+#        6                   2
+#       50                   7
+#      350                 307
+#      361                 308
+# 
+# The first trick is that these numbers aren't stored, only the increments
+# from one row to the next (this doesn't really work, but it's a start):
+# 
+#     0, 1,  6, 1,  44, 5,  300, 300,  11, 1
+# 
+# The second trick is that an unsigned byte can't hold negative values, or
+# values larger than 255, so (a) there's a deep assumption that byte code
+# offsets and their corresponding line #s both increase monotonically, and (b)
+# if at least one column jumps by more than 255 from one row to the next, more
+# than one pair is written to the table. In case #b, there's no way to know
+# from looking at the table later how many were written.	That's the delicate
+# part.  A user of c_lnotab desiring to find the source line number
+# corresponding to a bytecode address A should do something like this
+# 
+#     lineno = addr = 0
+#     for addr_incr, line_incr in c_lnotab:
+#         addr += addr_incr
+#         if addr > A:
+#             return lineno
+#         lineno += line_incr
+# 
+# In order for this to work, when the addr field increments by more than 255,
+# the line # increment in each pair generated must be 0 until the remaining addr
+# increment is < 256.  So, in the example above, assemble_lnotab (it used
+# to be called com_set_lineno) should not (as was actually done until 2.2)
+# expand 300, 300 to 255, 255, 45, 45, 
+#             but to 255,   0, 45, 255, 0, 45.
+# 
+
+def lnotab(pairs, first_lineno=0):
+    """Yields byte integers representing the pairs of integers passed in."""
+    assert first_lineno <= pairs[0][1]
+    cur_byte, cur_line = 0, first_lineno
+    for byte_off, line_off in pairs:
+        byte_delta = byte_off - cur_byte
+        line_delta = line_off - cur_line
+        assert byte_delta >= 0
+        assert line_delta >= 0
+        while byte_delta > 255:
+            yield 255 # byte
+            yield 0   # line
+            byte_delta -= 255
+        yield byte_delta
+        while line_delta > 255:
+            yield 255 # line
+            yield 0   # byte
+            line_delta -= 255
+        yield line_delta
+        cur_byte, cur_line = byte_off, line_off
+
+def lnotab_string(pairs, first_lineno=0):
+    return "".join(chr(b) for b in lnotab(pairs, first_lineno))
+
+def byte_pairs(lnotab):
+    """Yield pairs of integers from a string."""
+    for i in range(0, len(lnotab), 2):
+        yield ord(lnotab[i]), ord(lnotab[i+1])
+        
+def lnotab_numbers(lnotab, first_lineno=0):
+    """Yields the byte, line offset pairs from a packed lnotab string."""
+
+    last_line = None
+    cur_byte, cur_line = 0, first_lineno
+    for byte_delta, line_delta in byte_pairs(lnotab):
+        if byte_delta:
+            if cur_line != last_line:
+                yield cur_byte, cur_line
+                last_line = cur_line
+            cur_byte += byte_delta
+        cur_line += line_delta
+    if cur_line != last_line:        
+        yield cur_byte, cur_line
+    
+
+## Tests
+
+def same_list(a, b):
+    a = list(a)
+    assert a == b
+    
+def test_simple():
+    same_list(lnotab([(0,1)]), [0, 1])
+    same_list(lnotab([(0,1), (6, 2)]), [0, 1,  6, 1])
+
+def test_starting_above_one():
+    same_list(lnotab([(0,100), (6,101)]), [0, 100,  6, 1])
+    same_list(lnotab([(0,100), (6,101)], 50), [0, 50,  6, 1])
+    
+def test_large_gaps():
+    same_list(lnotab([(0,1), (300, 300)]), [0, 1,  255, 0,  45, 255,  0, 44])
+    same_list(lnotab([(0,1), (255, 300)]), [0, 1,  255, 255,  0, 44])
+    same_list(lnotab([(0,1), (255, 256)]), [0, 1,  255, 255])
+    
+def test_strings():
+    assert lnotab_string([(0,1), (6, 2)]) == "\x00\x01\x06\x01"
+    assert lnotab_string([(0,1), (300, 300)]) == "\x00\x01\xff\x00\x2d\xff\x00\x2c"
+
+def test_numbers():
+    same_list(lnotab_numbers("\x00\x01\x06\x01"), [(0,1), (6,2)])
+    same_list(lnotab_numbers("\x00\x01\xff\x00\x2d\xff\x00\x2c"), [(0,1), (300, 300)])
+
+def test_numbers_firstlineno():
+    same_list(lnotab_numbers("\x00\x01\xff\x00\x2d\xff\x00\x2c", 10), [(0,11), (300, 310)])

File lab/sample.py

+a, b = 1, 0
+if a or b or fn():
+    # Hey
+    a = 3
+d = 4

File lab/show_pyc.py

+import dis, marshal, struct, sys, time, types
+
+def show_pyc_file(fname):
+    f = open(fname, "rb")
+    magic = f.read(4)
+    moddate = f.read(4)
+    modtime = time.asctime(time.localtime(struct.unpack('L', moddate)[0]))
+    print "magic %s" % (magic.encode('hex'))
+    print "moddate %s (%s)" % (moddate.encode('hex'), modtime)
+    code = marshal.load(f)
+    show_code(code)
+
+def show_py_file(fname):
+    text = open(fname).read().replace('\r\n', '\n')
+    code = compile(text, fname, "exec")
+    show_code(code)
+
+def show_code(code, indent=''):
+    print "%scode" % indent
+    indent += '   '
+    print "%sargcount %d" % (indent, code.co_argcount)
+    print "%snlocals %d" % (indent, code.co_nlocals)
+    print "%sstacksize %d" % (indent, code.co_stacksize)
+    print "%sflags %04x" % (indent, code.co_flags)
+    show_hex("code", code.co_code, indent=indent)
+    dis.disassemble(code)
+    print "%sconsts" % indent
+    for const in code.co_consts:
+        if type(const) == types.CodeType:
+            show_code(const, indent+'   ')
+        else:
+            print "   %s%r" % (indent, const)
+    print "%snames %r" % (indent, code.co_names)
+    print "%svarnames %r" % (indent, code.co_varnames)
+    print "%sfreevars %r" % (indent, code.co_freevars)
+    print "%scellvars %r" % (indent, code.co_cellvars)
+    print "%sfilename %r" % (indent, code.co_filename)
+    print "%sname %r" % (indent, code.co_name)
+    print "%sfirstlineno %d" % (indent, code.co_firstlineno)
+    show_hex("lnotab", code.co_lnotab, indent=indent)
+    
+def show_hex(label, h, indent):
+    h = h.encode('hex')
+    if len(h) < 60:
+        print "%s%s %s" % (indent, label, h)
+    else:
+        print "%s%s" % (indent, label)
+        for i in range(0, len(h), 60):
+            print "%s   %s" % (indent, h[i:i+60])
+
+def show_file(fname):
+    if fname.endswith('pyc'):
+        show_pyc_file(fname)
+    elif fname.endswith('py'):
+        show_py_file(fname)
+    else:
+        print "Odd file:", fname
+        
+def main(args):
+    for a in args:
+        show_file(a)
+        
+if __name__ == '__main__':
+    main(sys.argv[1:])

File lab/trace_sample.py

+import os, sys
+
+global nest
+nest = 0
+
+def trace(frame, event, arg):
+    #if event == 'line':
+    global nest
+    
+    print "%s%s %s %d (%r)" % (
+        "   " * nest,
+        event,
+        os.path.basename(frame.f_code.co_filename),
+        frame.f_lineno,
+        arg
+        )
+    
+    if event == 'call':
+        nest += 1
+    if event == 'return':
+        nest -= 1
+        
+    return trace
+
+sys.settrace(trace)
+
+import sample
+#import littleclass