Source

research-python / jip.py

#!/usr/bin/env python
# encoding: utf-8
"""
jip.py

This provides an interface to the summary files provided by the
Java Interactive Profiler (JIP).
Created by Vinay Augustine on 2007-08-21.
Copyright (c) 2007 Vinay Augustine. All rights reserved.
"""

# import dateutil.parser as dateutil
import csv
import numpy
import re
import sys
from collections import defaultdict
from os import listdir, path
from UserDict import UserDict

profile_regex = re.compile(r'\+\-+')
def read_jip_file(jip_file):
    """docstring for read_jip_file"""
    f = open(jip_file)
    content = f.read()
    f.close()
    
    lines = profile_regex.split(content)[4].strip().splitlines()
    # rundate = dateutil.parse(lines[2].split(' ', 2)[2].strip())
    data = defaultdict(lambda: {'count' : 0, 'time' : 0.0, 'percent' : 0.0 })
    for line in lines[6:]:
        count,time,percent,location = line.split()
        # print location
        data[location] = { 'count' : int(count),
                           'time' : float(time),
                           'percent' : float(percent) }
    return data

def get_locations(output_dir, jip_ext='.profile'):
    """docstring for get_jip_locations"""
    locations = defaultdict(int)
    for f in (f for f in listdir(output_dir) if f.endswith(jip_ext)):
        data = read_jip_file(path.join(output_dir, f))
        for loc in data.keys():
            locations[loc] += 1
    return locations

def read_failure_file(failure_file):
    """docstring for read_failure_file"""
    f = open(failure_file)
    lines = f.readlines()
    f.close()
    
    failures = {}
    for line in lines[2:]:
        count,title = line.split()
        failures[title] = int(count)
    return failures


def read_profile(input_name, jip_ext='.profile', failure_ext='.failures'):
    """docstring for read_profile"""
    data = read_jip_file('%s%s' % (input_name, jip_ext))
    failures = read_failure_file('%s%s' % (input_name, failure_ext))
    
    return (data, failures)

def read_profiles(output_dir, jip_ext='.profile', failure_ext='.failures', colnames=None):
    """docstring for read_profiles"""
    print 'found',
    if not colnames:
    	colnames = get_locations(output_dir).keys()
    print '%d features.' % len(colnames)
    
    print 'found',
    rownames = []
    for profile in (f for f in listdir(output_dir) if f.endswith(jip_ext)):
        rowname = profile[:profile.index(jip_ext)]
        failure_f = rowname + failure_ext
        if path.exists(path.join(output_dir, failure_f)):
            rownames.append(rowname)
    print '%d profiles.' % len(rownames)
    
    print 'found',
    testfile = path.join(output_dir, rownames[0] + failure_ext)
    failure_types = \
        read_failure_file(testfile).keys()
    failure_types.sort()
    print '%d failures.' % len(failure_types)
    
    dataset = numpy.zeros((len(rownames), len(colnames)), dtype=numpy.int)
    failureset = numpy.zeros((len(rownames), len(failure_types)), \
                 dtype=numpy.int)
    
    for i in xrange(len(rownames)):
        (profile, failures) = read_profile(path.join(output_dir, \
                              rownames[i]))
        try:
            data_row = numpy.array([profile[k]['count'] for k in colnames])
            failure_row = numpy.array([failures[k] for k in failure_types])
            
            dataset[i] = data_row
            failureset[i] = failure_row
        except KeyError:
            print >>sys.stderr, 'skipping %s.' % rownames[i]
        
        if not i % 500:
            print 'parsed %d profiles.' % i
    
    return (rownames, colnames, dataset, failure_types, failureset)

def read_names(filename):
    f = open(filename)
    names = [l.strip() for l in f.readlines()]
    f.close()
    return names

def read_data(filename, splitchar=','):
    f = open(filename)
    data = []
    for line in f:
        data.append(tuple([int(i) for i in line.split(splitchar)]))
    f.close()
    return data

def consolidate_failures(data, failure_names, out=sys.stdout):
    counts = defaultdict(int, SUCCESS=0)
    length = len(failure_names)
    table_of_contents = dict()
    
    names = ['SUCCESS']
    names.extend(failure_names)
    
    for outcome in data:
        name = []
        for i in range(length):
            if outcome[i]:
                name.append(failure_names[i])
        if name:
            name = '+'.join(name)
        else:
            name = 'SUCCESS'
        counts[name] += 1
        if name not in names:
            names.append(name)
        if outcome not in table_of_contents:
            table_of_contents[outcome] = names.index(name)
        print >>out, table_of_contents[outcome]
    
    return names, counts