Source

pypy / pypy / translator / goal / gcbench.py

Full commit
# Ported from a Java benchmark whose history is :
#  This is adapted from a benchmark written by John Ellis and Pete Kovac
#  of Post Communications.
#  It was modified by Hans Boehm of Silicon Graphics.
# 
#       This is no substitute for real applications.  No actual application
#       is likely to behave in exactly this way.  However, this benchmark was
#       designed to be more representative of real applications than other
#       Java GC benchmarks of which we are aware.
#       It attempts to model those properties of allocation requests that
#       are important to current GC techniques.
#       It is designed to be used either to obtain a single overall performance
#       number, or to give a more detailed estimate of how collector
#       performance varies with object lifetimes.  It prints the time
#       required to allocate and collect balanced binary trees of various
#       sizes.  Smaller trees result in shorter object lifetimes.  Each cycle
#       allocates roughly the same amount of memory.
#       Two data structures are kept around during the entire process, so
#       that the measured performance is representative of applications
#       that maintain some live in-memory data.  One of these is a tree
#       containing many pointers.  The other is a large array containing
#       double precision floating point numbers.  Both should be of comparable
#       size.
#
#       The results are only really meaningful together with a specification
#       of how much memory was used.  It is possible to trade memory for
#       better time performance.  This benchmark should be run in a 32 MB
#       heap, though we don't currently know how to enforce that uniformly.
#
#       Unlike the original Ellis and Kovac benchmark, we do not attempt
#       measure pause times.  This facility should eventually be added back
#       in.  There are several reasons for omitting it for now.  The original
#       implementation depended on assumptions about the thread scheduler
#       that don't hold uniformly.  The results really measure both the
#       scheduler and GC.  Pause time measurements tend to not fit well with
#       current benchmark suites.  As far as we know, none of the current
#       commercial Java implementations seriously attempt to minimize GC pause
#       times.
#
#       Known deficiencies:
#               - No way to check on memory use
#               - No cyclic data structures
#               - No attempt to measure variation with object size
#               - Results are sensitive to locking cost, but we dont
#                 check for proper locking
import os, time

USAGE = """gcbench [num_repetitions] [--depths=N,N,N..] [--threads=N]"""
ENABLE_THREADS = True


class Node(object):

    def __init__(self, l=None, r=None):
        self.left = l
        self.right = r

kStretchTreeDepth    = 18  # about 16Mb (for Java)
kLongLivedTreeDepth  = 16  # about 4Mb (for Java)
kArraySize  = 500000   # about 4Mb
kMinTreeDepth = 4
kMaxTreeDepth = 16

def tree_size(i):
    "Nodes used by a tree of a given size"
    return (1 << (i + 1)) - 1

def num_iters(i):
    "Number of iterations to use for a given tree depth"
    return 2 * tree_size(kStretchTreeDepth) / tree_size(i);

def populate(depth, node):
    "Build tree top down, assigning to older objects."
    if depth <= 0:
        return
    else:
        depth -= 1
        node.left = Node()
        node.right = Node()
        populate(depth, node.left)
        populate(depth, node.right)

def make_tree(depth):
    "Build tree bottom-up"
    if depth <= 0:
        return Node()
    else:
        return Node(make_tree(depth-1), make_tree(depth-1))

def print_diagnostics():
    "ought to print free/total memory"
    pass

def time_construction(depth):
    niters = num_iters(depth)
    print "Creating %d trees of depth %d" % (niters, depth)
    t_start = time.time()
    for i in range(niters):
        temp_tree = Node()
        populate(depth, temp_tree)
        temp_tree = None
    t_finish = time.time()
    print "\tTop down constrution took %f ms" % ((t_finish-t_start)*1000.)
    t_start = time.time()
    for i in range(niters):
        temp_tree = make_tree(depth)
        temp_tree = None
    t_finish = time.time()
    print "\tBottom up constrution took %f ms" % ((t_finish-t_start)*1000.)

DEFAULT_DEPTHS = range(kMinTreeDepth, kMaxTreeDepth+1, 2)

def time_constructions(depths):
    for d in depths:
        time_construction(d)

def time_parallel_constructions(depths, nthreads):
    import threading
    threadlist = []
    print "Starting %d parallel threads..." % (nthreads,)
    for n in range(nthreads):
        t = threading.Thread(target=time_constructions, args=(depths,))
        t.start()
        threadlist.append(t)
    for t in threadlist:
        t.join()
    print "All %d threads finished" % (nthreads,)

def main(depths=DEFAULT_DEPTHS, threads=0):
    print "Garbage Collector Test"
    print " Stretching memory with a binary tree of depth %d" % kStretchTreeDepth
    print_diagnostics()
    t_start = time.time()
    temp_tree = make_tree(kStretchTreeDepth)
    temp_tree = None

    # Create a long lived object
    print " Creating a long-lived binary tree of depth %d" % kLongLivedTreeDepth
    long_lived_tree = Node()
    populate(kLongLivedTreeDepth, long_lived_tree)

    # Create long-lived array, filling half of it
    print " Creating a long-lived array of %d doubles" % kArraySize
    array = [0.0] * kArraySize
    i = 1
    while i < kArraySize/2:
        array[i] = 1.0/i
        i += 1
    print_diagnostics()

    if threads:
        time_parallel_constructions(depths, threads)
    else:
        time_constructions(depths)

    if long_lived_tree is None or array[1024] != 1.0/1024:
        raise Failed

    t_finish = time.time()
    print_diagnostics()
    print "Completed in %f ms." % ((t_finish-t_start)*1000.)

class Failed(Exception):
    pass


def argerror():
    print "Usage:"
    print "   ", USAGE
    return 2

def entry_point(argv):
    depths = DEFAULT_DEPTHS
    threads = 0
    repeatcount = 1
    for arg in argv[1:]:
        if arg.startswith('--threads='):
            arg = arg[len('--threads='):]
            if not ENABLE_THREADS:
                print "threads disabled (they cannot be translated)"
                return 1
            try:
                threads = int(arg)
            except ValueError:
                return argerror()
        elif arg.startswith('--depths='):
            arg = arg[len('--depths='):].split(',')
            try:
                depths = [int(s) for s in arg]
            except ValueError:
                return argerror()
        else:
            try:
                repeatcount = int(arg)
            except ValueError:
                return argerror()
    for i in range(repeatcount):
        main(depths, threads)
    return 0


if __name__ == '__main__':
    import sys
    sys.exit(entry_point(sys.argv))