1. Matthew Turk
  2. yt.opencl

Source

yt.opencl / grid_sum.py

import matplotlib;matplotlib.use("Agg");import pylab
from yt.config import ytcfg
from yt.mods import *
import pyopencl as cl
import pyopencl.array as cl_array
from contextlib import contextmanager

times = {}

@contextmanager
def time_func(name):
    t1 = time.time()
    yield
    t2 = time.time()
    print "%s took %0.3e seconds" % (name, t2-t1)
    times[name] = t2-t1

pf = load("JHK-DD0030/galaxy0030")
#pf = load("DD0087/DD0087")

# This part is specific to Hex
platforms = cl.get_platforms()
GPU = platforms[0]
CPU = platforms[1]
devices = GPU.get_devices()

context = cl.Context(devices)
command_queue = cl.CommandQueue(context)

prg = cl.Program(context, open("grid_sum.cl").read()).build()
ss = prg.grid_sum
#ss.set_scalar_arg_dtypes((None, None, na.uint32))

with time_func("Load from disk"):
    for g in pf.h.grids: g["Density"]

GPUARR = {}
OUTIMG = {}
CPUIMG = {}

gformat = cl.ImageFormat(cl.channel_order.R,
                         cl.channel_type.FLOAT)

with time_func("Copy to GPU"):
    for g in pf.h.grids:
        GPUARR[g.id] = cl.Image(context, cl.mem_flags.READ_ONLY,
                                gformat, hostbuf = g["Density"])
    cl.enqueue_barrier(command_queue)

with time_func("Create output images on GPU"):
    for g in pf.h.grids:
        CPUIMG[g.id] = na.zeros((g.ActiveDimensions[1], g.ActiveDimensions[0]), dtype='float32')
        OUTIMG[g.id] = cl.Image(context, cl.mem_flags.READ_ONLY,
                                gformat, hostbuf = CPUIMG[g.id])
    cl.enqueue_barrier(command_queue)

with time_func("Grid sum into images"):
    ad = na.empty(1, dtype='uint32')
    for g in pf.h.grids:
        ad[0] = g.ActiveDimensions[2]
        ss(command_queue, 
            (g.ActiveDimensions[0], g.ActiveDimensions[1]),
            (g.ActiveDimensions[0], g.ActiveDimensions[1]),
            GPUARR[g.id], OUTIMG[g.id], na.uint32(ad[0]))
    cl.enqueue_barrier(command_queue)

with time_func("Copy images back"):
    for g in pf.h.grids:
        cl.enqueue_copy(command_queue, CPUIMG[g.id], OUTIMG[g.id],
                (0,0), (g.ActiveDimensions[1], g.ActiveDimensions[0]))
    cl.enqueue_barrier(command_queue)

#val_gpu = val_gpu.get()[0]
#print "On GPU calculated a sum of: %0.9e" % (val_gpu)

with time_func("De-allocate on GPU"):
    GPUARR.clear()
    OUTIMG.clear()

with time_func("Sum with NumPy"):
    val_cpu = dict([(g.id, g["Density"].sum(axis=2)) for g in pf.h.grids])

max_delta = 0.0
for g in pf.h.grids:
    i1 = val_cpu[g.id]
    i2 = CPUIMG[g.id].transpose()
    max_delta = max(max_delta, (na.abs(i1 - i2)/(i1 + i2)).max())
print "Max delta: %0.3e" % (max_delta)