Source

yt.opencl / grid_sum.cl

Full commit
__kernel void grid_sum(
    __read_only image3d_t input_grid,
    __write_only image2d_t output_plane,
    uint nz
)
{
    const sampler_t grid_reader =
        CLK_NORMALIZED_COORDS_FALSE |
        CLK_ADDRESS_CLAMP |
        CLK_FILTER_NEAREST;

    __private float4 my_val = (float4)(0.0, 0.0, 0.0, 0.0);
    __private int xi = get_global_id(0);
    __private int yi = get_global_id(1);
    __local int zi;
    for (zi = 0; zi < nz; zi++) {
        my_val += read_imagef(input_grid, grid_reader, (int4)(xi, yi, zi, 0));
    }
    write_imagef(output_plane, (int2)(xi, yi), my_val.x);
}