clover: Clean up the kernel and program object interface.

[ Tom Stellard: Make sure to bind global arguments before retrieving handles. ]
Tested-by: Tom Stellard <thomas.stellard@amd.com>
This commit is contained in:
Francisco Jerez
2013-09-16 21:50:40 -07:00
parent 10284b1d2d
commit 7a9bbff7d6
8 changed files with 139 additions and 136 deletions
@@ -33,15 +33,12 @@ clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
if (!name)
throw error(CL_INVALID_VALUE);
if (prog.binaries().empty())
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
auto sym = prog.binaries().begin()->second.sym(name);
auto &sym = find(name_equals(name), prog.symbols());
ret_error(r_errcode, CL_SUCCESS);
return new kernel(prog, name, range(sym.args));
} catch (module::noent_error &e) {
} catch (std::out_of_range &e) {
ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
return NULL;
@@ -54,11 +51,7 @@ PUBLIC cl_int
clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
cl_kernel *rd_kerns, cl_uint *r_count) try {
auto &prog = obj(d_prog);
if (prog.binaries().empty())
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
auto &syms = prog.binaries().begin()->second.syms;
auto &syms = prog.symbols();
if (rd_kerns && count < syms.size())
throw error(CL_INVALID_VALUE);
@@ -102,15 +95,12 @@ clReleaseKernel(cl_kernel d_kern) try {
PUBLIC cl_int
clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
const void *value) try {
auto &kern = obj(d_kern);
if (idx >= kern.args.size())
throw error(CL_INVALID_ARG_INDEX);
kern.args[idx]->set(size, value);
obj(d_kern).args().at(idx).set(size, value);
return CL_SUCCESS;
} catch (std::out_of_range &e) {
return CL_INVALID_ARG_INDEX;
} catch (error &e) {
return e.get();
}
@@ -127,7 +117,7 @@ clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
break;
case CL_KERNEL_NUM_ARGS:
buf.as_scalar<cl_uint>() = kern.args.size();
buf.as_scalar<cl_uint>() = kern.args().size();
break;
case CL_KERNEL_REFERENCE_COUNT:
@@ -160,8 +150,8 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
auto &kern = obj(d_kern);
auto pdev = pobj(d_dev);
if ((!pdev && kern.prog.binaries().size() != 1) ||
(pdev && !kern.prog.binaries().count(pdev)))
if ((!pdev && kern.prog.devices().size() != 1) ||
(pdev && !count(*pdev, kern.prog.devices())))
throw error(CL_INVALID_DEVICE);
switch (param) {
@@ -210,10 +200,10 @@ namespace {
if (any_of([](kernel::argument &arg) {
return !arg.set();
}, map(derefs(), kern.args)))
}, kern.args()))
throw error(CL_INVALID_KERNEL_ARGS);
if (!kern.prog.binaries().count(&q.dev))
if (!count(q.dev, kern.prog.devices()))
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
@@ -173,11 +173,11 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param,
break;
case CL_PROGRAM_NUM_DEVICES:
buf.as_scalar<cl_uint>() = prog.binaries().size();
buf.as_scalar<cl_uint>() = prog.devices().size();
break;
case CL_PROGRAM_DEVICES:
buf.as_vector<cl_device_id>() = map(keys(), prog.binaries());
buf.as_vector<cl_device_id>() = descs(prog.devices());
break;
case CL_PROGRAM_SOURCE:
@@ -185,25 +185,23 @@ clGetProgramInfo(cl_program d_prog, cl_program_info param,
break;
case CL_PROGRAM_BINARY_SIZES:
buf.as_vector<size_t>() =
map([](const std::pair<device *, module> &ent) {
compat::ostream::buffer_t bin;
compat::ostream s(bin);
ent.second.serialize(s);
return bin.size();
},
prog.binaries());
buf.as_vector<size_t>() = map([&](const device &dev) {
compat::ostream::buffer_t bin;
compat::ostream s(bin);
prog.binary(dev).serialize(s);
return bin.size();
},
prog.devices());
break;
case CL_PROGRAM_BINARIES:
buf.as_matrix<unsigned char>() =
map([](const std::pair<device *, module> &ent) {
compat::ostream::buffer_t bin;
compat::ostream s(bin);
ent.second.serialize(s);
return bin;
},
prog.binaries());
buf.as_matrix<unsigned char>() = map([&](const device &dev) {
compat::ostream::buffer_t bin;
compat::ostream s(bin);
prog.binary(dev).serialize(s);
return bin;
},
prog.devices());
break;
default:
@@ -224,7 +222,7 @@ clGetProgramBuildInfo(cl_program d_prog, cl_device_id d_dev,
auto &prog = obj(d_prog);
auto &dev = obj(d_dev);
if (!count(dev, prog.ctx.devs()))
if (!count(dev, prog.devices()))
return CL_INVALID_DEVICE;
switch (param) {
@@ -27,36 +27,35 @@
using namespace clover;
kernel::kernel(program &prog,
const std::string &name,
kernel::kernel(program &prog, const std::string &name,
const std::vector<module::argument> &margs) :
prog(prog), _name(name), exec(*this) {
for (auto marg : margs) {
for (auto &marg : margs) {
if (marg.type == module::argument::scalar)
args.emplace_back(new scalar_argument(marg.size));
_args.emplace_back(new scalar_argument(marg.size));
else if (marg.type == module::argument::global)
args.emplace_back(new global_argument);
_args.emplace_back(new global_argument);
else if (marg.type == module::argument::local)
args.emplace_back(new local_argument);
_args.emplace_back(new local_argument);
else if (marg.type == module::argument::constant)
args.emplace_back(new constant_argument);
_args.emplace_back(new constant_argument);
else if (marg.type == module::argument::image2d_rd ||
marg.type == module::argument::image3d_rd)
args.emplace_back(new image_rd_argument);
_args.emplace_back(new image_rd_argument);
else if (marg.type == module::argument::image2d_wr ||
marg.type == module::argument::image3d_wr)
args.emplace_back(new image_wr_argument);
_args.emplace_back(new image_wr_argument);
else if (marg.type == module::argument::sampler)
args.emplace_back(new sampler_argument);
_args.emplace_back(new sampler_argument);
else
throw error(CL_INVALID_KERNEL_DEFINITION);
}
}
template<typename T, typename V>
static inline std::vector<T>
pad_vector(command_queue &q, const V &v, T x) {
std::vector<T> w { v.begin(), v.end() };
template<typename V>
static inline std::vector<uint>
pad_vector(command_queue &q, const V &v, uint x) {
std::vector<uint> w { v.begin(), v.end() };
w.resize(q.dev.max_block_size().size(), x);
return w;
}
@@ -66,7 +65,13 @@ kernel::launch(command_queue &q,
const std::vector<size_t> &grid_offset,
const std::vector<size_t> &grid_size,
const std::vector<size_t> &block_size) {
const auto m = prog.binary(q.dev);
const auto reduced_grid_size =
map(divides(), grid_size, block_size);
void *st = exec.bind(&q);
// The handles are created during exec_context::bind(), so we need make
// sure to call exec_context::bind() before retrieving them.
std::vector<uint32_t *> g_handles = map([&](size_t h) {
return (uint32_t *)&exec.input[h];
}, exec.g_handles);
@@ -84,9 +89,9 @@ kernel::launch(command_queue &q,
exec.g_buffers.data(), g_handles.data());
q.pipe->launch_grid(q.pipe,
pad_vector<uint>(q, block_size, 1).data(),
pad_vector<uint>(q, grid_size, 1).data(),
module(q).sym(_name).offset,
pad_vector(q, block_size, 1).data(),
pad_vector(q, reduced_grid_size, 1).data(),
find(name_equals(_name), m.syms).offset,
exec.input.data());
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
@@ -101,9 +106,9 @@ size_t
kernel::mem_local() const {
size_t sz = 0;
for (auto &arg : args) {
if (dynamic_cast<local_argument *>(arg.get()))
sz += arg->storage();
for (auto &arg : args()) {
if (dynamic_cast<local_argument *>(&arg))
sz += arg.storage();
}
return sz;
@@ -129,13 +134,23 @@ kernel::block_size() const {
return { 0, 0, 0 };
}
kernel::argument_range
kernel::args() {
return map(derefs(), _args);
}
kernel::const_argument_range
kernel::args() const {
return map(derefs(), _args);
}
const module &
kernel::module(const command_queue &q) const {
return prog.binaries().find(&q.dev)->second;
return prog.binary(q.dev);
}
kernel::exec_context::exec_context(kernel &kern) :
kern(kern), q(NULL), mem_local(0), st(NULL) {
kern(kern), q(NULL), mem_local(0), st(NULL), cs() {
}
kernel::exec_context::~exec_context() {
@@ -148,11 +163,13 @@ kernel::exec_context::bind(command_queue *_q) {
std::swap(q, _q);
// Bind kernel arguments.
auto margs = kern.module(*q).sym(kern.name()).args;
for_each([=](std::unique_ptr<kernel::argument> &karg,
const module::argument &marg) {
karg->bind(*this, marg);
}, kern.args, margs);
auto &m = kern.prog.binary(q->dev);
auto margs = find(name_equals(kern.name()), m.syms).args;
auto msec = find(type_equals(module::section::text), m.secs);
for_each([=](kernel::argument &karg, const module::argument &marg) {
karg.bind(*this, marg);
}, kern.args(), margs);
// Create a new compute state if anything changed.
if (!st || q != _q ||
@@ -161,7 +178,7 @@ kernel::exec_context::bind(command_queue *_q) {
if (st)
_q->pipe->delete_compute_state(_q->pipe, st);
cs.prog = kern.module(*q).sec(module::section::text).data.begin();
cs.prog = msec.data.begin();
cs.req_local_mem = mem_local;
cs.req_input_mem = input.size();
st = q->pipe->create_compute_state(q->pipe, &cs);
@@ -172,8 +189,8 @@ kernel::exec_context::bind(command_queue *_q) {
void
kernel::exec_context::unbind() {
for (auto &arg : kern.args)
arg->unbind(*this);
for (auto &arg : kern.args())
arg.unbind(*this);
input.clear();
samplers.clear();
@@ -95,9 +95,18 @@ namespace clover {
bool _set;
};
kernel(program &prog,
const std::string &name,
const std::vector<module::argument> &margs);
private:
typedef adaptor_range<
derefs, std::vector<std::unique_ptr<argument>> &
> argument_range;
typedef adaptor_range<
derefs, const std::vector<std::unique_ptr<argument>> &
> const_argument_range;
public:
kernel(program &prog, const std::string &name,
const std::vector<clover::module::argument> &margs);
kernel(const kernel &kern) = delete;
kernel &
@@ -115,12 +124,13 @@ namespace clover {
const std::string &name() const;
std::vector<size_t> block_size() const;
argument_range args();
const_argument_range args() const;
program &prog;
std::vector<std::unique_ptr<argument>> args;
private:
const clover::module &
module(const command_queue &q) const;
const clover::module &module(const command_queue &q) const;
class scalar_argument : public argument {
public:
@@ -208,6 +218,7 @@ namespace clover {
void *st;
};
std::vector<std::unique_ptr<argument>> _args;
std::string _name;
exec_context exec;
};
@@ -21,7 +21,6 @@
//
#include <type_traits>
#include <algorithm>
#include "core/module.hpp"
@@ -147,28 +146,4 @@ namespace clover {
module::deserialize(compat::istream &is) {
return _proc<module>(is);
}
const module::symbol &
module::sym(compat::string name) const {
auto it = std::find_if(syms.begin(), syms.end(), [&](const symbol &x) {
return compat::string(x.name) == name;
});
if (it == syms.end())
throw noent_error();
return *it;
}
const module::section &
module::sec(typename section::type type) const {
auto it = std::find_if(secs.begin(), secs.end(), [&](const section &x) {
return x.type == type;
});
if (it == secs.end())
throw noent_error();
return *it;
}
}
@@ -27,11 +27,6 @@
namespace clover {
struct module {
class noent_error {
public:
virtual ~noent_error() {}
};
typedef uint32_t resource_id;
typedef uint32_t size_t;
@@ -45,14 +40,14 @@ namespace clover {
};
section(resource_id id, enum type type, size_t size,
const clover::compat::vector<char> &data) :
const compat::vector<char> &data) :
id(id), type(type), size(size), data(data) { }
section() : id(0), type(text), size(0), data() { }
resource_id id;
type type;
size_t size;
clover::compat::vector<char> data;
compat::vector<char> data;
};
struct argument {
@@ -97,30 +92,22 @@ namespace clover {
};
struct symbol {
symbol(const clover::compat::vector<char> &name, resource_id section,
size_t offset, const clover::compat::vector<argument> &args) :
symbol(const compat::vector<char> &name, resource_id section,
size_t offset, const compat::vector<argument> &args) :
name(name), section(section), offset(offset), args(args) { }
symbol() : name(), section(0), offset(0), args() { }
clover::compat::vector<char> name;
compat::vector<char> name;
resource_id section;
size_t offset;
clover::compat::vector<argument> args;
compat::vector<argument> args;
};
void serialize(compat::ostream &os) const;
static module deserialize(compat::istream &is);
/// Look up a symbol by name. Throws module::noent_error if not
/// found.
const symbol &sym(compat::string name) const;
/// Look up a section by type. Throws module::noent_error if not
/// found.
const section &sec(typename section::type type) const;
clover::compat::vector<symbol> syms;
clover::compat::vector<section> secs;
compat::vector<symbol> syms;
compat::vector<section> secs;
};
}
@@ -67,22 +67,38 @@ program::source() const {
return _source;
}
const std::map<device *, module> &
program::binaries() const {
return _binaries;
program::device_range
program::devices() const {
return map(derefs(), map(keys(), _binaries));
}
const module &
program::binary(const device &dev) const {
return _binaries.find(const_cast<device *>(&dev))->second;
}
cl_build_status
program::build_status(device &dev) const {
return _binaries.count(&dev) ? CL_BUILD_SUCCESS : CL_BUILD_NONE;
program::build_status(const device &dev) const {
if (_binaries.count(const_cast<device *>(&dev)))
return CL_BUILD_SUCCESS;
else
return CL_BUILD_NONE;
}
std::string
program::build_opts(device &dev) const {
program::build_opts(const device &dev) const {
return _opts.count(&dev) ? _opts.find(&dev)->second : "";
}
std::string
program::build_log(device &dev) const {
program::build_log(const device &dev) const {
return _logs.count(&dev) ? _logs.find(&dev)->second : "";
}
const compat::vector<module::symbol> &
program::symbols() const {
if (_binaries.empty())
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
return _binaries.begin()->second.syms;
}
@@ -31,6 +31,11 @@
namespace clover {
class program : public ref_counter, public _cl_program {
private:
typedef adaptor_range<
derefs, adaptor_range<
keys, const std::map<device *, module> &>> device_range;
public:
program(context &ctx,
const std::string &source);
@@ -45,18 +50,22 @@ namespace clover {
void build(const ref_vector<device> &devs, const char *opts);
const std::string &source() const;
const std::map<device *, module> &binaries() const;
cl_build_status build_status(device &dev) const;
std::string build_opts(device &dev) const;
std::string build_log(device &dev) const;
device_range devices() const;
const module &binary(const device &dev) const;
cl_build_status build_status(const device &dev) const;
std::string build_opts(const device &dev) const;
std::string build_log(const device &dev) const;
const compat::vector<module::symbol> &symbols() const;
context &ctx;
private:
std::map<device *, module> _binaries;
std::map<device *, std::string> _logs;
std::map<device *, std::string> _opts;
std::map<const device *, std::string> _logs;
std::map<const device *, std::string> _opts;
std::string _source;
};
}