clover: implement CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
Work-group size should always be aligned to subgroup size; this is a basic requirement, otherwise some work-items will be no-operation. It might make sense to refine the value according to a kernel's resource usage, but that's a possible optimization for the future. Reviewed-by: Francisco Jerez <currojerez@riseup.net>
This commit is contained in:
@@ -169,7 +169,7 @@ clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
|
||||
break;
|
||||
|
||||
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
||||
buf.as_scalar<size_t>() = 1;
|
||||
buf.as_scalar<size_t>() = dev.subgroup_size();
|
||||
break;
|
||||
|
||||
case CL_KERNEL_PRIVATE_MEM_SIZE:
|
||||
|
||||
@@ -185,6 +185,11 @@ device::max_block_size() const {
|
||||
return { v.begin(), v.end() };
|
||||
}
|
||||
|
||||
cl_uint
|
||||
device::subgroup_size() const {
|
||||
return get_compute_param<uint32_t>(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];
|
||||
}
|
||||
|
||||
std::string
|
||||
device::device_name() const {
|
||||
return pipe->get_name(pipe);
|
||||
|
||||
@@ -67,6 +67,7 @@ namespace clover {
|
||||
bool has_doubles() const;
|
||||
|
||||
std::vector<size_t> max_block_size() const;
|
||||
cl_uint subgroup_size() const;
|
||||
std::string device_name() const;
|
||||
std::string vendor_name() const;
|
||||
enum pipe_shader_ir ir_format() const;
|
||||
|
||||
Reference in New Issue
Block a user