nir: add load_global_size intrinsic

There is no need to compute it in the shader as the result is known at
runtime already.

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Tested-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30467>
This commit is contained in:
Karol Herbst
2024-08-01 12:42:12 +02:00
committed by Marge Bot
parent ef58f2408f
commit 14ea102175
5 changed files with 15 additions and 4 deletions
+4
View File
@@ -2260,6 +2260,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_base_global_invocation_id;
case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
return nir_intrinsic_load_global_invocation_index;
case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
return nir_intrinsic_load_global_size;
case SYSTEM_VALUE_WORK_DIM:
return nir_intrinsic_load_work_dim;
case SYSTEM_VALUE_USER_DATA_AMD:
@@ -2424,6 +2426,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
case nir_intrinsic_load_global_invocation_index:
return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
case nir_intrinsic_load_global_size:
return SYSTEM_VALUE_GLOBAL_GROUP_SIZE;
case nir_intrinsic_load_work_dim:
return SYSTEM_VALUE_WORK_DIM;
case nir_intrinsic_load_user_data_amd:
+1
View File
@@ -5971,6 +5971,7 @@ nir_build_lowered_load_helper_invocation(struct nir_builder *b);
typedef struct nir_lower_compute_system_values_options {
bool has_base_global_invocation_id : 1;
bool has_base_workgroup_id : 1;
bool has_global_size : 1;
bool shuffle_local_ids_for_quad_derivatives : 1;
bool lower_local_invocation_index : 1;
bool lower_cs_local_id_to_index : 1;
+1
View File
@@ -678,6 +678,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
case nir_intrinsic_load_global_invocation_id:
case nir_intrinsic_load_base_global_invocation_id:
case nir_intrinsic_load_global_invocation_index:
case nir_intrinsic_load_global_size:
case nir_intrinsic_load_workgroup_id:
case nir_intrinsic_load_base_workgroup_id:
case nir_intrinsic_load_workgroup_index:
+2
View File
@@ -939,6 +939,8 @@ system_value("global_invocation_id", 3, bit_sizes=[32, 64])
# e.g. global_work_offset of clEnqueueNDRangeKernel
system_value("base_global_invocation_id", 3, bit_sizes=[32, 64])
system_value("global_invocation_index", 1, bit_sizes=[32, 64])
# threads per dimension in an invocation
system_value("global_size", 3, bit_sizes=[32, 64])
system_value("work_dim", 1)
system_value("line_width", 1)
system_value("aa_line_width", 1)
+7 -4
View File
@@ -232,9 +232,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
return nir_imm_int(b, 0);
break;
case SYSTEM_VALUE_GLOBAL_GROUP_SIZE:
return build_global_group_size(b, bit_size);
case SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL:
return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel,
INTERP_MODE_NOPERSPECTIVE);
@@ -717,7 +714,7 @@ lower_compute_system_value_instr(nir_builder *b,
/* OpenCL's global_linear_id explicitly ignores the global offset */
assert(b->shader->info.stage == MESA_SHADER_KERNEL);
nir_def *global_id = nir_load_global_invocation_id(b, bit_size);
nir_def *global_size = build_global_group_size(b, bit_size);
nir_def *global_size = nir_load_global_size(b, bit_size);
/* index = id.x + ((id.y + (id.z * size.y)) * size.x) */
nir_def *index;
@@ -729,6 +726,12 @@ lower_compute_system_value_instr(nir_builder *b,
return index;
}
case nir_intrinsic_load_global_size: {
if (options && !options->has_global_size)
return build_global_group_size(b, bit_size);
return NULL;
}
case nir_intrinsic_load_workgroup_id: {
if (options && options->lower_workgroup_id_to_index) {
nir_def *wg_idx = nir_load_workgroup_index(b);