From 14ea1021751125fce3cf806eda7dfa441cc96041 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Thu, 1 Aug 2024 12:42:12 +0200 Subject: [PATCH] nir: add load_global_size intrinsic There is no need to compute it in the shader as the result is known at runtime already. Reviewed-by: Faith Ekstrand Reviewed-by: Christian Gmeiner Tested-by: Christian Gmeiner Part-of: --- src/compiler/nir/nir.c | 4 ++++ src/compiler/nir/nir.h | 1 + src/compiler/nir/nir_gather_info.c | 1 + src/compiler/nir/nir_intrinsics.py | 2 ++ src/compiler/nir/nir_lower_system_values.c | 11 +++++++---- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 5c4cbda04e2..29d1f5003fb 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -2260,6 +2260,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_base_global_invocation_id; case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: return nir_intrinsic_load_global_invocation_index; + case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: + return nir_intrinsic_load_global_size; case SYSTEM_VALUE_WORK_DIM: return nir_intrinsic_load_work_dim; case SYSTEM_VALUE_USER_DATA_AMD: @@ -2424,6 +2426,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID; case nir_intrinsic_load_global_invocation_index: return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX; + case nir_intrinsic_load_global_size: + return SYSTEM_VALUE_GLOBAL_GROUP_SIZE; case nir_intrinsic_load_work_dim: return SYSTEM_VALUE_WORK_DIM; case nir_intrinsic_load_user_data_amd: diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5e3c1f1bd48..118b144960d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5971,6 +5971,7 @@ nir_build_lowered_load_helper_invocation(struct nir_builder *b); typedef struct nir_lower_compute_system_values_options { bool has_base_global_invocation_id : 1; bool has_base_workgroup_id : 1; + bool has_global_size : 1; bool shuffle_local_ids_for_quad_derivatives : 1; bool lower_local_invocation_index : 1; bool lower_cs_local_id_to_index : 1; diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 0425745b7c3..4f07282f681 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -678,6 +678,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, case nir_intrinsic_load_global_invocation_id: case nir_intrinsic_load_base_global_invocation_id: case nir_intrinsic_load_global_invocation_index: + case nir_intrinsic_load_global_size: case nir_intrinsic_load_workgroup_id: case nir_intrinsic_load_base_workgroup_id: case nir_intrinsic_load_workgroup_index: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 4a06f5d5819..7f4db541f85 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -939,6 +939,8 @@ system_value("global_invocation_id", 3, bit_sizes=[32, 64]) # e.g. global_work_offset of clEnqueueNDRangeKernel system_value("base_global_invocation_id", 3, bit_sizes=[32, 64]) system_value("global_invocation_index", 1, bit_sizes=[32, 64]) +# threads per dimension in an invocation +system_value("global_size", 3, bit_sizes=[32, 64]) system_value("work_dim", 1) system_value("line_width", 1) system_value("aa_line_width", 1) diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index f33fe3ba0e9..a3833b233f0 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -232,9 +232,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state) return nir_imm_int(b, 0); break; - case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: - return build_global_group_size(b, bit_size); - case SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL: return nir_load_barycentric(b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_NOPERSPECTIVE); @@ -717,7 +714,7 @@ lower_compute_system_value_instr(nir_builder *b, /* OpenCL's global_linear_id explicitly ignores the global offset */ assert(b->shader->info.stage == MESA_SHADER_KERNEL); nir_def *global_id = nir_load_global_invocation_id(b, bit_size); - nir_def *global_size = build_global_group_size(b, bit_size); + nir_def *global_size = nir_load_global_size(b, bit_size); /* index = id.x + ((id.y + (id.z * size.y)) * size.x) */ nir_def *index; @@ -729,6 +726,12 @@ lower_compute_system_value_instr(nir_builder *b, return index; } + case nir_intrinsic_load_global_size: { + if (options && !options->has_global_size) + return build_global_group_size(b, bit_size); + return NULL; + } + case nir_intrinsic_load_workgroup_id: { if (options && options->lower_workgroup_id_to_index) { nir_def *wg_idx = nir_load_workgroup_index(b);