From 0d7b8bfce5c7e46e66bd80751f6025648511b6df Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 28 Jul 2023 21:24:20 -0400 Subject: [PATCH] agx: Don't lower load_local_invocation_index We have an SR for it, which can save a bit of math. This came up while working on the spiller. total instructions in shared programs: 1778396 -> 1778376 (<.01%) instructions in affected programs: 3036 -> 3016 (-0.66%) helped: 10 HURT: 3 Instructions are helped. total bytes in shared programs: 12185182 -> 12185018 (<.01%) bytes in affected programs: 38640 -> 38476 (-0.42%) helped: 18 HURT: 2 Bytes are helped. total halfregs in shared programs: 531218 -> 531174 (<.01%) halfregs in affected programs: 471 -> 427 (-9.34%) helped: 6 HURT: 0 Halfregs are helped. total threads in shared programs: 18909056 -> 18909184 (<.01%) threads in affected programs: 1280 -> 1408 (10.00%) helped: 2 HURT: 0 Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 3 +++ src/asahi/compiler/agx_compile.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 4fcb1a1bbb2..50807f9d626 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1066,6 +1066,9 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) return agx_load_compute_dimension( b, dst, instr, AGX_SR_THREAD_POSITION_IN_THREADGROUP_X); + case nir_intrinsic_load_local_invocation_index: + return agx_get_sr_to(b, dst, AGX_SR_THREAD_INDEX_IN_THREADGROUP); + case nir_intrinsic_barrier: { assert(!b->shader->is_preamble && "invalid"); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 9f3b00d2332..416e94f9f0a 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -246,7 +246,6 @@ static const nir_shader_compiler_options agx_nir_options = { .lower_extract_byte = true, .lower_insert_byte = true, .lower_insert_word = true, - .lower_cs_local_index_to_id = true, .has_cs_global_id = true, .vectorize_io = true, .use_interpolated_input_intrinsics = true,