From 74b807cf58710424328e48417a08876d5a2f7c9b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 28 Jul 2025 11:54:08 +0100 Subject: [PATCH] aco: only workaround load tearing for atomic loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For non-atomic loads, this situation would require a data race. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Reviewed-by: Georg Lehmann Part-of: --- .../aco_select_nir_intrinsics.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 6cc116fc14a..375c4f59b2f 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -2958,9 +2958,10 @@ visit_load_shared(isel_context* ctx, nir_intrinsic_instr* instr) * addresses, if another wave writes LDS in between. Use v_readfirstlane instead of * p_as_uniform in order to avoid copy-propagation. */ - bool readfirstlane_for_uniform = ctx->options->gfx_level >= GFX10 && - ctx->program->wave_size == 64 && - ctx->program->workgroup_size > 64; + bool atomic = nir_intrinsic_access(instr) & ACCESS_ATOMIC; + bool readfirstlane_for_uniform = + ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64 && + ctx->program->workgroup_size > 64 && (atomic || !ctx->shader->info.assume_no_data_races); emit_vector_as_uniform(ctx, def.getTemp(), dst, readfirstlane_for_uniform); } @@ -3222,9 +3223,11 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr) Temp dst = get_ssa_temp(ctx, &instr->def); if (dst.type() == RegType::sgpr) { /* Similar to load_shared. */ + bool atomic = nir_intrinsic_access(instr) & ACCESS_ATOMIC; bool readfirstlane_for_uniform = ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64 && - ctx->program->workgroup_size > 64; + ctx->program->workgroup_size > 64 && + (atomic || !ctx->shader->info.assume_no_data_races); emit_split_vector(ctx, ds->definitions[0].getTemp(), dst.size()); Temp comp[4];