aco: only workaround load tearing for atomic loads

For non-atomic loads, this situation would require a data race.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36602>
This commit is contained in:
Rhys Perry
2025-07-28 11:54:08 +01:00
committed by Marge Bot
parent 8fba196164
commit 74b807cf58

View File

@@ -2958,9 +2958,10 @@ visit_load_shared(isel_context* ctx, nir_intrinsic_instr* instr)
* addresses, if another wave writes LDS in between. Use v_readfirstlane instead of
* p_as_uniform in order to avoid copy-propagation.
*/
bool readfirstlane_for_uniform = ctx->options->gfx_level >= GFX10 &&
ctx->program->wave_size == 64 &&
ctx->program->workgroup_size > 64;
bool atomic = nir_intrinsic_access(instr) & ACCESS_ATOMIC;
bool readfirstlane_for_uniform =
ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64 &&
ctx->program->workgroup_size > 64 && (atomic || !ctx->shader->info.assume_no_data_races);
emit_vector_as_uniform(ctx, def.getTemp(), dst, readfirstlane_for_uniform);
}
@@ -3222,9 +3223,11 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->def);
if (dst.type() == RegType::sgpr) {
/* Similar to load_shared. */
bool atomic = nir_intrinsic_access(instr) & ACCESS_ATOMIC;
bool readfirstlane_for_uniform = ctx->options->gfx_level >= GFX10 &&
ctx->program->wave_size == 64 &&
ctx->program->workgroup_size > 64;
ctx->program->workgroup_size > 64 &&
(atomic || !ctx->shader->info.assume_no_data_races);
emit_split_vector(ctx, ds->definitions[0].getTemp(), dst.size());
Temp comp[4];