diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 20df7a9bf11..3cbe59b1931 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -936,7 +936,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ * allow launching waves out-of-order. (same as Vulkan) * Not available in gfx940. */ - S_00B800_ORDER_MODE(sctx->gfx_level >= GFX7 && + S_00B800_ORDER_MODE(!sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add && + sctx->gfx_level >= GFX7 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) | S_00B800_CS_W32_EN(sctx->cs_shader_state.program->shader.wave_size == 32); @@ -972,7 +973,8 @@ static void si_emit_dispatch_packets(struct si_context *sctx, const struct pipe_ /* Set PING_PONG_EN for every other dispatch. * Only allowed on a gfx queue, and PARTIAL_TG_EN and USE_THREAD_DIMENSIONS must be 0. */ - if (sctx->has_graphics && !partial_block_en) { + if (sctx->has_graphics && !partial_block_en && + !sctx->cs_shader_state.program->sel.info.uses_atomic_ordered_add) { dispatch_initiator |= S_00B800_PING_PONG_EN(sctx->compute_ping_pong_launch); sctx->compute_ping_pong_launch ^= 1; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index b7392a16fd4..dd621fc43ea 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -497,6 +497,7 @@ struct si_shader_info { bool uses_variable_block_size; bool uses_grid_size; bool uses_tg_size; + bool uses_atomic_ordered_add; bool writes_position; bool writes_psize; bool writes_clipvertex; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index c9f5c319d94..61885fc2294 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -501,6 +501,11 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info !nir_src_is_const(intr->src[0])) info->uses_indirect_descriptor = true; + if (nir_intrinsic_has_atomic_op(intr)) { + if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_ordered_add_gfx12_amd) + info->uses_atomic_ordered_add = true; + } + switch (intr->intrinsic) { case nir_intrinsic_store_ssbo: if (!nir_src_is_const(intr->src[1])) @@ -609,6 +614,9 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info case nir_intrinsic_interp_deref_at_offset: unreachable("these opcodes should have been lowered"); break; + case nir_intrinsic_ordered_add_loop_gfx12_amd: + info->uses_atomic_ordered_add = true; + break; default: break; }