diff --git a/src/amd/vulkan/radv_nir_lower_abi.c b/src/amd/vulkan/radv_nir_lower_abi.c index 1f950d909c6..1b9fa4cfb6c 100644 --- a/src/amd/vulkan/radv_nir_lower_abi.c +++ b/src/amd/vulkan/radv_nir_lower_abi.c @@ -370,6 +370,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) nir_imm_int(b, RADV_NGG_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))), nir_imm_int(b, 0x100)); break; + case nir_intrinsic_atomic_add_gs_invocation_count_amd: + /* TODO: add gs invocation query emulation. */ + break; case nir_intrinsic_load_streamout_config_amd: replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_config); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 695fb16ea92..8c8ac673dce 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1538,6 +1538,10 @@ intrinsic("atomic_add_gs_emit_prim_count_amd", [1]) intrinsic("atomic_add_gen_prim_count_amd", [1], indices=[STREAM_ID]) intrinsic("atomic_add_xfb_prim_count_amd", [1], indices=[STREAM_ID]) +# Atomically add current wave's invocation count to query result +# src[] = { invocation_count }. +intrinsic("atomic_add_gs_invocation_count_amd", [1]) + # LDS offset for scratch section in NGG shader system_value("lds_ngg_scratch_base_amd", 1) # LDS offset for NGG GS shader vertex emit diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index 3f0f0080b6f..146b9041fc4 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -262,20 +262,26 @@ static bool lower_abi_instr(nir_builder *b, nir_instr *instr, struct lower_abi_s break; } case nir_intrinsic_atomic_add_gs_emit_prim_count_amd: + case nir_intrinsic_atomic_add_gs_invocation_count_amd: { + nir_ssa_def *buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF); + + enum pipe_statistics_query_index index = + intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd ? + PIPE_STAT_QUERY_GS_PRIMITIVES : PIPE_STAT_QUERY_GS_INVOCATIONS; + unsigned offset = si_query_pipestat_end_dw_offset(sel->screen, index) * 4; + + nir_ssa_def *count = intrin->src[0].ssa; + nir_buffer_atomic_add_amd(b, 32, buf, count, .base = offset); + break; + } case nir_intrinsic_atomic_add_gen_prim_count_amd: case nir_intrinsic_atomic_add_xfb_prim_count_amd: { - unsigned offset; - nir_ssa_def *buf; - if (intrin->intrinsic == nir_intrinsic_atomic_add_gs_emit_prim_count_amd) { - buf = load_internal_binding(b, args, SI_GS_QUERY_EMULATED_COUNTERS_BUF); - offset = si_query_pipestat_end_dw_offset(sel->screen, PIPE_STAT_QUERY_GS_PRIMITIVES) * 4; - } else { - unsigned stream = nir_intrinsic_stream_id(intrin); - buf = load_internal_binding(b, args, SI_GS_QUERY_BUF); - offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ? - offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) : - offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives); - } + nir_ssa_def *buf = load_internal_binding(b, args, SI_GS_QUERY_BUF); + + unsigned stream = nir_intrinsic_stream_id(intrin); + unsigned offset = intrin->intrinsic == nir_intrinsic_atomic_add_gen_prim_count_amd ? + offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].generated_primitives) : + offsetof(struct gfx10_sh_query_buffer_mem, stream[stream].emitted_primitives); nir_ssa_def *prim_count = intrin->src[0].ssa; nir_buffer_atomic_add_amd(b, 32, buf, prim_count, .base = offset);