From f8e744f07f1aa5dd2463b5912dc0a6eb83da4d8b Mon Sep 17 00:00:00 2001 From: Vitaliy Triang3l Kuzmin Date: Mon, 3 Apr 2023 21:22:02 +0300 Subject: [PATCH] aco: Add Primitive Ordered Pixel Shading pseudo-instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Timur Kristóf Signed-off-by: Vitaliy Triang3l Kuzmin Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 5 ++++ src/amd/compiler/aco_opcodes.py | 28 ++++++++++++++++++++ src/amd/compiler/aco_opt_value_numbering.cpp | 4 ++- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 8ddb2c75f69..851b467ffd3 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2406,6 +2406,11 @@ lower_to_hw_instr(Program* program) } break; } + case aco_opcode::p_pops_gfx9_add_exiting_wave_id: { + bld.sop2(aco_opcode::s_add_i32, instr->definitions[0], instr->definitions[1], + Operand(pops_exiting_wave_id, s1), instr->operands[0]); + break; + } case aco_opcode::p_bpermute_gfx6: { emit_gfx6_bpermute(program, instr, bld); break; diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index d3bcf078ec6..04b2645b61a 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -293,6 +293,34 @@ opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH) opcode("p_barrier", format=Format.PSEUDO_BARRIER) +# Primitive Ordered Pixel Shading pseudo-instructions. + +# For querying whether the current wave can enter the ordered section on GFX9-10.3, doing +# s_add_i32(pops_exiting_wave_id, op0), but in a way that it's different from a usual SALU +# instruction so that it's easier to maintain the volatility of pops_exiting_wave_id and to handle +# the polling specially in scheduling. +# Definitions: +# - Result SGPR; +# - Clobbered SCC. +# Operands: +# - s1 value to add, usually -(current_wave_ID + 1) (or ~current_wave_ID) to remap the exiting wave +# ID from wrapping [0, 0x3FF] to monotonic [0, 0xFFFFFFFF]. +opcode("p_pops_gfx9_add_exiting_wave_id") + +# Indicates that the wait for the completion of the ordered section in overlapped waves has been +# finished on GFX9-10.3. Not lowered to any hardware instructions. +opcode("p_pops_gfx9_overlapped_wave_wait_done") + +# Indicates that a POPS ordered section has ended, hints that overlapping waves can possibly +# continue execution. The overlapping waves may actually be resumed by this instruction or anywhere +# later, however, especially taking into account the fact that there can be multiple ordered +# sections in a wave (for instance, if one is chosen in divergent control flow in the source +# shader), thus multiple p_pops_gfx9_ordered_section_done instructions. At least one must be present +# in the program if POPS is used, however, otherwise the location of the end of the ordered section +# will be undefined. Only needed on GFX9-10.3 (GFX11+ ordered section is until the last export, +# can't be exited early). Not lowered to any hardware instructions. +opcode("p_pops_gfx9_ordered_section_done") + opcode("p_spill") opcode("p_reload") diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index ff936f1b833..f7619d35a59 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -357,7 +357,9 @@ can_eliminate(aco_ptr& instr) } if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi || - instr->opcode == aco_opcode::p_linear_phi || instr->definitions[0].isNoCSE()) + instr->opcode == aco_opcode::p_linear_phi || + instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id || + instr->definitions[0].isNoCSE()) return false; return true;