diff --git a/src/amd/compiler/aco_dead_code_analysis.cpp b/src/amd/compiler/aco_dead_code_analysis.cpp index f56718f0479..443ba188c18 100644 --- a/src/amd/compiler/aco_dead_code_analysis.cpp +++ b/src/amd/compiler/aco_dead_code_analysis.cpp @@ -57,11 +57,7 @@ void process_block(dce_ctx& ctx, Block& block) continue; aco_ptr& instr = block.instructions[idx]; - const bool is_live = instr->definitions.empty() || - std::any_of(instr->definitions.begin(), instr->definitions.end(), - [&ctx] (const Definition& def) { return !def.isTemp() || ctx.uses[def.tempId()];}); - - if (is_live) { + if (!is_dead(ctx.uses, instr.get())) { for (const Operand& op : instr->operands) { if (op.isTemp()) { if (ctx.uses[op.tempId()] == 0) @@ -81,6 +77,16 @@ void process_block(dce_ctx& ctx, Block& block) } /* end namespace */ +bool is_dead(const std::vector& uses, Instruction *instr) +{ + if (instr->definitions.empty()) + return false; + if (std::any_of(instr->definitions.begin(), instr->definitions.end(), + [&uses] (const Definition& def) { return uses[def.tempId()];})) + return false; + return instr_info.is_atomic[(int)instr->opcode]; +} + std::vector dead_code_analysis(Program *program) { dce_ctx ctx(program); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 1ee31d23702..5426dc05fb1 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -981,6 +981,8 @@ constexpr barrier_interaction get_barrier_interaction(Instruction* instr) } } +bool is_dead(const std::vector& uses, Instruction *instr); + enum block_kind { /* uniform indicates that leaving this block, * all actives lanes stay active */ @@ -1267,6 +1269,7 @@ typedef struct { const int16_t opcode_gfx10[static_cast(aco_opcode::num_opcodes)]; const std::bitset(aco_opcode::num_opcodes)> can_use_input_modifiers; const std::bitset(aco_opcode::num_opcodes)> can_use_output_modifiers; + const std::bitset(aco_opcode::num_opcodes)> is_atomic; const char *name[static_cast(aco_opcode::num_opcodes)]; const aco::Format format[static_cast(aco_opcode::num_opcodes)]; } Info; diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 65e739b0644..c471b92740e 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -155,7 +155,7 @@ class Opcode(object): """Class that represents all the information we have about the opcode NOTE: this must be kept in sync with aco_op_info """ - def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod): + def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic): """Parameters: - name is the name of the opcode (prepend nir_op_ for the enum name) @@ -180,15 +180,16 @@ class Opcode(object): self.opcode_gfx10 = opcode_gfx10 self.input_mod = "1" if input_mod else "0" self.output_mod = "1" if output_mod else "0" + self.is_atomic = "1" if is_atomic else "0" self.format = format # global dictionary of opcodes opcodes = {} -def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False): +def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False, is_atomic = True): assert name not in opcodes - opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod) + opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic) opcode("exp", 0, 0, 0, format = Format.EXP) opcode("p_parallelcopy") @@ -584,7 +585,7 @@ SMEM = { ( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM: - opcode(name, gfx7, gfx9, gfx10, Format.SMEM) + opcode(name, gfx7, gfx9, gfx10, Format.SMEM, is_atomic = "atomic" not in name) # VOP2 instructions: 2 inputs, 1 output (+ optional vcc) @@ -1263,7 +1264,7 @@ MUBUF = { ( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"), } for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF: - opcode(name, gfx7, gfx9, gfx10, Format.MUBUF) + opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, is_atomic = "atomic" not in name) MTBUF = { (0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"), @@ -1327,7 +1328,7 @@ IMAGE_ATOMIC = { # (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name) # gfx7 and gfx10 opcodes are the same here for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC: - opcode(name, gfx7, gfx89, gfx7, Format.MIMG) + opcode(name, gfx7, gfx89, gfx7, Format.MIMG, is_atomic = False) IMAGE_SAMPLE = { (0x20, "image_sample"), @@ -1467,7 +1468,7 @@ FLAT = { (0x60, -1, 0x60, "flat_atomic_fmax_x2"), } for (gfx7, gfx8, gfx10, name) in FLAT: - opcode(name, gfx7, gfx8, gfx10, Format.FLAT) + opcode(name, gfx7, gfx8, gfx10, Format.FLAT, is_atomic = "atomic" not in name) GLOBAL = { #GFX8_9, GFX10 @@ -1527,7 +1528,7 @@ GLOBAL = { ( -1, 0x60, "global_atomic_fmax_x2"), } for (gfx8, gfx10, name) in GLOBAL: - opcode(name, -1, gfx8, gfx10, Format.GLOBAL) + opcode(name, -1, gfx8, gfx10, Format.GLOBAL, is_atomic = "atomic" not in name) SCRATCH = { #GFX8_9, GFX10 diff --git a/src/amd/compiler/aco_opcodes_cpp.py b/src/amd/compiler/aco_opcodes_cpp.py index 834da904b88..8309dee18c2 100644 --- a/src/amd/compiler/aco_opcodes_cpp.py +++ b/src/amd/compiler/aco_opcodes_cpp.py @@ -33,6 +33,7 @@ namespace aco { opcode_names = sorted(opcodes.keys()) can_use_input_modifiers = "".join([opcodes[name].input_mod for name in reversed(opcode_names)]) can_use_output_modifiers = "".join([opcodes[name].output_mod for name in reversed(opcode_names)]) +is_atomic = "".join([opcodes[name].is_atomic for name in reversed(opcode_names)]) %> extern const aco::Info instr_info = { @@ -53,6 +54,7 @@ extern const aco::Info instr_info = { }, .can_use_input_modifiers = std::bitset<${len(opcode_names)}>("${can_use_input_modifiers}"), .can_use_output_modifiers = std::bitset<${len(opcode_names)}>("${can_use_output_modifiers}"), + .is_atomic = std::bitset<${len(opcode_names)}>("${is_atomic}"), .name = { % for name in opcode_names: "${name}", diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 68831a653a3..e224d1a3a5f 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2265,12 +2265,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr& instr) { const uint32_t threshold = 4; - /* Dead Code Elimination: - * We remove instructions if they define temporaries which all are unused */ - const bool is_used = instr->definitions.empty() || - std::any_of(instr->definitions.begin(), instr->definitions.end(), - [&ctx](const Definition& def) { return ctx.uses[def.tempId()]; }); - if (!is_used) { + if (is_dead(ctx.uses, instr.get())) { instr.reset(); return; }