From d9b019b68323a5505d64f90c3b67ddcc06dd28fc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 14 Nov 2024 10:12:25 -0800 Subject: [PATCH] brw/copy: Don't try to be clever about ADD3 constant propagation Always propagate into any source. Let commute_immedates and constant combining sort out the mess. It's literally their job. No shader-db changes on any Intel platform. The fossil-db changes just appear to be subtle changes in register allocation if the immediate source changes from src0 to src2. v2: Update the comment in commute_immediates. Suggested by Caio. fossil-db: Lunar Lake, Meteor Lake, and DG2 had similar results. (Lunar Lake shown) Totals: Cycle count: 31610720510 -> 31610720660 (+0.00%); split: -0.00%, +0.00% Totals from 8 (0.00% of 702433) affected shaders: Cycle count: 5522382 -> 5522532 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Caio Oliveira Reviewed-by: Matt Turner Part-of: --- .../compiler/brw_fs_combine_constants.cpp | 7 ++- .../compiler/brw_fs_copy_propagation.cpp | 47 +++++++------------ 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp b/src/intel/compiler/brw_fs_combine_constants.cpp index 8e8becc9417..9cba09f9edb 100644 --- a/src/intel/compiler/brw_fs_combine_constants.cpp +++ b/src/intel/compiler/brw_fs_combine_constants.cpp @@ -1022,11 +1022,10 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, fs_inst *inst, bool can_promote = false; /* Experiment shows that we can only support src0 as immediate for MAD on - * Gfx12. ADD3 can use src0 or src2 in Gfx12.5, but constant propagation - * only propagates into src0. It's possible that src2 works for W or UW MAD - * on Gfx12.5. + * Gfx12. ADD3 can use src0 or src2 in Gfx12.5. It's possible that src2 + * works for W or UW MAD on Gfx12.5. */ - if (inst->opcode == BRW_OPCODE_BFE) { + if (inst->opcode == BRW_OPCODE_BFE || inst->opcode == BRW_OPCODE_ADD3) { if (src_idx == 1) return false; } else { diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 41922f2fcc4..be9ae9ee5d6 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1070,30 +1070,6 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type, } break; - case BRW_OPCODE_ADD3: - /* add3 can have a single imm16 source. Proceed if the source type is - * already W or UW or the value can be coerced to one of those types. - */ - if (val.type == BRW_TYPE_W || val.type == BRW_TYPE_UW) - ; /* Nothing to do. */ - else if (val.ud <= 0xffff) - val = brw_imm_uw(val.ud); - else if (val.d >= -0x8000 && val.d <= 0x7fff) - val = brw_imm_w(val.d); - else - break; - - if (arg == 2) { - inst->src[arg] = val; - progress = true; - } else if (inst->src[2].file != IMM) { - inst->src[arg] = inst->src[2]; - inst->src[2] = val; - progress = true; - } - - break; - case BRW_OPCODE_CMP: if (arg == 1) { inst->src[arg] = val; @@ -1178,6 +1154,7 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type, case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: + case BRW_OPCODE_ADD3: case BRW_OPCODE_AND: case BRW_OPCODE_ASR: case BRW_OPCODE_BFE: @@ -1282,15 +1259,27 @@ can_propagate_from(fs_inst *inst) is_identity_payload(FIXED_GRF, inst); } +static void +swap_srcs(fs_inst *inst, unsigned a, unsigned b) +{ + const auto tmp = inst->src[a]; + inst->src[a] = inst->src[b]; + inst->src[b] = tmp; +} + static void commute_immediates(fs_inst *inst) { - /* ADD3 can only have the immediate as src0. */ + /* ADD3 can have the immediate as src0 or src2. Using one or the other + * consistently makes assembly dumps more readable, so we arbitrarily + * prefer src0. + */ if (inst->opcode == BRW_OPCODE_ADD3) { - if (inst->src[2].file == IMM) { - const auto src0 = inst->src[0]; - inst->src[0] = inst->src[2]; - inst->src[2] = src0; + if (inst->src[1].file == IMM) { + if (inst->src[0].file != IMM) + swap_srcs(inst, 0, 1); + else if (inst->src[2].file != IMM) + swap_srcs(inst, 1, 2); } }