brw/copy: Don't try to be clever about ADD3 constant propagation

Always propagate into any source. Let commute_immedates and constant
combining sort out the mess. It's literally their job.

No shader-db changes on any Intel platform. The fossil-db changes just
appear to be subtle changes in register allocation if the immediate
source changes from src0 to src2.

v2: Update the comment in commute_immediates. Suggested by Caio.

fossil-db:

Lunar Lake, Meteor Lake, and DG2 had similar results. (Lunar Lake shown)
Totals:
Cycle count: 31610720510 -> 31610720660 (+0.00%); split: -0.00%, +0.00%

Totals from 8 (0.00% of 702433) affected shaders:
Cycle count: 5522382 -> 5522532 (+0.00%); split: -0.00%, +0.00%

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32436>
This commit is contained in:
Ian Romanick
2024-11-14 10:12:25 -08:00
committed by Marge Bot
parent a84e3a0f55
commit d9b019b683
2 changed files with 21 additions and 33 deletions
@@ -1022,11 +1022,10 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, fs_inst *inst,
bool can_promote = false;
/* Experiment shows that we can only support src0 as immediate for MAD on
* Gfx12. ADD3 can use src0 or src2 in Gfx12.5, but constant propagation
* only propagates into src0. It's possible that src2 works for W or UW MAD
* on Gfx12.5.
* Gfx12. ADD3 can use src0 or src2 in Gfx12.5. It's possible that src2
* works for W or UW MAD on Gfx12.5.
*/
if (inst->opcode == BRW_OPCODE_BFE) {
if (inst->opcode == BRW_OPCODE_BFE || inst->opcode == BRW_OPCODE_ADD3) {
if (src_idx == 1)
return false;
} else {
+18 -29
View File
@@ -1070,30 +1070,6 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
}
break;
case BRW_OPCODE_ADD3:
/* add3 can have a single imm16 source. Proceed if the source type is
* already W or UW or the value can be coerced to one of those types.
*/
if (val.type == BRW_TYPE_W || val.type == BRW_TYPE_UW)
; /* Nothing to do. */
else if (val.ud <= 0xffff)
val = brw_imm_uw(val.ud);
else if (val.d >= -0x8000 && val.d <= 0x7fff)
val = brw_imm_w(val.d);
else
break;
if (arg == 2) {
inst->src[arg] = val;
progress = true;
} else if (inst->src[2].file != IMM) {
inst->src[arg] = inst->src[2];
inst->src[2] = val;
progress = true;
}
break;
case BRW_OPCODE_CMP:
if (arg == 1) {
inst->src[arg] = val;
@@ -1178,6 +1154,7 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
case BRW_OPCODE_ADD3:
case BRW_OPCODE_AND:
case BRW_OPCODE_ASR:
case BRW_OPCODE_BFE:
@@ -1282,15 +1259,27 @@ can_propagate_from(fs_inst *inst)
is_identity_payload(FIXED_GRF, inst);
}
static void
swap_srcs(fs_inst *inst, unsigned a, unsigned b)
{
const auto tmp = inst->src[a];
inst->src[a] = inst->src[b];
inst->src[b] = tmp;
}
static void
commute_immediates(fs_inst *inst)
{
/* ADD3 can only have the immediate as src0. */
/* ADD3 can have the immediate as src0 or src2. Using one or the other
* consistently makes assembly dumps more readable, so we arbitrarily
* prefer src0.
*/
if (inst->opcode == BRW_OPCODE_ADD3) {
if (inst->src[2].file == IMM) {
const auto src0 = inst->src[0];
inst->src[0] = inst->src[2];
inst->src[2] = src0;
if (inst->src[1].file == IMM) {
if (inst->src[0].file != IMM)
swap_srcs(inst, 0, 1);
else if (inst->src[2].file != IMM)
swap_srcs(inst, 1, 2);
}
}