brw/copy: Don't try to be clever about ADD3 constant propagation
Always propagate into any source. Let commute_immedates and constant combining sort out the mess. It's literally their job. No shader-db changes on any Intel platform. The fossil-db changes just appear to be subtle changes in register allocation if the immediate source changes from src0 to src2. v2: Update the comment in commute_immediates. Suggested by Caio. fossil-db: Lunar Lake, Meteor Lake, and DG2 had similar results. (Lunar Lake shown) Totals: Cycle count: 31610720510 -> 31610720660 (+0.00%); split: -0.00%, +0.00% Totals from 8 (0.00% of 702433) affected shaders: Cycle count: 5522382 -> 5522532 (+0.00%); split: -0.00%, +0.00% Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32436>
This commit is contained in:
@@ -1022,11 +1022,10 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, fs_inst *inst,
|
||||
bool can_promote = false;
|
||||
|
||||
/* Experiment shows that we can only support src0 as immediate for MAD on
|
||||
* Gfx12. ADD3 can use src0 or src2 in Gfx12.5, but constant propagation
|
||||
* only propagates into src0. It's possible that src2 works for W or UW MAD
|
||||
* on Gfx12.5.
|
||||
* Gfx12. ADD3 can use src0 or src2 in Gfx12.5. It's possible that src2
|
||||
* works for W or UW MAD on Gfx12.5.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_BFE) {
|
||||
if (inst->opcode == BRW_OPCODE_BFE || inst->opcode == BRW_OPCODE_ADD3) {
|
||||
if (src_idx == 1)
|
||||
return false;
|
||||
} else {
|
||||
|
||||
@@ -1070,30 +1070,6 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
|
||||
}
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_ADD3:
|
||||
/* add3 can have a single imm16 source. Proceed if the source type is
|
||||
* already W or UW or the value can be coerced to one of those types.
|
||||
*/
|
||||
if (val.type == BRW_TYPE_W || val.type == BRW_TYPE_UW)
|
||||
; /* Nothing to do. */
|
||||
else if (val.ud <= 0xffff)
|
||||
val = brw_imm_uw(val.ud);
|
||||
else if (val.d >= -0x8000 && val.d <= 0x7fff)
|
||||
val = brw_imm_w(val.d);
|
||||
else
|
||||
break;
|
||||
|
||||
if (arg == 2) {
|
||||
inst->src[arg] = val;
|
||||
progress = true;
|
||||
} else if (inst->src[2].file != IMM) {
|
||||
inst->src[arg] = inst->src[2];
|
||||
inst->src[2] = val;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CMP:
|
||||
if (arg == 1) {
|
||||
inst->src[arg] = val;
|
||||
@@ -1178,6 +1154,7 @@ try_constant_propagate_value(brw_reg val, brw_reg_type dst_type,
|
||||
|
||||
case SHADER_OPCODE_INT_QUOTIENT:
|
||||
case SHADER_OPCODE_INT_REMAINDER:
|
||||
case BRW_OPCODE_ADD3:
|
||||
case BRW_OPCODE_AND:
|
||||
case BRW_OPCODE_ASR:
|
||||
case BRW_OPCODE_BFE:
|
||||
@@ -1282,15 +1259,27 @@ can_propagate_from(fs_inst *inst)
|
||||
is_identity_payload(FIXED_GRF, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
swap_srcs(fs_inst *inst, unsigned a, unsigned b)
|
||||
{
|
||||
const auto tmp = inst->src[a];
|
||||
inst->src[a] = inst->src[b];
|
||||
inst->src[b] = tmp;
|
||||
}
|
||||
|
||||
static void
|
||||
commute_immediates(fs_inst *inst)
|
||||
{
|
||||
/* ADD3 can only have the immediate as src0. */
|
||||
/* ADD3 can have the immediate as src0 or src2. Using one or the other
|
||||
* consistently makes assembly dumps more readable, so we arbitrarily
|
||||
* prefer src0.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_ADD3) {
|
||||
if (inst->src[2].file == IMM) {
|
||||
const auto src0 = inst->src[0];
|
||||
inst->src[0] = inst->src[2];
|
||||
inst->src[2] = src0;
|
||||
if (inst->src[1].file == IMM) {
|
||||
if (inst->src[0].file != IMM)
|
||||
swap_srcs(inst, 0, 1);
|
||||
else if (inst->src[2].file != IMM)
|
||||
swap_srcs(inst, 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user