From 49141ad5f2baa9cc673278a201ebfccc80edf2a6 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 9 Jun 2025 15:54:03 -0700 Subject: [PATCH] brw: Strategically place flags initialization to help cmod prop v2: Rebase on ac2b0723121 ("brw: Add more specific brw_builder helpers"), and fix a bug that caused the new instruction to possibly be put in the wrong place. No shader-db changes on any Intel platform. fossil-db: All Intel platforms had similar results. (Lunar Lake shown) Totals: Instrs: 233675305 -> 233641585 (-0.01%) Cycle count: 32593658094 -> 32591467794 (-0.01%); split: -0.01%, +0.00% Totals from 33513 (4.25% of 789264) affected shaders: Instrs: 5200332 -> 5166612 (-0.65%) Cycle count: 1499831128 -> 1497640828 (-0.15%); split: -0.15%, +0.00% Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_lower_subgroup_ops.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/intel/compiler/brw_lower_subgroup_ops.cpp b/src/intel/compiler/brw_lower_subgroup_ops.cpp index d1684ebb153..44ebf33f5ea 100644 --- a/src/intel/compiler/brw_lower_subgroup_ops.cpp +++ b/src/intel/compiler/brw_lower_subgroup_ops.cpp @@ -389,6 +389,26 @@ brw_fill_flag(const brw_builder &bld, unsigned v, brw_inst *inst) } } + /* Performing the initialization before an instruction that writes to + * inst->src[0] might help cmod propagation. The expectation is that + * inst->src[0] will be used to generate flags. If scan_inst could be used + * to generate the flags instead, an instruction and temporary register + * might be saved. + */ + foreach_inst_in_block_reverse_starting_from(brw_inst, scan_inst, inst) { + if (scan_inst->flags_read(devinfo)) + break; + + if (regions_overlap(scan_inst->dst, scan_inst->size_written, + inst->src[0], inst->size_read(devinfo, 0))) { + ubld1.before(scan_inst).MOV(flag, value); + return flag; + } + + if (scan_inst->flags_written(devinfo)) + break; + } + ubld1.MOV(flag, value); return flag;