From 501b1cbc2c32465f337c1d586faa4e39206d541a Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 16 Jul 2025 15:39:27 -0700 Subject: [PATCH] intel/brw: Fix behavior of scheduler around flag register writes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were currently treating explicit flag writes and reads as a full scheduler barrier, which is unnecessary since the tracking we already do handles explicit flag access correctly so there is no reason for taking a possibly large performance hit from add_barrier_deps(). Found by inspection while trying to understand the poor scheduling of some fragment shaders. Improves performance by a small but statistically significant amount (4 iterations, 5% significance) for the following Traci tests in combination with a subsequent commit that makes the pre-RA scheduler sensitive to instruction latencies: SpaceEngineers-trace-dx11-2160p-high: 0.66% ±0.30% MountAndBlade2-trace-dx11-1440p-veryhigh: 0.62% ±0.23% Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_schedule_instructions.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 02831d02adb..5c51893d4fb 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1141,6 +1141,9 @@ brw_instruction_scheduler::register_needs_barrier(const brw_reg ®) if (reg.file != ARF || reg.is_null()) return false; + if (reg.nr >= BRW_ARF_FLAG && reg.nr < BRW_ARF_MASK) + return false; + /* If you look at SR register layout, there is nothing in there that * depends on other instructions. This is just fixed dispatch information. *