From fc2360167c48bd51f2cb536efadba4a5b846142d Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 28 Feb 2024 17:34:19 -0800 Subject: [PATCH] intel/brw: Avoid optimize_extract_to_float when it will just be undone later v2: Add bspec quotation. Suggested by Caio. With better understand of the restriction, only apply on DG2 and newer platforms. shader-db: DG2 and Meteor Lake had similar results. (DG2 shown) total instructions in shared programs: 19659363 -> 19659360 (<.01%) instructions in affected programs: 2484 -> 2481 (-0.12%) helped: 6 / HURT: 1 total cycles in shared programs: 823445738 -> 823432524 (<.01%) cycles in affected programs: 2619836 -> 2606622 (-0.50%) helped: 48 / HURT: 63 fossil-db: DG2 and Meteor Lake had similar results. (DG2 shown) Totals: Instrs: 154015863 -> 153987806 (-0.02%); split: -0.02%, +0.00% Cycle count: 17552172994 -> 17562047866 (+0.06%); split: -0.13%, +0.19% Spill count: 142124 -> 141544 (-0.41%); split: -0.54%, +0.13% Fill count: 266803 -> 266046 (-0.28%); split: -0.38%, +0.09% Scratch Memory Size: 10266624 -> 10271744 (+0.05%); split: -0.02%, +0.07% Max live registers: 32592428 -> 32592393 (-0.00%); split: -0.00%, +0.00% Max dispatch width: 5535944 -> 5535912 (-0.00%); split: +0.00%, -0.00% Totals from 41887 (6.63% of 631367) affected shaders: Instrs: 32971032 -> 32942975 (-0.09%); split: -0.10%, +0.01% Cycle count: 3892086217 -> 3901961089 (+0.25%); split: -0.60%, +0.85% Spill count: 105669 -> 105089 (-0.55%); split: -0.72%, +0.18% Fill count: 206459 -> 205702 (-0.37%); split: -0.49%, +0.12% Scratch Memory Size: 7766016 -> 7771136 (+0.07%); split: -0.03%, +0.09% Max live registers: 3230515 -> 3230480 (-0.00%); split: -0.00%, +0.00% Max dispatch width: 337232 -> 337200 (-0.01%); split: +0.00%, -0.01% No shader-db or fossil-db changes on any earlier Intel platforms. Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 61c938d309a..d1f7dabd103 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -566,6 +566,22 @@ optimize_extract_to_float(nir_to_brw_state &ntb, nir_alu_instr *instr, nir_src_bit_size(src0->src[0].src))); op0 = offset(op0, bld, src0->src[0].swizzle[0]); + /* Bspec "Register Region Restrictions" for Xe says: + * + * "In case of all float point data types used in destination + * + * 1. Register Regioning patterns where register data bit location of + * the LSB of the channels are changed between source and destination + * are not supported on Src0 and Src1 except for broadcast of a + * scalar." + * + * This restriction is enfored in brw_fs_lower_regioning. There is no + * reason to generate an optimized instruction that brw_fs_lower_regioning + * will have to break up later. + */ + if (devinfo->verx10 >= 125 && element != 0 && !is_uniform(op0)) + return false; + bld.MOV(result, subscript(op0, type, element)); return true; }