From 37d77a12e90b0a8922b26869e61376bf082f763d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 12 Nov 2024 14:48:12 +0000 Subject: [PATCH] nir/opt_move_discards_to_top: add more intrinsics to add_src_to_worklist fossil-db (navi21): Totals from 115 (0.14% of 79395) affected shaders: MaxWaves: 2882 -> 2886 (+0.14%); split: +0.62%, -0.49% Instrs: 71640 -> 71686 (+0.06%); split: -0.21%, +0.28% CodeSize: 395820 -> 395084 (-0.19%); split: -0.39%, +0.20% VGPRs: 5224 -> 5256 (+0.61%); split: -0.61%, +1.23% Latency: 1114025 -> 1145891 (+2.86%); split: -0.12%, +2.98% InvThroughput: 239149 -> 239028 (-0.05%); split: -0.07%, +0.02% VClause: 1289 -> 1291 (+0.16%); split: -0.62%, +0.78% SClause: 2267 -> 2203 (-2.82%); split: -5.38%, +2.56% Copies: 4359 -> 4372 (+0.30%); split: -2.18%, +2.48% Branches: 1215 -> 1225 (+0.82%) PreSGPRs: 4225 -> 4265 (+0.95%); split: -1.35%, +2.30% PreVGPRs: 4166 -> 4189 (+0.55%); split: -0.96%, +1.51% VALU: 53590 -> 53614 (+0.04%); split: -0.10%, +0.14% SALU: 6527 -> 6539 (+0.18%); split: -0.84%, +1.03% SMEM: 4120 -> 4117 (-0.07%) Signed-off-by: Rhys Perry Reviewed-by: Alyssa Rosenzweig Reviewed-by: Georg Lehmann Part-of: --- .../nir/nir_opt_move_discards_to_top.c | 28 +++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_opt_move_discards_to_top.c b/src/compiler/nir/nir_opt_move_discards_to_top.c index 5950bb86711..525022fddd5 100644 --- a/src/compiler/nir/nir_opt_move_discards_to_top.c +++ b/src/compiler/nir/nir_opt_move_discards_to_top.c @@ -49,8 +49,32 @@ add_src_to_worklist(nir_src *src, void *worklist) if (instr->type == nir_instr_type_intrinsic) { nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (!nir_intrinsic_can_reorder(intrin)) - return false; + switch (intrin->intrinsic) { + /* Increasing the set of active invocations is safe for these intrinsics, which is + * all that moving it to the top does. This is because the read from inactive + * invocations is undefined. + */ + case nir_intrinsic_quad_swizzle_amd: + /* If FI=0, then these intrinsics return 0 for inactive invocations. */ + if (!nir_intrinsic_fetch_inactive(intrin)) + return false; + FALLTHROUGH; + case nir_intrinsic_ddx: + case nir_intrinsic_ddy: + case nir_intrinsic_ddx_fine: + case nir_intrinsic_ddy_fine: + case nir_intrinsic_ddx_coarse: + case nir_intrinsic_ddy_coarse: + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + break; + default: + if (!nir_intrinsic_can_reorder(intrin)) + return false; + break; + } } /* Set pass_flags and remember the instruction to add it's own sources and for potential