From a93bc6afc46aca0f8124af4d77646e9788c697c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20Ondra=C4=8Dka?= Date: Wed, 7 Dec 2022 16:10:01 +0100 Subject: [PATCH] nir: check for x - ffract(x) patterns when lowering f2i32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already skip emitting ftrunc in nir_lower_int_to_float when there is ffloor, fround or any other integer-making opcode preceding f2i32. However if lower_ffloor is set for driver that doesn't support integers, the lowered x - ffract(x) patterns would not be recognized and extra ftruct would be emitted, doing unnecessary rounding. This optimization only works if there is no non-trivial swizzling used for the fadd, fneg and ffract involved, which seems to be 99% of the cases according to my testing. This is needed to enable nir ffloor lowering on r300 driver without regressions. I'm not sure if this helps anybody else, the only hardware which sets lower_ffloor and converts ints to floats (and can't do trunc) are some old etnaviv cards, so maybe it will help there a bit. Signed-off-by: Pavel Ondračka Reviewed-by: Emma Anholt Part-of: --- src/compiler/nir/nir_lower_int_to_float.c | 50 +++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/compiler/nir/nir_lower_int_to_float.c b/src/compiler/nir/nir_lower_int_to_float.c index 98633beca65..9dd2ae9aaf8 100644 --- a/src/compiler/nir/nir_lower_int_to_float.c +++ b/src/compiler/nir/nir_lower_int_to_float.c @@ -33,6 +33,51 @@ assert_ssa_def_is_not_int(nir_ssa_def *def, void *arg) return true; } +static bool +instr_has_only_trivial_swizzles(nir_alu_instr *alu) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + + for (unsigned i = 0; i < info->num_inputs; i++) { + for (unsigned chan = 0; chan < alu->dest.dest.ssa.num_components; chan++) { + if (alu->src[i].swizzle[chan] != chan) + return false; + } + } + return true; +} + +/* Recognize the y = x - ffract(x) patterns from lowered ffloor. + * It only works for the simple case when no swizzling is involved. + */ +static bool +check_for_lowered_ffloor(nir_alu_instr *fadd) +{ + if (!instr_has_only_trivial_swizzles(fadd)) + return false; + + nir_alu_instr *fneg = NULL; + nir_src x; + for (unsigned i = 0; i < 2; i++) { + nir_alu_instr *fadd_src_alu = nir_src_as_alu_instr(fadd->src[i].src); + if (fadd_src_alu && fadd_src_alu->op == nir_op_fneg) { + fneg = fadd_src_alu; + x = fadd->src[1 - i].src; + } + } + + if (!fneg || !instr_has_only_trivial_swizzles(fneg)) + return false; + + nir_alu_instr *ffract = nir_src_as_alu_instr(fneg->src[0].src); + if (ffract && ffract->op == nir_op_ffract && + nir_srcs_equal(ffract->src[0].src, x) && + instr_has_only_trivial_swizzles(ffract)) + return true; + + return false; +} + static bool lower_alu_instr(nir_builder *b, nir_alu_instr *alu) { @@ -75,6 +120,11 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu) nir_alu_instr *src_alu = nir_src_as_alu_instr(alu->src[0].src); if (src_alu) { switch (src_alu->op) { + /* Check for the y = x - ffract(x) patterns from lowered ffloor. */ + case nir_op_fadd: + if (check_for_lowered_ffloor(src_alu)) + alu->op = nir_op_mov; + break; case nir_op_fround_even: case nir_op_fceil: case nir_op_ftrunc: