nir: check for x - ffract(x) patterns when lowering f2i32

We already skip emitting ftrunc in nir_lower_int_to_float when there is
ffloor, fround or any other integer-making opcode preceding f2i32. However
if lower_ffloor is set for driver that doesn't support integers, the lowered
x - ffract(x) patterns would not be recognized and extra ftruct would be
emitted, doing unnecessary rounding.

This optimization only works if there is no non-trivial swizzling used for
the fadd, fneg and ffract involved, which seems to be 99% of the cases according
to my testing.

This is needed to enable nir ffloor lowering on r300 driver without regressions.

I'm not sure if this helps anybody else, the only hardware which sets
lower_ffloor and converts ints to floats (and can't do trunc) are some old
etnaviv cards, so maybe it will help there a bit.

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20208>
This commit is contained in:
Pavel Ondračka
2022-12-07 16:10:01 +01:00
committed by Marge Bot
parent b111e8aa49
commit a93bc6afc4

View File

@@ -33,6 +33,51 @@ assert_ssa_def_is_not_int(nir_ssa_def *def, void *arg)
return true;
}
static bool
instr_has_only_trivial_swizzles(nir_alu_instr *alu)
{
const nir_op_info *info = &nir_op_infos[alu->op];
for (unsigned i = 0; i < info->num_inputs; i++) {
for (unsigned chan = 0; chan < alu->dest.dest.ssa.num_components; chan++) {
if (alu->src[i].swizzle[chan] != chan)
return false;
}
}
return true;
}
/* Recognize the y = x - ffract(x) patterns from lowered ffloor.
* It only works for the simple case when no swizzling is involved.
*/
static bool
check_for_lowered_ffloor(nir_alu_instr *fadd)
{
if (!instr_has_only_trivial_swizzles(fadd))
return false;
nir_alu_instr *fneg = NULL;
nir_src x;
for (unsigned i = 0; i < 2; i++) {
nir_alu_instr *fadd_src_alu = nir_src_as_alu_instr(fadd->src[i].src);
if (fadd_src_alu && fadd_src_alu->op == nir_op_fneg) {
fneg = fadd_src_alu;
x = fadd->src[1 - i].src;
}
}
if (!fneg || !instr_has_only_trivial_swizzles(fneg))
return false;
nir_alu_instr *ffract = nir_src_as_alu_instr(fneg->src[0].src);
if (ffract && ffract->op == nir_op_ffract &&
nir_srcs_equal(ffract->src[0].src, x) &&
instr_has_only_trivial_swizzles(ffract))
return true;
return false;
}
static bool
lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
{
@@ -75,6 +120,11 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
nir_alu_instr *src_alu = nir_src_as_alu_instr(alu->src[0].src);
if (src_alu) {
switch (src_alu->op) {
/* Check for the y = x - ffract(x) patterns from lowered ffloor. */
case nir_op_fadd:
if (check_for_lowered_ffloor(src_alu))
alu->op = nir_op_mov;
break;
case nir_op_fround_even:
case nir_op_fceil:
case nir_op_ftrunc: