diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index 328e88323e4..6d9a4d337a6 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -582,9 +582,7 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) alu->op != nir_op_sdot_4x8_iadd && alu->op != nir_op_sdot_4x8_iadd_sat && alu->op != nir_op_sudot_4x8_iadd && - alu->op != nir_op_sudot_4x8_iadd_sat && - /* not supported in HW, we have to fall back to normal registers */ - alu->op != nir_op_ffma; + alu->op != nir_op_sudot_4x8_iadd_sat; struct ir3_instruction **def = ir3_get_def(ctx, &alu->def, dst_sz); @@ -721,7 +719,22 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu) dst = ir3_ADD_F_rpt(b, dst_sz, src[0], 0, src[1], IR3_REG_FNEG); break; case nir_op_ffma: - dst = ir3_MAD_F32_rpt(b, dst_sz, src[0], 0, src[1], 0, src[2], 0); + /* The scalar ALU doesn't support mad, so expand to mul+add so that we + * don't unnecessarily fall back to non-earlypreamble. This is safe + * because at least on a6xx+ mad is unfused. + */ + if (use_shared) { + struct ir3_instruction_rpt mul01 = + ir3_MUL_F_rpt(b, dst_sz, src[0], 0, src[1], 0); + + if (is_half(src[0].rpts[0])) { + set_dst_flags(mul01.rpts, dst_sz, IR3_REG_HALF); + } + + dst = ir3_ADD_F_rpt(b, dst_sz, mul01, 0, src[2], 0); + } else { + dst = ir3_MAD_F32_rpt(b, dst_sz, src[0], 0, src[1], 0, src[2], 0); + } break; case nir_op_flt: dst = ir3_CMPS_F_rpt(b, dst_sz, src[0], 0, src[1], 0);