From 9bc1fb4c07fa3ea50a71c434b5f8161bc0954bb4 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 17 Apr 2023 18:01:09 +0800 Subject: [PATCH] ac/llvm,radeonsi: lower nir_fpow for aco and llvm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit aco does not implement fpow, need nir to lower it first. llvm will do by itself in the same way, so we always lower fpow in nir now. Remove the llvm fpow implementation that has special handling for the muliplication. It's not used any more and does not match GLSL spec as fpow(0,0)=NaN but here we get 0. There's some pixel changes for gl-radeonsi-stoney: ror-default 2 (no tolerance), 0 (1% tol.) Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/amd/ci/traces-amd.yml | 2 +- src/amd/llvm/ac_nir_to_llvm.c | 24 ------------------------ src/gallium/drivers/radeonsi/si_get.c | 1 + 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/src/amd/ci/traces-amd.yml b/src/amd/ci/traces-amd.yml index 55372054a1b..962829dab3d 100644 --- a/src/amd/ci/traces-amd.yml +++ b/src/amd/ci/traces-amd.yml @@ -104,7 +104,7 @@ traces: checksum: 60f74020451e9beaf586b4551541b763 ror/ror-default.trace: gl-radeonsi-stoney: - checksum: ea53f93df31703bf7a07a4efb922608f + checksum: a37b58424c4289a6de77e61d599b6fab nheko/nheko-colors.trace: gl-radeonsi-stoney: checksum: d3234cd6ccc2ab5d3ceab7db79300c69 diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index f1a7efc12fe..2a5d4b93960 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) src[0] = ac_to_float(&ctx->ac, src[0]); result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_fpow: - if (instr->dest.dest.ssa.bit_size != 32) { - /* 16 and 64 bits */ - result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), ""); - result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", - ac_to_float_type(&ctx->ac, def_type), result); - break; - } - if (LLVM_VERSION_MAJOR >= 12) { - result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32, - (LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])}, - 2, 0); - result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", - ac_to_float_type(&ctx->ac, def_type), result); - break; - } - /* Older LLVM doesn't have fmul.legacy. */ - result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type), - src[0], src[1]); - break; case nir_op_fmax: result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index e2b6fe5a944..9672ef40153 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen) .fuse_ffma32 = use_fma32, .fuse_ffma64 = true, .lower_fmod = true, + .lower_fpow = true, .lower_ineg = true, .lower_pack_snorm_4x8 = true, .lower_pack_unorm_4x8 = true,