From 9bc1fb4c07fa3ea50a71c434b5f8161bc0954bb4 Mon Sep 17 00:00:00 2001
From: Qiang Yu <yuq825@gmail.com>
Date: Mon, 17 Apr 2023 18:01:09 +0800
Subject: [PATCH] ac/llvm,radeonsi: lower nir_fpow for aco and llvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

aco does not implement fpow, need nir to lower it
first. llvm will do by itself in the same way, so
we always lower fpow in nir now.

Remove the llvm fpow implementation that has special
handling for the muliplication. It's not used any
more and does not match GLSL spec as fpow(0,0)=NaN
but here we get 0.

There's some pixel changes for gl-radeonsi-stoney:
  ror-default 2 (no tolerance), 0 (1% tol.)

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22573>
---
 src/amd/ci/traces-amd.yml             |  2 +-
 src/amd/llvm/ac_nir_to_llvm.c         | 24 ------------------------
 src/gallium/drivers/radeonsi/si_get.c |  1 +
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/src/amd/ci/traces-amd.yml b/src/amd/ci/traces-amd.yml
index 55372054a1b..962829dab3d 100644
--- a/src/amd/ci/traces-amd.yml
+++ b/src/amd/ci/traces-amd.yml
@@ -104,7 +104,7 @@ traces:
       checksum: 60f74020451e9beaf586b4551541b763
   ror/ror-default.trace:
     gl-radeonsi-stoney:
-      checksum: ea53f93df31703bf7a07a4efb922608f
+      checksum: a37b58424c4289a6de77e61d599b6fab
   nheko/nheko-colors.trace:
     gl-radeonsi-stoney:
       checksum: d3234cd6ccc2ab5d3ceab7db79300c69
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index f1a7efc12fe..2a5d4b93960 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
       src[0] = ac_to_float(&ctx->ac, src[0]);
       result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
       break;
-   case nir_op_fpow:
-      if (instr->dest.dest.ssa.bit_size != 32) {
-         /* 16 and 64 bits */
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
-                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
-         result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
-                                       ac_to_float_type(&ctx->ac, def_type), result);
-         break;
-      }
-      if (LLVM_VERSION_MAJOR >= 12) {
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
-                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
-         result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
-                                     (LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
-                                     2, 0);
-         result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
-                                       ac_to_float_type(&ctx->ac, def_type), result);
-         break;
-      }
-      /* Older LLVM doesn't have fmul.legacy. */
-      result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
-                                    src[0], src[1]);
-      break;
    case nir_op_fmax:
       result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
                                     src[0], src[1]);
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index e2b6fe5a944..9672ef40153 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
       .fuse_ffma32 = use_fma32,
       .fuse_ffma64 = true,
       .lower_fmod = true,
+      .lower_fpow = true,
       .lower_ineg = true,
       .lower_pack_snorm_4x8 = true,
       .lower_pack_unorm_4x8 = true,