ac/llvm,radeonsi: lower nir_fpow for aco and llvm
aco does not implement fpow, need nir to lower it first. llvm will do by itself in the same way, so we always lower fpow in nir now. Remove the llvm fpow implementation that has special handling for the muliplication. It's not used any more and does not match GLSL spec as fpow(0,0)=NaN but here we get 0. There's some pixel changes for gl-radeonsi-stoney: ror-default 2 (no tolerance), 0 (1% tol.) Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22573>
This commit is contained in:
@@ -104,7 +104,7 @@ traces:
|
||||
checksum: 60f74020451e9beaf586b4551541b763
|
||||
ror/ror-default.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: ea53f93df31703bf7a07a4efb922608f
|
||||
checksum: a37b58424c4289a6de77e61d599b6fab
|
||||
nheko/nheko-colors.trace:
|
||||
gl-radeonsi-stoney:
|
||||
checksum: d3234cd6ccc2ab5d3ceab7db79300c69
|
||||
|
||||
@@ -844,30 +844,6 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
src[0] = ac_to_float(&ctx->ac, src[0]);
|
||||
result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
|
||||
break;
|
||||
case nir_op_fpow:
|
||||
if (instr->dest.dest.ssa.bit_size != 32) {
|
||||
/* 16 and 64 bits */
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
|
||||
ac_to_float_type(&ctx->ac, def_type), result);
|
||||
break;
|
||||
}
|
||||
if (LLVM_VERSION_MAJOR >= 12) {
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0]);
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
|
||||
(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
|
||||
2, 0);
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
|
||||
ac_to_float_type(&ctx->ac, def_type), result);
|
||||
break;
|
||||
}
|
||||
/* Older LLVM doesn't have fmul.legacy. */
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
break;
|
||||
case nir_op_fmax:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
|
||||
src[0], src[1]);
|
||||
|
||||
@@ -1280,6 +1280,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
|
||||
.fuse_ffma32 = use_fma32,
|
||||
.fuse_ffma64 = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fpow = true,
|
||||
.lower_ineg = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
|
||||
Reference in New Issue
Block a user