ac/llvm: use mul24 intrinsics
With the current code in clpeak LLVM ended up generating v_mad_u64_u32 instructions, with this we get nice v_mad_u32_s24 ones instead and an 4x performance increase in the int24 benchmark. Suggested-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34630>
This commit is contained in:
@@ -612,9 +612,13 @@ static bool visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
else
|
||||
result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
|
||||
break;
|
||||
case nir_op_imul:
|
||||
case nir_op_imul24_relaxed:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.mul.i24", ctx->ac.i32, src, 2, 0);
|
||||
break;
|
||||
case nir_op_umul24_relaxed:
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.mul.u24", ctx->ac.i32, src, 2, 0);
|
||||
break;
|
||||
case nir_op_imul:
|
||||
if (instr->no_unsigned_wrap)
|
||||
result = LLVMBuildNUWMul(ctx->ac.builder, src[0], src[1], "");
|
||||
else if (instr->no_signed_wrap)
|
||||
|
||||
Reference in New Issue
Block a user