diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0226663b63f..1b73bd58110 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2933,7 +2933,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) /* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to * keep value numbering and the scheduler simpler. */ - bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src); + bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, Definition(dst), src); else bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); break; @@ -3484,7 +3484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp src = get_alu_src(ctx, instr->src[0]); Temp f16; if (ctx->block->fp_mode.round16_64 != fp_round_ne) - f16 = bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, bld.def(v2b), src); + f16 = bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, bld.def(v2b), src); else f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src); Temp f32, cmp_res; diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index b13c961c756..9eb62b2b097 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -626,7 +626,7 @@ instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op) case aco_opcode::v_fmaak_f16: /* VOP1 */ case aco_opcode::v_cvt_f16_f32: - case aco_opcode::p_cvt_f16_f32_rtne: + case aco_opcode::p_v_cvt_f16_f32_rtne: case aco_opcode::v_cvt_f16_u16: case aco_opcode::v_cvt_f16_i16: case aco_opcode::v_rcp_f16: diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 566361cfb5d..1640e9b00fe 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2956,14 +2956,18 @@ lower_to_hw_instr(Program* program) } else if (emit_s_barrier) { bld.sopp(aco_opcode::s_barrier); } - } else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) { + } else if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne || + instr->opcode == aco_opcode::p_s_cvt_f16_f32_rtne) { float_mode new_mode = block->fp_mode; new_mode.round16_64 = fp_round_ne; bool set_round = new_mode.round != block->fp_mode.round; emit_set_mode(bld, new_mode, set_round, false); - instr->opcode = aco_opcode::v_cvt_f16_f32; + if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne) + instr->opcode = aco_opcode::v_cvt_f16_f32; + else + instr->opcode = aco_opcode::s_cvt_f16_f32; ctx.instructions.emplace_back(std::move(instr)); emit_set_mode(bld, block->fp_mode, set_round, false); diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 816c59464dd..7a9cf4ddfbb 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -639,6 +639,7 @@ SOP1 = { ("s_cvt_i32_f32", dst(1), src(1), op(gfx11=0x66), InstrClass.SFPU), ("s_cvt_u32_f32", dst(1), src(1), op(gfx11=0x67), InstrClass.SFPU), ("s_cvt_f16_f32", dst(1), src(1), op(gfx11=0x68), InstrClass.SFPU), + ("p_s_cvt_f16_f32_rtne", dst(1), src(1), op(-1), InstrClass.SFPU), ("s_cvt_f32_f16", dst(1), src(1), op(gfx11=0x69), InstrClass.SFPU), ("s_cvt_hi_f32_f16", dst(1), src(1), op(gfx11=0x6a), InstrClass.SFPU), ("s_ceil_f16", dst(1), src(1), op(gfx11=0x6b), InstrClass.SFPU), @@ -986,7 +987,7 @@ VOP1 = { ("v_cvt_u32_f32", True, False, dst(1), src(1), op(0x07)), ("v_cvt_i32_f32", True, False, dst(1), src(1), op(0x08)), ("v_cvt_f16_f32", True, True, dst(1), src(1), op(0x0a)), - ("p_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)), + ("p_v_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)), ("v_cvt_f32_f16", True, True, dst(1), src(1), op(0x0b)), ("v_cvt_rpi_i32_f32", True, False, dst(1), src(1), op(0x0c)), #v_cvt_nearest_i32_f32 in GFX11 ("v_cvt_flr_i32_f32", True, False, dst(1), src(1), op(0x0d)),#v_cvt_floor_i32_f32 in GFX11