aco: add aco_opcode::p_s_cvt_f16_f32_rtne
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
@@ -2933,7 +2933,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
/* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
|
||||
* keep value numbering and the scheduler simpler.
|
||||
*/
|
||||
bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
|
||||
bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, Definition(dst), src);
|
||||
else
|
||||
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
|
||||
break;
|
||||
@@ -3484,7 +3484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
Temp f16;
|
||||
if (ctx->block->fp_mode.round16_64 != fp_round_ne)
|
||||
f16 = bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, bld.def(v2b), src);
|
||||
f16 = bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, bld.def(v2b), src);
|
||||
else
|
||||
f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
|
||||
Temp f32, cmp_res;
|
||||
|
||||
@@ -626,7 +626,7 @@ instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
|
||||
case aco_opcode::v_fmaak_f16:
|
||||
/* VOP1 */
|
||||
case aco_opcode::v_cvt_f16_f32:
|
||||
case aco_opcode::p_cvt_f16_f32_rtne:
|
||||
case aco_opcode::p_v_cvt_f16_f32_rtne:
|
||||
case aco_opcode::v_cvt_f16_u16:
|
||||
case aco_opcode::v_cvt_f16_i16:
|
||||
case aco_opcode::v_rcp_f16:
|
||||
|
||||
@@ -2956,14 +2956,18 @@ lower_to_hw_instr(Program* program)
|
||||
} else if (emit_s_barrier) {
|
||||
bld.sopp(aco_opcode::s_barrier);
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
|
||||
} else if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne ||
|
||||
instr->opcode == aco_opcode::p_s_cvt_f16_f32_rtne) {
|
||||
float_mode new_mode = block->fp_mode;
|
||||
new_mode.round16_64 = fp_round_ne;
|
||||
bool set_round = new_mode.round != block->fp_mode.round;
|
||||
|
||||
emit_set_mode(bld, new_mode, set_round, false);
|
||||
|
||||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne)
|
||||
instr->opcode = aco_opcode::v_cvt_f16_f32;
|
||||
else
|
||||
instr->opcode = aco_opcode::s_cvt_f16_f32;
|
||||
ctx.instructions.emplace_back(std::move(instr));
|
||||
|
||||
emit_set_mode(bld, block->fp_mode, set_round, false);
|
||||
|
||||
@@ -639,6 +639,7 @@ SOP1 = {
|
||||
("s_cvt_i32_f32", dst(1), src(1), op(gfx11=0x66), InstrClass.SFPU),
|
||||
("s_cvt_u32_f32", dst(1), src(1), op(gfx11=0x67), InstrClass.SFPU),
|
||||
("s_cvt_f16_f32", dst(1), src(1), op(gfx11=0x68), InstrClass.SFPU),
|
||||
("p_s_cvt_f16_f32_rtne", dst(1), src(1), op(-1), InstrClass.SFPU),
|
||||
("s_cvt_f32_f16", dst(1), src(1), op(gfx11=0x69), InstrClass.SFPU),
|
||||
("s_cvt_hi_f32_f16", dst(1), src(1), op(gfx11=0x6a), InstrClass.SFPU),
|
||||
("s_ceil_f16", dst(1), src(1), op(gfx11=0x6b), InstrClass.SFPU),
|
||||
@@ -986,7 +987,7 @@ VOP1 = {
|
||||
("v_cvt_u32_f32", True, False, dst(1), src(1), op(0x07)),
|
||||
("v_cvt_i32_f32", True, False, dst(1), src(1), op(0x08)),
|
||||
("v_cvt_f16_f32", True, True, dst(1), src(1), op(0x0a)),
|
||||
("p_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)),
|
||||
("p_v_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)),
|
||||
("v_cvt_f32_f16", True, True, dst(1), src(1), op(0x0b)),
|
||||
("v_cvt_rpi_i32_f32", True, False, dst(1), src(1), op(0x0c)), #v_cvt_nearest_i32_f32 in GFX11
|
||||
("v_cvt_flr_i32_f32", True, False, dst(1), src(1), op(0x0d)),#v_cvt_floor_i32_f32 in GFX11
|
||||
|
||||
Reference in New Issue
Block a user