aco: add aco_opcode::p_s_cvt_f16_f32_rtne

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29245>
This commit is contained in:
Georg Lehmann
2023-09-21 20:04:31 +02:00
committed by Marge Bot
parent 1efb7754fc
commit 4399c7bac3
4 changed files with 11 additions and 6 deletions
@@ -2933,7 +2933,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
/* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
* keep value numbering and the scheduler simpler.
*/
bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, Definition(dst), src);
bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, Definition(dst), src);
else
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
break;
@@ -3484,7 +3484,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
Temp src = get_alu_src(ctx, instr->src[0]);
Temp f16;
if (ctx->block->fp_mode.round16_64 != fp_round_ne)
f16 = bld.vop1(aco_opcode::p_cvt_f16_f32_rtne, bld.def(v2b), src);
f16 = bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, bld.def(v2b), src);
else
f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
Temp f32, cmp_res;
+1 -1
View File
@@ -626,7 +626,7 @@ instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
case aco_opcode::v_fmaak_f16:
/* VOP1 */
case aco_opcode::v_cvt_f16_f32:
case aco_opcode::p_cvt_f16_f32_rtne:
case aco_opcode::p_v_cvt_f16_f32_rtne:
case aco_opcode::v_cvt_f16_u16:
case aco_opcode::v_cvt_f16_i16:
case aco_opcode::v_rcp_f16:
+6 -2
View File
@@ -2956,14 +2956,18 @@ lower_to_hw_instr(Program* program)
} else if (emit_s_barrier) {
bld.sopp(aco_opcode::s_barrier);
}
} else if (instr->opcode == aco_opcode::p_cvt_f16_f32_rtne) {
} else if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne ||
instr->opcode == aco_opcode::p_s_cvt_f16_f32_rtne) {
float_mode new_mode = block->fp_mode;
new_mode.round16_64 = fp_round_ne;
bool set_round = new_mode.round != block->fp_mode.round;
emit_set_mode(bld, new_mode, set_round, false);
instr->opcode = aco_opcode::v_cvt_f16_f32;
if (instr->opcode == aco_opcode::p_v_cvt_f16_f32_rtne)
instr->opcode = aco_opcode::v_cvt_f16_f32;
else
instr->opcode = aco_opcode::s_cvt_f16_f32;
ctx.instructions.emplace_back(std::move(instr));
emit_set_mode(bld, block->fp_mode, set_round, false);
+2 -1
View File
@@ -639,6 +639,7 @@ SOP1 = {
("s_cvt_i32_f32", dst(1), src(1), op(gfx11=0x66), InstrClass.SFPU),
("s_cvt_u32_f32", dst(1), src(1), op(gfx11=0x67), InstrClass.SFPU),
("s_cvt_f16_f32", dst(1), src(1), op(gfx11=0x68), InstrClass.SFPU),
("p_s_cvt_f16_f32_rtne", dst(1), src(1), op(-1), InstrClass.SFPU),
("s_cvt_f32_f16", dst(1), src(1), op(gfx11=0x69), InstrClass.SFPU),
("s_cvt_hi_f32_f16", dst(1), src(1), op(gfx11=0x6a), InstrClass.SFPU),
("s_ceil_f16", dst(1), src(1), op(gfx11=0x6b), InstrClass.SFPU),
@@ -986,7 +987,7 @@ VOP1 = {
("v_cvt_u32_f32", True, False, dst(1), src(1), op(0x07)),
("v_cvt_i32_f32", True, False, dst(1), src(1), op(0x08)),
("v_cvt_f16_f32", True, True, dst(1), src(1), op(0x0a)),
("p_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)),
("p_v_cvt_f16_f32_rtne", True, True, dst(1), src(1), op(-1)),
("v_cvt_f32_f16", True, True, dst(1), src(1), op(0x0b)),
("v_cvt_rpi_i32_f32", True, False, dst(1), src(1), op(0x0c)), #v_cvt_nearest_i32_f32 in GFX11
("v_cvt_flr_i32_f32", True, False, dst(1), src(1), op(0x0d)),#v_cvt_floor_i32_f32 in GFX11