nir: remove unpack_half_flush_to_zero

It doesn't make sense to have two sets of opcodes for this when all backends
that support the flush_to_zero variant just rely on the global floating point
mode anyway.

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29433>
This commit is contained in:
Georg Lehmann
2024-05-28 16:13:51 +02:00
committed by Marge Bot
parent a1ea0956b4
commit dcab408a6c
8 changed files with 24 additions and 61 deletions
@@ -3378,21 +3378,17 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
bld.vop3(opcode, Definition(dst), src0, src1);
break;
}
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
case nir_op_unpack_half_2x16_split_x: {
Temp src = get_alu_src(ctx, instr->src[0]);
if (src.regClass() == v1)
src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src);
if (dst.regClass() == v1) {
assert(ctx->block->fp_mode.must_flush_denorms16_64 ==
(instr->op == nir_op_unpack_half_2x16_split_x_flush_to_zero));
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
case nir_op_unpack_half_2x16_split_y: {
Temp src = get_alu_src(ctx, instr->src[0]);
if (src.regClass() == s1)
@@ -3402,8 +3398,6 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
src =
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp();
if (dst.regClass() == v1) {
assert(ctx->block->fp_mode.must_flush_denorms16_64 ==
(instr->op == nir_op_unpack_half_2x16_split_y_flush_to_zero));
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
+5 -14
View File
@@ -263,22 +263,13 @@ pack_half_1x16_rtz(float x)
* Evaluate one component of unpackHalf2x16.
*/
static float
unpack_half_1x16_flush_to_zero(uint16_t u)
unpack_half_1x16(uint16_t u, bool ftz)
{
if (0 == (u & 0x7c00))
if (0 == (u & 0x7c00) && ftz)
u &= 0x8000;
return _mesa_half_to_float(u);
}
/**
* Evaluate one component of unpackHalf2x16.
*/
static float
unpack_half_1x16(uint16_t u)
{
return _mesa_half_to_float(u);
}
/* Broadcom v3d specific instructions */
/**
* Packs 2 2x16 floating split into a r11g11b10f:
@@ -291,9 +282,9 @@ static uint32_t pack_32_to_r11g11b10_v3d(const uint32_t src0,
const uint32_t src1)
{
float rgb[3] = {
unpack_half_1x16((src0 & 0xffff)),
unpack_half_1x16((src0 >> 16)),
unpack_half_1x16((src1 & 0xffff)),
unpack_half_1x16((src0 & 0xffff), false),
unpack_half_1x16((src0 >> 16), false),
unpack_half_1x16((src1 & 0xffff), false),
};
return float3_to_r11g11b10f(rgb);
+3 -12
View File
@@ -241,23 +241,14 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
*/
return NULL;
case nir_op_unpack_half_2x16_flush_to_zero:
case nir_op_unpack_half_2x16: {
if (!b->shader->options->lower_unpack_half_2x16)
return NULL;
nir_def *packed = nir_ssa_for_alu_src(b, alu, 0);
if (alu->op == nir_op_unpack_half_2x16_flush_to_zero) {
return nir_vec2(b,
nir_unpack_half_2x16_split_x_flush_to_zero(b,
packed),
nir_unpack_half_2x16_split_y_flush_to_zero(b,
packed));
} else {
return nir_vec2(b,
nir_unpack_half_2x16_split_x(b, packed),
nir_unpack_half_2x16_split_y(b, packed));
}
return nir_vec2(b,
nir_unpack_half_2x16_split_x(b, packed),
nir_unpack_half_2x16_split_y(b, packed));
}
case nir_op_pack_uvec2_to_uint: {
+5 -10
View File
@@ -401,7 +401,6 @@ unpack_2x16("snorm")
unpack_4x8("snorm")
unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
@@ -452,22 +451,18 @@ unop_horiz("unpack_32_2x16", 2, tuint16, 1, tuint32,
unop_horiz("unpack_32_4x8", 4, tuint8, 1, tuint32,
"dst.x = src0.x; dst.y = src0.x >> 8; dst.z = src0.x >> 16; dst.w = src0.x >> 24;")
unop_horiz("unpack_half_2x16_flush_to_zero", 2, tfloat32, 1, tuint32, """
dst.x = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x & 0xffff));
dst.y = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x << 16));
unop_horiz("unpack_half_2x16", 2, tfloat32, 1, tuint32, """
dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16));
dst.y = unpack_half_1x16((uint16_t)(src0.x >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16));
""")
# Lowered floating point unpacking operations.
unop_convert("unpack_half_2x16_split_x", tfloat32, tuint32,
"unpack_half_1x16((uint16_t)(src0 & 0xffff))")
"unpack_half_1x16((uint16_t)(src0 & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16))")
unop_convert("unpack_half_2x16_split_y", tfloat32, tuint32,
"unpack_half_1x16((uint16_t)(src0 >> 16))")
"unpack_half_1x16((uint16_t)(src0 >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16))")
unop_convert("unpack_half_2x16_split_x_flush_to_zero", tfloat32, tuint32,
"unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff))")
unop_convert("unpack_half_2x16_split_y_flush_to_zero", tfloat32, tuint32,
"unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16))")
unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0")
unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16")
+10 -2
View File
@@ -2286,11 +2286,19 @@ optimizations.extend([
(('unpack_half_2x16_split_x', 'a@32'),
('f2f32', ('u2u16', a)),
'options->lower_pack_split'),
'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('unpack_half_2x16_split_x', 'a@32'),
('f2f32', ('fmul', 1.0, ('u2u16', a))),
'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('unpack_half_2x16_split_y', 'a@32'),
('f2f32', ('u2u16', ('ushr', a, 16))),
'options->lower_pack_split'),
'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('unpack_half_2x16_split_y', 'a@32'),
('f2f32', ('fmul', 1.0, ('u2u16', ('ushr', a, 16)))),
'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
(('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
(('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'),
+1 -5
View File
@@ -275,11 +275,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b,
case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8;
case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16;
case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16;
case GLSLstd450UnpackHalf2x16:
if (execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16)
return nir_op_unpack_half_2x16_flush_to_zero;
else
return nir_op_unpack_half_2x16;
case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16;
case GLSLstd450UnpackDouble2x32: return nir_op_unpack_64_2x32;
default:
-6
View File
@@ -1519,16 +1519,10 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
case nir_op_pack_half_2x16:
unreachable("not reached: should be handled by lower_packing_builtins");
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
FALLTHROUGH;
case nir_op_unpack_half_2x16_split_x:
bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 0));
break;
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
FALLTHROUGH;
case nir_op_unpack_half_2x16_split_y:
bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 1));
break;
-6
View File
@@ -1610,16 +1610,10 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr,
case nir_op_pack_half_2x16:
unreachable("not reached: should be handled by lower_packing_builtins");
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
FALLTHROUGH;
case nir_op_unpack_half_2x16_split_x:
inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 0));
break;
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
FALLTHROUGH;
case nir_op_unpack_half_2x16_split_y:
inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 1));
break;