nir: remove unpack_half_flush_to_zero
It doesn't make sense to have two sets of opcodes for this when all backends that support the flush_to_zero variant just rely on the global floating point mode anyway. Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29433>
This commit is contained in:
@@ -3378,21 +3378,17 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
bld.vop3(opcode, Definition(dst), src0, src1);
|
||||
break;
|
||||
}
|
||||
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
|
||||
case nir_op_unpack_half_2x16_split_x: {
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (src.regClass() == v1)
|
||||
src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src);
|
||||
if (dst.regClass() == v1) {
|
||||
assert(ctx->block->fp_mode.must_flush_denorms16_64 ==
|
||||
(instr->op == nir_op_unpack_half_2x16_split_x_flush_to_zero));
|
||||
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
|
||||
case nir_op_unpack_half_2x16_split_y: {
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (src.regClass() == s1)
|
||||
@@ -3402,8 +3398,6 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
src =
|
||||
bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp();
|
||||
if (dst.regClass() == v1) {
|
||||
assert(ctx->block->fp_mode.must_flush_denorms16_64 ==
|
||||
(instr->op == nir_op_unpack_half_2x16_split_y_flush_to_zero));
|
||||
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
|
||||
@@ -263,22 +263,13 @@ pack_half_1x16_rtz(float x)
|
||||
* Evaluate one component of unpackHalf2x16.
|
||||
*/
|
||||
static float
|
||||
unpack_half_1x16_flush_to_zero(uint16_t u)
|
||||
unpack_half_1x16(uint16_t u, bool ftz)
|
||||
{
|
||||
if (0 == (u & 0x7c00))
|
||||
if (0 == (u & 0x7c00) && ftz)
|
||||
u &= 0x8000;
|
||||
return _mesa_half_to_float(u);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate one component of unpackHalf2x16.
|
||||
*/
|
||||
static float
|
||||
unpack_half_1x16(uint16_t u)
|
||||
{
|
||||
return _mesa_half_to_float(u);
|
||||
}
|
||||
|
||||
/* Broadcom v3d specific instructions */
|
||||
/**
|
||||
* Packs 2 2x16 floating split into a r11g11b10f:
|
||||
@@ -291,9 +282,9 @@ static uint32_t pack_32_to_r11g11b10_v3d(const uint32_t src0,
|
||||
const uint32_t src1)
|
||||
{
|
||||
float rgb[3] = {
|
||||
unpack_half_1x16((src0 & 0xffff)),
|
||||
unpack_half_1x16((src0 >> 16)),
|
||||
unpack_half_1x16((src1 & 0xffff)),
|
||||
unpack_half_1x16((src0 & 0xffff), false),
|
||||
unpack_half_1x16((src0 >> 16), false),
|
||||
unpack_half_1x16((src1 & 0xffff), false),
|
||||
};
|
||||
|
||||
return float3_to_r11g11b10f(rgb);
|
||||
|
||||
@@ -241,23 +241,14 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
||||
*/
|
||||
return NULL;
|
||||
|
||||
case nir_op_unpack_half_2x16_flush_to_zero:
|
||||
case nir_op_unpack_half_2x16: {
|
||||
if (!b->shader->options->lower_unpack_half_2x16)
|
||||
return NULL;
|
||||
|
||||
nir_def *packed = nir_ssa_for_alu_src(b, alu, 0);
|
||||
if (alu->op == nir_op_unpack_half_2x16_flush_to_zero) {
|
||||
return nir_vec2(b,
|
||||
nir_unpack_half_2x16_split_x_flush_to_zero(b,
|
||||
packed),
|
||||
nir_unpack_half_2x16_split_y_flush_to_zero(b,
|
||||
packed));
|
||||
} else {
|
||||
return nir_vec2(b,
|
||||
nir_unpack_half_2x16_split_x(b, packed),
|
||||
nir_unpack_half_2x16_split_y(b, packed));
|
||||
}
|
||||
return nir_vec2(b,
|
||||
nir_unpack_half_2x16_split_x(b, packed),
|
||||
nir_unpack_half_2x16_split_y(b, packed));
|
||||
}
|
||||
|
||||
case nir_op_pack_uvec2_to_uint: {
|
||||
|
||||
@@ -401,7 +401,6 @@ unpack_2x16("snorm")
|
||||
unpack_4x8("snorm")
|
||||
unpack_2x16("unorm")
|
||||
unpack_4x8("unorm")
|
||||
unpack_2x16("half")
|
||||
|
||||
unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
|
||||
dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
|
||||
@@ -452,22 +451,18 @@ unop_horiz("unpack_32_2x16", 2, tuint16, 1, tuint32,
|
||||
unop_horiz("unpack_32_4x8", 4, tuint8, 1, tuint32,
|
||||
"dst.x = src0.x; dst.y = src0.x >> 8; dst.z = src0.x >> 16; dst.w = src0.x >> 24;")
|
||||
|
||||
unop_horiz("unpack_half_2x16_flush_to_zero", 2, tfloat32, 1, tuint32, """
|
||||
dst.x = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x & 0xffff));
|
||||
dst.y = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x << 16));
|
||||
unop_horiz("unpack_half_2x16", 2, tfloat32, 1, tuint32, """
|
||||
dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16));
|
||||
dst.y = unpack_half_1x16((uint16_t)(src0.x >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16));
|
||||
""")
|
||||
|
||||
# Lowered floating point unpacking operations.
|
||||
|
||||
unop_convert("unpack_half_2x16_split_x", tfloat32, tuint32,
|
||||
"unpack_half_1x16((uint16_t)(src0 & 0xffff))")
|
||||
"unpack_half_1x16((uint16_t)(src0 & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16))")
|
||||
unop_convert("unpack_half_2x16_split_y", tfloat32, tuint32,
|
||||
"unpack_half_1x16((uint16_t)(src0 >> 16))")
|
||||
"unpack_half_1x16((uint16_t)(src0 >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16))")
|
||||
|
||||
unop_convert("unpack_half_2x16_split_x_flush_to_zero", tfloat32, tuint32,
|
||||
"unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff))")
|
||||
unop_convert("unpack_half_2x16_split_y_flush_to_zero", tfloat32, tuint32,
|
||||
"unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16))")
|
||||
|
||||
unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0")
|
||||
unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16")
|
||||
|
||||
@@ -2286,11 +2286,19 @@ optimizations.extend([
|
||||
|
||||
(('unpack_half_2x16_split_x', 'a@32'),
|
||||
('f2f32', ('u2u16', a)),
|
||||
'options->lower_pack_split'),
|
||||
'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
|
||||
|
||||
(('unpack_half_2x16_split_x', 'a@32'),
|
||||
('f2f32', ('fmul', 1.0, ('u2u16', a))),
|
||||
'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
|
||||
|
||||
(('unpack_half_2x16_split_y', 'a@32'),
|
||||
('f2f32', ('u2u16', ('ushr', a, 16))),
|
||||
'options->lower_pack_split'),
|
||||
'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
|
||||
|
||||
(('unpack_half_2x16_split_y', 'a@32'),
|
||||
('f2f32', ('fmul', 1.0, ('u2u16', ('ushr', a, 16)))),
|
||||
'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'),
|
||||
|
||||
(('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
|
||||
(('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'),
|
||||
|
||||
@@ -275,11 +275,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b,
|
||||
case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8;
|
||||
case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16;
|
||||
case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16;
|
||||
case GLSLstd450UnpackHalf2x16:
|
||||
if (execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16)
|
||||
return nir_op_unpack_half_2x16_flush_to_zero;
|
||||
else
|
||||
return nir_op_unpack_half_2x16;
|
||||
case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16;
|
||||
case GLSLstd450UnpackDouble2x32: return nir_op_unpack_64_2x32;
|
||||
|
||||
default:
|
||||
|
||||
@@ -1519,16 +1519,10 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
||||
case nir_op_pack_half_2x16:
|
||||
unreachable("not reached: should be handled by lower_packing_builtins");
|
||||
|
||||
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
|
||||
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
|
||||
FALLTHROUGH;
|
||||
case nir_op_unpack_half_2x16_split_x:
|
||||
bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 0));
|
||||
break;
|
||||
|
||||
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
|
||||
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
|
||||
FALLTHROUGH;
|
||||
case nir_op_unpack_half_2x16_split_y:
|
||||
bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 1));
|
||||
break;
|
||||
|
||||
@@ -1610,16 +1610,10 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr,
|
||||
case nir_op_pack_half_2x16:
|
||||
unreachable("not reached: should be handled by lower_packing_builtins");
|
||||
|
||||
case nir_op_unpack_half_2x16_split_x_flush_to_zero:
|
||||
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
|
||||
FALLTHROUGH;
|
||||
case nir_op_unpack_half_2x16_split_x:
|
||||
inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 0));
|
||||
break;
|
||||
|
||||
case nir_op_unpack_half_2x16_split_y_flush_to_zero:
|
||||
assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode);
|
||||
FALLTHROUGH;
|
||||
case nir_op_unpack_half_2x16_split_y:
|
||||
inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 1));
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user