diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2f03fcb0c6a..0edf39b2f4f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3378,21 +3378,17 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.vop3(opcode, Definition(dst), src0, src1); break; } - case nir_op_unpack_half_2x16_split_x_flush_to_zero: case nir_op_unpack_half_2x16_split_x: { Temp src = get_alu_src(ctx, instr->src[0]); if (src.regClass() == v1) src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src); if (dst.regClass() == v1) { - assert(ctx->block->fp_mode.must_flush_denorms16_64 == - (instr->op == nir_op_unpack_half_2x16_split_x_flush_to_zero)); bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } break; } - case nir_op_unpack_half_2x16_split_y_flush_to_zero: case nir_op_unpack_half_2x16_split_y: { Temp src = get_alu_src(ctx, instr->src[0]); if (src.regClass() == s1) @@ -3402,8 +3398,6 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp(); if (dst.regClass() == v1) { - assert(ctx->block->fp_mode.must_flush_denorms16_64 == - (instr->op == nir_op_unpack_half_2x16_split_y_flush_to_zero)); bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), src); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 50ea8756da6..b9f4701a54c 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -263,22 +263,13 @@ pack_half_1x16_rtz(float x) * Evaluate one component of unpackHalf2x16. */ static float -unpack_half_1x16_flush_to_zero(uint16_t u) +unpack_half_1x16(uint16_t u, bool ftz) { - if (0 == (u & 0x7c00)) + if (0 == (u & 0x7c00) && ftz) u &= 0x8000; return _mesa_half_to_float(u); } -/** - * Evaluate one component of unpackHalf2x16. - */ -static float -unpack_half_1x16(uint16_t u) -{ - return _mesa_half_to_float(u); -} - /* Broadcom v3d specific instructions */ /** * Packs 2 2x16 floating split into a r11g11b10f: @@ -291,9 +282,9 @@ static uint32_t pack_32_to_r11g11b10_v3d(const uint32_t src0, const uint32_t src1) { float rgb[3] = { - unpack_half_1x16((src0 & 0xffff)), - unpack_half_1x16((src0 >> 16)), - unpack_half_1x16((src1 & 0xffff)), + unpack_half_1x16((src0 & 0xffff), false), + unpack_half_1x16((src0 >> 16), false), + unpack_half_1x16((src1 & 0xffff), false), }; return float3_to_r11g11b10f(rgb); diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index a28c4fe69ce..3bc6b187df5 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -241,23 +241,14 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) */ return NULL; - case nir_op_unpack_half_2x16_flush_to_zero: case nir_op_unpack_half_2x16: { if (!b->shader->options->lower_unpack_half_2x16) return NULL; nir_def *packed = nir_ssa_for_alu_src(b, alu, 0); - if (alu->op == nir_op_unpack_half_2x16_flush_to_zero) { - return nir_vec2(b, - nir_unpack_half_2x16_split_x_flush_to_zero(b, - packed), - nir_unpack_half_2x16_split_y_flush_to_zero(b, - packed)); - } else { - return nir_vec2(b, - nir_unpack_half_2x16_split_x(b, packed), - nir_unpack_half_2x16_split_y(b, packed)); - } + return nir_vec2(b, + nir_unpack_half_2x16_split_x(b, packed), + nir_unpack_half_2x16_split_y(b, packed)); } case nir_op_pack_uvec2_to_uint: { diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index b04c2f363ef..0f946741680 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -401,7 +401,6 @@ unpack_2x16("snorm") unpack_4x8("snorm") unpack_2x16("unorm") unpack_4x8("unorm") -unpack_2x16("half") unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """ dst.x = _mesa_unsigned_to_unsigned(src0.x, 16); @@ -452,22 +451,18 @@ unop_horiz("unpack_32_2x16", 2, tuint16, 1, tuint32, unop_horiz("unpack_32_4x8", 4, tuint8, 1, tuint32, "dst.x = src0.x; dst.y = src0.x >> 8; dst.z = src0.x >> 16; dst.w = src0.x >> 24;") -unop_horiz("unpack_half_2x16_flush_to_zero", 2, tfloat32, 1, tuint32, """ -dst.x = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x & 0xffff)); -dst.y = unpack_half_1x16_flush_to_zero((uint16_t)(src0.x << 16)); +unop_horiz("unpack_half_2x16", 2, tfloat32, 1, tuint32, """ +dst.x = unpack_half_1x16((uint16_t)(src0.x & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16)); +dst.y = unpack_half_1x16((uint16_t)(src0.x >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16)); """) # Lowered floating point unpacking operations. unop_convert("unpack_half_2x16_split_x", tfloat32, tuint32, - "unpack_half_1x16((uint16_t)(src0 & 0xffff))") + "unpack_half_1x16((uint16_t)(src0 & 0xffff), nir_is_denorm_flush_to_zero(execution_mode, 16))") unop_convert("unpack_half_2x16_split_y", tfloat32, tuint32, - "unpack_half_1x16((uint16_t)(src0 >> 16))") + "unpack_half_1x16((uint16_t)(src0 >> 16), nir_is_denorm_flush_to_zero(execution_mode, 16))") -unop_convert("unpack_half_2x16_split_x_flush_to_zero", tfloat32, tuint32, - "unpack_half_1x16_flush_to_zero((uint16_t)(src0 & 0xffff))") -unop_convert("unpack_half_2x16_split_y_flush_to_zero", tfloat32, tuint32, - "unpack_half_1x16_flush_to_zero((uint16_t)(src0 >> 16))") unop_convert("unpack_32_2x16_split_x", tuint16, tuint32, "src0") unop_convert("unpack_32_2x16_split_y", tuint16, tuint32, "src0 >> 16") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index dfee4ea3b93..a532ee8b5d0 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2286,11 +2286,19 @@ optimizations.extend([ (('unpack_half_2x16_split_x', 'a@32'), ('f2f32', ('u2u16', a)), - 'options->lower_pack_split'), + 'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'), + + (('unpack_half_2x16_split_x', 'a@32'), + ('f2f32', ('fmul', 1.0, ('u2u16', a))), + 'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'), (('unpack_half_2x16_split_y', 'a@32'), ('f2f32', ('u2u16', ('ushr', a, 16))), - 'options->lower_pack_split'), + 'options->lower_pack_split && !nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'), + + (('unpack_half_2x16_split_y', 'a@32'), + ('f2f32', ('fmul', 1.0, ('u2u16', ('ushr', a, 16)))), + 'options->lower_pack_split && nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)'), (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'), (('imin', ('imax', a, -1), 1), ('isign', a), '!options->lower_isign'), diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 59809b5de56..9bf62d9c143 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -275,11 +275,7 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(struct vtn_builder *b, case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8; case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16; case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16; - case GLSLstd450UnpackHalf2x16: - if (execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) - return nir_op_unpack_half_2x16_flush_to_zero; - else - return nir_op_unpack_half_2x16; + case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16; case GLSLstd450UnpackDouble2x32: return nir_op_unpack_64_2x32; default: diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 5505787af40..324dc85404f 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1519,16 +1519,10 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, case nir_op_pack_half_2x16: unreachable("not reached: should be handled by lower_packing_builtins"); - case nir_op_unpack_half_2x16_split_x_flush_to_zero: - assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode); - FALLTHROUGH; case nir_op_unpack_half_2x16_split_x: bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 0)); break; - case nir_op_unpack_half_2x16_split_y_flush_to_zero: - assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode); - FALLTHROUGH; case nir_op_unpack_half_2x16_split_y: bld.MOV(result, subscript(op[0], BRW_TYPE_HF, 1)); break; diff --git a/src/intel/compiler/elk/elk_fs_nir.cpp b/src/intel/compiler/elk/elk_fs_nir.cpp index 5bb1a3b1684..cbdbe6e40b3 100644 --- a/src/intel/compiler/elk/elk_fs_nir.cpp +++ b/src/intel/compiler/elk/elk_fs_nir.cpp @@ -1610,16 +1610,10 @@ fs_nir_emit_alu(nir_to_elk_state &ntb, nir_alu_instr *instr, case nir_op_pack_half_2x16: unreachable("not reached: should be handled by lower_packing_builtins"); - case nir_op_unpack_half_2x16_split_x_flush_to_zero: - assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode); - FALLTHROUGH; case nir_op_unpack_half_2x16_split_x: inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 0)); break; - case nir_op_unpack_half_2x16_split_y_flush_to_zero: - assert(FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16 & execution_mode); - FALLTHROUGH; case nir_op_unpack_half_2x16_split_y: inst = bld.F16TO32(result, subscript(op[0], ELK_REGISTER_TYPE_HF, 1)); break;