From 7ef45e661f4072382be7cc70f417f86a1ab039cb Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 19 May 2023 09:56:42 -0700 Subject: [PATCH] intel/fs: Add constant propagation for ADD3 v2: Require that the constant value be representable as either uint16_t or int16_t. Suggested by Matt. v3: Remove redundant patterns. Noticed by Matt. shader-db: DG2 total instructions in shared programs: 23103767 -> 23103577 (<.01%) instructions in affected programs: 51822 -> 51632 (-0.37%) helped: 98 / HURT: 15 total cycles in shared programs: 842347714 -> 842380017 (<.01%) cycles in affected programs: 1942595 -> 1974898 (1.66%) helped: 97 / HURT: 32 Nearly all of the affected shaders (around 9,900) are shaders in Cyberpunk 2077. It's about an even split between vertex and fragment shaders. The majority of the remaining affected shaders (3,600) are from Strange Brigade. This was also a nearly even split between fragment and vertex. All but two of the lost shaders are SIMD32 fragment shaders in Cyberpunk 2077. The other two are SIMD32 fragment shaders in Dota2. fossil-db: DG2 Instructions in all programs: 196379107 -> 196248608 (-0.1%) helped: 13467 / HURT: 1210 Cycles in all programs: 13931355281 -> 13929955971 (-0.0%) helped: 11801 / HURT: 2922 Lost: 90 Reviewed-by: Matt Turner Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 7 +++- src/compiler/nir/nir_search_helpers.h | 21 ++++++++++++ .../compiler/brw_fs_copy_propagation.cpp | 33 +++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index afb0fd0895b..f0cba7467af 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -2861,9 +2861,14 @@ late_optimizations.extend([ (('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'), (('ineg', a), ('isub', 0, a), 'options->lower_ineg'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), - + # On Intel GPUs, the constant field for an ADD3 instruction must be either + # int16_t or uint16_t. (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), + (('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), + (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'), (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), + (('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), + (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), # fneg_lo / fneg_hi (('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'), diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 0ba0965e8fb..06abe924391 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -284,6 +284,27 @@ is_first_5_bits_uge_2(UNUSED struct hash_table *ht, const nir_alu_instr *instr, return true; } +/** Is this a constant that could be either int16_t or uint16_t? */ +static inline bool +is_16_bits(UNUSED struct hash_table *ht, const nir_alu_instr *instr, + unsigned src, unsigned num_components, + const uint8_t *swizzle) +{ + /* only constant srcs: */ + if (!nir_src_is_const(instr->src[src].src)) + return false; + + for (unsigned i = 0; i < num_components; i++) { + const int64_t val = + nir_src_comp_as_int(instr->src[src].src, swizzle[i]); + + if (val > 0xffff || val < -0x8000) + return false; + } + + return true; +} + static inline bool is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 1cd1a2fab4f..56e852a41fc 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -983,6 +983,30 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case BRW_OPCODE_ADD3: + /* add3 can have a single imm16 source. Proceed if the source type is + * already W or UW or the value can be coerced to one of those types. + */ + if (val.type == BRW_REGISTER_TYPE_W || val.type == BRW_REGISTER_TYPE_UW) + ; /* Nothing to do. */ + else if (val.ud <= 0xffff) + val = brw_imm_uw(val.ud); + else if (val.d >= -0x8000 && val.d <= 0x7fff) + val = brw_imm_w(val.d); + else + break; + + if (i == 2) { + inst->src[i] = val; + progress = true; + } else if (inst->src[2].file != IMM) { + inst->src[i] = inst->src[2]; + inst->src[2] = val; + progress = true; + } + + break; + case BRW_OPCODE_CMP: case BRW_OPCODE_IF: if (i == 1) { @@ -1088,6 +1112,15 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } } + /* ADD3 can only have the immediate as src0. */ + if (progress && inst->opcode == BRW_OPCODE_ADD3) { + if (inst->src[2].file == IMM) { + const auto src0 = inst->src[0]; + inst->src[0] = inst->src[2]; + inst->src[2] = src0; + } + } + return progress; }