diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ddfa9b84d2d..fb096bdee26 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3602,6 +3602,7 @@ typedef enum { nir_lower_find_lsb64 = (1 << 22), nir_lower_conv64 = (1 << 23), nir_lower_uadd_sat64 = (1 << 24), + nir_lower_iadd3_64 = (1 << 25), } nir_lower_int64_options; typedef enum { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index a532ee8b5d0..f204da23eff 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -3081,15 +3081,27 @@ late_optimizations.extend([ (('iadd', a, ('ineg', 'b')), ('isub', 'a', 'b'), 'options->has_isub || options->lower_ineg'), (('ineg', a), ('isub', 0, a), 'options->lower_ineg'), (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'), +]) + +for s in [8, 16, 32, 64]: + cond = 'options->has_iadd3' + if s == 64: + cond += ' && !(options->lower_int64_options & nir_lower_iadd3_64)' + + iadd = "iadd@{}".format(s) + # On Intel GPUs, the constant field for an ADD3 instruction must be either # int16_t or uint16_t. - (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), - (('iadd', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), 'options->has_iadd3'), - (('iadd', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), 'options->has_iadd3'), - (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), - (('iadd', ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), - (('iadd', ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), 'options->has_iadd3'), + late_optimizations.extend([ + ((iadd, ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), cond), + ((iadd, ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)'), 'c(is_not_const)'), ('iadd3', a, b, c), cond), + ((iadd, ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_16_bits)'), ('iadd3', a, b, c), cond), + ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), + ((iadd, ('ineg', ('iadd(is_used_once)', '#a(is_16_bits)', 'b(is_not_const)')), 'c(is_not_const)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), + ((iadd, ('ineg', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)')), '#c(is_16_bits)'), ('iadd3', ('ineg', a), ('ineg', b), c), cond), + ]) +late_optimizations.extend([ # fneg_lo / fneg_hi (('vec2(is_only_used_as_float)', ('fneg@16', a), b), ('fmul', ('vec2', a, b), ('vec2', -1.0, 1.0)), 'options->vectorize_vec2_16bit'), (('vec2(is_only_used_as_float)', a, ('fneg@16', b)), ('fmul', ('vec2', a, b), ('vec2', 1.0, -1.0)), 'options->vectorize_vec2_16bit'), diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index 3a61af00211..5ebab79d9fd 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -112,7 +112,8 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_lower_imul_high64 | nir_lower_find_lsb64 | nir_lower_ufind_msb64 | - nir_lower_bit_count64; + nir_lower_bit_count64 | + nir_lower_iadd3_64; nir_lower_doubles_options fp64_options = nir_lower_drcp | nir_lower_dsqrt | diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 324dc85404f..a099e8ec34c 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1154,6 +1154,7 @@ fs_nir_emit_alu(nir_to_brw_state &ntb, nir_alu_instr *instr, break; case nir_op_iadd3: + assert(instr->def.bit_size < 64); bld.ADD3(result, op[0], op[1], op[2]); break;