diff --git a/src/intel/compiler/brw/brw_opt_combine_constants.cpp b/src/intel/compiler/brw/brw_opt_combine_constants.cpp index 4f2da2c4aae..29a4e9a9105 100644 --- a/src/intel/compiler/brw/brw_opt_combine_constants.cpp +++ b/src/intel/compiler/brw/brw_opt_combine_constants.cpp @@ -990,7 +990,8 @@ supports_src_as_imm(const struct intel_device_info *devinfo, const brw_inst *ins { switch (inst->opcode) { case BRW_OPCODE_ADD3: - /* ADD3 can use src0 or src2 in Gfx12.5. */ + case BRW_OPCODE_BFN: + /* ADD3 and BFN can use src0 or src2 in Gfx12.5. */ return src_idx != 1; case BRW_OPCODE_BFE: @@ -1052,6 +1053,12 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst, { bool can_promote = false; + /* src[3] of BFN is special. It must be immediate. Don't mess with it. */ + if (inst->opcode == BRW_OPCODE_BFN && src_idx == 3) { + assert(inst->src[src_idx].type == BRW_TYPE_UD); + return true; + } + if (!supports_src_as_imm(devinfo, inst, src_idx)) return false; @@ -1067,10 +1074,11 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst, case BRW_TYPE_D: case BRW_TYPE_UD: { /* ADD3, CSEL, and MAD can mix signed and unsiged types. Only BFE - * cannot. + * cannot. BFN only supports unsigned types. */ if (inst->src[src_idx].type == BRW_TYPE_D || - inst->opcode != BRW_OPCODE_BFE) { + (inst->opcode != BRW_OPCODE_BFE && + inst->opcode != BRW_OPCODE_BFN)) { int16_t w; if (representable_as_w(inst->src[src_idx].d, &w)) { inst->src[src_idx] = brw_imm_w(w); @@ -1079,6 +1087,11 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst, } } + /* FINISHME: BFN handling could be better. If the bit-wise compliment of + * the constant is representable as UW, the constant and the function + * control value could be changed. This would probably make a good + * algebraic optimization. + */ if (inst->src[src_idx].type == BRW_TYPE_UD || inst->opcode != BRW_OPCODE_BFE) { uint16_t uw; @@ -1330,6 +1343,7 @@ brw_opt_combine_constants(brw_shader &s) */ case BRW_OPCODE_BFE: case BRW_OPCODE_ADD3: + case BRW_OPCODE_BFN: case BRW_OPCODE_CSEL: case BRW_OPCODE_MAD: { if (inst->opcode == BRW_OPCODE_MAD && diff --git a/src/intel/compiler/brw/brw_opt_copy_propagation.cpp b/src/intel/compiler/brw/brw_opt_copy_propagation.cpp index eac3936375f..4c6aaed6703 100644 --- a/src/intel/compiler/brw/brw_opt_copy_propagation.cpp +++ b/src/intel/compiler/brw/brw_opt_copy_propagation.cpp @@ -35,6 +35,7 @@ #include "util/bitset.h" #include "util/u_math.h" #include "util/rb_tree.h" +#include "util/lut.h" #include "brw_shader.h" #include "brw_analysis.h" #include "brw_cfg.h" @@ -1255,6 +1256,7 @@ try_constant_propagate_value(const intel_device_info *devinfo, case BRW_OPCODE_BFE: case BRW_OPCODE_BFI1: case BRW_OPCODE_BFI2: + case BRW_OPCODE_BFN: case BRW_OPCODE_ROL: case BRW_OPCODE_ROR: case BRW_OPCODE_SHL: @@ -1362,6 +1364,24 @@ commute_immediates(brw_inst *inst) } } + /* Like ADD3, BFN can have the immediate as src0 or src2. Using one or the + * other consistently makes assembly dumps more readable, so we arbitrarily + * prefer src0. + */ + if (inst->opcode == BRW_OPCODE_BFN) { + if (inst->src[1].file == IMM) { + const unsigned bfc = inst->src[3].ud; + + if (inst->src[0].file != IMM) { + swap_srcs(inst, 0, 1); + inst->src[3] = brw_imm_ud(util_lut3_swap_sources(bfc, 0, 1)); + } else if (inst->src[2].file != IMM) { + swap_srcs(inst, 1, 2); + inst->src[3] = brw_imm_ud(util_lut3_swap_sources(bfc, 1, 2)); + } + } + } + /* MAD can only have mutliplicand immediate in src2. */ if (inst->opcode == BRW_OPCODE_MAD) { if (inst->src[1].file == IMM && inst->src[2].file != IMM)