diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index 374d254b4c5..3b6ed6a1518 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -2103,8 +2103,10 @@ instruction_restrictions(const struct brw_isa_info *isa, ERROR_IF(dst_type != BRW_TYPE_F && dst_type != BRW_TYPE_HF && dst_type != BRW_TYPE_D && - dst_type != BRW_TYPE_W, - "CSEL destination type must be F, HF, D, or W"); + dst_type != BRW_TYPE_W && + dst_type != BRW_TYPE_UD && + dst_type != BRW_TYPE_UW, + "CSEL destination type must be F, HF, *D, or *W"); } for (unsigned s = 0; s < 3; s++) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 3f5d217ca80..ef9bc09ec5e 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -668,6 +668,7 @@ bool brw_fs_lower_regioning(fs_visitor &s); bool brw_fs_lower_scoreboard(fs_visitor &s); bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s); bool brw_fs_lower_simd_width(fs_visitor &s); +bool brw_fs_lower_csel(fs_visitor &s); bool brw_fs_lower_sub_sat(fs_visitor &s); bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s); void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s); diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 8c95afaeb7c..89f3a203af1 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -756,17 +756,11 @@ namespace brw { CSEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2, brw_conditional_mod condition) const { - /* CSEL only operates on floats, so we can't do integer =/> - * comparisons. Zero/non-zero (== and !=) comparisons almost work. - * 0x80000000 fails because it is -0.0, and -0.0 == 0.0. - */ - assert(src2.type == BRW_TYPE_F); - return set_condmod(condition, emit(BRW_OPCODE_CSEL, - retype(dst, BRW_TYPE_F), - retype(src0, BRW_TYPE_F), - retype(src1, BRW_TYPE_F), + retype(dst, src2.type), + retype(src0, src2.type), + retype(src1, src2.type), src2)); } diff --git a/src/intel/compiler/brw_fs_lower.cpp b/src/intel/compiler/brw_fs_lower.cpp index d6d4da8e3fc..7cfeadd39be 100644 --- a/src/intel/compiler/brw_fs_lower.cpp +++ b/src/intel/compiler/brw_fs_lower.cpp @@ -130,6 +130,87 @@ brw_fs_lower_load_payload(fs_visitor &s) return progress; } +/** + * Lower CSEL with unsupported types to CMP+SEL. + * + * Or, for unsigned ==/!= comparisons, simply change the types. + */ +bool +brw_fs_lower_csel(fs_visitor &s) +{ + const intel_device_info *devinfo = s.devinfo; + bool progress = false; + + foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) { + if (inst->opcode != BRW_OPCODE_CSEL) + continue; + + bool supported = false; + enum brw_reg_type orig_type = inst->src[2].type; + enum brw_reg_type new_type = orig_type; + + switch (orig_type) { + case BRW_TYPE_F: + /* Gfx9 CSEL can only do F */ + supported = true; + break; + case BRW_TYPE_HF: + case BRW_TYPE_W: + case BRW_TYPE_D: + /* Gfx11+ CSEL can do HF, W, and D. Note that we can't simply + * retype integer ==/!= comparisons as float on earlier hardware + * because it breaks for 0x8000000 and 0 (-0.0 == 0.0). + */ + supported = devinfo->ver >= 11; + break; + case BRW_TYPE_UW: + case BRW_TYPE_UD: + /* CSEL doesn't support UW/UD but we can simply retype to use the + * signed types when comparing with == or !=. + */ + supported = devinfo->ver >= 11 && + (inst->conditional_mod == BRW_CONDITIONAL_EQ || + inst->conditional_mod == BRW_CONDITIONAL_NEQ); + + /* Bspec 47408, Gfx125+ CSEL does support the both signed and unsigned + * integer types. + */ + if (devinfo->verx10 < 125) { + new_type = inst->src[2].type == BRW_TYPE_UD ? + BRW_TYPE_D : BRW_TYPE_W; + } + break; + default: + break; + } + + if (!supported) { + const fs_builder ibld(&s, block, inst); + + /* CSEL: dst = src2 0 ? src0 : src1 */ + fs_reg zero = brw_imm_reg(orig_type); + ibld.CMP(retype(brw_null_reg(), orig_type), + inst->src[2], zero, inst->conditional_mod); + + inst->opcode = BRW_OPCODE_SEL; + inst->predicate = BRW_PREDICATE_NORMAL; + inst->conditional_mod = BRW_CONDITIONAL_NONE; + inst->resize_sources(2); + progress = true; + } else if (new_type != orig_type) { + inst->src[0].type = new_type; + inst->src[1].type = new_type; + inst->src[2].type = new_type; + progress = true; + } + } + + if (progress) + s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS); + + return progress; +} + bool brw_fs_lower_sub_sat(fs_visitor &s) { diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index a4bc71d1317..7705595f3b5 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -85,6 +85,7 @@ brw_fs_optimize(fs_visitor &s) OPT(brw_fs_opt_dead_code_eliminate); } + OPT(brw_fs_lower_csel); OPT(brw_fs_lower_simd_width); OPT(brw_fs_lower_barycentrics); OPT(brw_fs_lower_logical_sends);