intel/brw: Add a lower_csel pass and allow building it for all types
We can do CSEL on F, HF, *W, and *D on Gfx11+. Gfx9 can only do F. We can lower unsupported types to CMP+CSEL, allowing us to use CSEL in the IR and not worry about the limitations. Rework: (Sagar) - Update validation pass for CSEL Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29316>
This commit is contained in:
committed by
Marge Bot
parent
cb7b1a8d23
commit
1e69ec3b8d
@@ -2103,8 +2103,10 @@ instruction_restrictions(const struct brw_isa_info *isa,
|
||||
ERROR_IF(dst_type != BRW_TYPE_F &&
|
||||
dst_type != BRW_TYPE_HF &&
|
||||
dst_type != BRW_TYPE_D &&
|
||||
dst_type != BRW_TYPE_W,
|
||||
"CSEL destination type must be F, HF, D, or W");
|
||||
dst_type != BRW_TYPE_W &&
|
||||
dst_type != BRW_TYPE_UD &&
|
||||
dst_type != BRW_TYPE_UW,
|
||||
"CSEL destination type must be F, HF, *D, or *W");
|
||||
}
|
||||
|
||||
for (unsigned s = 0; s < 3; s++) {
|
||||
|
||||
@@ -668,6 +668,7 @@ bool brw_fs_lower_regioning(fs_visitor &s);
|
||||
bool brw_fs_lower_scoreboard(fs_visitor &s);
|
||||
bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
|
||||
bool brw_fs_lower_simd_width(fs_visitor &s);
|
||||
bool brw_fs_lower_csel(fs_visitor &s);
|
||||
bool brw_fs_lower_sub_sat(fs_visitor &s);
|
||||
bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
|
||||
void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
|
||||
|
||||
@@ -756,17 +756,11 @@ namespace brw {
|
||||
CSEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1,
|
||||
const fs_reg &src2, brw_conditional_mod condition) const
|
||||
{
|
||||
/* CSEL only operates on floats, so we can't do integer </<=/>=/>
|
||||
* comparisons. Zero/non-zero (== and !=) comparisons almost work.
|
||||
* 0x80000000 fails because it is -0.0, and -0.0 == 0.0.
|
||||
*/
|
||||
assert(src2.type == BRW_TYPE_F);
|
||||
|
||||
return set_condmod(condition,
|
||||
emit(BRW_OPCODE_CSEL,
|
||||
retype(dst, BRW_TYPE_F),
|
||||
retype(src0, BRW_TYPE_F),
|
||||
retype(src1, BRW_TYPE_F),
|
||||
retype(dst, src2.type),
|
||||
retype(src0, src2.type),
|
||||
retype(src1, src2.type),
|
||||
src2));
|
||||
}
|
||||
|
||||
|
||||
@@ -130,6 +130,87 @@ brw_fs_lower_load_payload(fs_visitor &s)
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lower CSEL with unsupported types to CMP+SEL.
|
||||
*
|
||||
* Or, for unsigned ==/!= comparisons, simply change the types.
|
||||
*/
|
||||
bool
|
||||
brw_fs_lower_csel(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
if (inst->opcode != BRW_OPCODE_CSEL)
|
||||
continue;
|
||||
|
||||
bool supported = false;
|
||||
enum brw_reg_type orig_type = inst->src[2].type;
|
||||
enum brw_reg_type new_type = orig_type;
|
||||
|
||||
switch (orig_type) {
|
||||
case BRW_TYPE_F:
|
||||
/* Gfx9 CSEL can only do F */
|
||||
supported = true;
|
||||
break;
|
||||
case BRW_TYPE_HF:
|
||||
case BRW_TYPE_W:
|
||||
case BRW_TYPE_D:
|
||||
/* Gfx11+ CSEL can do HF, W, and D. Note that we can't simply
|
||||
* retype integer ==/!= comparisons as float on earlier hardware
|
||||
* because it breaks for 0x8000000 and 0 (-0.0 == 0.0).
|
||||
*/
|
||||
supported = devinfo->ver >= 11;
|
||||
break;
|
||||
case BRW_TYPE_UW:
|
||||
case BRW_TYPE_UD:
|
||||
/* CSEL doesn't support UW/UD but we can simply retype to use the
|
||||
* signed types when comparing with == or !=.
|
||||
*/
|
||||
supported = devinfo->ver >= 11 &&
|
||||
(inst->conditional_mod == BRW_CONDITIONAL_EQ ||
|
||||
inst->conditional_mod == BRW_CONDITIONAL_NEQ);
|
||||
|
||||
/* Bspec 47408, Gfx125+ CSEL does support the both signed and unsigned
|
||||
* integer types.
|
||||
*/
|
||||
if (devinfo->verx10 < 125) {
|
||||
new_type = inst->src[2].type == BRW_TYPE_UD ?
|
||||
BRW_TYPE_D : BRW_TYPE_W;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!supported) {
|
||||
const fs_builder ibld(&s, block, inst);
|
||||
|
||||
/* CSEL: dst = src2 <op> 0 ? src0 : src1 */
|
||||
fs_reg zero = brw_imm_reg(orig_type);
|
||||
ibld.CMP(retype(brw_null_reg(), orig_type),
|
||||
inst->src[2], zero, inst->conditional_mod);
|
||||
|
||||
inst->opcode = BRW_OPCODE_SEL;
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
inst->resize_sources(2);
|
||||
progress = true;
|
||||
} else if (new_type != orig_type) {
|
||||
inst->src[0].type = new_type;
|
||||
inst->src[1].type = new_type;
|
||||
inst->src[2].type = new_type;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_fs_lower_sub_sat(fs_visitor &s)
|
||||
{
|
||||
|
||||
@@ -85,6 +85,7 @@ brw_fs_optimize(fs_visitor &s)
|
||||
OPT(brw_fs_opt_dead_code_eliminate);
|
||||
}
|
||||
|
||||
OPT(brw_fs_lower_csel);
|
||||
OPT(brw_fs_lower_simd_width);
|
||||
OPT(brw_fs_lower_barycentrics);
|
||||
OPT(brw_fs_lower_logical_sends);
|
||||
|
||||
Reference in New Issue
Block a user