intel/brw: Add a lower_csel pass and allow building it for all types

We can do CSEL on F, HF, *W, and *D on Gfx11+.  Gfx9 can only do F.

We can lower unsupported types to CMP+CSEL, allowing us to use CSEL
in the IR and not worry about the limitations.

Rework: (Sagar)
- Update validation pass for CSEL

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29316>
This commit is contained in:
Kenneth Graunke
2024-05-21 13:40:34 -07:00
committed by Marge Bot
parent cb7b1a8d23
commit 1e69ec3b8d
5 changed files with 90 additions and 11 deletions
+4 -2
View File
@@ -2103,8 +2103,10 @@ instruction_restrictions(const struct brw_isa_info *isa,
ERROR_IF(dst_type != BRW_TYPE_F &&
dst_type != BRW_TYPE_HF &&
dst_type != BRW_TYPE_D &&
dst_type != BRW_TYPE_W,
"CSEL destination type must be F, HF, D, or W");
dst_type != BRW_TYPE_W &&
dst_type != BRW_TYPE_UD &&
dst_type != BRW_TYPE_UW,
"CSEL destination type must be F, HF, *D, or *W");
}
for (unsigned s = 0; s < 3; s++) {
+1
View File
@@ -668,6 +668,7 @@ bool brw_fs_lower_regioning(fs_visitor &s);
bool brw_fs_lower_scoreboard(fs_visitor &s);
bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
bool brw_fs_lower_simd_width(fs_visitor &s);
bool brw_fs_lower_csel(fs_visitor &s);
bool brw_fs_lower_sub_sat(fs_visitor &s);
bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
void brw_fs_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
+3 -9
View File
@@ -756,17 +756,11 @@ namespace brw {
CSEL(const fs_reg &dst, const fs_reg &src0, const fs_reg &src1,
const fs_reg &src2, brw_conditional_mod condition) const
{
/* CSEL only operates on floats, so we can't do integer </<=/>=/>
* comparisons. Zero/non-zero (== and !=) comparisons almost work.
* 0x80000000 fails because it is -0.0, and -0.0 == 0.0.
*/
assert(src2.type == BRW_TYPE_F);
return set_condmod(condition,
emit(BRW_OPCODE_CSEL,
retype(dst, BRW_TYPE_F),
retype(src0, BRW_TYPE_F),
retype(src1, BRW_TYPE_F),
retype(dst, src2.type),
retype(src0, src2.type),
retype(src1, src2.type),
src2));
}
+81
View File
@@ -130,6 +130,87 @@ brw_fs_lower_load_payload(fs_visitor &s)
return progress;
}
/**
* Lower CSEL with unsupported types to CMP+SEL.
*
* Or, for unsigned ==/!= comparisons, simply change the types.
*/
bool
brw_fs_lower_csel(fs_visitor &s)
{
const intel_device_info *devinfo = s.devinfo;
bool progress = false;
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
if (inst->opcode != BRW_OPCODE_CSEL)
continue;
bool supported = false;
enum brw_reg_type orig_type = inst->src[2].type;
enum brw_reg_type new_type = orig_type;
switch (orig_type) {
case BRW_TYPE_F:
/* Gfx9 CSEL can only do F */
supported = true;
break;
case BRW_TYPE_HF:
case BRW_TYPE_W:
case BRW_TYPE_D:
/* Gfx11+ CSEL can do HF, W, and D. Note that we can't simply
* retype integer ==/!= comparisons as float on earlier hardware
* because it breaks for 0x8000000 and 0 (-0.0 == 0.0).
*/
supported = devinfo->ver >= 11;
break;
case BRW_TYPE_UW:
case BRW_TYPE_UD:
/* CSEL doesn't support UW/UD but we can simply retype to use the
* signed types when comparing with == or !=.
*/
supported = devinfo->ver >= 11 &&
(inst->conditional_mod == BRW_CONDITIONAL_EQ ||
inst->conditional_mod == BRW_CONDITIONAL_NEQ);
/* Bspec 47408, Gfx125+ CSEL does support the both signed and unsigned
* integer types.
*/
if (devinfo->verx10 < 125) {
new_type = inst->src[2].type == BRW_TYPE_UD ?
BRW_TYPE_D : BRW_TYPE_W;
}
break;
default:
break;
}
if (!supported) {
const fs_builder ibld(&s, block, inst);
/* CSEL: dst = src2 <op> 0 ? src0 : src1 */
fs_reg zero = brw_imm_reg(orig_type);
ibld.CMP(retype(brw_null_reg(), orig_type),
inst->src[2], zero, inst->conditional_mod);
inst->opcode = BRW_OPCODE_SEL;
inst->predicate = BRW_PREDICATE_NORMAL;
inst->conditional_mod = BRW_CONDITIONAL_NONE;
inst->resize_sources(2);
progress = true;
} else if (new_type != orig_type) {
inst->src[0].type = new_type;
inst->src[1].type = new_type;
inst->src[2].type = new_type;
progress = true;
}
}
if (progress)
s.invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
return progress;
}
bool
brw_fs_lower_sub_sat(fs_visitor &s)
{
+1
View File
@@ -85,6 +85,7 @@ brw_fs_optimize(fs_visitor &s)
OPT(brw_fs_opt_dead_code_eliminate);
}
OPT(brw_fs_lower_csel);
OPT(brw_fs_lower_simd_width);
OPT(brw_fs_lower_barycentrics);
OPT(brw_fs_lower_logical_sends);