brw: Constant propagation and constant combining support for BFN
v2: Commute immediate values out of src[1]. Reviewed-by: Matt Turner <mattst88@gmail.com> [v1] Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37186>
This commit is contained in:
@@ -990,7 +990,8 @@ supports_src_as_imm(const struct intel_device_info *devinfo, const brw_inst *ins
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case BRW_OPCODE_ADD3:
|
||||
/* ADD3 can use src0 or src2 in Gfx12.5. */
|
||||
case BRW_OPCODE_BFN:
|
||||
/* ADD3 and BFN can use src0 or src2 in Gfx12.5. */
|
||||
return src_idx != 1;
|
||||
|
||||
case BRW_OPCODE_BFE:
|
||||
@@ -1052,6 +1053,12 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst,
|
||||
{
|
||||
bool can_promote = false;
|
||||
|
||||
/* src[3] of BFN is special. It must be immediate. Don't mess with it. */
|
||||
if (inst->opcode == BRW_OPCODE_BFN && src_idx == 3) {
|
||||
assert(inst->src[src_idx].type == BRW_TYPE_UD);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!supports_src_as_imm(devinfo, inst, src_idx))
|
||||
return false;
|
||||
|
||||
@@ -1067,10 +1074,11 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst,
|
||||
case BRW_TYPE_D:
|
||||
case BRW_TYPE_UD: {
|
||||
/* ADD3, CSEL, and MAD can mix signed and unsiged types. Only BFE
|
||||
* cannot.
|
||||
* cannot. BFN only supports unsigned types.
|
||||
*/
|
||||
if (inst->src[src_idx].type == BRW_TYPE_D ||
|
||||
inst->opcode != BRW_OPCODE_BFE) {
|
||||
(inst->opcode != BRW_OPCODE_BFE &&
|
||||
inst->opcode != BRW_OPCODE_BFN)) {
|
||||
int16_t w;
|
||||
if (representable_as_w(inst->src[src_idx].d, &w)) {
|
||||
inst->src[src_idx] = brw_imm_w(w);
|
||||
@@ -1079,6 +1087,11 @@ can_promote_src_as_imm(const struct intel_device_info *devinfo, brw_inst *inst,
|
||||
}
|
||||
}
|
||||
|
||||
/* FINISHME: BFN handling could be better. If the bit-wise compliment of
|
||||
* the constant is representable as UW, the constant and the function
|
||||
* control value could be changed. This would probably make a good
|
||||
* algebraic optimization.
|
||||
*/
|
||||
if (inst->src[src_idx].type == BRW_TYPE_UD ||
|
||||
inst->opcode != BRW_OPCODE_BFE) {
|
||||
uint16_t uw;
|
||||
@@ -1330,6 +1343,7 @@ brw_opt_combine_constants(brw_shader &s)
|
||||
*/
|
||||
case BRW_OPCODE_BFE:
|
||||
case BRW_OPCODE_ADD3:
|
||||
case BRW_OPCODE_BFN:
|
||||
case BRW_OPCODE_CSEL:
|
||||
case BRW_OPCODE_MAD: {
|
||||
if (inst->opcode == BRW_OPCODE_MAD &&
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "util/bitset.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/rb_tree.h"
|
||||
#include "util/lut.h"
|
||||
#include "brw_shader.h"
|
||||
#include "brw_analysis.h"
|
||||
#include "brw_cfg.h"
|
||||
@@ -1255,6 +1256,7 @@ try_constant_propagate_value(const intel_device_info *devinfo,
|
||||
case BRW_OPCODE_BFE:
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_BFI2:
|
||||
case BRW_OPCODE_BFN:
|
||||
case BRW_OPCODE_ROL:
|
||||
case BRW_OPCODE_ROR:
|
||||
case BRW_OPCODE_SHL:
|
||||
@@ -1362,6 +1364,24 @@ commute_immediates(brw_inst *inst)
|
||||
}
|
||||
}
|
||||
|
||||
/* Like ADD3, BFN can have the immediate as src0 or src2. Using one or the
|
||||
* other consistently makes assembly dumps more readable, so we arbitrarily
|
||||
* prefer src0.
|
||||
*/
|
||||
if (inst->opcode == BRW_OPCODE_BFN) {
|
||||
if (inst->src[1].file == IMM) {
|
||||
const unsigned bfc = inst->src[3].ud;
|
||||
|
||||
if (inst->src[0].file != IMM) {
|
||||
swap_srcs(inst, 0, 1);
|
||||
inst->src[3] = brw_imm_ud(util_lut3_swap_sources(bfc, 0, 1));
|
||||
} else if (inst->src[2].file != IMM) {
|
||||
swap_srcs(inst, 1, 2);
|
||||
inst->src[3] = brw_imm_ud(util_lut3_swap_sources(bfc, 1, 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* MAD can only have mutliplicand immediate in src2. */
|
||||
if (inst->opcode == BRW_OPCODE_MAD) {
|
||||
if (inst->src[1].file == IMM && inst->src[2].file != IMM)
|
||||
|
||||
Reference in New Issue
Block a user