aco: fix dual source blending on GFX11

Assembly looks similar to LLVM.

Cc: 22.3 mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19643>
This commit is contained in:
Samuel Pitoiset
2022-11-16 15:19:32 +01:00
committed by Marge Bot
parent bb90d29660
commit fb781bfb0a
5 changed files with 93 additions and 10 deletions
+85 -10
View File
@@ -11188,9 +11188,25 @@ struct mrt_color_export {
bool enable_mrt_output_nan_fixup;
};
struct aco_export_mrt {
Operand out[4];
unsigned enabled_channels;
unsigned target;
bool compr;
};
static void
export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt)
{
Builder bld(ctx->program, ctx->block);
bld.exp(aco_opcode::exp, mrt->out[0], mrt->out[1], mrt->out[2], mrt->out[3],
mrt->enabled_channels, mrt->target, mrt->compr);
}
static bool
export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out,
bool is_ps_epilog)
export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool is_ps_epilog,
struct aco_export_mrt* mrt)
{
Builder bld(ctx->program, ctx->block);
Operand values[4];
@@ -11358,8 +11374,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out,
compr = false;
}
bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target,
compr);
for (unsigned i = 0; i < 4; i++)
mrt->out[i] = values[i];
mrt->target = target;
mrt->enabled_channels = enabled_channels;
mrt->compr = compr;
return true;
}
@@ -11427,6 +11447,31 @@ create_fs_jump_to_epilog(isel_context* ctx)
ctx->block->instructions.emplace_back(std::move(jump));
}
static void
create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* mrt0,
const struct aco_export_mrt* mrt1)
{
Builder bld(ctx->program, ctx->block);
aco_ptr<Pseudo_instruction> exp{create_instruction<Pseudo_instruction>(
aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)};
for (unsigned i = 0; i < 4; i++) {
exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1);
exp->operands[i].setLateKill(true);
exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1);
exp->operands[i + 4].setLateKill(true);
}
RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels));
exp->definitions[0] = bld.def(type); /* mrt0 */
exp->definitions[1] = bld.def(type); /* mrt1 */
exp->definitions[2] = bld.def(v1);
exp->definitions[3] = bld.def(bld.lm);
exp->definitions[4] = bld.def(bld.lm, vcc);
exp->definitions[5] = bld.def(s1, scc);
ctx->block->instructions.emplace_back(std::move(exp));
}
static void
create_fs_exports(isel_context* ctx)
{
@@ -11441,10 +11486,15 @@ create_fs_exports(isel_context* ctx)
if (ctx->program->info.ps.has_epilog) {
create_fs_jump_to_epilog(ctx);
} else {
struct aco_export_mrt mrts[8];
unsigned compacted_mrt_index = 0;
/* Export all color render targets. */
for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) {
unsigned idx = i - FRAG_RESULT_DATA0;
mrts[idx].enabled_channels = 0;
if (!ctx->outputs.mask[i])
continue;
@@ -11452,7 +11502,7 @@ create_fs_exports(isel_context* ctx)
out.slot = compacted_mrt_index;
out.write_mask = ctx->outputs.mask[i];
out.col_format = (ctx->options->key.ps.col_format >> (4 * (i - FRAG_RESULT_DATA0))) & 0xf;
out.col_format = (ctx->options->key.ps.col_format >> (4 * idx)) & 0xf;
for (unsigned c = 0; c < 4; ++c) {
if (out.write_mask & (1 << c)) {
@@ -11462,14 +11512,25 @@ create_fs_exports(isel_context* ctx)
}
}
if (export_fs_mrt_color(ctx, &out, false)) {
if (export_fs_mrt_color(ctx, &out, false, &mrts[compacted_mrt_index])) {
compacted_mrt_index++;
exported = true;
}
}
if (!exported)
if (exported) {
if (ctx->options->gfx_level >= GFX11 && ctx->options->key.ps.mrt0_is_dual_src) {
struct aco_export_mrt* mrt0 = mrts[0].enabled_channels ? &mrts[0] : NULL;
struct aco_export_mrt* mrt1 = mrts[1].enabled_channels ? &mrts[1] : NULL;
create_fs_dual_src_export_gfx11(ctx, mrt0, mrt1);
} else {
for (unsigned i = 0; i < compacted_mrt_index; i++) {
export_mrt(ctx, &mrts[i]);
}
}
} else {
create_fs_null_export(ctx);
}
}
ctx->block->kind |= block_kind_export_end;
@@ -12582,7 +12643,8 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
Builder bld(ctx.program, ctx.block);
/* Export all color render targets */
bool exported = false;
struct aco_export_mrt mrts[8];
uint8_t exported_mrts = 0;
for (unsigned i = 0; i < 8; i++) {
unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf;
@@ -12604,11 +12666,24 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade
out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1));
}
exported |= export_fs_mrt_color(&ctx, &out, true);
if (export_fs_mrt_color(&ctx, &out, true, &mrts[i])) {
exported_mrts |= 1 << i;
}
}
if (!exported)
if (exported_mrts) {
if (ctx.options->gfx_level >= GFX11 && key->mrt0_is_dual_src) {
struct aco_export_mrt* mrt0 = (exported_mrts & BITFIELD_BIT(0)) ? &mrts[0] : NULL;
struct aco_export_mrt* mrt1 = (exported_mrts & BITFIELD_BIT(1)) ? &mrts[1] : NULL;
create_fs_dual_src_export_gfx11(&ctx, mrt0, mrt1);
} else {
u_foreach_bit (i, exported_mrts) {
export_mrt(&ctx, &mrts[i]);
}
}
} else {
create_fs_null_export(&ctx);
}
program->config->float_mode = program->blocks[0].fp_mode.val;
+3
View File
@@ -67,6 +67,8 @@ struct aco_ps_epilog_key {
uint8_t color_is_int8;
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
bool mrt0_is_dual_src;
};
struct aco_vp_output_info {
@@ -173,6 +175,7 @@ struct aco_stage_input {
/* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */
bool alpha_to_coverage_via_mrtz;
bool mrt0_is_dual_src;
} ps;
};
+2
View File
@@ -128,6 +128,7 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_key *aco_info,
ASSIGN_FIELD(color_is_int8);
ASSIGN_FIELD(color_is_int10);
ASSIGN_FIELD(enable_mrt_output_nan_fixup);
ASSIGN_FIELD(mrt0_is_dual_src);
}
static inline void
@@ -146,6 +147,7 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info,
ASSIGN_FIELD(tcs.tess_input_vertices);
ASSIGN_FIELD(ps.col_format);
ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz);
ASSIGN_FIELD(ps.mrt0_is_dual_src);
}
static inline void
+2
View File
@@ -3994,6 +3994,7 @@ radv_pipeline_create_ps_epilog(struct radv_graphics_pipeline *pipeline,
.color_is_int8 = pipeline_key->ps.is_int8,
.color_is_int10 = pipeline_key->ps.is_int10,
.enable_mrt_output_nan_fixup = pipeline_key->ps.enable_mrt_output_nan_fixup,
.mrt0_is_dual_src = pipeline_key->ps.mrt0_is_dual_src,
};
pipeline->ps_epilog = radv_create_ps_epilog(device, &epilog_key);
@@ -5918,6 +5919,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline,
.color_is_int8 = blend.col_format_is_int8,
.color_is_int10 = blend.col_format_is_int10,
.enable_mrt_output_nan_fixup = key.ps.enable_mrt_output_nan_fixup,
.mrt0_is_dual_src = blend.mrt0_is_dual_src,
};
pipeline->base.ps_epilog = radv_create_ps_epilog(device, &epilog_key);
+1
View File
@@ -420,6 +420,7 @@ struct radv_ps_epilog_key {
uint8_t color_is_int10;
uint8_t enable_mrt_output_nan_fixup;
bool mrt0_is_dual_src;
bool wave32;
};