From fb781bfb0a5a401b5210d613479bbdfb90e94790 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 16 Nov 2022 15:19:32 +0100 Subject: [PATCH] aco: fix dual source blending on GFX11 Assembly looks similar to LLVM. Cc: 22.3 mesa-stable Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- .../compiler/aco_instruction_selection.cpp | 95 +++++++++++++++++-- src/amd/compiler/aco_shader_info.h | 3 + src/amd/vulkan/radv_aco_shader_info.h | 2 + src/amd/vulkan/radv_pipeline.c | 2 + src/amd/vulkan/radv_shader.h | 1 + 5 files changed, 93 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3464bab4eb8..60b006539f1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11188,9 +11188,25 @@ struct mrt_color_export { bool enable_mrt_output_nan_fixup; }; +struct aco_export_mrt { + Operand out[4]; + unsigned enabled_channels; + unsigned target; + bool compr; +}; + +static void +export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt) +{ + Builder bld(ctx->program, ctx->block); + + bld.exp(aco_opcode::exp, mrt->out[0], mrt->out[1], mrt->out[2], mrt->out[3], + mrt->enabled_channels, mrt->target, mrt->compr); +} + static bool -export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out, - bool is_ps_epilog) +export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out, bool is_ps_epilog, + struct aco_export_mrt* mrt) { Builder bld(ctx->program, ctx->block); Operand values[4]; @@ -11358,8 +11374,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export *out, compr = false; } - bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3], enabled_channels, target, - compr); + for (unsigned i = 0; i < 4; i++) + mrt->out[i] = values[i]; + mrt->target = target; + mrt->enabled_channels = enabled_channels; + mrt->compr = compr; + return true; } @@ -11427,6 +11447,31 @@ create_fs_jump_to_epilog(isel_context* ctx) ctx->block->instructions.emplace_back(std::move(jump)); } +static void +create_fs_dual_src_export_gfx11(isel_context* ctx, const struct aco_export_mrt* mrt0, + const struct aco_export_mrt* mrt1) +{ + Builder bld(ctx->program, ctx->block); + + aco_ptr exp{create_instruction( + aco_opcode::p_dual_src_export_gfx11, Format::PSEUDO, 8, 6)}; + for (unsigned i = 0; i < 4; i++) { + exp->operands[i] = mrt0 ? mrt0->out[i] : Operand(v1); + exp->operands[i].setLateKill(true); + exp->operands[i + 4] = mrt1 ? mrt1->out[i] : Operand(v1); + exp->operands[i + 4].setLateKill(true); + } + + RegClass type = RegClass(RegType::vgpr, util_bitcount(mrt0->enabled_channels)); + exp->definitions[0] = bld.def(type); /* mrt0 */ + exp->definitions[1] = bld.def(type); /* mrt1 */ + exp->definitions[2] = bld.def(v1); + exp->definitions[3] = bld.def(bld.lm); + exp->definitions[4] = bld.def(bld.lm, vcc); + exp->definitions[5] = bld.def(s1, scc); + ctx->block->instructions.emplace_back(std::move(exp)); +} + static void create_fs_exports(isel_context* ctx) { @@ -11441,10 +11486,15 @@ create_fs_exports(isel_context* ctx) if (ctx->program->info.ps.has_epilog) { create_fs_jump_to_epilog(ctx); } else { + struct aco_export_mrt mrts[8]; unsigned compacted_mrt_index = 0; /* Export all color render targets. */ for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) { + unsigned idx = i - FRAG_RESULT_DATA0; + + mrts[idx].enabled_channels = 0; + if (!ctx->outputs.mask[i]) continue; @@ -11452,7 +11502,7 @@ create_fs_exports(isel_context* ctx) out.slot = compacted_mrt_index; out.write_mask = ctx->outputs.mask[i]; - out.col_format = (ctx->options->key.ps.col_format >> (4 * (i - FRAG_RESULT_DATA0))) & 0xf; + out.col_format = (ctx->options->key.ps.col_format >> (4 * idx)) & 0xf; for (unsigned c = 0; c < 4; ++c) { if (out.write_mask & (1 << c)) { @@ -11462,14 +11512,25 @@ create_fs_exports(isel_context* ctx) } } - if (export_fs_mrt_color(ctx, &out, false)) { + if (export_fs_mrt_color(ctx, &out, false, &mrts[compacted_mrt_index])) { compacted_mrt_index++; exported = true; } } - if (!exported) + if (exported) { + if (ctx->options->gfx_level >= GFX11 && ctx->options->key.ps.mrt0_is_dual_src) { + struct aco_export_mrt* mrt0 = mrts[0].enabled_channels ? &mrts[0] : NULL; + struct aco_export_mrt* mrt1 = mrts[1].enabled_channels ? &mrts[1] : NULL; + create_fs_dual_src_export_gfx11(ctx, mrt0, mrt1); + } else { + for (unsigned i = 0; i < compacted_mrt_index; i++) { + export_mrt(ctx, &mrts[i]); + } + } + } else { create_fs_null_export(ctx); + } } ctx->block->kind |= block_kind_export_end; @@ -12582,7 +12643,8 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade Builder bld(ctx.program, ctx.block); /* Export all color render targets */ - bool exported = false; + struct aco_export_mrt mrts[8]; + uint8_t exported_mrts = 0; for (unsigned i = 0; i < 8; i++) { unsigned col_format = (key->spi_shader_col_format >> (i * 4)) & 0xf; @@ -12604,11 +12666,24 @@ select_ps_epilog(Program* program, const struct aco_ps_epilog_key* key, ac_shade out.values[c] = Operand(emit_extract_vector(&ctx, inputs, c, v1)); } - exported |= export_fs_mrt_color(&ctx, &out, true); + if (export_fs_mrt_color(&ctx, &out, true, &mrts[i])) { + exported_mrts |= 1 << i; + } } - if (!exported) + if (exported_mrts) { + if (ctx.options->gfx_level >= GFX11 && key->mrt0_is_dual_src) { + struct aco_export_mrt* mrt0 = (exported_mrts & BITFIELD_BIT(0)) ? &mrts[0] : NULL; + struct aco_export_mrt* mrt1 = (exported_mrts & BITFIELD_BIT(1)) ? &mrts[1] : NULL; + create_fs_dual_src_export_gfx11(&ctx, mrt0, mrt1); + } else { + u_foreach_bit (i, exported_mrts) { + export_mrt(&ctx, &mrts[i]); + } + } + } else { create_fs_null_export(&ctx); + } program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index 8fba17aedc9..a36e204315c 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -67,6 +67,8 @@ struct aco_ps_epilog_key { uint8_t color_is_int8; uint8_t color_is_int10; uint8_t enable_mrt_output_nan_fixup; + + bool mrt0_is_dual_src; }; struct aco_vp_output_info { @@ -173,6 +175,7 @@ struct aco_stage_input { /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ bool alpha_to_coverage_via_mrtz; + bool mrt0_is_dual_src; } ps; }; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 175a2ab53f1..79597389885 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -128,6 +128,7 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_key *aco_info, ASSIGN_FIELD(color_is_int8); ASSIGN_FIELD(color_is_int10); ASSIGN_FIELD(enable_mrt_output_nan_fixup); + ASSIGN_FIELD(mrt0_is_dual_src); } static inline void @@ -146,6 +147,7 @@ radv_aco_convert_pipe_key(struct aco_stage_input *aco_info, ASSIGN_FIELD(tcs.tess_input_vertices); ASSIGN_FIELD(ps.col_format); ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz); + ASSIGN_FIELD(ps.mrt0_is_dual_src); } static inline void diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 603afd384cf..86b5c77ec06 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3994,6 +3994,7 @@ radv_pipeline_create_ps_epilog(struct radv_graphics_pipeline *pipeline, .color_is_int8 = pipeline_key->ps.is_int8, .color_is_int10 = pipeline_key->ps.is_int10, .enable_mrt_output_nan_fixup = pipeline_key->ps.enable_mrt_output_nan_fixup, + .mrt0_is_dual_src = pipeline_key->ps.mrt0_is_dual_src, }; pipeline->ps_epilog = radv_create_ps_epilog(device, &epilog_key); @@ -5918,6 +5919,7 @@ radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, .color_is_int8 = blend.col_format_is_int8, .color_is_int10 = blend.col_format_is_int10, .enable_mrt_output_nan_fixup = key.ps.enable_mrt_output_nan_fixup, + .mrt0_is_dual_src = blend.mrt0_is_dual_src, }; pipeline->base.ps_epilog = radv_create_ps_epilog(device, &epilog_key); diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index e3f84c19505..ae27d0d12f9 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -420,6 +420,7 @@ struct radv_ps_epilog_key { uint8_t color_is_int10; uint8_t enable_mrt_output_nan_fixup; + bool mrt0_is_dual_src; bool wave32; };