aco: implement upcasting 16-bit types for 32-bit color buffers in PS epilog

This was missed when implementing the change for LLVM.

Fixes: fbbf029529 - radeonsi: enable 16-bit mediump IO for PS outputs only, and VS->PS with env var

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36112>
This commit is contained in:
Marek Olšák
2025-07-14 09:20:41 -04:00
committed by Marge Bot
parent 004abdc767
commit d12bc87dda
@@ -83,7 +83,7 @@ export_mrt(isel_context* ctx, const struct aco_export_mrt* mrt)
bool
export_fs_mrt_color(isel_context* ctx, const struct aco_ps_epilog_info* info, Temp colors[4],
unsigned slot, struct aco_export_mrt* mrt)
unsigned slot, unsigned color_type, struct aco_export_mrt* mrt)
{
unsigned col_format = (info->spi_shader_col_format >> (slot * 4)) & 0xf;
@@ -101,6 +101,7 @@ export_fs_mrt_color(isel_context* ctx, const struct aco_ps_epilog_info* info, Te
aco_opcode compr_op = aco_opcode::num_opcodes;
bool compr = false;
bool is_16bit = colors[0].regClass() == v2b;
assert(is_16bit == (color_type != ACO_TYPE_ANY32));
bool is_int8 = (info->color_is_int8 >> slot) & 1;
bool is_int10 = (info->color_is_int10 >> slot) & 1;
bool enable_mrt_output_nan_fixup = (ctx->options->enable_mrt_output_nan_fixup >> slot) & 1;
@@ -119,11 +120,41 @@ export_fs_mrt_color(isel_context* ctx, const struct aco_ps_epilog_info* info, Te
}
switch (col_format) {
case V_028714_SPI_SHADER_32_R: enabled_channels = 1; break;
case V_028714_SPI_SHADER_32_R:
if (color_type == ACO_TYPE_FLOAT16)
values[0] = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), values[0]);
else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16)
values[0] = Operand(
convert_int(ctx, bld, values[0].getTemp(), 16, 32, color_type == ACO_TYPE_INT16));
enabled_channels = 1;
break;
case V_028714_SPI_SHADER_32_GR: enabled_channels = 0x3; break;
case V_028714_SPI_SHADER_32_GR:
if (color_type == ACO_TYPE_FLOAT16) {
for (unsigned i = 0; i < 2; i++)
values[i] = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), values[i]);
} else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16) {
for (unsigned i = 0; i < 2; i++)
values[i] = Operand(
convert_int(ctx, bld, values[i].getTemp(), 16, 32, color_type == ACO_TYPE_INT16));
}
enabled_channels = 0x3;
break;
case V_028714_SPI_SHADER_32_AR:
if (color_type == ACO_TYPE_FLOAT16) {
for (unsigned i = 0; i < 2; i++) {
unsigned idx = i ? 3 : 0;
values[idx] = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), values[idx]);
}
} else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16) {
for (unsigned i = 0; i < 2; i++) {
unsigned idx = i ? 3 : 0;
values[idx] = Operand(
convert_int(ctx, bld, values[idx].getTemp(), 16, 32, color_type == ACO_TYPE_INT16));
}
}
if (ctx->options->gfx_level >= GFX10) {
/* Special case: on GFX10, the outputs are different for 32_AR */
enabled_channels = 0x3;
@@ -210,7 +241,17 @@ export_fs_mrt_color(isel_context* ctx, const struct aco_ps_epilog_info* info, Te
}
break;
case V_028714_SPI_SHADER_32_ABGR: enabled_channels = 0xF; break;
case V_028714_SPI_SHADER_32_ABGR:
enabled_channels = 0xF;
if (color_type == ACO_TYPE_FLOAT16) {
for (unsigned i = 0; i < 4; i++)
values[i] = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), values[i]);
} else if (color_type == ACO_TYPE_INT16 || color_type == ACO_TYPE_UINT16) {
for (unsigned i = 0; i < 4; i++)
values[i] = Operand(
convert_int(ctx, bld, values[i].getTemp(), 16, 32, color_type == ACO_TYPE_INT16));
}
break;
case V_028714_SPI_SHADER_ZERO:
default: return false;
@@ -358,11 +399,11 @@ select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
continue;
Temp color = get_arg(&ctx, einfo->colors[i]);
unsigned col_types = (einfo->color_types >> (i * 2)) & 0x3;
unsigned col_type = (einfo->color_types >> (i * 2)) & 0x3;
emit_split_vector(&ctx, color, col_types == ACO_TYPE_ANY32 ? 4 : 8);
emit_split_vector(&ctx, color, col_type == ACO_TYPE_ANY32 ? 4 : 8);
for (unsigned c = 0; c < 4; ++c) {
colors[i][c] = emit_extract_vector(&ctx, color, c, col_types == ACO_TYPE_ANY32 ? v1 : v2b);
colors[i][c] = emit_extract_vector(&ctx, color, c, col_type == ACO_TYPE_ANY32 ? v1 : v2b);
}
/* Store MRTZ.a before applying alpha-to-one if enabled. */
@@ -395,18 +436,21 @@ select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
*/
for (unsigned i = 0; i < 8; i++) {
struct aco_export_mrt* mrt = &mrts[mrt_num];
if (export_fs_mrt_color(&ctx, einfo, colors[0], i, mrt))
unsigned col_type = einfo->color_types & 0x3;
if (export_fs_mrt_color(&ctx, einfo, colors[0], i, col_type, mrt))
mrt->target += mrt_num++;
}
} else {
for (unsigned i = 0; i < MAX_DRAW_BUFFERS; i++) {
struct aco_export_mrt* mrt = &mrts[mrt_num];
const uint8_t cb_idx = einfo->color_map[i];
unsigned col_type = (einfo->color_types >> (cb_idx * 2)) & 0x3;
if (cb_idx == 0xff || !einfo->colors[cb_idx].used)
continue;
if (export_fs_mrt_color(&ctx, einfo, colors[cb_idx], i, mrt)) {
if (export_fs_mrt_color(&ctx, einfo, colors[cb_idx], i, col_type, mrt)) {
mrt->target += mrt_num++;
}
}