ac/llvm: allocate LLVM PS output variables on demand

This stops relying on si_shader_info, allowing further cleanup of
si_shader_info.

radv_load_output was unused.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35392>
This commit is contained in:
Marek Olšák
2025-06-01 18:47:05 -04:00
committed by Marge Bot
parent 6b2331d5f7
commit 447d744833
5 changed files with 44 additions and 57 deletions

View File

@@ -2047,10 +2047,18 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
continue;
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
LLVMValueRef output_addr = ctx->abi->outputs[base * 4 + chan];
LLVMTypeRef val_type = LLVMTypeOf(value);
assert(val_type == ctx->ac.f32 || val_type == ctx->ac.f16);
LLVMTypeRef output_type = ctx->stage == MESA_SHADER_FRAGMENT ? val_type : ctx->ac.f32;
LLVMValueRef *output_addr = &ctx->abi->outputs[base * 4 + chan];
if (!ctx->abi->is_16bit[base * 4 + chan] &&
LLVMTypeOf(value) == ctx->ac.f16) {
/* Allocate the output variable on demand. */
if (!*output_addr) {
*output_addr = ac_build_alloca_undef(&ctx->ac, output_type, "");
ctx->abi->is_16bit[base * 4 + chan] = output_type == ctx->ac.f16;
}
if (val_type == ctx->ac.f16 && output_type == ctx->ac.f32) {
LLVMValueRef output, index;
/* Insert the 16-bit value into the low or high bits of the 32-bit output
@@ -2058,11 +2066,11 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
*/
index = LLVMConstInt(ctx->ac.i32, nir_intrinsic_io_semantics(instr).high_16bits, 0);
output = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.v2f16, output_addr, "");
output = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.v2f16, *output_addr, "");
output = LLVMBuildInsertElement(ctx->ac.builder, output, value, index, "");
value = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.f32, "");
}
LLVMBuildStore(ctx->ac.builder, value, output_addr);
LLVMBuildStore(ctx->ac.builder, value, *output_addr);
}
}

View File

@@ -183,15 +183,6 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_des
return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
}
static LLVMValueRef
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
{
int idx = ac_llvm_reg_index_soa(index, chan);
LLVMValueRef output = ctx->abi.outputs[idx];
LLVMTypeRef type = ctx->abi.is_16bit[idx] ? ctx->ac.f16 : ctx->ac.f32;
return LLVMBuildLoad2(ctx->ac.builder, type, output, "");
}
static void
ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo)
{

View File

@@ -684,28 +684,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
ctx->abi.disable_aniso_single_level = true;
bool ls_need_output =
ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
shader->key.ge.opt.same_patch_vertices;
bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;
if (ls_need_output || ps_need_output) {
for (unsigned i = 0; i < info->num_outputs; i++) {
LLVMTypeRef type = ctx->ac.f32;
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
type = ctx->ac.f16;
for (unsigned j = 0; j < 4; j++) {
ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, "");
ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
}
}
}
if (!ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args->ac, nir))
return false;

View File

@@ -353,35 +353,38 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
struct si_shader_info *info = &shader->selector->info;
LLVMBuilderRef builder = ctx->ac.builder;
unsigned i, j, vgpr;
LLVMValueRef *addrs = ctx->abi.outputs;
LLVMValueRef color[8][4] = {};
uint8_t color_output_mask = 0, is_16bit_mask = 0;
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
LLVMValueRef ret;
/* Read the output values. */
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
LLVMTypeRef type = ctx->abi.is_16bit[4 * i] ? ctx->ac.f16 : ctx->ac.f32;
switch (semantic) {
case FRAG_RESULT_DEPTH:
depth = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
depth = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
break;
case FRAG_RESULT_STENCIL:
stencil = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
stencil = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
break;
case FRAG_RESULT_SAMPLE_MASK:
samplemask = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
samplemask = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
break;
default:
if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
unsigned index = semantic - FRAG_RESULT_DATA0;
for (j = 0; j < 4; j++) {
LLVMValueRef ptr = addrs[4 * i + j];
type = ctx->abi.is_16bit[4 * i + j] ? ctx->ac.f16 : ctx->ac.f32;
LLVMValueRef result = LLVMBuildLoad2(builder, type, ptr, "");
if (!ctx->abi.outputs[4 * i + j])
continue;
color_output_mask |= BITFIELD_BIT(index);
is_16bit_mask |= ctx->abi.is_16bit[4 * i + j] ? BITFIELD_BIT(index) : 0;
LLVMTypeRef type = ctx->abi.is_16bit[4 * i + j] ? ctx->ac.f16 : ctx->ac.f32;
LLVMValueRef result = LLVMBuildLoad2(builder, type, ctx->abi.outputs[4 * i + j], "");
color[index][j] = result;
}
} else {
@@ -401,20 +404,28 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
/* Set VGPRs */
vgpr = SI_SGPR_ALPHA_REF + 1;
for (i = 0; i < ARRAY_SIZE(color); i++) {
if (!color[i][0])
continue;
if (LLVMTypeOf(color[i][0]) == ctx->ac.f16) {
u_foreach_bit(i, color_output_mask) {
if (is_16bit_mask & BITFIELD_BIT(i)) {
for (j = 0; j < 2; j++) {
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
ret = LLVMBuildInsertValue(builder, ret, tmp, vgpr++, "");
if (color[i][j * 2] || color[i][j * 2 + 1]) {
for (unsigned k = 0; k < 2; k++) {
if (!color[i][j * 2 + k])
color[i][j * 2 + k] = LLVMGetUndef(ctx->ac.f16);
}
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
ret = LLVMBuildInsertValue(builder, ret, tmp, vgpr, "");
}
vgpr++;
}
vgpr += 2;
} else {
for (j = 0; j < 4; j++)
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
for (j = 0; j < 4; j++) {
if (color[i][j])
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr, "");
vgpr++;
}
}
}
if (depth)

View File

@@ -74,7 +74,6 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
assert(shader->is_monolithic);
struct si_shader_info *info = &shader->selector->info;
LLVMValueRef *addrs = ctx->abi.outputs;
for (unsigned i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
@@ -84,11 +83,11 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
continue;
for (unsigned chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)))
if (!ctx->abi.outputs[4 * i + chan])
continue;
LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32,
ctx->abi.outputs[4 * i + chan], "");
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, value, vgpr + param * 4 + chan, "");
}
}