ac/llvm: allocate LLVM PS output variables on demand
This stops relying on si_shader_info, allowing further cleanup of si_shader_info. radv_load_output was unused. Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35392>
This commit is contained in:
@@ -2047,10 +2047,18 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||
continue;
|
||||
|
||||
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
|
||||
LLVMValueRef output_addr = ctx->abi->outputs[base * 4 + chan];
|
||||
LLVMTypeRef val_type = LLVMTypeOf(value);
|
||||
assert(val_type == ctx->ac.f32 || val_type == ctx->ac.f16);
|
||||
LLVMTypeRef output_type = ctx->stage == MESA_SHADER_FRAGMENT ? val_type : ctx->ac.f32;
|
||||
LLVMValueRef *output_addr = &ctx->abi->outputs[base * 4 + chan];
|
||||
|
||||
if (!ctx->abi->is_16bit[base * 4 + chan] &&
|
||||
LLVMTypeOf(value) == ctx->ac.f16) {
|
||||
/* Allocate the output variable on demand. */
|
||||
if (!*output_addr) {
|
||||
*output_addr = ac_build_alloca_undef(&ctx->ac, output_type, "");
|
||||
ctx->abi->is_16bit[base * 4 + chan] = output_type == ctx->ac.f16;
|
||||
}
|
||||
|
||||
if (val_type == ctx->ac.f16 && output_type == ctx->ac.f32) {
|
||||
LLVMValueRef output, index;
|
||||
|
||||
/* Insert the 16-bit value into the low or high bits of the 32-bit output
|
||||
@@ -2058,11 +2066,11 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||
*/
|
||||
index = LLVMConstInt(ctx->ac.i32, nir_intrinsic_io_semantics(instr).high_16bits, 0);
|
||||
|
||||
output = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.v2f16, output_addr, "");
|
||||
output = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.v2f16, *output_addr, "");
|
||||
output = LLVMBuildInsertElement(ctx->ac.builder, output, value, index, "");
|
||||
value = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.f32, "");
|
||||
}
|
||||
LLVMBuildStore(ctx->ac.builder, value, output_addr);
|
||||
LLVMBuildStore(ctx->ac.builder, value, *output_addr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -183,15 +183,6 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, LLVMValueRef index, enum ac_des
|
||||
return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
|
||||
{
|
||||
int idx = ac_llvm_reg_index_soa(index, chan);
|
||||
LLVMValueRef output = ctx->abi.outputs[idx];
|
||||
LLVMTypeRef type = ctx->abi.is_16bit[idx] ? ctx->ac.f16 : ctx->ac.f32;
|
||||
return LLVMBuildLoad2(ctx->ac.builder, type, output, "");
|
||||
}
|
||||
|
||||
static void
|
||||
ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo)
|
||||
{
|
||||
|
||||
@@ -684,28 +684,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
||||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
|
||||
ctx->abi.disable_aniso_single_level = true;
|
||||
|
||||
bool ls_need_output =
|
||||
ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
|
||||
shader->key.ge.opt.same_patch_vertices;
|
||||
|
||||
bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;
|
||||
|
||||
if (ls_need_output || ps_need_output) {
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
LLVMTypeRef type = ctx->ac.f32;
|
||||
|
||||
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
|
||||
type = ctx->ac.f16;
|
||||
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, "");
|
||||
ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args->ac, nir))
|
||||
return false;
|
||||
|
||||
|
||||
@@ -353,35 +353,38 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
unsigned i, j, vgpr;
|
||||
LLVMValueRef *addrs = ctx->abi.outputs;
|
||||
|
||||
LLVMValueRef color[8][4] = {};
|
||||
uint8_t color_output_mask = 0, is_16bit_mask = 0;
|
||||
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
|
||||
LLVMValueRef ret;
|
||||
|
||||
/* Read the output values. */
|
||||
for (i = 0; i < info->num_outputs; i++) {
|
||||
unsigned semantic = info->output_semantic[i];
|
||||
LLVMTypeRef type = ctx->abi.is_16bit[4 * i] ? ctx->ac.f16 : ctx->ac.f32;
|
||||
|
||||
switch (semantic) {
|
||||
case FRAG_RESULT_DEPTH:
|
||||
depth = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
|
||||
depth = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
|
||||
break;
|
||||
case FRAG_RESULT_STENCIL:
|
||||
stencil = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
|
||||
stencil = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
|
||||
break;
|
||||
case FRAG_RESULT_SAMPLE_MASK:
|
||||
samplemask = LLVMBuildLoad2(builder, type, addrs[4 * i + 0], "");
|
||||
samplemask = LLVMBuildLoad2(builder, ctx->ac.f32, ctx->abi.outputs[4 * i + 0], "");
|
||||
break;
|
||||
default:
|
||||
if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
|
||||
unsigned index = semantic - FRAG_RESULT_DATA0;
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
LLVMValueRef ptr = addrs[4 * i + j];
|
||||
type = ctx->abi.is_16bit[4 * i + j] ? ctx->ac.f16 : ctx->ac.f32;
|
||||
LLVMValueRef result = LLVMBuildLoad2(builder, type, ptr, "");
|
||||
if (!ctx->abi.outputs[4 * i + j])
|
||||
continue;
|
||||
|
||||
color_output_mask |= BITFIELD_BIT(index);
|
||||
is_16bit_mask |= ctx->abi.is_16bit[4 * i + j] ? BITFIELD_BIT(index) : 0;
|
||||
LLVMTypeRef type = ctx->abi.is_16bit[4 * i + j] ? ctx->ac.f16 : ctx->ac.f32;
|
||||
LLVMValueRef result = LLVMBuildLoad2(builder, type, ctx->abi.outputs[4 * i + j], "");
|
||||
color[index][j] = result;
|
||||
}
|
||||
} else {
|
||||
@@ -401,20 +404,28 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx)
|
||||
|
||||
/* Set VGPRs */
|
||||
vgpr = SI_SGPR_ALPHA_REF + 1;
|
||||
for (i = 0; i < ARRAY_SIZE(color); i++) {
|
||||
if (!color[i][0])
|
||||
continue;
|
||||
|
||||
if (LLVMTypeOf(color[i][0]) == ctx->ac.f16) {
|
||||
u_foreach_bit(i, color_output_mask) {
|
||||
if (is_16bit_mask & BITFIELD_BIT(i)) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
|
||||
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
|
||||
ret = LLVMBuildInsertValue(builder, ret, tmp, vgpr++, "");
|
||||
if (color[i][j * 2] || color[i][j * 2 + 1]) {
|
||||
for (unsigned k = 0; k < 2; k++) {
|
||||
if (!color[i][j * 2 + k])
|
||||
color[i][j * 2 + k] = LLVMGetUndef(ctx->ac.f16);
|
||||
}
|
||||
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
|
||||
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
|
||||
ret = LLVMBuildInsertValue(builder, ret, tmp, vgpr, "");
|
||||
}
|
||||
vgpr++;
|
||||
}
|
||||
vgpr += 2;
|
||||
} else {
|
||||
for (j = 0; j < 4; j++)
|
||||
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (color[i][j])
|
||||
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr, "");
|
||||
vgpr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (depth)
|
||||
|
||||
@@ -74,7 +74,6 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
|
||||
assert(shader->is_monolithic);
|
||||
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
LLVMValueRef *addrs = ctx->abi.outputs;
|
||||
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
unsigned semantic = info->output_semantic[i];
|
||||
@@ -84,11 +83,11 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
|
||||
continue;
|
||||
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
if (!(info->output_usagemask[i] & (1 << chan)))
|
||||
if (!ctx->abi.outputs[4 * i + chan])
|
||||
continue;
|
||||
|
||||
LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + chan], "");
|
||||
|
||||
LLVMValueRef value = LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32,
|
||||
ctx->abi.outputs[4 * i + chan], "");
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, value, vgpr + param * 4 + chan, "");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user