radeonsi: count and report temp arrays in scratch separately

v2: only do this if debug output of shader dumping is enabled

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (v1)
This commit is contained in:
Marek Olšák
2016-11-26 22:52:12 +01:00
parent a91add9369
commit 274fb601c2
2 changed files with 40 additions and 4 deletions
+39 -4
View File
@@ -5348,6 +5348,9 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
case LLVMVectorTypeKind:
return LLVMGetVectorSize(type) *
llvm_get_type_size(LLVMGetElementType(type));
case LLVMArrayTypeKind:
return LLVMGetArrayLength(type) *
llvm_get_type_size(LLVMGetElementType(type));
default:
assert(0);
return 0;
@@ -5996,13 +5999,15 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
"VGPRS: %d\n"
"Spilled SGPRs: %d\n"
"Spilled VGPRs: %d\n"
"Private memory VGPRs: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs, code_size,
conf->spilled_sgprs, conf->spilled_vgprs,
conf->private_mem_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
}
@@ -6010,11 +6015,11 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d "
"LDS: %d Scratch: %d Max Waves: %d Spilled SGPRs: %d "
"Spilled VGPRs: %d",
"Spilled VGPRs: %d PrivMem VGPRs: %d",
conf->num_sgprs, conf->num_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves, conf->spilled_sgprs,
conf->spilled_vgprs);
conf->spilled_vgprs, conf->private_mem_vgprs);
}
static const char *si_get_shader_name(struct si_shader *shader,
@@ -6571,6 +6576,32 @@ static void si_eliminate_const_vs_outputs(struct si_shader_context *ctx)
}
}
static void si_count_scratch_private_memory(struct si_shader_context *ctx)
{
ctx->shader->config.private_mem_vgprs = 0;
/* Process all LLVM instructions. */
LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
while (bb) {
LLVMValueRef next = LLVMGetFirstInstruction(bb);
while (next) {
LLVMValueRef inst = next;
next = LLVMGetNextInstruction(next);
if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
continue;
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
/* No idea why LLVM aligns allocas to 4 elements. */
unsigned alignment = LLVMGetAlignment(inst);
unsigned dw_size = align(llvm_get_type_size(type) / 4, alignment);
ctx->shader->config.private_mem_vgprs += dw_size;
}
bb = LLVMGetNextBasicBlock(bb);
}
}
static bool si_compile_tgsi_main(struct si_shader_context *ctx,
struct si_shader *shader)
{
@@ -7227,9 +7258,13 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
si_llvm_finalize_module(&ctx,
r600_extra_shader_checks(&sscreen->b, ctx.type));
/* Post-optimization transformations. */
/* Post-optimization transformations and analysis. */
si_eliminate_const_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
r600_can_dump_shader(&sscreen->b, ctx.type))
si_count_scratch_private_memory(&ctx);
/* Compile to bytecode. */
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, ctx.type, "TGSI shader");
+1
View File
@@ -448,6 +448,7 @@ struct si_shader_config {
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
unsigned private_mem_vgprs;
unsigned lds_size;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;