From c11e4798521e73de4f7f07105802c91f2c6c155d Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Wed, 5 May 2021 11:26:13 +0200 Subject: [PATCH] broadcom/compiler: specify maximum thread count in compile strategies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we have exhausted compile strategies at 4 threads and we start enabling lower thread counts, there is no point in starting compiles with 4 threads for them, we know these will fail, so let's start at 2 in these cases. This also has another nice implication: if the driver compiles at 4 threads and fails to register allocate, we were allowing it to try with 2 threads, but this would only retry the register allocation process and would not really recompile the shader with 2 threads. This is not optimal, because at 2 threads we have more TMU fifo space for each thread and we can do more TMU pipelining, so we were missing that opportunity. This improves performance in Sponza by ~1.5% and also seems to help UE4 slightly. Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/vir.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 7fc799ac705..a6f8d845923 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -525,6 +525,7 @@ vir_compile_init(const struct v3d_compiler *compiler, void *debug_output_data), void *debug_output_data, int program_id, int variant_id, + uint32_t max_threads, uint32_t min_threads_for_reg_alloc, bool tmu_spilling_allowed, bool disable_loop_unrolling, @@ -539,7 +540,7 @@ vir_compile_init(const struct v3d_compiler *compiler, c->key = key; c->program_id = program_id; c->variant_id = variant_id; - c->threads = 4; + c->threads = max_threads; c->debug_output = debug_output; c->debug_output_data = debug_output_data; c->compilation_result = V3D_COMPILATION_SUCCEEDED; @@ -1525,21 +1526,22 @@ int v3d_shaderdb_dump(struct v3d_compile *c, */ struct v3d_compiler_strategy { const char *name; - uint32_t min_threads_for_reg_alloc; + uint32_t max_threads; + uint32_t min_threads; bool disable_loop_unrolling; bool disable_ubo_load_sorting; bool disable_tmu_pipelining; bool tmu_spilling_allowed; } static const strategies[] = { - /*0*/ { "default", 4, false, false, false, false }, - /*1*/ { "disable loop unrolling", 4, true, false, false, false }, - /*2*/ { "disable UBO load sorting", 4, true, true, false, false }, - /*3*/ { "disable TMU pipelining", 4, true, true, true, false }, - /*4*/ { "lower thread count", 1, false, false, false, false }, - /*5*/ { "disable loop unrolling (ltc)", 1, true, false, false, false }, - /*6*/ { "disable UBO load sorting (ltc)", 1, true, true, false, false }, - /*7*/ { "disable TMU pipelining (ltc)", 1, true, true, true, true }, - /*8*/ { "fallback scheduler", 1, true, true, true, true } + /*0*/ { "default", 4, 4, false, false, false, false }, + /*1*/ { "disable loop unrolling", 4, 4, true, false, false, false }, + /*2*/ { "disable UBO load sorting", 4, 4, true, true, false, false }, + /*3*/ { "disable TMU pipelining", 4, 4, true, true, true, false }, + /*4*/ { "lower thread count", 2, 1, false, false, false, false }, + /*5*/ { "disable loop unrolling (ltc)", 2, 1, true, false, false, false }, + /*6*/ { "disable UBO load sorting (ltc)", 2, 1, true, true, false, false }, + /*7*/ { "disable TMU pipelining (ltc)", 2, 1, true, true, true, true }, + /*8*/ { "fallback scheduler", 2, 1, true, true, true, true } }; /** @@ -1623,7 +1625,8 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler, c = vir_compile_init(compiler, key, s, debug_output, debug_output_data, program_id, variant_id, - strategies[i].min_threads_for_reg_alloc, + strategies[i].max_threads, + strategies[i].min_threads, strategies[i].tmu_spilling_allowed, strategies[i].disable_loop_unrolling, strategies[i].disable_ubo_load_sorting,