From 10beddf6595499f17c6c23ec0409160fe1e952df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 12 Nov 2020 22:25:52 -0500 Subject: [PATCH] radeonsi: don't leave more than 8 unoccupied lanes in HS Previously it was 16 and bigger patches would always trim the patch count needlessly. There are 2 variables to consider: - lane occupancy - LDS usage (limiting wave occupancy) If LDS size is 32 KB (max limit per CU) for 3 waves and we can't maximize occupancy, it's better to leave some lanes unoccupied because using 2 waves would decrease the LDS size to 21 KB, which is not enough to fit another workgroup on the CU. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_state_draw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index ef5eae2cc47..07c068a9557 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -176,7 +176,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, const struct pip unsigned temp_verts_per_tg = *num_patches * max_verts_per_patch; unsigned wave_size = sctx->screen->ge_wave_size; - if (temp_verts_per_tg > wave_size && temp_verts_per_tg % wave_size < wave_size * 3 / 4) + if (temp_verts_per_tg > wave_size && + (wave_size - temp_verts_per_tg % wave_size >= MAX2(max_verts_per_patch, 8))) *num_patches = (temp_verts_per_tg & ~(wave_size - 1)) / max_verts_per_patch; if (sctx->chip_class == GFX6) {