From 5e487dbc49e5be57a1da255ae31fe642bd42f815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 17 Apr 2025 14:04:17 -0400 Subject: [PATCH] amd: stop using CLEAR_STATE on gfx11 It's not allowed with user queues, so this will make it simpler to support user queues. There are 2 groups of registers: - those that are never set by radv and radeonsi - those are now set in the shared preamble - those that are set by radv but not radeonsi - those are now set in the radeonsi preamble Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_cmdbuf.c | 31 ++++++++++++++++++++++--- src/amd/common/ac_gpu_info.c | 9 ++++--- src/gallium/drivers/radeonsi/si_state.c | 21 +++++++++++------ 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/src/amd/common/ac_cmdbuf.c b/src/amd/common/ac_cmdbuf.c index 57d5ae1cfa5..14c7f39b7e2 100644 --- a/src/amd/common/ac_cmdbuf.c +++ b/src/amd/common/ac_cmdbuf.c @@ -453,6 +453,29 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state, S_00B524_MEM_BASE(info->address32_hi >> 8)); /* Context registers. */ + if (info->gfx_level >= GFX11) { + /* These are set by CLEAR_STATE on gfx10. We don't use CLEAR_STATE on gfx11. */ + ac_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); + ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); + ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); + ac_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, info->pa_sc_tile_steering_override); + ac_pm4_set_reg(pm4, R_0283E4_PA_SC_VRS_RATE_CACHE_CNTL, 0); + ac_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0); + ac_pm4_set_reg(pm4, R_0286DC_SPI_BARYC_SSAA_CNTL, 0); + ac_pm4_set_reg(pm4, R_0287D4_PA_CL_POINT_X_RAD, 0); + ac_pm4_set_reg(pm4, R_0287D8_PA_CL_POINT_Y_RAD, 0); + ac_pm4_set_reg(pm4, R_0287DC_PA_CL_POINT_SIZE, 0); + ac_pm4_set_reg(pm4, R_0287E0_PA_CL_POINT_CULL_RAD, 0); + ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); + ac_pm4_set_reg(pm4, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0); + ac_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0); + ac_pm4_set_reg(pm4, R_028840_PA_STEREO_CNTL, 0); + ac_pm4_set_reg(pm4, R_028A50_VGT_ENHANCE, 0); + ac_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0); + ac_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0); + ac_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0); + } + if (info->gfx_level < GFX11) { ac_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF)); } @@ -491,6 +514,8 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state, S_028830_SMALL_PRIM_FILTER_ENABLE(1)); ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); + if (info->gfx_level >= GFX11) /* cleared by CLEAR_STATE on gfx10 */ + ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1); ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, info->gfx_level >= GFX11 ? @@ -511,10 +536,10 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state, ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - gfx10_one) | S_028C48_MAX_PRIM_PER_BATCH(1023)); - - if (info->gfx_level >= GFX11_5) + if (info->gfx_level >= GFX11) { ac_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2, - S_028C54_ENABLE_PING_PONG_BIN_ORDER(1)); + S_028C54_ENABLE_PING_PONG_BIN_ORDER(info->gfx_level >= GFX11_5)); + } /* Break up a pixel wave if it contains deallocs for more than * half the parameter cache. diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 50f386ae2ac..5bd9f430a75 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1143,12 +1143,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4; info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity; - /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs - * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. - * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel. + /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs on GFX6. CLEAR_STATE + * causes GPU hangs with the radeon kernel driver, so only enable GFX7 CLEAR_STATE on amdgpu. + * GFX11+ supports CLEAR_STATE, but we have decided not to use it. */ - info->has_clear_state = info->gfx_level >= GFX7 && info->gfx_level < GFX12 && - !(info->userq_ip_mask & BITFIELD_BIT(AMD_IP_GFX)); + info->has_clear_state = info->gfx_level >= GFX7 && info->gfx_level < GFX11; info->has_distributed_tess = info->gfx_level >= GFX10 || (info->gfx_level >= GFX8 && info->max_se >= 2); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 81373d34300..bcc5c71f014 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5123,15 +5123,9 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } - if (sscreen->info.has_clear_state) { + if (sctx->gfx_level < GFX11) { ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CLEAR_STATE, 0, 0)); ac_pm4_cmd_add(&pm4->base, 0); - } else { - /* PA_SC_TILE_STEERING_OVERRIDE needs to be written else observing corruption in - * gfx11 with userq. - */ - ac_pm4_set_reg(&pm4->base, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, - sscreen->info.pa_sc_tile_steering_override); } } @@ -5161,6 +5155,19 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); } + if (sctx->gfx_level >= GFX11) { + /* These are set by CLEAR_STATE on gfx10. We don't use CLEAR_STATE on gfx11. */ + ac_pm4_set_reg(&pm4->base, R_028034_PA_SC_SCREEN_SCISSOR_BR, + S_028034_BR_X(16384) | S_028034_BR_Y(16384)); + ac_pm4_set_reg(&pm4->base, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); + ac_pm4_set_reg(&pm4->base, R_0286E0_SPI_BARYC_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0); + ac_pm4_set_reg(&pm4->base, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); + } + done: ac_pm4_finalize(&pm4->base); sctx->cs_preamble_state = pm4;