amd: stop using CLEAR_STATE on gfx11

It's not allowed with user queues, so this will make it simpler to support
user queues.

There are 2 groups of registers:
- those that are never set by radv and radeonsi - those are now set
  in the shared preamble
- those that are set by radv but not radeonsi - those are now set
  in the radeonsi preamble

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34589>
This commit is contained in:
Marek Olšák
2025-04-17 14:04:17 -04:00
committed by Marge Bot
parent cf9b0dd589
commit 5e487dbc49
3 changed files with 46 additions and 15 deletions
+28 -3
View File
@@ -453,6 +453,29 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state,
S_00B524_MEM_BASE(info->address32_hi >> 8));
/* Context registers. */
if (info->gfx_level >= GFX11) {
/* These are set by CLEAR_STATE on gfx10. We don't use CLEAR_STATE on gfx11. */
ac_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384));
ac_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, info->pa_sc_tile_steering_override);
ac_pm4_set_reg(pm4, R_0283E4_PA_SC_VRS_RATE_CACHE_CNTL, 0);
ac_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
ac_pm4_set_reg(pm4, R_0286DC_SPI_BARYC_SSAA_CNTL, 0);
ac_pm4_set_reg(pm4, R_0287D4_PA_CL_POINT_X_RAD, 0);
ac_pm4_set_reg(pm4, R_0287D8_PA_CL_POINT_Y_RAD, 0);
ac_pm4_set_reg(pm4, R_0287DC_PA_CL_POINT_SIZE, 0);
ac_pm4_set_reg(pm4, R_0287E0_PA_CL_POINT_CULL_RAD, 0);
ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
ac_pm4_set_reg(pm4, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0);
ac_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
ac_pm4_set_reg(pm4, R_028840_PA_STEREO_CNTL, 0);
ac_pm4_set_reg(pm4, R_028A50_VGT_ENHANCE, 0);
ac_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0);
ac_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
ac_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
}
if (info->gfx_level < GFX11) {
ac_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF));
}
@@ -491,6 +514,8 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state,
S_028830_SMALL_PRIM_FILTER_ENABLE(1));
ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
if (info->gfx_level >= GFX11) /* cleared by CLEAR_STATE on gfx10 */
ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
info->gfx_level >= GFX11 ?
@@ -511,10 +536,10 @@ gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state,
ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - gfx10_one) |
S_028C48_MAX_PRIM_PER_BATCH(1023));
if (info->gfx_level >= GFX11_5)
if (info->gfx_level >= GFX11) {
ac_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2,
S_028C54_ENABLE_PING_PONG_BIN_ORDER(1));
S_028C54_ENABLE_PING_PONG_BIN_ORDER(info->gfx_level >= GFX11_5));
}
/* Break up a pixel wave if it contains deallocs for more than
* half the parameter cache.
+4 -5
View File
@@ -1143,12 +1143,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
* on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
* SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs on GFX6. CLEAR_STATE
* causes GPU hangs with the radeon kernel driver, so only enable GFX7 CLEAR_STATE on amdgpu.
* GFX11+ supports CLEAR_STATE, but we have decided not to use it.
*/
info->has_clear_state = info->gfx_level >= GFX7 && info->gfx_level < GFX12 &&
!(info->userq_ip_mask & BITFIELD_BIT(AMD_IP_GFX));
info->has_clear_state = info->gfx_level >= GFX7 && info->gfx_level < GFX11;
info->has_distributed_tess =
info->gfx_level >= GFX10 || (info->gfx_level >= GFX8 && info->max_se >= 2);
+14 -7
View File
@@ -5123,15 +5123,9 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx)
ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
if (sscreen->info.has_clear_state) {
if (sctx->gfx_level < GFX11) {
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CLEAR_STATE, 0, 0));
ac_pm4_cmd_add(&pm4->base, 0);
} else {
/* PA_SC_TILE_STEERING_OVERRIDE needs to be written else observing corruption in
* gfx11 with userq.
*/
ac_pm4_set_reg(&pm4->base, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
sscreen->info.pa_sc_tile_steering_override);
}
}
@@ -5161,6 +5155,19 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx)
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE));
}
if (sctx->gfx_level >= GFX11) {
/* These are set by CLEAR_STATE on gfx10. We don't use CLEAR_STATE on gfx11. */
ac_pm4_set_reg(&pm4->base, R_028034_PA_SC_SCREEN_SCISSOR_BR,
S_028034_BR_X(16384) | S_028034_BR_Y(16384));
ac_pm4_set_reg(&pm4->base, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
ac_pm4_set_reg(&pm4->base, R_0286E0_SPI_BARYC_CNTL, 0);
ac_pm4_set_reg(&pm4->base, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0);
ac_pm4_set_reg(&pm4->base, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
ac_pm4_set_reg(&pm4->base, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
}
done:
ac_pm4_finalize(&pm4->base);
sctx->cs_preamble_state = pm4;