radeonsi: determine si_pm4_state::reg_va_low_idx automatically

The existing code doesn't work with the packed SET packets, so si_pm4_state
needs to find reg_va_low_idx after the whole packet is built.

Remove si_pm4_set_reg_va and do the same thing for SET_SH_REG.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23517>
This commit is contained in:
Marek Olšák
2023-06-07 13:43:31 -04:00
committed by Marge Bot
parent 22f3bcfb5a
commit ccb856fbaa
3 changed files with 47 additions and 20 deletions
+34 -6
View File
@@ -8,6 +8,7 @@
#include "si_build_pm4.h"
#include "sid.h"
#include "util/u_memory.h"
#include "ac_debug.h"
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
unsigned opcode, unsigned idx);
@@ -123,6 +124,25 @@ void si_pm4_finalize(struct si_pm4_state *state)
state->ndw = state->last_pm4 + 2 + reg_count;
state->last_opcode = PKT3_SET_SH_REG;
} else {
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
if (state->screen->debug_flags & DBG(SQTT) &&
(state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) {
if (state->packed_is_padded)
reg_count++; /* Add this back because we only need to record the last write. */
for (int i = reg_count - 1; i >= 0; i--) {
unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4;
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_offset),
"SPI_SHADER_PGM_LO_")) {
state->reg_va_low_idx = get_packed_reg_valueN_idx(state, i);
break;
}
}
}
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
if (!state->is_compute_queue)
state->pm4[state->last_pm4] |= PKT3_RESET_FILTER_CAM_S(1);
@@ -132,7 +152,21 @@ void si_pm4_finalize(struct si_pm4_state *state)
state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
}
}
}
if (state->screen->debug_flags & DBG(SQTT) && state->last_opcode == PKT3_SET_SH_REG) {
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]);
unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4;
for (unsigned i = 0; i < reg_count; i++) {
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
state->screen->info.family, reg_base_offset + i * 4),
"SPI_SHADER_PGM_LO_")) {
state->reg_va_low_idx = state->last_pm4 + 2 + i;
break;
}
}
}
}
@@ -262,12 +296,6 @@ void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val)
}
}
void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val)
{
si_pm4_set_reg(state, reg, val);
state->reg_va_low_idx = state->ndw - 1;
}
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
bool is_compute_queue)
{
-1
View File
@@ -52,7 +52,6 @@ struct si_pm4_state {
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
void si_pm4_set_reg_va(struct si_pm4_state *state, unsigned reg, uint32_t val);
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
void si_pm4_finalize(struct si_pm4_state *state);
@@ -685,7 +685,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
return;
va = shader->bo->gpu_address;
si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
shader->config.rsrc1 = S_00B528_VGPRS(si_shader_encode_vgprs(shader)) |
S_00B528_SGPRS(si_shader_encode_sgprs(shader)) |
@@ -714,13 +714,13 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
S_00B404_CU_EN(0xffff),
C_00B404_CU_EN, 16, &sscreen->info));
si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else if (sscreen->info.gfx_level >= GFX10) {
si_pm4_set_reg_va(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else if (sscreen->info.gfx_level >= GFX9) {
si_pm4_set_reg_va(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
} else {
si_pm4_set_reg_va(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
}
@@ -797,7 +797,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
@@ -1062,9 +1062,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
if (sscreen->info.gfx_level >= GFX10) {
si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
si_pm4_set_reg_va(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
}
uint32_t rsrc1 = S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
@@ -1115,7 +1115,7 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
S_00B21C_WAVE_LIMIT(0x3F),
C_00B21C_CU_EN, 0, &sscreen->info);
si_pm4_set_reg_va(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
@@ -1357,7 +1357,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
else
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
si_pm4_set_reg_va(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
S_00B228_FLOAT_MODE(shader->config.float_mode) |
@@ -1669,7 +1669,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
}
si_pm4_set_reg_va(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
@@ -1943,7 +1943,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
}
uint64_t va = shader->bo->gpu_address;
si_pm4_set_reg_va(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
@@ -2002,7 +2002,7 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
assert(0);
}
assert(shader->pm4.reg_va_low_idx != 0);
assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.reg_va_low_idx != 0);
}
static void si_clear_vs_key_inputs(struct si_context *sctx, union si_shader_key *key,