r300: set PVS_LAST_VTX_SRC_INST properly to last input read
From docs: The PVS Instruction which uses the Input Vertex Memory for the last time. This value is used to free up the Input Vertex Slots ASAP. This field must be set to a valid instruction. Right now it is set to the last instruction. When the last read is inside a loop, set it on the outhermost ENDLOOP. This could in theory help performance, but none of my usual benchmarks including GLmark, Unigine Sanctuary or Lightsmark show any measurable performance difference. Suggested in: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6045 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15252>
This commit is contained in:
committed by
Marge Bot
parent
43c3f4386b
commit
19db6b760a
@@ -371,10 +371,12 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
|
||||
unsigned loops[R500_PVS_MAX_LOOP_DEPTH] = {};
|
||||
unsigned loop_depth = 0;
|
||||
bool last_input_read_at_loop_end = false;
|
||||
|
||||
compiler->code->pos_end = 0; /* Not supported yet */
|
||||
compiler->code->length = 0;
|
||||
compiler->code->num_temporaries = 0;
|
||||
compiler->code->last_input_read = 0;
|
||||
|
||||
compiler->SetHwInputOutput(compiler);
|
||||
|
||||
@@ -448,6 +450,11 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
unsigned int last_addr;
|
||||
unsigned int ret_addr;
|
||||
|
||||
if (loop_depth == 1 && last_input_read_at_loop_end) {
|
||||
compiler->code->last_input_read = compiler->code->length / 4;
|
||||
last_input_read_at_loop_end = false;
|
||||
}
|
||||
|
||||
ret_addr = loops[--loop_depth];
|
||||
act_addr = ret_addr - 1;
|
||||
last_addr = (compiler->code->length / 4) - 1;
|
||||
@@ -536,10 +543,19 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
|
||||
vpi->DstReg.Index >= compiler->code->num_temporaries)
|
||||
compiler->code->num_temporaries = vpi->DstReg.Index + 1;
|
||||
|
||||
for (unsigned i = 0; i < info->NumSrcRegs; i++)
|
||||
for (unsigned i = 0; i < info->NumSrcRegs; i++) {
|
||||
if (vpi->SrcReg[i].File == RC_FILE_TEMPORARY &&
|
||||
vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
|
||||
compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
|
||||
if (vpi->SrcReg[i].File == RC_FILE_INPUT) {
|
||||
if (loop_depth == 0)
|
||||
compiler->code->last_input_read = compiler->code->length / 4;
|
||||
else
|
||||
last_input_read_at_loop_end = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
|
||||
rc_error(&compiler->Base, "Too many temporaries.\n");
|
||||
|
||||
@@ -270,6 +270,7 @@ struct r300_vertex_program_code {
|
||||
int num_temporaries; /* Number of temp vars used by program */
|
||||
int inputs[VSF_MAX_INPUTS];
|
||||
int outputs[VSF_MAX_OUTPUTS];
|
||||
unsigned last_input_read;
|
||||
|
||||
struct rc_constant_list constants;
|
||||
unsigned *constants_remap_table;
|
||||
|
||||
@@ -1128,7 +1128,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
|
||||
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) |
|
||||
R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
|
||||
R300_PVS_LAST_INST(instruction_count - 1));
|
||||
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1);
|
||||
OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, code->last_input_read);
|
||||
|
||||
OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
|
||||
OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
|
||||
|
||||
Reference in New Issue
Block a user