aco/insert_delay_alu: consider more implicit waits
Foz-DB Navi31: Totals from 37961 (47.81% of 79395) affected shaders: Instrs: 34175286 -> 33978599 (-0.58%) CodeSize: 180059352 -> 179190076 (-0.48%); split: -0.48%, +0.00% Latency: 259826196 -> 259798474 (-0.01%); split: -0.01%, +0.00% InvThroughput: 42792700 -> 42789298 (-0.01%); split: -0.01%, +0.00% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31132>
This commit is contained in:
@@ -154,16 +154,41 @@ update_alu(delay_ctx& ctx, bool is_valu, bool is_trans, int cycles)
|
||||
void
|
||||
kill_alu(alu_delay_info& delay, Instruction* instr, delay_ctx& ctx)
|
||||
{
|
||||
if (parse_depctr_wait(instr).va_vdst == 0) {
|
||||
/* Consider frontend waits first. These are automatically done by the hardware,
|
||||
* so we don't need to insert s_delay_alu.
|
||||
* They are also lower granularity, waiting for accesses of a counter instead
|
||||
* of only the real per register dependencies.
|
||||
*/
|
||||
depctr_wait wait = parse_depctr_wait(instr);
|
||||
|
||||
int8_t implict_cycles = 0;
|
||||
if (!wait.va_vdst || !wait.va_sdst || !wait.va_vcc || !wait.sa_sdst || !wait.sa_exec ||
|
||||
!wait.va_exec) {
|
||||
std::map<PhysReg, alu_delay_info>::iterator it = ctx.gpr_map.begin();
|
||||
while (it != ctx.gpr_map.end()) {
|
||||
alu_delay_info& entry = it->second;
|
||||
entry.valu_instrs = alu_delay_info::valu_nop;
|
||||
entry.trans_instrs = alu_delay_info::trans_nop;
|
||||
bool wait_valu = !wait.va_vdst || (it->first < vcc && !wait.va_sdst) ||
|
||||
(it->first >= vcc && it->first <= vcc_hi && !wait.va_vcc) ||
|
||||
(it->first >= exec && it->first <= exec_hi && !wait.va_exec);
|
||||
if (wait_valu) {
|
||||
implict_cycles = MAX3(implict_cycles, entry.valu_cycles, entry.trans_cycles);
|
||||
entry.valu_cycles = 0;
|
||||
entry.trans_cycles = 0;
|
||||
}
|
||||
bool wait_salu = ((it->first <= vcc_hi || it->first == scc) && !wait.sa_sdst) ||
|
||||
(it->first >= exec && it->first <= exec_hi && !wait.sa_exec);
|
||||
if (wait_salu) {
|
||||
implict_cycles = MAX2(implict_cycles, entry.salu_cycles);
|
||||
entry.salu_cycles = 0;
|
||||
}
|
||||
it = it->second.fixup() ? ctx.gpr_map.erase(it) : std::next(it);
|
||||
}
|
||||
}
|
||||
|
||||
/* Previous alu progresses as usual while the frontend waits. */
|
||||
if (implict_cycles != 0)
|
||||
update_alu(ctx, false, false, implict_cycles);
|
||||
|
||||
if (instr->isVALU() || instr->isSALU())
|
||||
check_alu(ctx, delay, instr);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user