aco/gfx10: skip waitcnts or use vm_vsrc(0) for workgroup lds barriers
fossil-db (navi21): Totals from 36594 (45.84% of 79825) affected shaders: Instrs: 19922581 -> 19922563 (-0.00%) CodeSize: 103616980 -> 103616956 (-0.00%) Latency: 69862064 -> 69053273 (-1.16%) InvThroughput: 14607708 -> 14606308 (-0.01%); split: -0.01%, +0.00% fossil-db (navi31): Totals from 1641 (2.06% of 79825) affected shaders: Instrs: 1247591 -> 1247875 (+0.02%); split: -0.00%, +0.03% CodeSize: 6259516 -> 6260612 (+0.02%); split: -0.00%, +0.02% Latency: 7657224 -> 7577299 (-1.04%); split: -1.05%, +0.00% InvThroughput: 1150669 -> 1148171 (-0.22%); split: -0.22%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36491>
This commit is contained in:
@@ -487,6 +487,7 @@ finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instr
|
||||
|
||||
if (info->scope[storage_idx] <= scope_workgroup) {
|
||||
bool is_vmem = instr->isVMEM() || (instr->isFlatLike() && !instr->flatlike().may_use_lds);
|
||||
bool is_lds = instr->isDS() && !instr->ds().gds;
|
||||
bool is_barrier = instr->isBarrier(); /* This is only called for control barriers. */
|
||||
|
||||
/* In non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations in-order for the same
|
||||
@@ -496,6 +497,13 @@ finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instr
|
||||
events &= ~(event_vmem | event_vmem_store);
|
||||
vm_vsrc |= is_barrier && ctx.gfx_level >= GFX10;
|
||||
}
|
||||
|
||||
/* Similar for LDS. */
|
||||
if ((events & event_lds) &&
|
||||
(is_lds || (is_barrier && ctx.gfx_level >= GFX10 && !ctx.program->wgp_mode))) {
|
||||
events &= ~event_lds;
|
||||
vm_vsrc |= is_barrier;
|
||||
}
|
||||
}
|
||||
|
||||
if (events)
|
||||
|
||||
Reference in New Issue
Block a user