radv: Add L2 writeback.
Signed-off-by: Bas Nieuwenhuizen <basni@google.com> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -587,16 +587,18 @@ enum radv_cmd_flush_bits {
|
||||
RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
|
||||
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
|
||||
/* Same as above, but only writes back and doesn't invalidate */
|
||||
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
|
||||
/* Framebuffer caches */
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
|
||||
/* Engine synchronization. */
|
||||
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8,
|
||||
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9,
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10,
|
||||
RADV_CMD_FLAG_VGT_FLUSH = 1 << 11,
|
||||
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
|
||||
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
|
||||
RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
|
||||
|
||||
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
|
||||
|
||||
@@ -689,6 +689,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
si_emit_acquire_mem(struct radeon_winsys_cs *cs,
|
||||
bool is_mec,
|
||||
unsigned cp_coher_cntl)
|
||||
{
|
||||
if (is_mec) {
|
||||
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
|
||||
PKT3_SHADER_TYPE_S(1));
|
||||
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||
} else {
|
||||
/* ACQUIRE_MEM is only required on a compute ring. */
|
||||
radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
|
||||
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
enum chip_class chip_class,
|
||||
@@ -701,13 +725,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
|
||||
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
|
||||
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
|
||||
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
|
||||
if (chip_class >= VI)
|
||||
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
|
||||
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
|
||||
@@ -778,28 +795,29 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
|
||||
(chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
|
||||
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
|
||||
if (chip_class >= VI)
|
||||
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
|
||||
} else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
|
||||
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
|
||||
S_0301F0_TC_NC_ACTION_ENA(1);
|
||||
|
||||
/* L2 writeback doesn't combine with L1 invalidate */
|
||||
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
|
||||
|
||||
cp_coher_cntl = 0;
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
|
||||
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
|
||||
|
||||
/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
|
||||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl) {
|
||||
if (is_mec) {
|
||||
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
|
||||
PKT3_SHADER_TYPE_S(1));
|
||||
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
|
||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||
} else {
|
||||
/* ACQUIRE_MEM is only required on a compute ring. */
|
||||
radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
|
||||
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
|
||||
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
|
||||
radeon_emit(cs, 0); /* CP_COHER_BASE */
|
||||
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
|
||||
}
|
||||
}
|
||||
if (cp_coher_cntl)
|
||||
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user