radv: Add L2 writeback.

Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Bas Nieuwenhuizen
2017-03-06 01:28:53 +01:00
parent 6b657cecd5
commit 66e12d4073
2 changed files with 54 additions and 34 deletions
+10 -8
View File
@@ -587,16 +587,18 @@ enum radv_cmd_flush_bits {
RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
/* Same as above, but only writes back and doesn't invalidate */
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
/* Framebuffer caches */
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4,
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5,
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6,
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7,
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
/* Engine synchronization. */
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8,
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9,
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10,
RADV_CMD_FLAG_VGT_FLUSH = 1 << 11,
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+44 -26
View File
@@ -689,6 +689,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
}
static void
si_emit_acquire_mem(struct radeon_winsys_cs *cs,
bool is_mec,
unsigned cp_coher_cntl)
{
if (is_mec) {
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
} else {
/* ACQUIRE_MEM is only required on a compute ring. */
radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
}
}
void
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
@@ -701,13 +725,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
if (chip_class >= VI)
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
@@ -778,28 +795,29 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
radeon_emit(cs, 0);
}
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
(chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
if (chip_class >= VI)
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
} else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1);
/* L2 writeback doesn't combine with L1 invalidate */
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
cp_coher_cntl = 0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
* Therefore, it should be last. Done in PFP.
*/
if (cp_coher_cntl) {
if (is_mec) {
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
} else {
/* ACQUIRE_MEM is only required on a compute ring. */
radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
}
}
if (cp_coher_cntl)
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
}
void