radeonsi: add TC L2 prefetch for shaders and VBO descriptors
Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
@@ -386,6 +386,18 @@ void si_copy_buffer(struct si_context *sctx,
|
||||
sctx->b.num_cp_dma_calls++;
|
||||
}
|
||||
|
||||
void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
|
||||
uint64_t offset, unsigned size)
|
||||
{
|
||||
assert(sctx->b.chip_class >= CIK);
|
||||
|
||||
si_copy_buffer(sctx, buf, buf, offset, offset, size,
|
||||
SI_CPDMA_SKIP_CHECK_CS_SPACE |
|
||||
SI_CPDMA_SKIP_SYNC_AFTER |
|
||||
SI_CPDMA_SKIP_SYNC_BEFORE |
|
||||
SI_CPDMA_SKIP_GFX_SYNC);
|
||||
}
|
||||
|
||||
void si_init_cp_dma_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->b.clear_buffer = si_clear_buffer;
|
||||
|
||||
@@ -381,6 +381,8 @@ void si_copy_buffer(struct si_context *sctx,
|
||||
struct pipe_resource *dst, struct pipe_resource *src,
|
||||
uint64_t dst_offset, uint64_t src_offset, unsigned size,
|
||||
unsigned user_flags);
|
||||
void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
|
||||
uint64_t offset, unsigned size);
|
||||
void si_init_cp_dma_functions(struct si_context *sctx);
|
||||
|
||||
/* si_debug.c */
|
||||
|
||||
@@ -937,6 +937,17 @@ void si_ce_post_draw_synchronization(struct si_context *sctx)
|
||||
}
|
||||
}
|
||||
|
||||
static void cik_prefetch_shader_async(struct si_context *sctx,
|
||||
struct si_pm4_state *state)
|
||||
{
|
||||
if (state) {
|
||||
struct pipe_resource *bo = &state->bo[0]->b.b;
|
||||
assert(state->nbo == 1);
|
||||
|
||||
cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
|
||||
}
|
||||
}
|
||||
|
||||
void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
@@ -1114,10 +1125,34 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
if (!si_upload_vertex_buffer_descriptors(sctx))
|
||||
return;
|
||||
|
||||
/* Flushed caches prior to emitting states. */
|
||||
/* Flushed caches prior to prefetching shaders. */
|
||||
if (sctx->b.flags)
|
||||
si_emit_cache_flush(sctx);
|
||||
|
||||
/* Prefetch shaders and VBO descriptors to TC L2. */
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
if (si_pm4_state_changed(sctx, ls))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
|
||||
if (si_pm4_state_changed(sctx, hs))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
|
||||
if (si_pm4_state_changed(sctx, es))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
|
||||
if (si_pm4_state_changed(sctx, gs))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
|
||||
if (si_pm4_state_changed(sctx, vs))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
|
||||
|
||||
/* Vertex buffer descriptors are uploaded uncached, so prefetch
|
||||
* them right after the VS binary. */
|
||||
if (sctx->vertex_buffers.pointer_dirty) {
|
||||
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
|
||||
sctx->vertex_buffers.buffer_offset,
|
||||
sctx->vertex_elements->count * 16);
|
||||
}
|
||||
if (si_pm4_state_changed(sctx, ps))
|
||||
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
|
||||
}
|
||||
|
||||
/* Emit states. */
|
||||
mask = sctx->dirty_atoms;
|
||||
while (mask) {
|
||||
|
||||
Reference in New Issue
Block a user