panfrost: Only link varyings once in good conditions
If the varying descriptors will always be the same for a given shader variant (certainly true if none of separable shaders, transform feedback, or point sprites are used), we only need to link once. Now that pan_pool supports both owned and unowned modes, we have the flexibility to reuse the code path for both allocation strategies. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10954>
This commit is contained in:
committed by
Marge Bot
parent
c746747cb8
commit
af78f52493
@@ -2066,17 +2066,6 @@ panfrost_emit_varying(const struct panfrost_device *dev,
|
||||
/* Links varyings and uploads ATTRIBUTE descriptors. Can execute at link time,
|
||||
* rather than draw time (under good conditions). */
|
||||
|
||||
struct pan_linkage {
|
||||
/* Uploaded attribute descriptors */
|
||||
mali_ptr producer, consumer;
|
||||
|
||||
/* Varyings buffers required */
|
||||
uint32_t present;
|
||||
|
||||
/* Per-vertex stride for general varying buffer */
|
||||
uint32_t stride;
|
||||
};
|
||||
|
||||
static void
|
||||
panfrost_emit_varying_descs(
|
||||
struct pan_pool *pool,
|
||||
@@ -2100,6 +2089,12 @@ panfrost_emit_varying_descs(
|
||||
struct panfrost_ptr T = panfrost_pool_alloc_desc_array(pool,
|
||||
producer_count + consumer_count, ATTRIBUTE);
|
||||
|
||||
/* Take a reference if we're being put on the CSO */
|
||||
if (!pool->owned) {
|
||||
out->bo = pool->transient_bo;
|
||||
panfrost_bo_reference(out->bo);
|
||||
}
|
||||
|
||||
struct mali_attribute_packed *descs = T.cpu;
|
||||
out->producer = producer_count ? T.gpu : 0;
|
||||
out->consumer = consumer_count ? T.gpu +
|
||||
@@ -2177,7 +2172,6 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct panfrost_shader_state *vs, *fs;
|
||||
struct pan_linkage linkage;
|
||||
|
||||
vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
|
||||
fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
|
||||
@@ -2188,11 +2182,28 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
||||
if (!point_coord_replace || pan_is_bifrost(dev))
|
||||
point_coord_mask = 0;
|
||||
|
||||
/* Emit ATTRIBUTE descriptors */
|
||||
panfrost_emit_varying_descs(&batch->pool, vs, fs, &ctx->streamout, point_coord_mask, &linkage);
|
||||
/* In good conditions, we only need to link varyings once */
|
||||
bool prelink =
|
||||
(point_coord_mask == 0) &&
|
||||
(ctx->streamout.num_targets == 0) &&
|
||||
!vs->info.separable &&
|
||||
!fs->info.separable;
|
||||
|
||||
/* Try to reduce copies */
|
||||
struct pan_linkage _linkage;
|
||||
struct pan_linkage *linkage = prelink ? &vs->linkage : &_linkage;
|
||||
|
||||
/* Emit ATTRIBUTE descriptors if needed */
|
||||
if (!prelink || vs->linkage.bo == NULL) {
|
||||
struct pan_pool *pool =
|
||||
prelink ? &ctx->descs : &batch->pool;
|
||||
|
||||
panfrost_emit_varying_descs(pool, vs, fs, &ctx->streamout, point_coord_mask, linkage);
|
||||
}
|
||||
|
||||
struct pipe_stream_output_info *so = &vs->stream_output;
|
||||
unsigned xfb_base = pan_xfb_base(linkage.present);
|
||||
unsigned present = linkage->present, stride = linkage->stride;
|
||||
unsigned xfb_base = pan_xfb_base(present);
|
||||
struct panfrost_ptr T =
|
||||
panfrost_pool_alloc_desc_array(&batch->pool,
|
||||
xfb_base +
|
||||
@@ -2220,30 +2231,30 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
|
||||
}
|
||||
|
||||
panfrost_emit_varyings(batch,
|
||||
&varyings[pan_varying_index(linkage.present, PAN_VARY_GENERAL)],
|
||||
linkage.stride, vertex_count);
|
||||
&varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
|
||||
stride, vertex_count);
|
||||
|
||||
/* fp32 vec4 gl_Position */
|
||||
*position = panfrost_emit_varyings(batch,
|
||||
&varyings[pan_varying_index(linkage.present, PAN_VARY_POSITION)],
|
||||
&varyings[pan_varying_index(present, PAN_VARY_POSITION)],
|
||||
sizeof(float) * 4, vertex_count);
|
||||
|
||||
if (linkage.present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
|
||||
if (present & BITFIELD_BIT(PAN_VARY_PSIZ)) {
|
||||
*psiz = panfrost_emit_varyings(batch,
|
||||
&varyings[pan_varying_index(linkage.present, PAN_VARY_PSIZ)],
|
||||
&varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
|
||||
2, vertex_count);
|
||||
}
|
||||
|
||||
pan_emit_special_input(varyings, linkage.present,
|
||||
pan_emit_special_input(varyings, present,
|
||||
PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
|
||||
pan_emit_special_input(varyings, linkage.present, PAN_VARY_FACE,
|
||||
pan_emit_special_input(varyings, present, PAN_VARY_FACE,
|
||||
MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
|
||||
pan_emit_special_input(varyings, linkage.present, PAN_VARY_FRAGCOORD,
|
||||
pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD,
|
||||
MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
|
||||
|
||||
*buffers = T.gpu;
|
||||
*vs_attribs = linkage.producer;
|
||||
*fs_attribs = linkage.consumer;
|
||||
*vs_attribs = linkage->producer;
|
||||
*fs_attribs = linkage->consumer;
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -977,6 +977,7 @@ panfrost_delete_shader_state(
|
||||
struct panfrost_shader_state *shader_state = &cso->variants[i];
|
||||
panfrost_bo_unreference(shader_state->bin.bo);
|
||||
panfrost_bo_unreference(shader_state->state.bo);
|
||||
panfrost_bo_unreference(shader_state->linkage.bo);
|
||||
}
|
||||
|
||||
free(cso->variants);
|
||||
|
||||
@@ -235,6 +235,23 @@ struct panfrost_rasterizer {
|
||||
struct mali_stencil_mask_misc_packed stencil_misc;
|
||||
};
|
||||
|
||||
/* Linked varyings */
|
||||
struct pan_linkage {
|
||||
/* If the upload is owned by the CSO instead
|
||||
* of the pool, the referenced BO. Else,
|
||||
* NULL. */
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
/* Uploaded attribute descriptors */
|
||||
mali_ptr producer, consumer;
|
||||
|
||||
/* Varyings buffers required */
|
||||
uint32_t present;
|
||||
|
||||
/* Per-vertex stride for general varying buffer */
|
||||
uint32_t stride;
|
||||
};
|
||||
|
||||
/* Variants bundle together to form the backing CSO, bundling multiple
|
||||
* shaders with varying emulated features baked in */
|
||||
|
||||
@@ -251,6 +268,9 @@ struct panfrost_shader_state {
|
||||
|
||||
struct pan_shader_info info;
|
||||
|
||||
/* Linked varyings, for non-separable programs */
|
||||
struct pan_linkage linkage;
|
||||
|
||||
struct pipe_stream_output_info stream_output;
|
||||
uint64_t so_mask;
|
||||
|
||||
|
||||
@@ -77,7 +77,8 @@ struct pan_pool_ref {
|
||||
static inline struct pan_pool_ref
|
||||
pan_take_ref(struct pan_pool *pool, mali_ptr ptr)
|
||||
{
|
||||
panfrost_bo_reference(pool->transient_bo);
|
||||
if (!pool->owned)
|
||||
panfrost_bo_reference(pool->transient_bo);
|
||||
|
||||
return (struct pan_pool_ref) {
|
||||
.gpu = ptr,
|
||||
|
||||
@@ -174,6 +174,7 @@ pan_shader_compile(const struct panfrost_device *dev,
|
||||
info->stage = s->info.stage;
|
||||
info->contains_barrier = s->info.uses_memory_barrier ||
|
||||
s->info.uses_control_barrier;
|
||||
info->separable = s->info.separate_shader;
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
|
||||
@@ -190,6 +190,7 @@ struct pan_shader_info {
|
||||
} vs;
|
||||
};
|
||||
|
||||
bool separable;
|
||||
bool contains_barrier;
|
||||
bool writes_global;
|
||||
uint64_t outputs_written;
|
||||
|
||||
Reference in New Issue
Block a user