zink: use feedback loop layout to correctly handle implicit feedback loops

an implicit feedback loop occurs when an app happens to bind the same image
as both a framebuffer attachment and a sampler for the same draw

an explicit feedback loop occurs when an app uses fbfetch to read data back
from the framebuffer using input attachments

fbfetch is already handled, but implicit feedback loops require more work:
* detecting them happens on-the-fly
* pipeline variants are required

this handles implicit feedback loops by detecting them at draw time during
barrier updates and then flagging pipeline state change to trigger variant creation.
the bits are then unset when the framebuffer/sampler binds are removed

fixes #7309

fixes (tu):
KHR-GL46.texture_barrier.disjoint-texels
KHR-GL46.texture_barrier.overlapping-texels
KHR-GL46.texture_barrier.same-texel-rw-multipass
KHR-GL46.texture_barrier_ARB.disjoint-texels
KHR-GL46.texture_barrier_ARB.overlapping-texels
KHR-GL46.texture_barrier_ARB.same-texel-rw-multipass

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18787>
This commit is contained in:
Mike Blumenkrantz
2022-09-22 13:02:17 -04:00
committed by Marge Bot
parent 902f64f906
commit 97c7eaf17f
8 changed files with 82 additions and 5 deletions
+30
View File
@@ -1777,6 +1777,22 @@ unbind_samplerview(struct zink_context *ctx, gl_shader_stage stage, unsigned slo
return;
struct zink_resource *res = zink_resource(sv->base.texture);
res->sampler_bind_count[stage == MESA_SHADER_COMPUTE]--;
if (stage != MESA_SHADER_COMPUTE && !res->sampler_bind_count[0] && res->fb_bind_count) {
unsigned feedback_loops = ctx->feedback_loops;
u_foreach_bit(idx, res->fb_binds) {
if (ctx->feedback_loops & BITFIELD_BIT(idx)) {
ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
ctx->rp_layout_changed = true;
}
ctx->feedback_loops &= ~BITFIELD_BIT(idx);
}
if (feedback_loops && !ctx->feedback_loops) {
/* unset feedback loop bits */
if (ctx->gfx_pipeline_state.feedback_loop)
ctx->gfx_pipeline_state.dirty = true;
ctx->gfx_pipeline_state.feedback_loop = false;
}
}
update_res_bind_count(ctx, res, stage == MESA_SHADER_COMPUTE, true);
res->sampler_binds[stage] &= ~BITFIELD_BIT(slot);
if (res->obj->is_buffer) {
@@ -2803,6 +2819,18 @@ unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, unsigned
ctx->rp_changed = true;
}
res->fb_bind_count--;
unsigned feedback_loops = ctx->feedback_loops;
if (ctx->feedback_loops & BITFIELD_BIT(idx)) {
ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
ctx->rp_layout_changed = true;
}
ctx->feedback_loops &= ~BITFIELD_BIT(idx);
if (feedback_loops && !ctx->feedback_loops) {
/* unset feedback loop bits */
if (ctx->gfx_pipeline_state.feedback_loop)
ctx->gfx_pipeline_state.dirty = true;
ctx->gfx_pipeline_state.feedback_loop = false;
}
res->fb_binds &= ~BITFIELD_BIT(idx);
if (!res->fb_bind_count) {
check_resource_for_batch_ref(ctx, res);
@@ -3047,6 +3075,7 @@ access_src_flags(VkImageLayout layout)
return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
return VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
@@ -3083,6 +3112,7 @@ access_dst_flags(VkImageLayout layout)
return VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+8 -2
View File
@@ -271,9 +271,15 @@ zink_descriptor_util_image_layout_eval(const struct zink_context *ctx, const str
if (!is_compute && res->fb_bind_count &&
ctx->gfx_pipeline_state.render_pass && ctx->gfx_pipeline_state.render_pass->state.rts[ctx->fb_state.nr_cbufs].mixed_zs)
return VK_IMAGE_LAYOUT_GENERAL;
if (res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
}
if (!is_compute && res->fb_bind_count && res->sampler_bind_count[0]) {
/* feedback loop */
if (zink_screen(ctx->base.screen)->info.have_EXT_attachment_feedback_loop_layout)
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
return VK_IMAGE_LAYOUT_GENERAL;
}
if (res->obj->vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
return VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
@@ -106,6 +106,9 @@ EXTENSIONS = [
properties=True),
Extension("VK_EXT_memory_budget"),
Extension("VK_KHR_draw_indirect_count"),
Extension("VK_EXT_attachment_feedback_loop_layout",
alias="feedback_loop",
features=True),
Extension("VK_EXT_fragment_shader_interlock",
alias="interlock",
features=True,
+12
View File
@@ -295,6 +295,18 @@ update_barriers(struct zink_context *ctx, bool is_compute,
zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, res->barrier_access[is_compute], pipeline);
else {
VkImageLayout layout = zink_descriptor_util_image_layout_eval(ctx, res, is_compute);
if (!is_compute) {
if (res->fb_bind_count && res->sampler_bind_count[0] && (!(ctx->feedback_loops & res->fb_binds))) {
/* new feedback loop detected */
if (!ctx->gfx_pipeline_state.feedback_loop)
ctx->gfx_pipeline_state.dirty = true;
ctx->gfx_pipeline_state.feedback_loop = true;
ctx->rp_layout_changed = true;
ctx->feedback_loops |= res->fb_binds;
u_foreach_bit(idx, res->fb_binds)
ctx->dynamic_fb.attachments[idx].imageLayout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
}
}
if (layout != res->layout)
zink_screen(ctx->base.screen)->image_barrier(ctx, res, layout, res->barrier_access[is_compute], pipeline);
}
+16
View File
@@ -323,6 +323,14 @@ zink_create_gfx_pipeline(struct zink_screen *screen,
VkGraphicsPipelineCreateInfo pci = {0};
pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
if (state->feedback_loop) {
if (screen->info.have_EXT_attachment_feedback_loop_layout)
pci.flags = VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
else {
static bool warn = false;
warn_missing_feature(warn, "EXT_attachment_feedback_loop_layout");
}
}
pci.layout = prog->base.layout;
if (state->render_pass)
pci.renderPass = state->render_pass->render_pass;
@@ -486,6 +494,14 @@ zink_create_gfx_pipeline_output(struct zink_screen *screen, struct zink_gfx_pipe
pci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pci.pNext = &gplci;
pci.flags = VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
if (state->feedback_loop) {
if (screen->info.have_EXT_attachment_feedback_loop_layout)
pci.flags = VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
else {
static bool warn = false;
warn_missing_feature(warn, "EXT_attachment_feedback_loop_layout");
}
}
pci.pColorBlendState = &blend_state;
pci.pMultisampleState = &ms_state;
pci.pDynamicState = &pipelineDynamicStateCreateInfo;
+5 -1
View File
@@ -49,13 +49,15 @@ get_rt_loadop(const struct zink_rt_attrib *rt, bool clear)
static VkImageLayout
get_color_rt_layout(const struct zink_rt_attrib *rt)
{
return rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
return rt->feedback_loop ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT : rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
static VkImageLayout
get_zs_rt_layout(const struct zink_rt_attrib *rt)
{
bool has_clear = rt->clear_color || rt->clear_stencil;
if (rt->feedback_loop)
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
if (rt->mixed_zs)
return VK_IMAGE_LAYOUT_GENERAL;
return rt->needs_write || has_clear ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
@@ -357,6 +359,7 @@ zink_init_zs_attachment(struct zink_context *ctx, struct zink_rt_attrib *rt)
rt->mixed_zs = needs_write_z && zsbuf->bind_count[0];
rt->needs_write = needs_write_z | needs_write_s;
rt->invalid = !zsbuf->valid;
rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(PIPE_MAX_COLOR_BUFS)) > 0;
}
void
@@ -372,6 +375,7 @@ zink_init_color_attachment(struct zink_context *ctx, unsigned i, struct zink_rt_
rt->clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]);
rt->invalid = !zink_resource(psurf->texture)->valid;
rt->fbfetch = (ctx->fbfetch_outputs & BITFIELD_BIT(i)) > 0;
rt->feedback_loop = (ctx->feedback_loops & BITFIELD_BIT(i)) > 0;
} else {
memset(rt, 0, sizeof(struct zink_rt_attrib));
rt->format = VK_FORMAT_R8G8B8A8_UNORM;
+2
View File
@@ -287,6 +287,7 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats
usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
if ((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SHARED)) != (PIPE_BIND_LINEAR | PIPE_BIND_SHARED))
usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
} else {
/* trust that gallium isn't going to give us anything wild */
*need_extended = true;
@@ -306,6 +307,7 @@ get_image_usage_for_feats(struct zink_screen *screen, VkFormatFeatureFlags feats
usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
else
return 0;
usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
/* this is unlikely to occur and has been included for completeness */
} else if (bind & PIPE_BIND_SAMPLER_VIEW && !(usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT)) {
if (feats & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)
+6 -2
View File
@@ -646,7 +646,8 @@ struct zink_gfx_pipeline_state {
uint32_t force_persample_interp:1; //duplicated for gpl hashing
/* order matches zink_gfx_output_key: uint16_t offset */
uint32_t rast_samples:8; // 2 extra bits (can be used for new members)
uint32_t min_samples:8; // 2 extra bits (can be used for new members)
uint32_t min_samples:7; // 1 extra bit (can be used for new members)
uint32_t feedback_loop : 1;
VkSampleMask sample_mask;
unsigned rp_state;
uint32_t blend_id;
@@ -799,7 +800,8 @@ struct zink_gfx_output_key {
uint32_t _pad:15;
uint32_t force_persample_interp:1;
uint32_t rast_samples:8; // 2 extra bits (can be used for new members)
uint32_t min_samples:8; // 2 extra bits (can be used for new members)
uint32_t min_samples:7; // 1 extra bit (can be used for new members)
uint32_t feedback_loop : 1;
VkSampleMask sample_mask;
unsigned rp_state;
@@ -870,6 +872,7 @@ struct zink_rt_attrib {
bool needs_write;
bool resolve;
bool mixed_zs;
bool feedback_loop;
};
struct zink_render_pass_state {
@@ -1449,6 +1452,7 @@ struct zink_context {
uint16_t rp_clears_enabled;
uint16_t void_clears;
uint16_t fbfetch_outputs;
uint16_t feedback_loops;
struct zink_resource *needs_present;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];