radv: fix waiting for occlusion queries on GFX6-8
Occlusion queries don't go through L2 on GFX6-8, and waiting properly
in shaders is more complicated to implement. Use the previous
WAIT_REG_MEM logic on these GPUs to fix this.
This fixes flickering on many games on GFX8.
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8954
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9415
Fixes: d44651bfc3 ("radv: wait for occlusion queries in the resolve query shader")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28796>
This commit is contained in:
committed by
Marge Bot
parent
fe1aa98ab9
commit
e18cc3b39b
+46
-18
@@ -48,6 +48,16 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
|
||||
return num_results * 8;
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_occlusion_query_use_l2(const struct radv_physical_device *pdev)
|
||||
{
|
||||
/* Occlusion query writes don't go through L2 on GFX6-8 which means the driver would need to
|
||||
* flush caches before every read in shaders or use MTYPE=3 (ie. uncached) in the buffer
|
||||
* descriptor to bypass L2. Use the WAIT_REG_MEM logic instead which is easier to implement.
|
||||
*/
|
||||
return pdev->info.gfx_level >= GFX9;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32)
|
||||
{
|
||||
@@ -136,29 +146,31 @@ build_occlusion_query_shader(struct radv_device *device)
|
||||
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
|
||||
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
|
||||
|
||||
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
|
||||
nir_push_if(&b, query_result_wait);
|
||||
{
|
||||
/* Wait on the upper word of the last DB entry. */
|
||||
nir_push_loop(&b);
|
||||
if (radv_occlusion_query_use_l2(pdev)) {
|
||||
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
|
||||
nir_push_if(&b, query_result_wait);
|
||||
{
|
||||
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
|
||||
|
||||
/* Prevent the SSBO load to be moved out of the loop. */
|
||||
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
|
||||
|
||||
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
|
||||
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
|
||||
|
||||
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
|
||||
/* Wait on the upper word of the last DB entry. */
|
||||
nir_push_loop(&b);
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
|
||||
|
||||
/* Prevent the SSBO load to be moved out of the loop. */
|
||||
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
|
||||
|
||||
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
|
||||
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
|
||||
|
||||
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_loop(&b, NULL);
|
||||
}
|
||||
nir_pop_loop(&b, NULL);
|
||||
nir_pop_if(&b, NULL);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_push_loop(&b);
|
||||
|
||||
@@ -1797,6 +1809,22 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
||||
|
||||
switch (pool->vk.query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
if (!radv_occlusion_query_use_l2(pdev)) {
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
|
||||
uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
|
||||
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
|
||||
unsigned query = firstQuery + i;
|
||||
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
|
||||
|
||||
radeon_check_space(device->ws, cs, 7);
|
||||
|
||||
/* Waits on the upper word of the last DB entry */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
radv_query_shader(cmd_buffer, &device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo,
|
||||
firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
|
||||
queryCount, flags, 0, 0, false);
|
||||
|
||||
@@ -15,8 +15,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
|
||||
|
||||
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
|
||||
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail
|
||||
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
|
||||
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
|
||||
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
|
||||
@@ -308,67 +306,9 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8
|
||||
dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail
|
||||
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail
|
||||
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.height,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.samples,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.maximums.width,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples0,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples1,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples2,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples3,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples4,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1023x1023,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1025x1025,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x127,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x15,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x127,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x129,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x511,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x1025,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x2047,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.255x255,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.257x257,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.33x33,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x127,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x511,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.513x513,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.63x63,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.npot_size.65x65,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.0,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.1,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.10,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.11,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.12,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.13,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.14,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.15,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.2,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.3,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.4,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.5,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.6,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.7,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.8,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.random.9,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.1024x1024,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.1024x16,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.1024x256,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.1024x64,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.16x1024,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.16x256,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.16x64,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.256x1024,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.256x16,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.256x256,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.256x64,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.64x1024,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.64x16,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.64x256,Fail
|
||||
dEQP-GLES31.functional.fbo.no_attachments.size.64x64,Fail
|
||||
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail
|
||||
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail
|
||||
dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail
|
||||
@@ -414,10 +354,6 @@ spec@arb_depth_buffer_float@fbo-clear-formats,Fail
|
||||
spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail
|
||||
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail
|
||||
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@Basic,Fail
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@discard,Fail
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@fb resize,Fail
|
||||
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport,Fail
|
||||
spec@arb_sample_locations@test,Fail
|
||||
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
|
||||
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail
|
||||
|
||||
Reference in New Issue
Block a user