radv: fix waiting for occlusion queries on GFX6-8

Occlusion queries don't go through L2 on GFX6-8, and waiting properly
in shaders is more complicated to implement. Use the previous
WAIT_REG_MEM logic on these GPUs to fix this.

This fixes flickering on many games on GFX8.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8954
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9415
Fixes: d44651bfc3 ("radv: wait for occlusion queries in the resolve query shader")
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28796>
This commit is contained in:
Samuel Pitoiset
2024-04-18 08:26:22 +02:00
committed by Marge Bot
parent fe1aa98ab9
commit e18cc3b39b
2 changed files with 46 additions and 82 deletions
+46 -18
View File
@@ -48,6 +48,16 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
return num_results * 8;
}
static bool
radv_occlusion_query_use_l2(const struct radv_physical_device *pdev)
{
/* Occlusion query writes don't go through L2 on GFX6-8 which means the driver would need to
* flush caches before every read in shaders or use MTYPE=3 (ie. uncached) in the buffer
* descriptor to bypass L2. Use the WAIT_REG_MEM logic instead which is easier to implement.
*/
return pdev->info.gfx_level >= GFX9;
}
static void
radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32)
{
@@ -136,29 +146,31 @@ build_occlusion_query_shader(struct radv_device *device)
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_push_if(&b, query_result_wait);
{
/* Wait on the upper word of the last DB entry. */
nir_push_loop(&b);
if (radv_occlusion_query_use_l2(pdev)) {
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_push_if(&b, query_result_wait);
{
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
/* Prevent the SSBO load to be moved out of the loop. */
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
/* Wait on the upper word of the last DB entry. */
nir_push_loop(&b);
{
nir_jump(&b, nir_jump_break);
const uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
/* Prevent the SSBO load to be moved out of the loop. */
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
{
nir_jump(&b, nir_jump_break);
}
nir_pop_if(&b, NULL);
}
nir_pop_if(&b, NULL);
nir_pop_loop(&b, NULL);
}
nir_pop_loop(&b, NULL);
nir_pop_if(&b, NULL);
}
nir_pop_if(&b, NULL);
nir_push_loop(&b);
@@ -1797,6 +1809,22 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
switch (pool->vk.query_type) {
case VK_QUERY_TYPE_OCCLUSION:
if (!radv_occlusion_query_use_l2(pdev)) {
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
uint32_t rb_avail_offset = 16 * util_last_bit64(enabled_rb_mask) - 4;
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
unsigned query = firstQuery + i;
uint64_t src_va = va + query * pool->stride + rb_avail_offset;
radeon_check_space(device->ws, cs, 7);
/* Waits on the upper word of the last DB entry */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
}
}
}
radv_query_shader(cmd_buffer, &device->meta_state.query.occlusion_query_pipeline, pool->bo, dst_buffer->bo,
firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
queryCount, flags, 0, 0, false);
@@ -15,8 +15,6 @@ spec@arb_framebuffer_object@fbo-blit-scaled-linear,Fail
spec@arb_bindless_texture@compiler@samplers@arith-bound-sampler-texture2d.frag,Crash
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@MS8,Fail
spec@arb_gpu_shader_fp64@execution@arb_gpu_shader_fp64-tf-separate,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2-mat2,Fail
spec@arb_gpu_shader_fp64@execution@conversion@frag-conversion-explicit-dmat2x3-mat2x3,Fail
@@ -308,67 +306,9 @@ dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth24_stencil8
dEQP-GLES3.functional.texture.specification.texsubimage2d_depth.depth32f_stencil8,Fail
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_2d_array,Fail
dEQP-GLES3.functional.texture.specification.texsubimage3d_depth.depth32f_stencil8_2d_array,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.all,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.height,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.samples,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.size,Fail
dEQP-GLES31.functional.fbo.no_attachments.maximums.width,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples0,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples1,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples2,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples3,Fail
dEQP-GLES31.functional.fbo.no_attachments.multisample.samples4,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1023x1023,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1025x1025,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.127x15,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.129x129,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x15,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.15x511,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.1x1,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x1025,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.2047x2047,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.255x255,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.257x257,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.33x33,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.3x3,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x127,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.511x511,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.513x513,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.63x63,Fail
dEQP-GLES31.functional.fbo.no_attachments.npot_size.65x65,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.0,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.1,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.10,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.11,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.12,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.13,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.14,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.15,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.2,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.3,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.4,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.5,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.6,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.7,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.8,Fail
dEQP-GLES31.functional.fbo.no_attachments.random.9,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.1024x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.16x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.256x64,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x1024,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x16,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x256,Fail
dEQP-GLES31.functional.fbo.no_attachments.size.64x64,Fail
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth24_stencil8_cube_array,Fail
dEQP-GLES31.functional.texture.specification.texstorage3d.format.depth32f_stencil8_cube_array,Fail
dEQP-GLES31.functional.texture.specification.texsubimage3d_depth.depth24_stencil8_cube_array,Fail
@@ -414,10 +354,6 @@ spec@arb_depth_buffer_float@fbo-clear-formats,Fail
spec@arb_depth_buffer_float@fbo-clear-formats@GL_DEPTH32F_STENCIL8,Fail
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled,Fail
spec@arb_es2_compatibility@texwrap formats bordercolor-swizzled@GL_RGB565- swizzled- border color only,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@Basic,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@discard,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@fb resize,Fail
spec@arb_framebuffer_no_attachments@arb_framebuffer_no_attachments-query@glViewport,Fail
spec@arb_sample_locations@test,Fail
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: false,Fail
spec@arb_sample_locations@test@MSAA: 1- X: 0- Y: 0- Grid: true,Fail