From 59c501ca353f8ec9d2717c98af2bfa1a1dbf4d75 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 10 Apr 2021 03:24:05 +0200 Subject: [PATCH] radv: Format. Using find ./src/amd/vulkan -regex '.*/.*\.\(c\|h\|cpp\)' | xargs -P8 -n1 clang-format --style=file -i Acked-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/layers/radv_sqtt_layer.c | 1524 +- src/amd/vulkan/radv_android.c | 1157 +- src/amd/vulkan/radv_cmd_buffer.c | 10915 +++++++------- src/amd/vulkan/radv_constants.h | 61 +- src/amd/vulkan/radv_cs.h | 216 +- src/amd/vulkan/radv_debug.c | 1327 +- src/amd/vulkan/radv_debug.h | 104 +- src/amd/vulkan/radv_descriptor_set.c | 2589 ++-- src/amd/vulkan/radv_descriptor_set.h | 27 +- src/amd/vulkan/radv_device.c | 12532 ++++++++-------- src/amd/vulkan/radv_formats.c | 3242 ++-- src/amd/vulkan/radv_image.c | 3526 +++-- src/amd/vulkan/radv_llvm_helper.cpp | 186 +- src/amd/vulkan/radv_meta.c | 877 +- src/amd/vulkan/radv_meta.h | 275 +- src/amd/vulkan/radv_meta_blit.c | 1953 ++- src/amd/vulkan/radv_meta_blit2d.c | 2215 ++- src/amd/vulkan/radv_meta_buffer.c | 847 +- src/amd/vulkan/radv_meta_bufimage.c | 3007 ++-- src/amd/vulkan/radv_meta_clear.c | 3540 ++--- src/amd/vulkan/radv_meta_copy.c | 828 +- src/amd/vulkan/radv_meta_dcc_retile.c | 448 +- src/amd/vulkan/radv_meta_decompress.c | 850 +- src/amd/vulkan/radv_meta_fast_clear.c | 1472 +- src/amd/vulkan/radv_meta_fmask_expand.c | 414 +- src/amd/vulkan/radv_meta_resolve.c | 1495 +- src/amd/vulkan/radv_meta_resolve_cs.c | 1545 +- src/amd/vulkan/radv_meta_resolve_fs.c | 1954 ++- .../vulkan/radv_nir_lower_ycbcr_textures.c | 440 +- src/amd/vulkan/radv_nir_to_llvm.c | 5574 ++++--- src/amd/vulkan/radv_pass.c | 803 +- src/amd/vulkan/radv_pipeline.c | 9659 ++++++------ src/amd/vulkan/radv_pipeline_cache.c | 883 +- src/amd/vulkan/radv_private.h | 3380 ++--- src/amd/vulkan/radv_query.c | 2872 ++-- src/amd/vulkan/radv_radeon_winsys.h | 428 +- src/amd/vulkan/radv_shader.c | 2873 ++-- src/amd/vulkan/radv_shader.h | 822 +- src/amd/vulkan/radv_shader_args.c | 1079 +- src/amd/vulkan/radv_shader_args.h | 38 +- src/amd/vulkan/radv_shader_helper.h | 16 +- src/amd/vulkan/radv_shader_info.c | 1135 +- src/amd/vulkan/radv_sqtt.c | 910 +- src/amd/vulkan/radv_util.c | 111 +- src/amd/vulkan/radv_wsi.c | 378 +- src/amd/vulkan/radv_wsi_display.c | 366 +- src/amd/vulkan/radv_wsi_wayland.c | 18 +- src/amd/vulkan/radv_wsi_x11.c | 59 +- src/amd/vulkan/si_cmd_buffer.c | 3185 ++-- src/amd/vulkan/vk_format.h | 328 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c | 1549 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h | 61 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 2734 ++-- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h | 29 +- .../winsys/amdgpu/radv_amdgpu_surface.c | 106 +- .../winsys/amdgpu/radv_amdgpu_surface.h | 2 + .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 369 +- .../vulkan/winsys/amdgpu/radv_amdgpu_winsys.h | 68 +- .../winsys/amdgpu/radv_amdgpu_winsys_public.h | 2 +- src/amd/vulkan/winsys/null/radv_null_bo.c | 54 +- src/amd/vulkan/winsys/null/radv_null_bo.h | 12 +- src/amd/vulkan/winsys/null/radv_null_cs.c | 82 +- src/amd/vulkan/winsys/null/radv_null_cs.h | 6 +- src/amd/vulkan/winsys/null/radv_null_winsys.c | 204 +- src/amd/vulkan/winsys/null/radv_null_winsys.h | 8 +- 65 files changed, 47908 insertions(+), 51861 deletions(-) diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 30cd453b8e5..fc68d188dab 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -29,1215 +29,1071 @@ static void radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_general_api_type api_type) + enum rgp_sqtt_marker_general_api_type api_type) { - struct rgp_sqtt_marker_general_api marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_general_api marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; - marker.api_type = api_type; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; + marker.api_type = api_type; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_general_api_type api_type) + enum rgp_sqtt_marker_general_api_type api_type) { - struct rgp_sqtt_marker_general_api marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_general_api marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; - marker.api_type = api_type; - marker.is_end = 1; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API; + marker.api_type = api_type; + marker.is_end = 1; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_event_type api_type, - uint32_t vertex_offset_user_data, - uint32_t instance_offset_user_data, - uint32_t draw_index_user_data) + enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data, + uint32_t instance_offset_user_data, uint32_t draw_index_user_data) { - struct rgp_sqtt_marker_event marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_event marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; - marker.api_type = api_type; - marker.cmd_id = cmd_buffer->state.num_events++; - marker.cb_id = 0; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; + marker.api_type = api_type; + marker.cmd_id = cmd_buffer->state.num_events++; + marker.cb_id = 0; - if (vertex_offset_user_data == UINT_MAX || - instance_offset_user_data == UINT_MAX) { - vertex_offset_user_data = 0; - instance_offset_user_data = 0; - } + if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) { + vertex_offset_user_data = 0; + instance_offset_user_data = 0; + } - if (draw_index_user_data == UINT_MAX) - draw_index_user_data = vertex_offset_user_data; + if (draw_index_user_data == UINT_MAX) + draw_index_user_data = vertex_offset_user_data; - marker.vertex_offset_reg_idx = vertex_offset_user_data; - marker.instance_offset_reg_idx = instance_offset_user_data; - marker.draw_index_reg_idx = draw_index_user_data; + marker.vertex_offset_reg_idx = vertex_offset_user_data; + marker.instance_offset_reg_idx = instance_offset_user_data; + marker.draw_index_reg_idx = draw_index_user_data; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_event_type api_type, - uint32_t x, uint32_t y, uint32_t z) + enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y, + uint32_t z) { - struct rgp_sqtt_marker_event_with_dims marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_event_with_dims marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; - marker.event.api_type = api_type; - marker.event.cmd_id = cmd_buffer->state.num_events++; - marker.event.cb_id = 0; - marker.event.has_thread_dims = 1; + marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT; + marker.event.api_type = api_type; + marker.event.cmd_id = cmd_buffer->state.num_events++; + marker.event.cb_id = 0; + marker.event.has_thread_dims = 1; - marker.thread_x = x; - marker.thread_y = y; - marker.thread_z = z; + marker.thread_x = x; + marker.thread_y = y; + marker.thread_z = z; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } static void radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer, - enum rgp_sqtt_marker_user_event_type type, - const char *str) + enum rgp_sqtt_marker_user_event_type type, const char *str) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (type == UserEventPop) { - assert (str == NULL); - struct rgp_sqtt_marker_user_event marker = { 0 }; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; - marker.data_type = type; + if (type == UserEventPop) { + assert(str == NULL); + struct rgp_sqtt_marker_user_event marker = {0}; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; + marker.data_type = type; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); - } else { - assert (str != NULL); - unsigned len = strlen(str); - struct rgp_sqtt_marker_user_event_with_length marker = { 0 }; - marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; - marker.user_event.data_type = type; - marker.length = align(len, 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + } else { + assert(str != NULL); + unsigned len = strlen(str); + struct rgp_sqtt_marker_user_event_with_length marker = {0}; + marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT; + marker.user_event.data_type = type; + marker.length = align(len, 4); - uint8_t *buffer = alloca(sizeof(marker) + marker.length); - memset(buffer, 0, sizeof(marker) + marker.length); - memcpy(buffer, &marker, sizeof(marker)); - memcpy(buffer + sizeof(marker), str, len); + uint8_t *buffer = alloca(sizeof(marker) + marker.length); + memset(buffer, 0, sizeof(marker) + marker.length); + memcpy(buffer, &marker, sizeof(marker)); + memcpy(buffer + sizeof(marker), str, len); - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer, sizeof(marker) / 4 + marker.length / 4); - } + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer, + sizeof(marker) / 4 + marker.length / 4); + } } void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { - uint64_t device_id = (uintptr_t)cmd_buffer->device; - struct rgp_sqtt_marker_cb_start marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t device_id = (uintptr_t)cmd_buffer->device; + struct rgp_sqtt_marker_cb_start marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; - marker.cb_id = 0; - marker.device_id_low = device_id; - marker.device_id_high = device_id >> 32; - marker.queue = cmd_buffer->queue_family_index; - marker.queue_flags = VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT | - VK_QUEUE_SPARSE_BINDING_BIT; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START; + marker.cb_id = 0; + marker.device_id_low = device_id; + marker.device_id_high = device_id >> 32; + marker.queue = cmd_buffer->queue_family_index; + marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT; - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) - marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; + if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) + marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { - uint64_t device_id = (uintptr_t)cmd_buffer->device; - struct rgp_sqtt_marker_cb_end marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t device_id = (uintptr_t)cmd_buffer->device; + struct rgp_sqtt_marker_cb_end marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END; - marker.cb_id = 0; - marker.device_id_low = device_id; - marker.device_id_high = device_id >> 32; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END; + marker.cb_id = 0; + marker.device_id_low = device_id; + marker.device_id_high = device_id >> 32; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer) { - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, - UINT_MAX, UINT_MAX, UINT_MAX); + radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX, + UINT_MAX); } void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z) { - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - radv_write_event_with_dims_marker(cmd_buffer, - cmd_buffer->state.current_event_type, - x, y, z); + radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z); } void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer, - VkImageAspectFlagBits aspects) + VkImageAspectFlagBits aspects) { - cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ? - EventRenderPassColorClear : EventRenderPassDepthStencilClear; + cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) + ? EventRenderPassColorClear + : EventRenderPassDepthStencilClear; } void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->state.current_event_type = EventInternalUnknown; + cmd_buffer->state.current_event_type = EventInternalUnknown; } void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->state.current_event_type = EventRenderPassResolve; + cmd_buffer->state.current_event_type = EventRenderPassResolve; } void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->state.current_event_type = EventInternalUnknown; + cmd_buffer->state.current_event_type = EventInternalUnknown; } void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer) { - struct rgp_sqtt_marker_barrier_end marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_barrier_end marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo) || - !cmd_buffer->state.pending_sqtt_barrier_end) - return; + if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end) + return; - cmd_buffer->state.pending_sqtt_barrier_end = false; + cmd_buffer->state.pending_sqtt_barrier_end = false; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; - marker.cb_id = 0; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END; + marker.cb_id = 0; - marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; + marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS) - marker.wait_on_eop_ts = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH) - marker.vs_partial_flush = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH) - marker.ps_partial_flush = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH) - marker.cs_partial_flush = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME) - marker.pfp_sync_me = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA) - marker.sync_cp_dma = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0) - marker.inval_tcp = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE) - marker.inval_sqI = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0) - marker.inval_sqK = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2) - marker.flush_tcc = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2) - marker.inval_tcc = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB) - marker.flush_cb = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB) - marker.inval_cb = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB) - marker.flush_db = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB) - marker.inval_db = true; - if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1) - marker.inval_gl1 = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS) + marker.wait_on_eop_ts = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH) + marker.vs_partial_flush = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH) + marker.ps_partial_flush = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH) + marker.cs_partial_flush = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME) + marker.pfp_sync_me = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA) + marker.sync_cp_dma = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0) + marker.inval_tcp = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE) + marker.inval_sqI = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0) + marker.inval_sqK = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2) + marker.flush_tcc = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2) + marker.inval_tcc = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB) + marker.flush_cb = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB) + marker.inval_cb = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB) + marker.flush_db = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB) + marker.inval_db = true; + if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1) + marker.inval_gl1 = true; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); - cmd_buffer->state.num_layout_transitions = 0; + cmd_buffer->state.num_layout_transitions = 0; } void -radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, - enum rgp_barrier_reason reason) +radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason) { - struct rgp_sqtt_marker_barrier_start marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_barrier_start marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - radv_describe_barrier_end_delayed(cmd_buffer); - cmd_buffer->state.sqtt_flush_bits = 0; + radv_describe_barrier_end_delayed(cmd_buffer); + cmd_buffer->state.sqtt_flush_bits = 0; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; - marker.cb_id = 0; - marker.dword02 = reason; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START; + marker.cb_id = 0; + marker.dword02 = reason; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->state.pending_sqtt_barrier_end = true; + cmd_buffer->state.pending_sqtt_barrier_end = true; } void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer, - const struct radv_barrier_data *barrier) + const struct radv_barrier_data *barrier) { - struct rgp_sqtt_marker_layout_transition marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_layout_transition marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; - marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand; - marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand; - marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize; - marker.dcc_decompress = barrier->layout_transitions.dcc_decompress; - marker.fmask_decompress = barrier->layout_transitions.fmask_decompress; - marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate; - marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; - marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION; + marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand; + marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand; + marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize; + marker.dcc_decompress = barrier->layout_transitions.dcc_decompress; + marker.fmask_decompress = barrier->layout_transitions.fmask_decompress; + marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate; + marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand; + marker.init_mask_ram = barrier->layout_transitions.init_mask_ram; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); - cmd_buffer->state.num_layout_transitions++; + cmd_buffer->state.num_layout_transitions++; } static void radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, - struct radv_pipeline *pipeline) + VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline) { - struct rgp_sqtt_marker_pipeline_bind marker = {0}; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct rgp_sqtt_marker_pipeline_bind marker = {0}; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (likely(!cmd_buffer->device->thread_trace.bo)) - return; + if (likely(!cmd_buffer->device->thread_trace.bo)) + return; - marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; - marker.cb_id = 0; - marker.bind_point = pipelineBindPoint; - marker.api_pso_hash[0] = pipeline->pipeline_hash; - marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32; + marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE; + marker.cb_id = 0; + marker.bind_point = pipelineBindPoint; + marker.api_pso_hash[0] = pipeline->pipeline_hash; + marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32; - radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); + radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4); } /* TODO: Improve the way to trigger capture (overlay, etc). */ static void radv_handle_thread_trace(VkQueue _queue) { - RADV_FROM_HANDLE(radv_queue, queue, _queue); - static bool thread_trace_enabled = false; - static uint64_t num_frames = 0; - bool resize_trigger = false; + RADV_FROM_HANDLE(radv_queue, queue, _queue); + static bool thread_trace_enabled = false; + static uint64_t num_frames = 0; + bool resize_trigger = false; - if (thread_trace_enabled) { - struct ac_thread_trace thread_trace = {0}; + if (thread_trace_enabled) { + struct ac_thread_trace thread_trace = {0}; - radv_end_thread_trace(queue); - thread_trace_enabled = false; + radv_end_thread_trace(queue); + thread_trace_enabled = false; - /* TODO: Do something better than this whole sync. */ - radv_QueueWaitIdle(_queue); + /* TODO: Do something better than this whole sync. */ + radv_QueueWaitIdle(_queue); - if (radv_get_thread_trace(queue, &thread_trace)) { - ac_dump_thread_trace(&queue->device->physical_device->rad_info, - &thread_trace, - &queue->device->thread_trace); - } else { - /* Trigger a new capture if the driver failed to get - * the trace because the buffer was too small. - */ - resize_trigger = true; - } - } + if (radv_get_thread_trace(queue, &thread_trace)) { + ac_dump_thread_trace(&queue->device->physical_device->rad_info, &thread_trace, + &queue->device->thread_trace); + } else { + /* Trigger a new capture if the driver failed to get + * the trace because the buffer was too small. + */ + resize_trigger = true; + } + } - if (!thread_trace_enabled) { - bool frame_trigger = num_frames == queue->device->thread_trace.start_frame; - bool file_trigger = false; + if (!thread_trace_enabled) { + bool frame_trigger = num_frames == queue->device->thread_trace.start_frame; + bool file_trigger = false; #ifndef _WIN32 - if (queue->device->thread_trace.trigger_file && - access(queue->device->thread_trace.trigger_file, W_OK) == 0) { - if (unlink(queue->device->thread_trace.trigger_file) == 0) { - file_trigger = true; - } else { - /* Do not enable tracing if we cannot remove the file, - * because by then we'll trace every frame ... */ - fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n"); - } - } + if (queue->device->thread_trace.trigger_file && + access(queue->device->thread_trace.trigger_file, W_OK) == 0) { + if (unlink(queue->device->thread_trace.trigger_file) == 0) { + file_trigger = true; + } else { + /* Do not enable tracing if we cannot remove the file, + * because by then we'll trace every frame ... */ + fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n"); + } + } #endif - if (frame_trigger || file_trigger || resize_trigger) { - /* FIXME: SQTT on compute hangs. */ - if (queue->queue_family_index == RADV_QUEUE_COMPUTE) { - fprintf(stderr, "RADV: Capturing a SQTT trace on the compute " - "queue is currently broken and might hang! " - "Please, disable presenting on compute if " - "you can.\n"); - return; - } + if (frame_trigger || file_trigger || resize_trigger) { + /* FIXME: SQTT on compute hangs. */ + if (queue->queue_family_index == RADV_QUEUE_COMPUTE) { + fprintf(stderr, "RADV: Capturing a SQTT trace on the compute " + "queue is currently broken and might hang! " + "Please, disable presenting on compute if " + "you can.\n"); + return; + } - radv_begin_thread_trace(queue); - assert(!thread_trace_enabled); - thread_trace_enabled = true; - } - } - num_frames++; + radv_begin_thread_trace(queue); + assert(!thread_trace_enabled); + thread_trace_enabled = true; + } + } + num_frames++; } -VkResult sqtt_QueuePresentKHR( - VkQueue _queue, - const VkPresentInfoKHR* pPresentInfo) +VkResult +sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) { - VkResult result; + VkResult result; - result = radv_QueuePresentKHR(_queue, pPresentInfo); - if (result != VK_SUCCESS) - return result; + result = radv_QueuePresentKHR(_queue, pPresentInfo); + if (result != VK_SUCCESS) + return result; - radv_handle_thread_trace(_queue); + radv_handle_thread_trace(_queue); - return VK_SUCCESS; + return VK_SUCCESS; } -#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ - radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ - cmd_buffer->state.current_event_type = EventCmd##api_name; \ - radv_Cmd##cmd_name(__VA_ARGS__); \ - cmd_buffer->state.current_event_type = EventInternalUnknown; \ - radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); +#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ + radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ + cmd_buffer->state.current_event_type = EventCmd##api_name; \ + radv_Cmd##cmd_name(__VA_ARGS__); \ + cmd_buffer->state.current_event_type = EventInternalUnknown; \ + radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); -#define EVENT_MARKER(cmd_name, ...) \ - EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); +#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); -void sqtt_CmdDraw( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) +void +sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, + uint32_t firstVertex, uint32_t firstInstance) { - EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, - firstVertex, firstInstance); + EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance); } -void sqtt_CmdDrawIndexed( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) +void +sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, + uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { - EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, - firstIndex, vertexOffset, firstInstance); + EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset, + firstInstance); } -void sqtt_CmdDrawIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) +void +sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { - EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, - stride); + EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride); } -void sqtt_CmdDrawIndexedIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) +void +sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { - EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, - drawCount, stride); + EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride); } -void sqtt_CmdDrawIndirectCount( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) +void +sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride) { - EVENT_MARKER(DrawIndirectCount,commandBuffer, buffer, offset, - countBuffer, countBufferOffset, maxDrawCount, stride); + EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset, + maxDrawCount, stride); } -void sqtt_CmdDrawIndexedIndirectCount( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkBuffer countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) +void +sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride) { - EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, - countBuffer, countBufferOffset, maxDrawCount, stride); + EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer, + countBufferOffset, maxDrawCount, stride); } -void sqtt_CmdDispatch( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) +void +sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { - EVENT_MARKER(Dispatch, commandBuffer, x, y, z); + EVENT_MARKER(Dispatch, commandBuffer, x, y, z); } -void sqtt_CmdDispatchIndirect( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset) +void +sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset) { - EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset); + EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset); } -void sqtt_CmdCopyBuffer2KHR( - VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2KHR* pCopyBufferInfo) +void +sqtt_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo) { - EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, - pCopyBufferInfo); + EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, pCopyBufferInfo); } -void sqtt_CmdFillBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize fillSize, - uint32_t data) +void +sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, + VkDeviceSize fillSize, uint32_t data) { - EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, - data); + EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data); } -void sqtt_CmdUpdateBuffer( - VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const void* pData) +void +sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, + VkDeviceSize dataSize, const void *pData) { - EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, - dataSize, pData); + EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData); } -void sqtt_CmdCopyImage2KHR( - VkCommandBuffer commandBuffer, - const VkCopyImageInfo2KHR* pCopyImageInfo) +void +sqtt_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo) { - EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, - pCopyImageInfo); + EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, pCopyImageInfo); } -void sqtt_CmdCopyBufferToImage2KHR( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo) +void +sqtt_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo) { - EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, - commandBuffer, pCopyBufferToImageInfo); + EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, commandBuffer, + pCopyBufferToImageInfo); } -void sqtt_CmdCopyImageToBuffer2KHR( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo) +void +sqtt_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo) { - EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, - commandBuffer, pCopyImageToBufferInfo); + EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, commandBuffer, + pCopyImageToBufferInfo); } -void sqtt_CmdBlitImage2KHR( - VkCommandBuffer commandBuffer, - const VkBlitImageInfo2KHR* pBlitImageInfo) +void +sqtt_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo) { - EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, - pBlitImageInfo); + EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, pBlitImageInfo); } -void sqtt_CmdClearColorImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearColorValue* pColor, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) +void +sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout, + const VkClearColorValue *pColor, uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { - EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, - pColor, rangeCount, pRanges); + EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges); } -void sqtt_CmdClearDepthStencilImage( - VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue* pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange* pRanges) +void +sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { - EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, - imageLayout, pDepthStencil, rangeCount, pRanges); + EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil, + rangeCount, pRanges); } -void sqtt_CmdClearAttachments( - VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment* pAttachments, - uint32_t rectCount, - const VkClearRect* pRects) +void +sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, + const VkClearAttachment *pAttachments, uint32_t rectCount, + const VkClearRect *pRects) { - EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, - pAttachments, rectCount, pRects); + EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects); } -void sqtt_CmdResolveImage2KHR( - VkCommandBuffer commandBuffer, - const VkResolveImageInfo2KHR* pResolveImageInfo) +void +sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2KHR *pResolveImageInfo) { - EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, - pResolveImageInfo); + EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, pResolveImageInfo); } -void sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +void +sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { - EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, - srcStageMask, dstStageMask, memoryBarrierCount, - pMemoryBarriers, bufferMemoryBarrierCount, - pBufferMemoryBarriers, imageMemoryBarrierCount, - pImageMemoryBarriers); + EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask, + memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); } -void sqtt_CmdPipelineBarrier( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +void +sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, VkBool32 byRegion, + uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { - EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, - destStageMask, byRegion, memoryBarrierCount, - pMemoryBarriers, bufferMemoryBarrierCount, - pBufferMemoryBarriers, imageMemoryBarrierCount, - pImageMemoryBarriers); + EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion, + memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers); } -void sqtt_CmdResetQueryPool( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) +void +sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, + uint32_t queryCount) { - EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, - queryCount); + EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount); } -void sqtt_CmdCopyQueryPoolResults( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize stride, - VkQueryResultFlags flags) +void +sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) { - EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, - queryCount, dstBuffer, dstOffset, stride, - flags); + EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer, + dstOffset, stride, flags); } #undef EVENT_MARKER -#define API_MARKER_ALIAS(cmd_name, api_name, ...) \ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ - radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ - radv_Cmd##cmd_name(__VA_ARGS__); \ - radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); +#define API_MARKER_ALIAS(cmd_name, api_name, ...) \ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \ + radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \ + radv_Cmd##cmd_name(__VA_ARGS__); \ + radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name); -#define API_MARKER(cmd_name, ...) \ - API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); +#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__); static bool radv_sqtt_dump_pipeline() { - return getenv("RADV_THREAD_TRACE_PIPELINE"); + return getenv("RADV_THREAD_TRACE_PIPELINE"); } -void sqtt_CmdBindPipeline( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) +void +sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) { - RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); + RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); - API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline); + API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline); - if (radv_sqtt_dump_pipeline()) - radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline); + if (radv_sqtt_dump_pipeline()) + radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline); } -void sqtt_CmdBindDescriptorSets( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) +void +sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) { - API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, - layout, firstSet, descriptorSetCount, - pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); + API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet, + descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets); } -void sqtt_CmdBindIndexBuffer( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType) +void +sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkIndexType indexType) { - API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType); + API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType); } -void sqtt_CmdBindVertexBuffers( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) +void +sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) { - API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, - pBuffers, pOffsets); + API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets); } -void sqtt_CmdBeginQuery( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) +void +sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, + VkQueryControlFlags flags) { - API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags); + API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags); } -void sqtt_CmdEndQuery( - VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) +void +sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) { - API_MARKER(EndQuery, commandBuffer, queryPool, query); + API_MARKER(EndQuery, commandBuffer, queryPool, query); } -void sqtt_CmdWriteTimestamp( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t flags) +void +sqtt_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, uint32_t flags) { - API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags); + API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags); } -void sqtt_CmdPushConstants( - VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void* pValues) +void +sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, + VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, + const void *pValues) { - API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, - size, pValues); + API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues); } -void sqtt_CmdBeginRenderPass2( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBeginInfo, - const VkSubpassBeginInfo* pSubpassBeginInfo) +void +sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfo *pSubpassBeginInfo) { - API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, - pRenderPassBeginInfo, pSubpassBeginInfo); + API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo, + pSubpassBeginInfo); } -void sqtt_CmdNextSubpass2( - VkCommandBuffer commandBuffer, - const VkSubpassBeginInfo* pSubpassBeginInfo, - const VkSubpassEndInfo* pSubpassEndInfo) +void +sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) { - API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, - pSubpassBeginInfo, pSubpassEndInfo); + API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo); } -void sqtt_CmdEndRenderPass2( - VkCommandBuffer commandBuffer, - const VkSubpassEndInfo* pSubpassEndInfo) +void +sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo) { - API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, - pSubpassEndInfo); + API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo); } -void sqtt_CmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) +void +sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, + const VkCommandBuffer *pCmdBuffers) { - API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, - pCmdBuffers); + API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers); } -void sqtt_CmdSetViewport( - VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport* pViewports) +void +sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, + const VkViewport *pViewports) { - API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, - pViewports); + API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports); } -void sqtt_CmdSetScissor( - VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D* pScissors) +void +sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, + const VkRect2D *pScissors) { - API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, - pScissors); + API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors); } -void sqtt_CmdSetLineWidth( - VkCommandBuffer commandBuffer, - float lineWidth) +void +sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) { - API_MARKER(SetLineWidth, commandBuffer, lineWidth); + API_MARKER(SetLineWidth, commandBuffer, lineWidth); } -void sqtt_CmdSetDepthBias( - VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) +void +sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, + float depthBiasClamp, float depthBiasSlopeFactor) { - API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, - depthBiasClamp, depthBiasSlopeFactor); + API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp, + depthBiasSlopeFactor); } -void sqtt_CmdSetBlendConstants( - VkCommandBuffer commandBuffer, - const float blendConstants[4]) +void +sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) { - API_MARKER(SetBlendConstants, commandBuffer, blendConstants); + API_MARKER(SetBlendConstants, commandBuffer, blendConstants); } -void sqtt_CmdSetDepthBounds( - VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) +void +sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) { - API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, - maxDepthBounds); + API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds); } -void sqtt_CmdSetStencilCompareMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) +void +sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t compareMask) { - API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask); + API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask); } -void sqtt_CmdSetStencilWriteMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) +void +sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t writeMask) { - API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask); + API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask); } -void sqtt_CmdSetStencilReference( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) +void +sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t reference) { - API_MARKER(SetStencilReference, commandBuffer, faceMask, reference); + API_MARKER(SetStencilReference, commandBuffer, faceMask, reference); } /* VK_EXT_debug_marker */ -void sqtt_CmdDebugMarkerBeginEXT( - VkCommandBuffer commandBuffer, - const VkDebugMarkerMarkerInfoEXT* pMarkerInfo) +void +sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_write_user_event_marker(cmd_buffer, UserEventPush, - pMarkerInfo->pMarkerName); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName); } -void sqtt_CmdDebugMarkerEndEXT( - VkCommandBuffer commandBuffer) +void +sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL); } -void sqtt_CmdDebugMarkerInsertEXT( - VkCommandBuffer commandBuffer, - const VkDebugMarkerMarkerInfoEXT* pMarkerInfo) +void +sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT *pMarkerInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_write_user_event_marker(cmd_buffer, UserEventTrigger, - pMarkerInfo->pMarkerName); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName); } -VkResult sqtt_DebugMarkerSetObjectNameEXT( - VkDevice device, - const VkDebugMarkerObjectNameInfoEXT* pNameInfo) +VkResult +sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo) { - /* no-op */ - return VK_SUCCESS; + /* no-op */ + return VK_SUCCESS; } -VkResult sqtt_DebugMarkerSetObjectTagEXT( - VkDevice device, - const VkDebugMarkerObjectTagInfoEXT* pTagInfo) +VkResult +sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo) { - /* no-op */ - return VK_SUCCESS; + /* no-op */ + return VK_SUCCESS; } /* Pipelines */ static enum rgp_hardware_stages -radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, - gl_shader_stage stage) +radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage) { - struct radv_shader_variant *shader = pipeline->shaders[stage]; + struct radv_shader_variant *shader = pipeline->shaders[stage]; - switch (stage) { - case MESA_SHADER_VERTEX: - if (shader->info.vs.as_ls) - return RGP_HW_STAGE_LS; - else if (shader->info.vs.as_es) - return RGP_HW_STAGE_ES; - else if (shader->info.is_ngg) - return RGP_HW_STAGE_GS; - else - return RGP_HW_STAGE_VS; - case MESA_SHADER_TESS_CTRL: - return RGP_HW_STAGE_HS; - case MESA_SHADER_TESS_EVAL: - if (shader->info.tes.as_es) - return RGP_HW_STAGE_ES; - else if (shader->info.is_ngg) - return RGP_HW_STAGE_GS; - else - return RGP_HW_STAGE_VS; - case MESA_SHADER_GEOMETRY: - return RGP_HW_STAGE_GS; - case MESA_SHADER_FRAGMENT: - return RGP_HW_STAGE_PS; - case MESA_SHADER_COMPUTE: - return RGP_HW_STAGE_CS; - default: - unreachable("invalid mesa shader stage"); - } + switch (stage) { + case MESA_SHADER_VERTEX: + if (shader->info.vs.as_ls) + return RGP_HW_STAGE_LS; + else if (shader->info.vs.as_es) + return RGP_HW_STAGE_ES; + else if (shader->info.is_ngg) + return RGP_HW_STAGE_GS; + else + return RGP_HW_STAGE_VS; + case MESA_SHADER_TESS_CTRL: + return RGP_HW_STAGE_HS; + case MESA_SHADER_TESS_EVAL: + if (shader->info.tes.as_es) + return RGP_HW_STAGE_ES; + else if (shader->info.is_ngg) + return RGP_HW_STAGE_GS; + else + return RGP_HW_STAGE_VS; + case MESA_SHADER_GEOMETRY: + return RGP_HW_STAGE_GS; + case MESA_SHADER_FRAGMENT: + return RGP_HW_STAGE_PS; + case MESA_SHADER_COMPUTE: + return RGP_HW_STAGE_CS; + default: + unreachable("invalid mesa shader stage"); + } } static VkResult -radv_add_code_object(struct radv_device *device, - struct radv_pipeline *pipeline) +radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline) { - struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; - struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; - struct rgp_code_object_record *record; + struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; + struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; + struct rgp_code_object_record *record; - record = malloc(sizeof(struct rgp_code_object_record)); - if (!record) - return VK_ERROR_OUT_OF_HOST_MEMORY; + record = malloc(sizeof(struct rgp_code_object_record)); + if (!record) + return VK_ERROR_OUT_OF_HOST_MEMORY; - record->shader_stages_mask = 0; - record->num_shaders_combined = 0; - record->pipeline_hash[0] = pipeline->pipeline_hash; - record->pipeline_hash[1] = pipeline->pipeline_hash; + record->shader_stages_mask = 0; + record->num_shaders_combined = 0; + record->pipeline_hash[0] = pipeline->pipeline_hash; + record->pipeline_hash[1] = pipeline->pipeline_hash; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct radv_shader_variant *shader = pipeline->shaders[i]; - uint8_t *code; - uint64_t va; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct radv_shader_variant *shader = pipeline->shaders[i]; + uint8_t *code; + uint64_t va; - if (!shader) - continue; + if (!shader) + continue; - code = malloc(shader->code_size); - if (!code) { - free(record); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - memcpy(code, shader->code_ptr, shader->code_size); + code = malloc(shader->code_size); + if (!code) { + free(record); + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + memcpy(code, shader->code_ptr, shader->code_size); - va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader; - record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32; - record->shader_data[i].code_size = shader->code_size; - record->shader_data[i].code = code; - record->shader_data[i].vgpr_count = shader->config.num_vgprs; - record->shader_data[i].sgpr_count = shader->config.num_sgprs; - record->shader_data[i].base_address = va & 0xffffffffffff; - record->shader_data[i].elf_symbol_offset = 0; - record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i); - record->shader_data[i].is_combined = false; + record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader; + record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32; + record->shader_data[i].code_size = shader->code_size; + record->shader_data[i].code = code; + record->shader_data[i].vgpr_count = shader->config.num_vgprs; + record->shader_data[i].sgpr_count = shader->config.num_sgprs; + record->shader_data[i].base_address = va & 0xffffffffffff; + record->shader_data[i].elf_symbol_offset = 0; + record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i); + record->shader_data[i].is_combined = false; - record->shader_stages_mask |= (1 << i); - record->num_shaders_combined++; - } + record->shader_stages_mask |= (1 << i); + record->num_shaders_combined++; + } - simple_mtx_lock(&code_object->lock); - list_addtail(&record->list, &code_object->record); - code_object->record_count++; - simple_mtx_unlock(&code_object->lock); + simple_mtx_lock(&code_object->lock); + list_addtail(&record->list, &code_object->record); + code_object->record_count++; + simple_mtx_unlock(&code_object->lock); - return VK_SUCCESS; + return VK_SUCCESS; } static VkResult -radv_register_pipeline(struct radv_device *device, - struct radv_pipeline *pipeline) +radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline) { - bool result; - uint64_t base_va = ~0; + bool result; + uint64_t base_va = ~0; - result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash); - if (!result) - return VK_ERROR_OUT_OF_HOST_MEMORY; + result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash); + if (!result) + return VK_ERROR_OUT_OF_HOST_MEMORY; - /* Find the lowest shader BO VA. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct radv_shader_variant *shader = pipeline->shaders[i]; - uint64_t va; + /* Find the lowest shader BO VA. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct radv_shader_variant *shader = pipeline->shaders[i]; + uint64_t va; - if (!shader) - continue; + if (!shader) + continue; - va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - base_va = MIN2(base_va, va); - } + va = radv_buffer_get_va(shader->bo) + shader->bo_offset; + base_va = MIN2(base_va, va); + } - result = ac_sqtt_add_code_object_loader_event(&device->thread_trace, - pipeline->pipeline_hash, - base_va); - if (!result) - return VK_ERROR_OUT_OF_HOST_MEMORY; + result = + ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va); + if (!result) + return VK_ERROR_OUT_OF_HOST_MEMORY; - result = radv_add_code_object(device, pipeline); - if (result != VK_SUCCESS) - return result; + result = radv_add_code_object(device, pipeline); + if (result != VK_SUCCESS) + return result; - return VK_SUCCESS; + return VK_SUCCESS; } static void -radv_unregister_pipeline(struct radv_device *device, - struct radv_pipeline *pipeline) +radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline) { - struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; - struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; - struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; - struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; + struct ac_thread_trace_data *thread_trace_data = &device->thread_trace; + struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation; + struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events; + struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object; - /* Destroy the PSO correlation record. */ - simple_mtx_lock(&pso_correlation->lock); - list_for_each_entry_safe(struct rgp_pso_correlation_record, record, - &pso_correlation->record, list) { - if (record->pipeline_hash[0] == pipeline->pipeline_hash) { - pso_correlation->record_count--; - list_del(&record->list); - free(record); - break; - } - } - simple_mtx_unlock(&pso_correlation->lock); + /* Destroy the PSO correlation record. */ + simple_mtx_lock(&pso_correlation->lock); + list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record, + list) + { + if (record->pipeline_hash[0] == pipeline->pipeline_hash) { + pso_correlation->record_count--; + list_del(&record->list); + free(record); + break; + } + } + simple_mtx_unlock(&pso_correlation->lock); - /* Destroy the code object loader record. */ - simple_mtx_lock(&loader_events->lock); - list_for_each_entry_safe(struct rgp_loader_events_record, record, - &loader_events->record, list) { - if (record->code_object_hash[0] == pipeline->pipeline_hash) { - loader_events->record_count--; - list_del(&record->list); - free(record); - break; - } - } - simple_mtx_unlock(&loader_events->lock); + /* Destroy the code object loader record. */ + simple_mtx_lock(&loader_events->lock); + list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list) + { + if (record->code_object_hash[0] == pipeline->pipeline_hash) { + loader_events->record_count--; + list_del(&record->list); + free(record); + break; + } + } + simple_mtx_unlock(&loader_events->lock); - /* Destroy the code object record. */ - simple_mtx_lock(&code_object->lock); - list_for_each_entry_safe(struct rgp_code_object_record, record, - &code_object->record, list) { - if (record->pipeline_hash[0] == pipeline->pipeline_hash) { - uint32_t mask = record->shader_stages_mask; - int i; + /* Destroy the code object record. */ + simple_mtx_lock(&code_object->lock); + list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list) + { + if (record->pipeline_hash[0] == pipeline->pipeline_hash) { + uint32_t mask = record->shader_stages_mask; + int i; - /* Free the disassembly. */ - while (mask) { - i = u_bit_scan(&mask); - free(record->shader_data[i].code); - } + /* Free the disassembly. */ + while (mask) { + i = u_bit_scan(&mask); + free(record->shader_data[i].code); + } - code_object->record_count--; - list_del(&record->list); - free(record); - break; - } - } - simple_mtx_unlock(&code_object->lock); + code_object->record_count--; + list_del(&record->list); + free(record); + break; + } + } + simple_mtx_unlock(&code_object->lock); } -VkResult sqtt_CreateGraphicsPipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) +VkResult +sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { - RADV_FROM_HANDLE(radv_device, device, _device); - VkResult result; + RADV_FROM_HANDLE(radv_device, device, _device); + VkResult result; - result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, - pCreateInfos, pAllocator, - pPipelines); - if (result != VK_SUCCESS) - return result; + result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, + pPipelines); + if (result != VK_SUCCESS) + return result; - if (radv_sqtt_dump_pipeline()) { - for (unsigned i = 0; i < count; i++) { - RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); + if (radv_sqtt_dump_pipeline()) { + for (unsigned i = 0; i < count; i++) { + RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); - if (!pipeline) - continue; + if (!pipeline) + continue; - result = radv_register_pipeline(device, pipeline); - if (result != VK_SUCCESS) - goto fail; - } - } + result = radv_register_pipeline(device, pipeline); + if (result != VK_SUCCESS) + goto fail; + } + } - return VK_SUCCESS; + return VK_SUCCESS; fail: - for (unsigned i = 0; i < count; i++) { - sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); - pPipelines[i] = VK_NULL_HANDLE; - } - return result; + for (unsigned i = 0; i < count; i++) { + sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); + pPipelines[i] = VK_NULL_HANDLE; + } + return result; } -VkResult sqtt_CreateComputePipelines( - VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo* pCreateInfos, - const VkAllocationCallbacks* pAllocator, - VkPipeline* pPipelines) +VkResult +sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { - RADV_FROM_HANDLE(radv_device, device, _device); - VkResult result; + RADV_FROM_HANDLE(radv_device, device, _device); + VkResult result; - result = radv_CreateComputePipelines(_device, pipelineCache, count, - pCreateInfos, pAllocator, - pPipelines); - if (result != VK_SUCCESS) - return result; + result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, + pPipelines); + if (result != VK_SUCCESS) + return result; - if (radv_sqtt_dump_pipeline()) { - for (unsigned i = 0; i < count; i++) { - RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); + if (radv_sqtt_dump_pipeline()) { + for (unsigned i = 0; i < count; i++) { + RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]); - if (!pipeline) - continue; + if (!pipeline) + continue; - result = radv_register_pipeline(device, pipeline); - if (result != VK_SUCCESS) - goto fail; - } - } + result = radv_register_pipeline(device, pipeline); + if (result != VK_SUCCESS) + goto fail; + } + } - return VK_SUCCESS; + return VK_SUCCESS; fail: - for (unsigned i = 0; i < count; i++) { - sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); - pPipelines[i] = VK_NULL_HANDLE; - } - return result; + for (unsigned i = 0; i < count; i++) { + sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator); + pPipelines[i] = VK_NULL_HANDLE; + } + return result; } -void sqtt_DestroyPipeline( - VkDevice _device, - VkPipeline _pipeline, - const VkAllocationCallbacks* pAllocator) +void +sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, + const VkAllocationCallbacks *pAllocator) { - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); - if (!_pipeline) - return; + if (!_pipeline) + return; - if (radv_sqtt_dump_pipeline()) - radv_unregister_pipeline(device, pipeline); + if (radv_sqtt_dump_pipeline()) + radv_unregister_pipeline(device, pipeline); - radv_DestroyPipeline(_device, _pipeline, pAllocator); + radv_DestroyPipeline(_device, _pipeline, pAllocator); } #undef API_MARKER diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index 76ab39ca480..f8064c011d9 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -22,12 +22,12 @@ */ #ifdef ANDROID +#include #include #include #include #include #include -#include #if ANDROID_API_LEVEL >= 26 #include @@ -42,433 +42,421 @@ #ifdef ANDROID -static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); +static int radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev); static int radv_hal_close(struct hw_device_t *dev); static void UNUSED static_asserts(void) { - STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); + STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); } PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { - .common = { - .tag = HARDWARE_MODULE_TAG, - .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, - .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), - .id = HWVULKAN_HARDWARE_MODULE_ID, - .name = "AMD Vulkan HAL", - .author = "Google", - .methods = &(hw_module_methods_t) { - .open = radv_hal_open, - }, - }, + .common = + { + .tag = HARDWARE_MODULE_TAG, + .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, + .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), + .id = HWVULKAN_HARDWARE_MODULE_ID, + .name = "AMD Vulkan HAL", + .author = "Google", + .methods = + &(hw_module_methods_t){ + .open = radv_hal_open, + }, + }, }; /* If any bits in test_mask are set, then unset them and return true. */ static inline bool unmask32(uint32_t *inout_mask, uint32_t test_mask) { - uint32_t orig_mask = *inout_mask; - *inout_mask &= ~test_mask; - return *inout_mask != orig_mask; + uint32_t orig_mask = *inout_mask; + *inout_mask &= ~test_mask; + return *inout_mask != orig_mask; } static int -radv_hal_open(const struct hw_module_t* mod, const char* id, - struct hw_device_t** dev) +radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev) { - assert(mod == &HAL_MODULE_INFO_SYM.common); - assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); + assert(mod == &HAL_MODULE_INFO_SYM.common); + assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); - hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); - if (!hal_dev) - return -1; + hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); + if (!hal_dev) + return -1; - *hal_dev = (hwvulkan_device_t) { - .common = { - .tag = HARDWARE_DEVICE_TAG, - .version = HWVULKAN_DEVICE_API_VERSION_0_1, - .module = &HAL_MODULE_INFO_SYM.common, - .close = radv_hal_close, - }, - .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties, - .CreateInstance = radv_CreateInstance, - .GetInstanceProcAddr = radv_GetInstanceProcAddr, - }; + *hal_dev = (hwvulkan_device_t){ + .common = + { + .tag = HARDWARE_DEVICE_TAG, + .version = HWVULKAN_DEVICE_API_VERSION_0_1, + .module = &HAL_MODULE_INFO_SYM.common, + .close = radv_hal_close, + }, + .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties, + .CreateInstance = radv_CreateInstance, + .GetInstanceProcAddr = radv_GetInstanceProcAddr, + }; - *dev = &hal_dev->common; - return 0; + *dev = &hal_dev->common; + return 0; } static int radv_hal_close(struct hw_device_t *dev) { - /* hwvulkan.h claims that hw_device_t::close() is never called. */ - return -1; + /* hwvulkan.h claims that hw_device_t::close() is never called. */ + return -1; } VkResult -radv_image_from_gralloc(VkDevice device_h, - const VkImageCreateInfo *base_info, - const VkNativeBufferANDROID *gralloc_info, - const VkAllocationCallbacks *alloc, - VkImage *out_image_h) +radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, + const VkNativeBufferANDROID *gralloc_info, + const VkAllocationCallbacks *alloc, VkImage *out_image_h) { - RADV_FROM_HANDLE(radv_device, device, device_h); - VkImage image_h = VK_NULL_HANDLE; - struct radv_image *image = NULL; - VkResult result; + RADV_FROM_HANDLE(radv_device, device, device_h); + VkImage image_h = VK_NULL_HANDLE; + struct radv_image *image = NULL; + VkResult result; - if (gralloc_info->handle->numFds != 1) { - return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE, - "VkNativeBufferANDROID::handle::numFds is %d, " - "expected 1", gralloc_info->handle->numFds); - } + if (gralloc_info->handle->numFds != 1) { + return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE, + "VkNativeBufferANDROID::handle::numFds is %d, " + "expected 1", + gralloc_info->handle->numFds); + } - /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf - * must exceed that of the gralloc handle, and we do not own the gralloc - * handle. - */ - int dma_buf = gralloc_info->handle->data[0]; + /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf + * must exceed that of the gralloc handle, and we do not own the gralloc + * handle. + */ + int dma_buf = gralloc_info->handle->data[0]; - VkDeviceMemory memory_h; + VkDeviceMemory memory_h; - const VkImportMemoryFdInfoKHR import_info = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, - .fd = os_dupfd_cloexec(dma_buf), - }; + const VkImportMemoryFdInfoKHR import_info = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, + .fd = os_dupfd_cloexec(dma_buf), + }; - /* Find the first VRAM memory type, or GART for PRIME images. */ - int memory_type_index = -1; - for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { - bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (is_local) { - memory_type_index = i; - break; - } - } + /* Find the first VRAM memory type, or GART for PRIME images. */ + int memory_type_index = -1; + for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { + bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (is_local) { + memory_type_index = i; + break; + } + } - /* fallback */ - if (memory_type_index == -1) - memory_type_index = 0; + /* fallback */ + if (memory_type_index == -1) + memory_type_index = 0; - result = radv_AllocateMemory(device_h, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &import_info, - /* Max buffer size, unused for imports */ - .allocationSize = 0x7FFFFFFF, - .memoryTypeIndex = memory_type_index, - }, - alloc, - &memory_h); - if (result != VK_SUCCESS) - return result; + result = radv_AllocateMemory(device_h, + &(VkMemoryAllocateInfo){ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &import_info, + /* Max buffer size, unused for imports */ + .allocationSize = 0x7FFFFFFF, + .memoryTypeIndex = memory_type_index, + }, + alloc, &memory_h); + if (result != VK_SUCCESS) + return result; - struct radeon_bo_metadata md; - device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md); + struct radeon_bo_metadata md; + device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md); - VkImageCreateInfo updated_base_info = *base_info; + VkImageCreateInfo updated_base_info = *base_info; - VkExternalMemoryImageCreateInfo external_memory_info = { - .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, - .pNext = updated_base_info.pNext, - .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; + VkExternalMemoryImageCreateInfo external_memory_info = { + .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = updated_base_info.pNext, + .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; - updated_base_info.pNext = &external_memory_info; + updated_base_info.pNext = &external_memory_info; - result = radv_image_create(device_h, - &(struct radv_image_create_info) { - .vk_info = &updated_base_info, - .no_metadata_planes = true, - .bo_metadata = &md, - }, - alloc, - &image_h); + result = radv_image_create(device_h, + &(struct radv_image_create_info){ + .vk_info = &updated_base_info, + .no_metadata_planes = true, + .bo_metadata = &md, + }, + alloc, &image_h); - if (result != VK_SUCCESS) - goto fail_create_image; + if (result != VK_SUCCESS) + goto fail_create_image; - image = radv_image_from_handle(image_h); + image = radv_image_from_handle(image_h); - radv_image_override_offset_stride(device, image, 0, gralloc_info->stride); + radv_image_override_offset_stride(device, image, 0, gralloc_info->stride); - radv_BindImageMemory(device_h, image_h, memory_h, 0); + radv_BindImageMemory(device_h, image_h, memory_h, 0); - image->owned_memory = memory_h; - /* Don't clobber the out-parameter until success is certain. */ - *out_image_h = image_h; + image->owned_memory = memory_h; + /* Don't clobber the out-parameter until success is certain. */ + *out_image_h = image_h; - return VK_SUCCESS; + return VK_SUCCESS; fail_create_image: - radv_FreeMemory(device_h, memory_h, alloc); - return result; + radv_FreeMemory(device_h, memory_h, alloc); + return result; } -VkResult radv_GetSwapchainGrallocUsageANDROID( - VkDevice device_h, - VkFormat format, - VkImageUsageFlags imageUsage, - int* grallocUsage) +VkResult +radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, + VkImageUsageFlags imageUsage, int *grallocUsage) { - RADV_FROM_HANDLE(radv_device, device, device_h); - struct radv_physical_device *phys_dev = device->physical_device; - VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev); - VkResult result; + RADV_FROM_HANDLE(radv_device, device, device_h); + struct radv_physical_device *phys_dev = device->physical_device; + VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev); + VkResult result; - *grallocUsage = 0; + *grallocUsage = 0; - /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags - * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. - * The relevant code in libvulkan/swapchain.cpp contains this fun comment: - * - * TODO(jessehall): I think these are right, but haven't thought hard - * about it. Do we need to query the driver for support of any of - * these? - * - * Any disagreement between this function and the hardcoded - * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests - * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. - */ + /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags + * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags. + * The relevant code in libvulkan/swapchain.cpp contains this fun comment: + * + * TODO(jessehall): I think these are right, but haven't thought hard + * about it. Do we need to query the driver for support of any of + * these? + * + * Any disagreement between this function and the hardcoded + * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests + * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. + */ - const VkPhysicalDeviceImageFormatInfo2 image_format_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .format = format, - .type = VK_IMAGE_TYPE_2D, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = imageUsage, - }; + const VkPhysicalDeviceImageFormatInfo2 image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = imageUsage, + }; - VkImageFormatProperties2 image_format_props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - }; + VkImageFormatProperties2 image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; - /* Check that requested format and usage are supported. */ - result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, - &image_format_info, &image_format_props); - if (result != VK_SUCCESS) { - return vk_errorf(device->instance, result, - "radv_GetPhysicalDeviceImageFormatProperties2 failed " - "inside %s", __func__); - } + /* Check that requested format and usage are supported. */ + result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, + &image_format_props); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, result, + "radv_GetPhysicalDeviceImageFormatProperties2 failed " + "inside %s", + __func__); + } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) - *grallocUsage |= GRALLOC_USAGE_HW_RENDER; + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_RENDER; - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) - *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; - /* All VkImageUsageFlags not explicitly checked here are unsupported for - * gralloc swapchains. - */ - if (imageUsage != 0) { - return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "unsupported VkImageUsageFlags(0x%x) for gralloc " - "swapchain", imageUsage); - } + /* All VkImageUsageFlags not explicitly checked here are unsupported for + * gralloc swapchains. + */ + if (imageUsage != 0) { + return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkImageUsageFlags(0x%x) for gralloc " + "swapchain", + imageUsage); + } - /* - * FINISHME: Advertise all display-supported formats. Mostly - * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check - * what we need for 30-bit colors. - */ - if (format == VK_FORMAT_B8G8R8A8_UNORM || - format == VK_FORMAT_B5G6R5_UNORM_PACK16) { - *grallocUsage |= GRALLOC_USAGE_HW_FB | - GRALLOC_USAGE_HW_COMPOSER | - GRALLOC_USAGE_EXTERNAL_DISP; - } + /* + * FINISHME: Advertise all display-supported formats. Mostly + * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check + * what we need for 30-bit colors. + */ + if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) { + *grallocUsage |= + GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP; + } - if (*grallocUsage == 0) - return VK_ERROR_FORMAT_NOT_SUPPORTED; + if (*grallocUsage == 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; - return VK_SUCCESS; + return VK_SUCCESS; } -VkResult radv_GetSwapchainGrallocUsage2ANDROID( - VkDevice device_h, - VkFormat format, - VkImageUsageFlags imageUsage, - VkSwapchainImageUsageFlagsANDROID swapchainImageUsage, - uint64_t* grallocConsumerUsage, - uint64_t* grallocProducerUsage) +VkResult +radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, + VkImageUsageFlags imageUsage, + VkSwapchainImageUsageFlagsANDROID swapchainImageUsage, + uint64_t *grallocConsumerUsage, + uint64_t *grallocProducerUsage) { - /* Before level 26 (Android 8.0/Oreo) the loader uses - * vkGetSwapchainGrallocUsageANDROID. */ + /* Before level 26 (Android 8.0/Oreo) the loader uses + * vkGetSwapchainGrallocUsageANDROID. */ #if ANDROID_API_LEVEL >= 26 - RADV_FROM_HANDLE(radv_device, device, device_h); - struct radv_physical_device *phys_dev = device->physical_device; - VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev); - VkResult result; + RADV_FROM_HANDLE(radv_device, device, device_h); + struct radv_physical_device *phys_dev = device->physical_device; + VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev); + VkResult result; - *grallocConsumerUsage = 0; - *grallocProducerUsage = 0; + *grallocConsumerUsage = 0; + *grallocProducerUsage = 0; - if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID) - return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "The Vulkan loader tried to query shared presentable image support"); + if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID) + return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "The Vulkan loader tried to query shared presentable image support"); - const VkPhysicalDeviceImageFormatInfo2 image_format_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .format = format, - .type = VK_IMAGE_TYPE_2D, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = imageUsage, - }; + const VkPhysicalDeviceImageFormatInfo2 image_format_info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .format = format, + .type = VK_IMAGE_TYPE_2D, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = imageUsage, + }; - VkImageFormatProperties2 image_format_props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - }; + VkImageFormatProperties2 image_format_props = { + .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, + }; - /* Check that requested format and usage are supported. */ - result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, - &image_format_info, &image_format_props); - if (result != VK_SUCCESS) { - return vk_errorf(device->instance, result, - "radv_GetPhysicalDeviceImageFormatProperties2 failed " - "inside %s", __func__); - } + /* Check that requested format and usage are supported. */ + result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info, + &image_format_props); + if (result != VK_SUCCESS) { + return vk_errorf(device->instance, result, + "radv_GetPhysicalDeviceImageFormatProperties2 failed " + "inside %s", + __func__); + } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { - *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET; - *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET; - } + if (unmask32(&imageUsage, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) { + *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET; + *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET; + } - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { - *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE; - } + if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { + *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE; + } - if (imageUsage != 0) { - return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "unsupported VkImageUsageFlags(0x%x) for gralloc " - "swapchain", imageUsage); - } + if (imageUsage != 0) { + return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkImageUsageFlags(0x%x) for gralloc " + "swapchain", + imageUsage); + } - /* - * FINISHME: Advertise all display-supported formats. Mostly - * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check - * what we need for 30-bit colors. - */ - if (format == VK_FORMAT_B8G8R8A8_UNORM || - format == VK_FORMAT_B5G6R5_UNORM_PACK16) { - *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET; - *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER; - } + /* + * FINISHME: Advertise all display-supported formats. Mostly + * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check + * what we need for 30-bit colors. + */ + if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) { + *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET; + *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER; + } - if (!*grallocProducerUsage && !*grallocConsumerUsage) - return VK_ERROR_FORMAT_NOT_SUPPORTED; + if (!*grallocProducerUsage && !*grallocConsumerUsage) + return VK_ERROR_FORMAT_NOT_SUPPORTED; - return VK_SUCCESS; + return VK_SUCCESS; #else - *grallocConsumerUsage = 0; - *grallocProducerUsage = 0; - return VK_ERROR_FORMAT_NOT_SUPPORTED; + *grallocConsumerUsage = 0; + *grallocProducerUsage = 0; + return VK_ERROR_FORMAT_NOT_SUPPORTED; #endif } VkResult -radv_AcquireImageANDROID( - VkDevice device, - VkImage image_h, - int nativeFenceFd, - VkSemaphore semaphore, - VkFence fence) +radv_AcquireImageANDROID(VkDevice device, VkImage image_h, int nativeFenceFd, VkSemaphore semaphore, + VkFence fence) { - VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS; + VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS; - if (semaphore != VK_NULL_HANDLE) { - int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd; - semaphore_result = radv_ImportSemaphoreFdKHR(device, - &(VkImportSemaphoreFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, - .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, - .fd = semaphore_fd, - .semaphore = semaphore, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, - }); - } + if (semaphore != VK_NULL_HANDLE) { + int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd; + semaphore_result = radv_ImportSemaphoreFdKHR( + device, &(VkImportSemaphoreFdInfoKHR){ + .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, + .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, + .fd = semaphore_fd, + .semaphore = semaphore, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + }); + } - if (fence != VK_NULL_HANDLE) { - int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd; - fence_result = radv_ImportFenceFdKHR(device, - &(VkImportFenceFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, - .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, - .fd = fence_fd, - .fence = fence, - .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, - }); - } + if (fence != VK_NULL_HANDLE) { + int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd; + fence_result = + radv_ImportFenceFdKHR(device, &(VkImportFenceFdInfoKHR){ + .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, + .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, + .fd = fence_fd, + .fence = fence, + .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT, + }); + } - close(nativeFenceFd); + close(nativeFenceFd); - if (semaphore_result != VK_SUCCESS) - return semaphore_result; - return fence_result; + if (semaphore_result != VK_SUCCESS) + return semaphore_result; + return fence_result; } VkResult -radv_QueueSignalReleaseImageANDROID( - VkQueue _queue, - uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores, - VkImage image, - int* pNativeFenceFd) +radv_QueueSignalReleaseImageANDROID(VkQueue _queue, uint32_t waitSemaphoreCount, + const VkSemaphore *pWaitSemaphores, VkImage image, + int *pNativeFenceFd) { - RADV_FROM_HANDLE(radv_queue, queue, _queue); - VkResult result = VK_SUCCESS; + RADV_FROM_HANDLE(radv_queue, queue, _queue); + VkResult result = VK_SUCCESS; - if (waitSemaphoreCount == 0) { - if (pNativeFenceFd) - *pNativeFenceFd = -1; - return VK_SUCCESS; - } + if (waitSemaphoreCount == 0) { + if (pNativeFenceFd) + *pNativeFenceFd = -1; + return VK_SUCCESS; + } - int fd = -1; + int fd = -1; - for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { - int tmp_fd; - result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device), - &(VkSemaphoreGetFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, - .semaphore = pWaitSemaphores[i], - }, &tmp_fd); - if (result != VK_SUCCESS) { - if (fd >= 0) - close (fd); - return result; - } + for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { + int tmp_fd; + result = + radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device), + &(VkSemaphoreGetFdInfoKHR){ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, + .semaphore = pWaitSemaphores[i], + }, + &tmp_fd); + if (result != VK_SUCCESS) { + if (fd >= 0) + close(fd); + return result; + } - if (fd < 0) - fd = tmp_fd; - else if (tmp_fd >= 0) { - sync_accumulate("radv", &fd, tmp_fd); - close(tmp_fd); - } - } + if (fd < 0) + fd = tmp_fd; + else if (tmp_fd >= 0) { + sync_accumulate("radv", &fd, tmp_fd); + close(tmp_fd); + } + } - if (pNativeFenceFd) { - *pNativeFenceFd = fd; - } else if (fd >= 0) { - close(fd); - /* We still need to do the exports, to reset the semaphores, but - * otherwise we don't wait on them. */ - } - return VK_SUCCESS; + if (pNativeFenceFd) { + *pNativeFenceFd = fd; + } else if (fd >= 0) { + close(fd); + /* We still need to do the exports, to reset the semaphores, but + * otherwise we don't wait on them. */ + } + return VK_SUCCESS; } #endif @@ -482,29 +470,29 @@ enum { static inline VkFormat vk_format_from_android(unsigned android_format, unsigned android_usage) { - switch (android_format) { - case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: - case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: - return VK_FORMAT_R8G8B8A8_UNORM; - case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: - return VK_FORMAT_R8G8B8_UNORM; - case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: - return VK_FORMAT_R5G6B5_UNORM_PACK16; - case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: - return VK_FORMAT_R16G16B16A16_SFLOAT; - case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420: - return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; - case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED: - if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK) - return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; - else - return VK_FORMAT_R8G8B8_UNORM; - case AHARDWAREBUFFER_FORMAT_BLOB: - default: - return VK_FORMAT_UNDEFINED; - } + switch (android_format) { + case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM: + case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM: + return VK_FORMAT_R8G8B8A8_UNORM; + case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM: + return VK_FORMAT_R8G8B8_UNORM; + case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT: + return VK_FORMAT_R16G16B16A16_SFLOAT; + case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420: + return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED: + if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK) + return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + else + return VK_FORMAT_R8G8B8_UNORM; + case AHARDWAREBUFFER_FORMAT_BLOB: + default: + return VK_FORMAT_UNDEFINED; + } } static inline unsigned @@ -529,8 +517,7 @@ android_format_from_vk(unsigned vk_format) } uint64_t -radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, - const VkImageUsageFlags vk_usage) +radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, const VkImageUsageFlags vk_usage) { uint64_t ahb_usage = 0; if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT) @@ -555,145 +542,137 @@ radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, } static VkResult -get_ahb_buffer_format_properties( - VkDevice device_h, - const struct AHardwareBuffer *buffer, - VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties) +get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer *buffer, + VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties) { - RADV_FROM_HANDLE(radv_device, device, device_h); + RADV_FROM_HANDLE(radv_device, device, device_h); - /* Get a description of buffer contents . */ - AHardwareBuffer_Desc desc; - AHardwareBuffer_describe(buffer, &desc); + /* Get a description of buffer contents . */ + AHardwareBuffer_Desc desc; + AHardwareBuffer_describe(buffer, &desc); - /* Verify description. */ - const uint64_t gpu_usage = - AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | - AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | - AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; + /* Verify description. */ + const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE | + AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT | + AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER; - /* "Buffer must be a valid Android hardware buffer object with at least - * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags." - */ - if (!(desc.usage & (gpu_usage))) - return VK_ERROR_INVALID_EXTERNAL_HANDLE; + /* "Buffer must be a valid Android hardware buffer object with at least + * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags." + */ + if (!(desc.usage & (gpu_usage))) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; - /* Fill properties fields based on description. */ - VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties; + /* Fill properties fields based on description. */ + VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties; - p->format = vk_format_from_android(desc.format, desc.usage); - p->externalFormat = (uint64_t) (uintptr_t) p->format; + p->format = vk_format_from_android(desc.format, desc.usage); + p->externalFormat = (uint64_t)(uintptr_t)p->format; - VkFormatProperties format_properties; - radv_GetPhysicalDeviceFormatProperties( - radv_physical_device_to_handle(device->physical_device), - p->format, &format_properties); + VkFormatProperties format_properties; + radv_GetPhysicalDeviceFormatProperties(radv_physical_device_to_handle(device->physical_device), + p->format, &format_properties); - if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) - p->formatFeatures = format_properties.linearTilingFeatures; - else - p->formatFeatures = format_properties.optimalTilingFeatures; + if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) + p->formatFeatures = format_properties.linearTilingFeatures; + else + p->formatFeatures = format_properties.optimalTilingFeatures; - /* "Images can be created with an external format even if the Android hardware - * buffer has a format which has an equivalent Vulkan format to enable - * consistent handling of images from sources that might use either category - * of format. However, all images created with an external format are subject - * to the valid usage requirements associated with external formats, even if - * the Android hardware buffer’s format has a Vulkan equivalent." - * - * "The formatFeatures member *must* include - * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of - * VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or - * VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT" - */ - assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); + /* "Images can be created with an external format even if the Android hardware + * buffer has a format which has an equivalent Vulkan format to enable + * consistent handling of images from sources that might use either category + * of format. However, all images created with an external format are subject + * to the valid usage requirements associated with external formats, even if + * the Android hardware buffer’s format has a Vulkan equivalent." + * + * "The formatFeatures member *must* include + * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of + * VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or + * VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT" + */ + assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT); - p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; + p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; - /* "Implementations may not always be able to determine the color model, - * numerical range, or chroma offsets of the image contents, so the values - * in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions. - * Applications should treat these values as sensible defaults to use in - * the absence of more reliable information obtained through some other - * means." - */ - p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY; - p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY; - p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY; - p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY; + /* "Implementations may not always be able to determine the color model, + * numerical range, or chroma offsets of the image contents, so the values + * in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions. + * Applications should treat these values as sensible defaults to use in + * the absence of more reliable information obtained through some other + * means." + */ + p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY; + p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY; - p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601; - p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL; + p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601; + p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL; - p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; - p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; + p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; + p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT; - return VK_SUCCESS; + return VK_SUCCESS; } VkResult -radv_GetAndroidHardwareBufferPropertiesANDROID( - VkDevice device_h, - const struct AHardwareBuffer *buffer, - VkAndroidHardwareBufferPropertiesANDROID *pProperties) +radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, + const struct AHardwareBuffer *buffer, + VkAndroidHardwareBufferPropertiesANDROID *pProperties) { - RADV_FROM_HANDLE(radv_device, dev, device_h); - struct radv_physical_device *pdevice = dev->physical_device; + RADV_FROM_HANDLE(radv_device, dev, device_h); + struct radv_physical_device *pdevice = dev->physical_device; - VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop = - vk_find_struct(pProperties->pNext, - ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID); + VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop = + vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID); - /* Fill format properties of an Android hardware buffer. */ - if (format_prop) - get_ahb_buffer_format_properties(device_h, buffer, format_prop); + /* Fill format properties of an Android hardware buffer. */ + if (format_prop) + get_ahb_buffer_format_properties(device_h, buffer, format_prop); - /* NOTE - We support buffers with only one handle but do not error on - * multiple handle case. Reason is that we want to support YUV formats - * where we have many logical planes but they all point to the same - * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. - */ - const native_handle_t *handle = - AHardwareBuffer_getNativeHandle(buffer); - int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; - if (dma_buf < 0) - return VK_ERROR_INVALID_EXTERNAL_HANDLE; + /* NOTE - We support buffers with only one handle but do not error on + * multiple handle case. Reason is that we want to support YUV formats + * where we have many logical planes but they all point to the same + * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. + */ + const native_handle_t *handle = AHardwareBuffer_getNativeHandle(buffer); + int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; + if (dma_buf < 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; - /* All memory types. */ - uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1; + /* All memory types. */ + uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1; - pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END); - pProperties->memoryTypeBits = memory_types; + pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END); + pProperties->memoryTypeBits = memory_types; - return VK_SUCCESS; + return VK_SUCCESS; } VkResult -radv_GetMemoryAndroidHardwareBufferANDROID( - VkDevice device_h, - const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, - struct AHardwareBuffer **pBuffer) +radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h, + const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo, + struct AHardwareBuffer **pBuffer) { - RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); + RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory); - /* This should always be set due to the export handle types being set on - * allocation. */ - assert(mem->android_hardware_buffer); + /* This should always be set due to the export handle types being set on + * allocation. */ + assert(mem->android_hardware_buffer); - /* Some quotes from Vulkan spec: - * - * "If the device memory was created by importing an Android hardware - * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same - * Android hardware buffer object." - * - * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must - * have been included in VkExportMemoryAllocateInfo::handleTypes when - * memory was created." - */ - *pBuffer = mem->android_hardware_buffer; - /* Increase refcount. */ - AHardwareBuffer_acquire(mem->android_hardware_buffer); - return VK_SUCCESS; + /* Some quotes from Vulkan spec: + * + * "If the device memory was created by importing an Android hardware + * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same + * Android hardware buffer object." + * + * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must + * have been included in VkExportMemoryAllocateInfo::handleTypes when + * memory was created." + */ + *pBuffer = mem->android_hardware_buffer; + /* Increase refcount. */ + AHardwareBuffer_acquire(mem->android_hardware_buffer); + return VK_SUCCESS; } #endif @@ -702,172 +681,162 @@ VkFormat radv_select_android_external_format(const void *next, VkFormat default_format) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER - const VkExternalFormatANDROID *android_format = - vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID); + const VkExternalFormatANDROID *android_format = + vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID); - if (android_format && android_format->externalFormat) { - return (VkFormat)android_format->externalFormat; - } + if (android_format && android_format->externalFormat) { + return (VkFormat)android_format->externalFormat; + } #endif - return default_format; + return default_format; } - VkResult -radv_import_ahb_memory(struct radv_device *device, - struct radv_device_memory *mem, - unsigned priority, - const VkImportAndroidHardwareBufferInfoANDROID *info) +radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, + unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER - /* Import from AHardwareBuffer to radv_device_memory. */ - const native_handle_t *handle = - AHardwareBuffer_getNativeHandle(info->buffer); + /* Import from AHardwareBuffer to radv_device_memory. */ + const native_handle_t *handle = AHardwareBuffer_getNativeHandle(info->buffer); - /* NOTE - We support buffers with only one handle but do not error on - * multiple handle case. Reason is that we want to support YUV formats - * where we have many logical planes but they all point to the same - * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. - */ - int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; - if (dma_buf < 0) - return VK_ERROR_INVALID_EXTERNAL_HANDLE; + /* NOTE - We support buffers with only one handle but do not error on + * multiple handle case. Reason is that we want to support YUV formats + * where we have many logical planes but they all point to the same + * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM. + */ + int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1; + if (dma_buf < 0) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; - uint64_t alloc_size = 0; - mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf, - priority, &alloc_size); - if (!mem->bo) - return VK_ERROR_OUT_OF_HOST_MEMORY; + uint64_t alloc_size = 0; + mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf, priority, &alloc_size); + if (!mem->bo) + return VK_ERROR_OUT_OF_HOST_MEMORY; - if (mem->image) { - struct radeon_bo_metadata metadata; - device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata); + if (mem->image) { + struct radeon_bo_metadata metadata; + device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata); - struct radv_image_create_info create_info = { - .no_metadata_planes = true, - .bo_metadata = &metadata - }; + struct radv_image_create_info create_info = {.no_metadata_planes = true, + .bo_metadata = &metadata}; - VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image); - if (result != VK_SUCCESS) { - device->ws->buffer_destroy(device->ws, mem->bo); - mem->bo = NULL; - return result; - } + VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image); + if (result != VK_SUCCESS) { + device->ws->buffer_destroy(device->ws, mem->bo); + mem->bo = NULL; + return result; + } - if (alloc_size < mem->image->size) { - device->ws->buffer_destroy(device->ws, mem->bo); - mem->bo = NULL; - return VK_ERROR_INVALID_EXTERNAL_HANDLE; - } - } else if (mem->buffer) { - if (alloc_size < mem->buffer->size) { - device->ws->buffer_destroy(device->ws, mem->bo); - mem->bo = NULL; - return VK_ERROR_INVALID_EXTERNAL_HANDLE; - } - } + if (alloc_size < mem->image->size) { + device->ws->buffer_destroy(device->ws, mem->bo); + mem->bo = NULL; + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + } else if (mem->buffer) { + if (alloc_size < mem->buffer->size) { + device->ws->buffer_destroy(device->ws, mem->bo); + mem->bo = NULL; + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + } - /* "If the vkAllocateMemory command succeeds, the implementation must - * acquire a reference to the imported hardware buffer, which it must - * release when the device memory object is freed. If the command fails, - * the implementation must not retain a reference." - */ - AHardwareBuffer_acquire(info->buffer); - mem->android_hardware_buffer = info->buffer; + /* "If the vkAllocateMemory command succeeds, the implementation must + * acquire a reference to the imported hardware buffer, which it must + * release when the device memory object is freed. If the command fails, + * the implementation must not retain a reference." + */ + AHardwareBuffer_acquire(info->buffer); + mem->android_hardware_buffer = info->buffer; - return VK_SUCCESS; + return VK_SUCCESS; #else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */ - return VK_ERROR_EXTENSION_NOT_PRESENT; + return VK_ERROR_EXTENSION_NOT_PRESENT; #endif } VkResult -radv_create_ahb_memory(struct radv_device *device, - struct radv_device_memory *mem, - unsigned priority, - const VkMemoryAllocateInfo *pAllocateInfo) +radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem, + unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo) { #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER - const VkMemoryDedicatedAllocateInfo *dedicated_info = - vk_find_struct_const(pAllocateInfo->pNext, - MEMORY_DEDICATED_ALLOCATE_INFO); + const VkMemoryDedicatedAllocateInfo *dedicated_info = + vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO); - uint32_t w = 0; - uint32_t h = 1; - uint32_t layers = 1; - uint32_t format = 0; - uint64_t usage = 0; + uint32_t w = 0; + uint32_t h = 1; + uint32_t layers = 1; + uint32_t format = 0; + uint64_t usage = 0; - /* If caller passed dedicated information. */ - if (dedicated_info && dedicated_info->image) { - RADV_FROM_HANDLE(radv_image, image, dedicated_info->image); - w = image->info.width; - h = image->info.height; - layers = image->info.array_size; - format = android_format_from_vk(image->vk_format); - usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage); - } else if (dedicated_info && dedicated_info->buffer) { - RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer); - w = buffer->size; - format = AHARDWAREBUFFER_FORMAT_BLOB; - usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | - AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; - } else { - w = pAllocateInfo->allocationSize; - format = AHARDWAREBUFFER_FORMAT_BLOB; - usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | - AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; - } + /* If caller passed dedicated information. */ + if (dedicated_info && dedicated_info->image) { + RADV_FROM_HANDLE(radv_image, image, dedicated_info->image); + w = image->info.width; + h = image->info.height; + layers = image->info.array_size; + format = android_format_from_vk(image->vk_format); + usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage); + } else if (dedicated_info && dedicated_info->buffer) { + RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer); + w = buffer->size; + format = AHARDWAREBUFFER_FORMAT_BLOB; + usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; + } else { + w = pAllocateInfo->allocationSize; + format = AHARDWAREBUFFER_FORMAT_BLOB; + usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; + } - struct AHardwareBuffer *android_hardware_buffer = NULL; - struct AHardwareBuffer_Desc desc = { - .width = w, - .height = h, - .layers = layers, - .format = format, - .usage = usage, - }; + struct AHardwareBuffer *android_hardware_buffer = NULL; + struct AHardwareBuffer_Desc desc = { + .width = w, + .height = h, + .layers = layers, + .format = format, + .usage = usage, + }; - if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0) - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0) + return VK_ERROR_OUT_OF_HOST_MEMORY; - mem->android_hardware_buffer = android_hardware_buffer; + mem->android_hardware_buffer = android_hardware_buffer; - const struct VkImportAndroidHardwareBufferInfoANDROID import_info = { - .buffer = mem->android_hardware_buffer, - }; + const struct VkImportAndroidHardwareBufferInfoANDROID import_info = { + .buffer = mem->android_hardware_buffer, + }; - VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info); - if (result != VK_SUCCESS) - AHardwareBuffer_release(mem->android_hardware_buffer); - return result; + VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info); + if (result != VK_SUCCESS) + AHardwareBuffer_release(mem->android_hardware_buffer); + return result; #else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */ - return VK_ERROR_EXTENSION_NOT_PRESENT; + return VK_ERROR_EXTENSION_NOT_PRESENT; #endif } -bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage) { +bool +radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage) +{ #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER - /* Ideally we check Gralloc for what it supports and then merge that with the radv - format support, but there is no easy gralloc query besides just creating an image. - That seems a bit on the expensive side, so just hardcode for now. */ - /* TODO: Add multi-plane formats after confirming everything works between radeonsi - and radv. */ - switch(format) { - case VK_FORMAT_R8G8B8A8_UNORM: - case VK_FORMAT_R5G6B5_UNORM_PACK16: - return true; - case VK_FORMAT_R8_UNORM: - case VK_FORMAT_R8G8_UNORM: - return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); - default: - return false; - } + /* Ideally we check Gralloc for what it supports and then merge that with the radv + format support, but there is no easy gralloc query besides just creating an image. + That seems a bit on the expensive side, so just hardcode for now. */ + /* TODO: Add multi-plane formats after confirming everything works between radeonsi + and radv. */ + switch (format) { + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_R5G6B5_UNORM_PACK16: + return true; + case VK_FORMAT_R8_UNORM: + case VK_FORMAT_R8G8_UNORM: + return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); + default: + return false; + } #else - (void)format; - (void)usage; - return false; + (void)format; + (void)usage; + return false; #endif } diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f1751cbfc17..4c015b98d57 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -25,777 +25,736 @@ * IN THE SOFTWARE. */ +#include "radv_cs.h" +#include "radv_debug.h" +#include "radv_meta.h" #include "radv_private.h" #include "radv_radeon_winsys.h" #include "radv_shader.h" -#include "radv_cs.h" #include "sid.h" #include "vk_format.h" #include "vk_util.h" -#include "radv_debug.h" -#include "radv_meta.h" #include "ac_debug.h" enum { - RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), - RADV_PREFETCH_VS = (1 << 1), - RADV_PREFETCH_TCS = (1 << 2), - RADV_PREFETCH_TES = (1 << 3), - RADV_PREFETCH_GS = (1 << 4), - RADV_PREFETCH_PS = (1 << 5), - RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | - RADV_PREFETCH_TCS | - RADV_PREFETCH_TES | - RADV_PREFETCH_GS | - RADV_PREFETCH_PS) + RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), + RADV_PREFETCH_VS = (1 << 1), + RADV_PREFETCH_TCS = (1 << 2), + RADV_PREFETCH_TES = (1 << 3), + RADV_PREFETCH_GS = (1 << 4), + RADV_PREFETCH_PS = (1 << 5), + RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | + RADV_PREFETCH_GS | RADV_PREFETCH_PS) }; static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - bool src_render_loop, - VkImageLayout dst_layout, - bool dst_render_loop, - uint32_t src_family, - uint32_t dst_family, - const VkImageSubresourceRange *range, - struct radv_sample_locations_state *sample_locs); + struct radv_image *image, VkImageLayout src_layout, + bool src_render_loop, VkImageLayout dst_layout, + bool dst_render_loop, uint32_t src_family, + uint32_t dst_family, const VkImageSubresourceRange *range, + struct radv_sample_locations_state *sample_locs); const struct radv_dynamic_state default_dynamic_state = { - .viewport = { - .count = 0, - }, - .scissor = { - .count = 0, - }, - .line_width = 1.0f, - .depth_bias = { - .bias = 0.0f, - .clamp = 0.0f, - .slope = 0.0f, - }, - .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, - .depth_bounds = { - .min = 0.0f, - .max = 1.0f, - }, - .stencil_compare_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_write_mask = { - .front = ~0u, - .back = ~0u, - }, - .stencil_reference = { - .front = 0u, - .back = 0u, - }, - .line_stipple = { - .factor = 0u, - .pattern = 0u, - }, - .cull_mode = 0u, - .front_face = 0u, - .primitive_topology = 0u, - .fragment_shading_rate = { - .size = { 1u, 1u }, - .combiner_ops = { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, - VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR - }, - }, + .viewport = + { + .count = 0, + }, + .scissor = + { + .count = 0, + }, + .line_width = 1.0f, + .depth_bias = + { + .bias = 0.0f, + .clamp = 0.0f, + .slope = 0.0f, + }, + .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f}, + .depth_bounds = + { + .min = 0.0f, + .max = 1.0f, + }, + .stencil_compare_mask = + { + .front = ~0u, + .back = ~0u, + }, + .stencil_write_mask = + { + .front = ~0u, + .back = ~0u, + }, + .stencil_reference = + { + .front = 0u, + .back = 0u, + }, + .line_stipple = + { + .factor = 0u, + .pattern = 0u, + }, + .cull_mode = 0u, + .front_face = 0u, + .primitive_topology = 0u, + .fragment_shading_rate = + { + .size = {1u, 1u}, + .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, + VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, + }, }; static void -radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, - const struct radv_dynamic_state *src) +radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src) { - struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; - uint64_t copy_mask = src->mask; - uint64_t dest_mask = 0; + struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; + uint64_t copy_mask = src->mask; + uint64_t dest_mask = 0; - dest->discard_rectangle.count = src->discard_rectangle.count; - dest->sample_location.count = src->sample_location.count; + dest->discard_rectangle.count = src->discard_rectangle.count; + dest->sample_location.count = src->sample_location.count; - if (copy_mask & RADV_DYNAMIC_VIEWPORT) { - if (dest->viewport.count != src->viewport.count) { - dest->viewport.count = src->viewport.count; - dest_mask |= RADV_DYNAMIC_VIEWPORT; - } + if (copy_mask & RADV_DYNAMIC_VIEWPORT) { + if (dest->viewport.count != src->viewport.count) { + dest->viewport.count = src->viewport.count; + dest_mask |= RADV_DYNAMIC_VIEWPORT; + } - if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, - src->viewport.count * sizeof(VkViewport))) { - typed_memcpy(dest->viewport.viewports, - src->viewport.viewports, - src->viewport.count); - dest_mask |= RADV_DYNAMIC_VIEWPORT; - } - } + if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, + src->viewport.count * sizeof(VkViewport))) { + typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); + dest_mask |= RADV_DYNAMIC_VIEWPORT; + } + } - if (copy_mask & RADV_DYNAMIC_SCISSOR) { - if (dest->scissor.count != src->scissor.count) { - dest->scissor.count = src->scissor.count; - dest_mask |= RADV_DYNAMIC_SCISSOR; - } + if (copy_mask & RADV_DYNAMIC_SCISSOR) { + if (dest->scissor.count != src->scissor.count) { + dest->scissor.count = src->scissor.count; + dest_mask |= RADV_DYNAMIC_SCISSOR; + } - if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, - src->scissor.count * sizeof(VkRect2D))) { - typed_memcpy(dest->scissor.scissors, - src->scissor.scissors, src->scissor.count); - dest_mask |= RADV_DYNAMIC_SCISSOR; - } - } + if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, + src->scissor.count * sizeof(VkRect2D))) { + typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); + dest_mask |= RADV_DYNAMIC_SCISSOR; + } + } - if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) { - if (dest->line_width != src->line_width) { - dest->line_width = src->line_width; - dest_mask |= RADV_DYNAMIC_LINE_WIDTH; - } - } + if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) { + if (dest->line_width != src->line_width) { + dest->line_width = src->line_width; + dest_mask |= RADV_DYNAMIC_LINE_WIDTH; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) { - if (memcmp(&dest->depth_bias, &src->depth_bias, - sizeof(src->depth_bias))) { - dest->depth_bias = src->depth_bias; - dest_mask |= RADV_DYNAMIC_DEPTH_BIAS; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) { + if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) { + dest->depth_bias = src->depth_bias; + dest_mask |= RADV_DYNAMIC_DEPTH_BIAS; + } + } - if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { - if (memcmp(&dest->blend_constants, &src->blend_constants, - sizeof(src->blend_constants))) { - typed_memcpy(dest->blend_constants, - src->blend_constants, 4); - dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; - } - } + if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { + if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) { + typed_memcpy(dest->blend_constants, src->blend_constants, 4); + dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) { - if (memcmp(&dest->depth_bounds, &src->depth_bounds, - sizeof(src->depth_bounds))) { - dest->depth_bounds = src->depth_bounds; - dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) { + if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) { + dest->depth_bounds = src->depth_bounds; + dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS; + } + } - if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { - if (memcmp(&dest->stencil_compare_mask, - &src->stencil_compare_mask, - sizeof(src->stencil_compare_mask))) { - dest->stencil_compare_mask = src->stencil_compare_mask; - dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK; - } - } + if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { + if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, + sizeof(src->stencil_compare_mask))) { + dest->stencil_compare_mask = src->stencil_compare_mask; + dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK; + } + } - if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) { - if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, - sizeof(src->stencil_write_mask))) { - dest->stencil_write_mask = src->stencil_write_mask; - dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK; - } - } + if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) { + if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, + sizeof(src->stencil_write_mask))) { + dest->stencil_write_mask = src->stencil_write_mask; + dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK; + } + } - if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) { - if (memcmp(&dest->stencil_reference, &src->stencil_reference, - sizeof(src->stencil_reference))) { - dest->stencil_reference = src->stencil_reference; - dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE; - } - } + if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) { + if (memcmp(&dest->stencil_reference, &src->stencil_reference, + sizeof(src->stencil_reference))) { + dest->stencil_reference = src->stencil_reference; + dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE; + } + } - if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { - if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, - src->discard_rectangle.count * sizeof(VkRect2D))) { - typed_memcpy(dest->discard_rectangle.rectangles, - src->discard_rectangle.rectangles, - src->discard_rectangle.count); - dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; - } - } + if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { + if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, + src->discard_rectangle.count * sizeof(VkRect2D))) { + typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles, + src->discard_rectangle.count); + dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; + } + } - if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { - if (dest->sample_location.per_pixel != src->sample_location.per_pixel || - dest->sample_location.grid_size.width != src->sample_location.grid_size.width || - dest->sample_location.grid_size.height != src->sample_location.grid_size.height || - memcmp(&dest->sample_location.locations, - &src->sample_location.locations, - src->sample_location.count * sizeof(VkSampleLocationEXT))) { - dest->sample_location.per_pixel = src->sample_location.per_pixel; - dest->sample_location.grid_size = src->sample_location.grid_size; - typed_memcpy(dest->sample_location.locations, - src->sample_location.locations, - src->sample_location.count); - dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; - } - } + if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { + if (dest->sample_location.per_pixel != src->sample_location.per_pixel || + dest->sample_location.grid_size.width != src->sample_location.grid_size.width || + dest->sample_location.grid_size.height != src->sample_location.grid_size.height || + memcmp(&dest->sample_location.locations, &src->sample_location.locations, + src->sample_location.count * sizeof(VkSampleLocationEXT))) { + dest->sample_location.per_pixel = src->sample_location.per_pixel; + dest->sample_location.grid_size = src->sample_location.grid_size; + typed_memcpy(dest->sample_location.locations, src->sample_location.locations, + src->sample_location.count); + dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; + } + } - if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) { - if (memcmp(&dest->line_stipple, &src->line_stipple, - sizeof(src->line_stipple))) { - dest->line_stipple = src->line_stipple; - dest_mask |= RADV_DYNAMIC_LINE_STIPPLE; - } - } + if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) { + if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) { + dest->line_stipple = src->line_stipple; + dest_mask |= RADV_DYNAMIC_LINE_STIPPLE; + } + } - if (copy_mask & RADV_DYNAMIC_CULL_MODE) { - if (dest->cull_mode != src->cull_mode) { - dest->cull_mode = src->cull_mode; - dest_mask |= RADV_DYNAMIC_CULL_MODE; - } - } + if (copy_mask & RADV_DYNAMIC_CULL_MODE) { + if (dest->cull_mode != src->cull_mode) { + dest->cull_mode = src->cull_mode; + dest_mask |= RADV_DYNAMIC_CULL_MODE; + } + } - if (copy_mask & RADV_DYNAMIC_FRONT_FACE) { - if (dest->front_face != src->front_face) { - dest->front_face = src->front_face; - dest_mask |= RADV_DYNAMIC_FRONT_FACE; - } - } + if (copy_mask & RADV_DYNAMIC_FRONT_FACE) { + if (dest->front_face != src->front_face) { + dest->front_face = src->front_face; + dest_mask |= RADV_DYNAMIC_FRONT_FACE; + } + } - if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { - if (dest->primitive_topology != src->primitive_topology) { - dest->primitive_topology = src->primitive_topology; - dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; - } - } + if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { + if (dest->primitive_topology != src->primitive_topology) { + dest->primitive_topology = src->primitive_topology; + dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) { - if (dest->depth_test_enable != src->depth_test_enable) { - dest->depth_test_enable = src->depth_test_enable; - dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) { + if (dest->depth_test_enable != src->depth_test_enable) { + dest->depth_test_enable = src->depth_test_enable; + dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) { - if (dest->depth_write_enable != src->depth_write_enable) { - dest->depth_write_enable = src->depth_write_enable; - dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) { + if (dest->depth_write_enable != src->depth_write_enable) { + dest->depth_write_enable = src->depth_write_enable; + dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) { - if (dest->depth_compare_op != src->depth_compare_op) { - dest->depth_compare_op = src->depth_compare_op; - dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) { + if (dest->depth_compare_op != src->depth_compare_op) { + dest->depth_compare_op = src->depth_compare_op; + dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP; + } + } - if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { - if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) { - dest->depth_bounds_test_enable = src->depth_bounds_test_enable; - dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; - } - } + if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { + if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) { + dest->depth_bounds_test_enable = src->depth_bounds_test_enable; + dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; + } + } - if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) { - if (dest->stencil_test_enable != src->stencil_test_enable) { - dest->stencil_test_enable = src->stencil_test_enable; - dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE; - } - } + if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) { + if (dest->stencil_test_enable != src->stencil_test_enable) { + dest->stencil_test_enable = src->stencil_test_enable; + dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE; + } + } - if (copy_mask & RADV_DYNAMIC_STENCIL_OP) { - if (memcmp(&dest->stencil_op, &src->stencil_op, - sizeof(src->stencil_op))) { - dest->stencil_op = src->stencil_op; - dest_mask |= RADV_DYNAMIC_STENCIL_OP; - } - } + if (copy_mask & RADV_DYNAMIC_STENCIL_OP) { + if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) { + dest->stencil_op = src->stencil_op; + dest_mask |= RADV_DYNAMIC_STENCIL_OP; + } + } - if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { - if (memcmp(&dest->fragment_shading_rate, - &src->fragment_shading_rate, - sizeof(src->fragment_shading_rate))) { - dest->fragment_shading_rate = src->fragment_shading_rate; - dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE; - } - } + if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { + if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate, + sizeof(src->fragment_shading_rate))) { + dest->fragment_shading_rate = src->fragment_shading_rate; + dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE; + } + } - cmd_buffer->state.dirty |= dest_mask; + cmd_buffer->state.dirty |= dest_mask; } static void -radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) +radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radv_shader_info *info; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + struct radv_shader_info *info; - if (!pipeline->streamout_shader || - cmd_buffer->device->physical_device->use_ngg_streamout) - return; + if (!pipeline->streamout_shader || cmd_buffer->device->physical_device->use_ngg_streamout) + return; - info = &pipeline->streamout_shader->info; - for (int i = 0; i < MAX_SO_BUFFERS; i++) - so->stride_in_dw[i] = info->so.strides[i]; + info = &pipeline->streamout_shader->info; + for (int i = 0; i < MAX_SO_BUFFERS; i++) + so->stride_in_dw[i] = info->so.strides[i]; - so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; + so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; } -bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) +bool +radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { - return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; + return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; } -enum ring_type radv_queue_family_to_ring(int f) { - switch (f) { - case RADV_QUEUE_GENERAL: - return RING_GFX; - case RADV_QUEUE_COMPUTE: - return RING_COMPUTE; - case RADV_QUEUE_TRANSFER: - return RING_DMA; - default: - unreachable("Unknown queue family"); - } +enum ring_type +radv_queue_family_to_ring(int f) +{ + switch (f) { + case RADV_QUEUE_GENERAL: + return RING_GFX; + case RADV_QUEUE_COMPUTE: + return RING_COMPUTE; + case RADV_QUEUE_TRANSFER: + return RING_DMA; + default: + unreachable("Unknown queue family"); + } } static void radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { - list_del(&cmd_buffer->pool_link); + list_del(&cmd_buffer->pool_link); - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, - &cmd_buffer->upload.list, list) { - cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); - list_del(&up->list); - free(up); - } + list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) + { + cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); + list_del(&up->list); + free(up); + } - if (cmd_buffer->upload.upload_bo) - cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo); + if (cmd_buffer->upload.upload_bo) + cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo); - if (cmd_buffer->cs) - cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); + if (cmd_buffer->cs) + cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); - for (unsigned i = 0; i < MAX_BIND_POINTS; i++) - free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); + for (unsigned i = 0; i < MAX_BIND_POINTS; i++) + free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); - vk_object_base_finish(&cmd_buffer->base); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer); + vk_object_base_finish(&cmd_buffer->base); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer); } -static VkResult radv_create_cmd_buffer( - struct radv_device * device, - struct radv_cmd_pool * pool, - VkCommandBufferLevel level, - VkCommandBuffer* pCommandBuffer) +static VkResult +radv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool, + VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer) { - struct radv_cmd_buffer *cmd_buffer; - unsigned ring; - cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cmd_buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + struct radv_cmd_buffer *cmd_buffer; + unsigned ring; + cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd_buffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - vk_object_base_init(&device->vk, &cmd_buffer->base, - VK_OBJECT_TYPE_COMMAND_BUFFER); + vk_object_base_init(&device->vk, &cmd_buffer->base, VK_OBJECT_TYPE_COMMAND_BUFFER); - cmd_buffer->device = device; - cmd_buffer->pool = pool; - cmd_buffer->level = level; + cmd_buffer->device = device; + cmd_buffer->pool = pool; + cmd_buffer->level = level; - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - cmd_buffer->queue_family_index = pool->queue_family_index; + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + cmd_buffer->queue_family_index = pool->queue_family_index; - ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); + ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); - cmd_buffer->cs = device->ws->cs_create(device->ws, ring); - if (!cmd_buffer->cs) { - radv_destroy_cmd_buffer(cmd_buffer); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } + cmd_buffer->cs = device->ws->cs_create(device->ws, ring); + if (!cmd_buffer->cs) { + radv_destroy_cmd_buffer(cmd_buffer); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } - *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer); + *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer); - list_inithead(&cmd_buffer->upload.list); + list_inithead(&cmd_buffer->upload.list); - return VK_SUCCESS; + return VK_SUCCESS; } static VkResult radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) { - cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); + cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); - list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, - &cmd_buffer->upload.list, list) { - cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); - list_del(&up->list); - free(up); - } + list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) + { + cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); + list_del(&up->list); + free(up); + } - cmd_buffer->push_constant_stages = 0; - cmd_buffer->scratch_size_per_wave_needed = 0; - cmd_buffer->scratch_waves_wanted = 0; - cmd_buffer->compute_scratch_size_per_wave_needed = 0; - cmd_buffer->compute_scratch_waves_wanted = 0; - cmd_buffer->esgs_ring_size_needed = 0; - cmd_buffer->gsvs_ring_size_needed = 0; - cmd_buffer->tess_rings_needed = false; - cmd_buffer->gds_needed = false; - cmd_buffer->gds_oa_needed = false; - cmd_buffer->sample_positions_needed = false; + cmd_buffer->push_constant_stages = 0; + cmd_buffer->scratch_size_per_wave_needed = 0; + cmd_buffer->scratch_waves_wanted = 0; + cmd_buffer->compute_scratch_size_per_wave_needed = 0; + cmd_buffer->compute_scratch_waves_wanted = 0; + cmd_buffer->esgs_ring_size_needed = 0; + cmd_buffer->gsvs_ring_size_needed = 0; + cmd_buffer->tess_rings_needed = false; + cmd_buffer->gds_needed = false; + cmd_buffer->gds_oa_needed = false; + cmd_buffer->sample_positions_needed = false; - if (cmd_buffer->upload.upload_bo) - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - cmd_buffer->upload.upload_bo); - cmd_buffer->upload.offset = 0; + if (cmd_buffer->upload.upload_bo) + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo); + cmd_buffer->upload.offset = 0; - cmd_buffer->record_result = VK_SUCCESS; + cmd_buffer->record_result = VK_SUCCESS; - memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings)); + memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings)); - for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { - cmd_buffer->descriptors[i].dirty = 0; - cmd_buffer->descriptors[i].valid = 0; - cmd_buffer->descriptors[i].push_dirty = false; - } + for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { + cmd_buffer->descriptors[i].dirty = 0; + cmd_buffer->descriptors[i].valid = 0; + cmd_buffer->descriptors[i].push_dirty = false; + } - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && - cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { - unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; - unsigned fence_offset, eop_bug_offset; - void *fence_ptr; + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 && + cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) { + unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; + unsigned fence_offset, eop_bug_offset; + void *fence_ptr; - radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, - &fence_ptr); - memset(fence_ptr, 0, 8); + radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr); + memset(fence_ptr, 0, 8); - cmd_buffer->gfx9_fence_va = - radv_buffer_get_va(cmd_buffer->upload.upload_bo); - cmd_buffer->gfx9_fence_va += fence_offset; + cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->gfx9_fence_va += fence_offset; - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { - /* Allocate a buffer for the EOP bug on GFX9. */ - radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, - &eop_bug_offset, &fence_ptr); - memset(fence_ptr, 0, 16 * num_db); - cmd_buffer->gfx9_eop_bug_va = - radv_buffer_get_va(cmd_buffer->upload.upload_bo); - cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; - } - } + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { + /* Allocate a buffer for the EOP bug on GFX9. */ + radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr); + memset(fence_ptr, 0, 16 * num_db); + cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; + } + } - cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; + cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; - return cmd_buffer->record_result; + return cmd_buffer->record_result; } enum radeon_bo_domain radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest) { - bool use_sam = (info->all_vram_visible && info->has_dedicated_vram && - !(perftest & RADV_PERFTEST_NO_SAM)) || - (perftest & RADV_PERFTEST_SAM); - return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT; + bool use_sam = + (info->all_vram_visible && info->has_dedicated_vram && !(perftest & RADV_PERFTEST_NO_SAM)) || + (perftest & RADV_PERFTEST_SAM); + return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT; } static bool -radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, - uint64_t min_needed) +radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) { - uint64_t new_size; - struct radeon_winsys_bo *bo; - struct radv_cmd_buffer_upload *upload; - struct radv_device *device = cmd_buffer->device; + uint64_t new_size; + struct radeon_winsys_bo *bo; + struct radv_cmd_buffer_upload *upload; + struct radv_device *device = cmd_buffer->device; - new_size = MAX2(min_needed, 16 * 1024); - new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); + new_size = MAX2(min_needed, 16 * 1024); + new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); - bo = device->ws->buffer_create(device->ws, - new_size, 4096, - radv_cmdbuffer_domain(&device->physical_device->rad_info, - device->instance->perftest_flags), - RADEON_FLAG_CPU_ACCESS| - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_32BIT | - RADEON_FLAG_GTT_WC, - RADV_BO_PRIORITY_UPLOAD_BUFFER); + bo = device->ws->buffer_create( + device->ws, new_size, 4096, + radv_cmdbuffer_domain(&device->physical_device->rad_info, device->instance->perftest_flags), + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT | + RADEON_FLAG_GTT_WC, + RADV_BO_PRIORITY_UPLOAD_BUFFER); - if (!bo) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; - return false; - } + if (!bo) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + return false; + } - radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); - if (cmd_buffer->upload.upload_bo) { - upload = malloc(sizeof(*upload)); + radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); + if (cmd_buffer->upload.upload_bo) { + upload = malloc(sizeof(*upload)); - if (!upload) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - device->ws->buffer_destroy(device->ws, bo); - return false; - } + if (!upload) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + device->ws->buffer_destroy(device->ws, bo); + return false; + } - memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); - list_add(&upload->list, &cmd_buffer->upload.list); - } + memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); + list_add(&upload->list, &cmd_buffer->upload.list); + } - cmd_buffer->upload.upload_bo = bo; - cmd_buffer->upload.size = new_size; - cmd_buffer->upload.offset = 0; - cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); + cmd_buffer->upload.upload_bo = bo; + cmd_buffer->upload.size = new_size; + cmd_buffer->upload.offset = 0; + cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); - if (!cmd_buffer->upload.map) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; - return false; - } + if (!cmd_buffer->upload.map) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; + return false; + } - return true; + return true; } bool -radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, - unsigned size, unsigned *out_offset, void **ptr) +radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, + unsigned *out_offset, void **ptr) { - assert(size % 4 == 0); + assert(size % 4 == 0); - struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; + struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; - /* Align to the scalar cache line size if it results in this allocation - * being placed in less of them. - */ - unsigned offset = cmd_buffer->upload.offset; - unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32; - unsigned gap = align(offset, line_size) - offset; - if ((size & (line_size - 1)) > gap) - offset = align(offset, line_size); + /* Align to the scalar cache line size if it results in this allocation + * being placed in less of them. + */ + unsigned offset = cmd_buffer->upload.offset; + unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32; + unsigned gap = align(offset, line_size) - offset; + if ((size & (line_size - 1)) > gap) + offset = align(offset, line_size); - if (offset + size > cmd_buffer->upload.size) { - if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) - return false; - offset = 0; - } + if (offset + size > cmd_buffer->upload.size) { + if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) + return false; + offset = 0; + } - *out_offset = offset; - *ptr = cmd_buffer->upload.map + offset; + *out_offset = offset; + *ptr = cmd_buffer->upload.map + offset; - cmd_buffer->upload.offset = offset + size; - return true; + cmd_buffer->upload.offset = offset + size; + return true; } bool -radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, - unsigned size, const void *data, unsigned *out_offset) +radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, + unsigned *out_offset) { - uint8_t *ptr; + uint8_t *ptr; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) - return false; + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) + return false; - if (ptr) - memcpy(ptr, data, size); + if (ptr) + memcpy(ptr, data, size); - return true; + return true; } static void -radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, - unsigned count, const uint32_t *data) +radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned count, + const uint32_t *data) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); + radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit_array(cs, data, count); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit_array(cs, data, count); } -void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) +void +radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) { - struct radv_device *device = cmd_buffer->device; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint64_t va; + struct radv_device *device = cmd_buffer->device; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va; - va = radv_buffer_get_va(device->trace_bo); - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) - va += 4; + va = radv_buffer_get_va(device->trace_bo); + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) + va += 4; - ++cmd_buffer->state.trace_id; - radv_emit_write_data_packet(cmd_buffer, va, 1, - &cmd_buffer->state.trace_id); + ++cmd_buffer->state.trace_id; + radv_emit_write_data_packet(cmd_buffer, va, 1, &cmd_buffer->state.trace_id); - radeon_check_space(cmd_buffer->device->ws, cs, 2); + radeon_check_space(cmd_buffer->device->ws, cs, 2); - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); - radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); } static void -radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, - enum radv_cmd_flush_bits flags) +radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags) { - if (unlikely(cmd_buffer->device->thread_trace.bo)) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); - } + if (unlikely(cmd_buffer->device->thread_trace.bo)) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); + } - if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { - enum rgp_flush_bits sqtt_flush_bits = 0; - assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); + if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { + enum rgp_flush_bits sqtt_flush_bits = 0; + assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); - /* Force wait for graphics or compute engines to be idle. */ - si_cs_emit_cache_flush(cmd_buffer->cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - &cmd_buffer->gfx9_fence_idx, - cmd_buffer->gfx9_fence_va, - radv_cmd_buffer_uses_mec(cmd_buffer), - flags, &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); - } + /* Force wait for graphics or compute engines to be idle. */ + si_cs_emit_cache_flush(cmd_buffer->cs, + cmd_buffer->device->physical_device->rad_info.chip_class, + &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, + radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits, + cmd_buffer->gfx9_eop_bug_va); + } - if (unlikely(cmd_buffer->device->trace_bo)) - radv_cmd_buffer_trace_emit(cmd_buffer); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_cmd_buffer_trace_emit(cmd_buffer); } static void -radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) +radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { - struct radv_device *device = cmd_buffer->device; - enum ring_type ring; - uint32_t data[2]; - uint64_t va; + struct radv_device *device = cmd_buffer->device; + enum ring_type ring; + uint32_t data[2]; + uint64_t va; - va = radv_buffer_get_va(device->trace_bo); + va = radv_buffer_get_va(device->trace_bo); - ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); + ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index); - switch (ring) { - case RING_GFX: - va += 8; - break; - case RING_COMPUTE: - va += 16; - break; - default: - assert(!"invalid ring type"); - } + switch (ring) { + case RING_GFX: + va += 8; + break; + case RING_COMPUTE: + va += 16; + break; + default: + assert(!"invalid ring type"); + } - uint64_t pipeline_address = (uintptr_t)pipeline; - data[0] = pipeline_address; - data[1] = pipeline_address >> 32; + uint64_t pipeline_address = (uintptr_t)pipeline; + data[0] = pipeline_address; + data[1] = pipeline_address >> 32; - radv_emit_write_data_packet(cmd_buffer, va, 2, data); + radv_emit_write_data_packet(cmd_buffer, va, 2, data); } static void -radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, - uint64_t vb_ptr) +radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr) { - struct radv_device *device = cmd_buffer->device; - uint32_t data[2]; - uint64_t va; + struct radv_device *device = cmd_buffer->device; + uint32_t data[2]; + uint64_t va; - va = radv_buffer_get_va(device->trace_bo); - va += 24; + va = radv_buffer_get_va(device->trace_bo); + va += 24; - data[0] = vb_ptr; - data[1] = vb_ptr >> 32; + data[0] = vb_ptr; + data[1] = vb_ptr >> 32; - radv_emit_write_data_packet(cmd_buffer, va, 2, data); + radv_emit_write_data_packet(cmd_buffer, va, 2, data); } -void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point, - struct radv_descriptor_set *set, - unsigned idx) +void +radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, + struct radv_descriptor_set *set, unsigned idx) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); - descriptors_state->sets[idx] = set; + descriptors_state->sets[idx] = set; - descriptors_state->valid |= (1u << idx); /* active descriptors */ - descriptors_state->dirty |= (1u << idx); + descriptors_state->valid |= (1u << idx); /* active descriptors */ + descriptors_state->dirty |= (1u << idx); } static void -radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) +radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - struct radv_device *device = cmd_buffer->device; - uint32_t data[MAX_SETS * 2] = {0}; - uint64_t va; - va = radv_buffer_get_va(device->trace_bo) + 32; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_device *device = cmd_buffer->device; + uint32_t data[MAX_SETS * 2] = {0}; + uint64_t va; + va = radv_buffer_get_va(device->trace_bo) + 32; - u_foreach_bit(i, descriptors_state->valid) { - struct radv_descriptor_set *set = descriptors_state->sets[i]; - data[i * 2] = (uint64_t)(uintptr_t)set; - data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; - } + u_foreach_bit(i, descriptors_state->valid) + { + struct radv_descriptor_set *set = descriptors_state->sets[i]; + data[i * 2] = (uint64_t)(uintptr_t)set; + data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; + } - radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data); + radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data); } struct radv_userdata_info * -radv_lookup_user_sgpr(struct radv_pipeline *pipeline, - gl_shader_stage stage, - int idx) +radv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx) { - struct radv_shader_variant *shader = radv_get_shader(pipeline, stage); - return &shader->info.user_sgprs_locs.shader_data[idx]; + struct radv_shader_variant *shader = radv_get_shader(pipeline, stage); + return &shader->info.user_sgprs_locs.shader_data[idx]; } static void -radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - gl_shader_stage stage, - int idx, uint64_t va) +radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, + gl_shader_stage stage, int idx, uint64_t va) { - struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); - uint32_t base_reg = pipeline->user_data_0[stage]; - if (loc->sgpr_idx == -1) - return; + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); + uint32_t base_reg = pipeline->user_data_0[stage]; + if (loc->sgpr_idx == -1) + return; - assert(loc->num_sgprs == 1); + assert(loc->num_sgprs == 1); - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + loc->sgpr_idx * 4, va, false); + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, + false); } static void -radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - struct radv_descriptor_state *descriptors_state, - gl_shader_stage stage) +radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, + struct radv_descriptor_state *descriptors_state, + gl_shader_stage stage) { - struct radv_device *device = cmd_buffer->device; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t sh_base = pipeline->user_data_0[stage]; - struct radv_userdata_locations *locs = - &pipeline->shaders[stage]->info.user_sgprs_locs; - unsigned mask = locs->descriptor_sets_enabled; + struct radv_device *device = cmd_buffer->device; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t sh_base = pipeline->user_data_0[stage]; + struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs; + unsigned mask = locs->descriptor_sets_enabled; - mask &= descriptors_state->dirty & descriptors_state->valid; + mask &= descriptors_state->dirty & descriptors_state->valid; - while (mask) { - int start, count; + while (mask) { + int start, count; - u_bit_scan_consecutive_range(&mask, &start, &count); + u_bit_scan_consecutive_range(&mask, &start, &count); - struct radv_userdata_info *loc = &locs->descriptor_sets[start]; - unsigned sh_offset = sh_base + loc->sgpr_idx * 4; + struct radv_userdata_info *loc = &locs->descriptor_sets[start]; + unsigned sh_offset = sh_base + loc->sgpr_idx * 4; - radv_emit_shader_pointer_head(cs, sh_offset, count, true); - for (int i = 0; i < count; i++) { - struct radv_descriptor_set *set = - descriptors_state->sets[start + i]; + radv_emit_shader_pointer_head(cs, sh_offset, count, true); + for (int i = 0; i < count; i++) { + struct radv_descriptor_set *set = descriptors_state->sets[start + i]; - radv_emit_shader_pointer_body(device, cs, set->header.va, true); - } - } + radv_emit_shader_pointer_body(device, cs, set->header.va, true); + } + } } /** @@ -803,30 +762,30 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). */ static void -radv_convert_user_sample_locs(struct radv_sample_locations_state *state, - uint32_t x, uint32_t y, VkOffset2D *sample_locs) +radv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y, + VkOffset2D *sample_locs) { - uint32_t x_offset = x % state->grid_size.width; - uint32_t y_offset = y % state->grid_size.height; - uint32_t num_samples = (uint32_t)state->per_pixel; - VkSampleLocationEXT *user_locs; - uint32_t pixel_offset; + uint32_t x_offset = x % state->grid_size.width; + uint32_t y_offset = y % state->grid_size.height; + uint32_t num_samples = (uint32_t)state->per_pixel; + VkSampleLocationEXT *user_locs; + uint32_t pixel_offset; - pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; + pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; - assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); - user_locs = &state->locations[pixel_offset]; + assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); + user_locs = &state->locations[pixel_offset]; - for (uint32_t i = 0; i < num_samples; i++) { - float shifted_pos_x = user_locs[i].x - 0.5; - float shifted_pos_y = user_locs[i].y - 0.5; + for (uint32_t i = 0; i < num_samples; i++) { + float shifted_pos_x = user_locs[i].x - 0.5; + float shifted_pos_y = user_locs[i].y - 0.5; - int32_t scaled_pos_x = floorf(shifted_pos_x * 16); - int32_t scaled_pos_y = floorf(shifted_pos_y * 16); + int32_t scaled_pos_x = floorf(shifted_pos_x * 16); + int32_t scaled_pos_y = floorf(shifted_pos_y * 16); - sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); - sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); - } + sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); + sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); + } } /** @@ -835,20 +794,20 @@ radv_convert_user_sample_locs(struct radv_sample_locations_state *state, */ static void radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, - uint32_t *sample_locs_pixel) + uint32_t *sample_locs_pixel) { - for (uint32_t i = 0; i < num_samples; i++) { - uint32_t sample_reg_idx = i / 4; - uint32_t sample_loc_idx = i % 4; - int32_t pos_x = sample_locs[i].x; - int32_t pos_y = sample_locs[i].y; + for (uint32_t i = 0; i < num_samples; i++) { + uint32_t sample_reg_idx = i / 4; + uint32_t sample_loc_idx = i % 4; + int32_t pos_x = sample_locs[i].x; + int32_t pos_y = sample_locs[i].y; - uint32_t shift_x = 8 * sample_loc_idx; - uint32_t shift_y = shift_x + 4; + uint32_t shift_x = 8 * sample_loc_idx; + uint32_t shift_y = shift_x + 4; - sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; - sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; - } + sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; + sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; + } } /** @@ -856,41 +815,38 @@ radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, * sample locations. */ static uint64_t -radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, - VkOffset2D *sample_locs, - uint32_t num_samples) +radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, + uint32_t num_samples) { - uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); - uint32_t sample_mask = num_samples - 1; - uint32_t *distances = alloca(num_samples * sizeof(*distances)); - uint64_t centroid_priority = 0; + uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); + uint32_t sample_mask = num_samples - 1; + uint32_t *distances = alloca(num_samples * sizeof(*distances)); + uint64_t centroid_priority = 0; - /* Compute the distances from center for each sample. */ - for (int i = 0; i < num_samples; i++) { - distances[i] = (sample_locs[i].x * sample_locs[i].x) + - (sample_locs[i].y * sample_locs[i].y); - } + /* Compute the distances from center for each sample. */ + for (int i = 0; i < num_samples; i++) { + distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y); + } - /* Compute the centroid priorities by looking at the distances array. */ - for (int i = 0; i < num_samples; i++) { - uint32_t min_idx = 0; + /* Compute the centroid priorities by looking at the distances array. */ + for (int i = 0; i < num_samples; i++) { + uint32_t min_idx = 0; - for (int j = 1; j < num_samples; j++) { - if (distances[j] < distances[min_idx]) - min_idx = j; - } + for (int j = 1; j < num_samples; j++) { + if (distances[j] < distances[min_idx]) + min_idx = j; + } - centroid_priorities[i] = min_idx; - distances[min_idx] = 0xffffffff; - } + centroid_priorities[i] = min_idx; + distances[min_idx] = 0xffffffff; + } - /* Compute the final centroid priority. */ - for (int i = 0; i < 8; i++) { - centroid_priority |= - centroid_priorities[i & sample_mask] << (i * 4); - } + /* Compute the final centroid priority. */ + for (int i = 0; i < 8; i++) { + centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4); + } - return centroid_priority << 32 | centroid_priority; + return centroid_priority << 32 | centroid_priority; } /** @@ -899,972 +855,934 @@ radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, static void radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) { - struct radv_sample_locations_state *sample_location = - &cmd_buffer->state.dynamic.sample_location; - uint32_t num_samples = (uint32_t)sample_location->per_pixel; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t sample_locs_pixel[4][2] = {0}; - VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ - uint32_t max_sample_dist = 0; - uint64_t centroid_priority; + struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location; + uint32_t num_samples = (uint32_t)sample_location->per_pixel; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t sample_locs_pixel[4][2] = {0}; + VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ + uint32_t max_sample_dist = 0; + uint64_t centroid_priority; - if (!cmd_buffer->state.dynamic.sample_location.count) - return; + if (!cmd_buffer->state.dynamic.sample_location.count) + return; - /* Convert the user sample locations to hardware sample locations. */ - radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]); - radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]); - radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]); - radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]); + /* Convert the user sample locations to hardware sample locations. */ + radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]); + radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]); + radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]); + radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]); - /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ - for (uint32_t i = 0; i < 4; i++) { - radv_compute_sample_locs_pixel(num_samples, sample_locs[i], - sample_locs_pixel[i]); - } + /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ + for (uint32_t i = 0; i < 4; i++) { + radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]); + } - /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ - centroid_priority = - radv_compute_centroid_priority(cmd_buffer, sample_locs[0], - num_samples); + /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ + centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples); - /* Compute the maximum sample distance from the specified locations. */ - for (unsigned i = 0; i < 4; ++i) { - for (uint32_t j = 0; j < num_samples; j++) { - VkOffset2D offset = sample_locs[i][j]; - max_sample_dist = MAX2(max_sample_dist, - MAX2(abs(offset.x), abs(offset.y))); - } - } + /* Compute the maximum sample distance from the specified locations. */ + for (unsigned i = 0; i < 4; ++i) { + for (uint32_t j = 0; j < num_samples; j++) { + VkOffset2D offset = sample_locs[i][j]; + max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y))); + } + } - /* Emit the specified user sample locations. */ - switch (num_samples) { - case 2: - case 4: - radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); - radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); - radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); - radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); - break; - case 8: - radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]); - radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]); - radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]); - radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]); - radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]); - radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]); - radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]); - radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]); - break; - default: - unreachable("invalid number of samples"); - } + /* Emit the specified user sample locations. */ + switch (num_samples) { + case 2: + case 4: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, + sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, + sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, + sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, + sample_locs_pixel[3][0]); + break; + case 8: + radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, + sample_locs_pixel[0][0]); + radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, + sample_locs_pixel[1][0]); + radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, + sample_locs_pixel[2][0]); + radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, + sample_locs_pixel[3][0]); + radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, + sample_locs_pixel[0][1]); + radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, + sample_locs_pixel[1][1]); + radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, + sample_locs_pixel[2][1]); + radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, + sample_locs_pixel[3][1]); + break; + default: + unreachable("invalid number of samples"); + } - /* Emit the maximum sample distance and the centroid priority. */ - radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG, - S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), - ~C_028BE0_MAX_SAMPLE_DIST); + /* Emit the maximum sample distance and the centroid priority. */ + radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG, + S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), ~C_028BE0_MAX_SAMPLE_DIST); - radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); - radeon_emit(cs, centroid_priority); - radeon_emit(cs, centroid_priority >> 32); + radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); + radeon_emit(cs, centroid_priority); + radeon_emit(cs, centroid_priority >> 32); - /* GFX9: Flush DFSM when the AA mode changes. */ - if (cmd_buffer->device->dfsm_allowed) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); - } + /* GFX9: Flush DFSM when the AA mode changes. */ + if (cmd_buffer->device->dfsm_allowed) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0)); + } - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void -radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - gl_shader_stage stage, - int idx, int count, uint32_t *values) +radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, + gl_shader_stage stage, int idx, int count, uint32_t *values) { - struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); - uint32_t base_reg = pipeline->user_data_0[stage]; - if (loc->sgpr_idx == -1) - return; + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); + uint32_t base_reg = pipeline->user_data_0[stage]; + if (loc->sgpr_idx == -1) + return; - assert(loc->num_sgprs == count); + assert(loc->num_sgprs == count); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count); - radeon_emit_array(cmd_buffer->cs, values, count); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count); + radeon_emit_array(cmd_buffer->cs, values, count); } static void -radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) +radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { - int num_samples = pipeline->graphics.ms.num_samples; - struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; + int num_samples = pipeline->graphics.ms.num_samples; + struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; - if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions) - cmd_buffer->sample_positions_needed = true; + if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions) + cmd_buffer->sample_positions_needed = true; - if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) - return; + if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples) + return; - radv_emit_default_sample_locations(cmd_buffer->cs, num_samples); + radv_emit_default_sample_locations(cmd_buffer->cs, num_samples); - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void -radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline) +radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { - const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; + const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline; + if (pipeline->device->physical_device->rad_info.chip_class < GFX9) + return; - if (pipeline->device->physical_device->rad_info.chip_class < GFX9) - return; + if (old_pipeline && + old_pipeline->graphics.binning.pa_sc_binner_cntl_0 == + pipeline->graphics.binning.pa_sc_binner_cntl_0 && + old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control) + return; - if (old_pipeline && - old_pipeline->graphics.binning.pa_sc_binner_cntl_0 == pipeline->graphics.binning.pa_sc_binner_cntl_0 && - old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control) - return; + bool binning_flush = false; + if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 || + cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 || + cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 || + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + binning_flush = !old_pipeline || + G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) != + G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0); + } - bool binning_flush = false; - if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 || - cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 || - cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 || - cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - binning_flush = !old_pipeline || - G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) != - G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0); - } + radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, + pipeline->graphics.binning.pa_sc_binner_cntl_0 | + S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush)); - radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, - pipeline->graphics.binning.pa_sc_binner_cntl_0 | - S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush)); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL, + pipeline->graphics.binning.db_dfsm_control); + } else { + radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL, + pipeline->graphics.binning.db_dfsm_control); + } - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL, - pipeline->graphics.binning.db_dfsm_control); - } else { - radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL, - pipeline->graphics.binning.db_dfsm_control); - } - - cmd_buffer->state.context_roll_without_scissor_emitted = true; -} - - -static void -radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, - struct radv_shader_variant *shader) -{ - uint64_t va; - - if (!shader) - return; - - va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void -radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, - struct radv_pipeline *pipeline, - bool vertex_stage_only) +radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader) { - struct radv_cmd_state *state = &cmd_buffer->state; - uint32_t mask = state->prefetch_L2_mask; + uint64_t va; - if (vertex_stage_only) { - /* Fast prefetch path for starting draws as soon as possible. - */ - mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS | - RADV_PREFETCH_VBO_DESCRIPTORS); - } + if (!shader) + return; - if (mask & RADV_PREFETCH_VS) - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_VERTEX]); + va = radv_buffer_get_va(shader->bo) + shader->bo_offset; - if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) - si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size); + si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); +} - if (mask & RADV_PREFETCH_TCS) - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_TESS_CTRL]); +static void +radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, + bool vertex_stage_only) +{ + struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t mask = state->prefetch_L2_mask; - if (mask & RADV_PREFETCH_TES) - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_TESS_EVAL]); + if (vertex_stage_only) { + /* Fast prefetch path for starting draws as soon as possible. + */ + mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS); + } - if (mask & RADV_PREFETCH_GS) { - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_GEOMETRY]); - if (radv_pipeline_has_gs_copy_shader(pipeline)) - radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); - } + if (mask & RADV_PREFETCH_VS) + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_VERTEX]); - if (mask & RADV_PREFETCH_PS) - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_FRAGMENT]); + if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) + si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size); - state->prefetch_L2_mask &= ~mask; + if (mask & RADV_PREFETCH_TCS) + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_CTRL]); + + if (mask & RADV_PREFETCH_TES) + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_EVAL]); + + if (mask & RADV_PREFETCH_GS) { + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_GEOMETRY]); + if (radv_pipeline_has_gs_copy_shader(pipeline)) + radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader); + } + + if (mask & RADV_PREFETCH_PS) + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_FRAGMENT]); + + state->prefetch_L2_mask &= ~mask; } static void radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) { - if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed) - return; + if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed) + return; - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - const struct radv_subpass *subpass = cmd_buffer->state.subpass; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; - unsigned sx_ps_downconvert = 0; - unsigned sx_blend_opt_epsilon = 0; - unsigned sx_blend_opt_control = 0; + unsigned sx_ps_downconvert = 0; + unsigned sx_blend_opt_epsilon = 0; + unsigned sx_blend_opt_control = 0; - if (!cmd_buffer->state.attachments || !subpass) - return; + if (!cmd_buffer->state.attachments || !subpass) + return; - for (unsigned i = 0; i < subpass->color_count; ++i) { - if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { - /* We don't set the DISABLE bits, because the HW can't have holes, - * so the SPI color format is set to 32-bit 1-component. */ - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); - continue; - } + for (unsigned i = 0; i < subpass->color_count; ++i) { + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { + /* We don't set the DISABLE bits, because the HW can't have holes, + * so the SPI color format is set to 32-bit 1-component. */ + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); + continue; + } - int idx = subpass->color_attachments[i].attachment; - struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb; + int idx = subpass->color_attachments[i].attachment; + struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb; - unsigned format = G_028C70_FORMAT(cb->cb_color_info); - unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info); - uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf; - uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf; + unsigned format = G_028C70_FORMAT(cb->cb_color_info); + unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info); + uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf; + uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf; - bool has_alpha, has_rgb; + bool has_alpha, has_rgb; - /* Set if RGB and A are present. */ - has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib); + /* Set if RGB and A are present. */ + has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib); - if (format == V_028C70_COLOR_8 || - format == V_028C70_COLOR_16 || - format == V_028C70_COLOR_32) - has_rgb = !has_alpha; - else - has_rgb = true; + if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32) + has_rgb = !has_alpha; + else + has_rgb = true; - /* Check the colormask and export format. */ - if (!(colormask & 0x7)) - has_rgb = false; - if (!(colormask & 0x8)) - has_alpha = false; + /* Check the colormask and export format. */ + if (!(colormask & 0x7)) + has_rgb = false; + if (!(colormask & 0x8)) + has_alpha = false; - if (spi_format == V_028714_SPI_SHADER_ZERO) { - has_rgb = false; - has_alpha = false; - } + if (spi_format == V_028714_SPI_SHADER_ZERO) { + has_rgb = false; + has_alpha = false; + } - /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha - * optimization, even though it has no alpha. */ - if (has_rgb && format == V_028C70_COLOR_5_9_9_9) - has_alpha = true; + /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha + * optimization, even though it has no alpha. */ + if (has_rgb && format == V_028C70_COLOR_5_9_9_9) + has_alpha = true; - /* Disable value checking for disabled channels. */ - if (!has_rgb) - sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); - if (!has_alpha) - sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); + /* Disable value checking for disabled channels. */ + if (!has_rgb) + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); + if (!has_alpha) + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); - /* Enable down-conversion for 32bpp and smaller formats. */ - switch (format) { - case V_028C70_COLOR_8: - case V_028C70_COLOR_8_8: - case V_028C70_COLOR_8_8_8_8: - /* For 1 and 2-channel formats, use the superset thereof. */ - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || - spi_format == V_028714_SPI_SHADER_UINT16_ABGR || - spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); - sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); - } - break; + /* Enable down-conversion for 32bpp and smaller formats. */ + switch (format) { + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + /* For 1 and 2-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); + sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); + } + break; - case V_028C70_COLOR_5_6_5: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); - sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); - } - break; + case V_028C70_COLOR_5_6_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); + } + break; - case V_028C70_COLOR_1_5_5_5: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); - sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); - } - break; + case V_028C70_COLOR_1_5_5_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); + } + break; - case V_028C70_COLOR_4_4_4_4: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); - sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); - } - break; + case V_028C70_COLOR_4_4_4_4: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); + sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); + } + break; - case V_028C70_COLOR_32: - if (swap == V_028C70_SWAP_STD && - spi_format == V_028714_SPI_SHADER_32_R) - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); - else if (swap == V_028C70_SWAP_ALT_REV && - spi_format == V_028714_SPI_SHADER_32_AR) - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); - break; + case V_028C70_COLOR_32: + if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); + else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); + break; - case V_028C70_COLOR_16: - case V_028C70_COLOR_16_16: - /* For 1-channel formats, use the superset thereof. */ - if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || - spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || - spi_format == V_028714_SPI_SHADER_UINT16_ABGR || - spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { - if (swap == V_028C70_SWAP_STD || - swap == V_028C70_SWAP_STD_REV) - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); - else - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); - } - break; + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + /* For 1-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); + else + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); + } + break; - case V_028C70_COLOR_10_11_11: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); - break; + case V_028C70_COLOR_10_11_11: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); + break; - case V_028C70_COLOR_2_10_10_10: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); - sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); - } - break; - case V_028C70_COLOR_5_9_9_9: - if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) - sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); - break; - } - } + case V_028C70_COLOR_2_10_10_10: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); + sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); + } + break; + case V_028C70_COLOR_5_9_9_9: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); + break; + } + } - /* Do not set the DISABLE bits for the unused attachments, as that - * breaks dual source blending in SkQP and does not seem to improve - * performance. */ + /* Do not set the DISABLE bits for the unused attachments, as that + * breaks dual source blending in SkQP and does not seem to improve + * performance. */ - if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert && - sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon && - sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control) - return; + if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert && + sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon && + sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control) + return; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); - radeon_emit(cmd_buffer->cs, sx_ps_downconvert); - radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); - radeon_emit(cmd_buffer->cs, sx_blend_opt_control); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cmd_buffer->cs, sx_ps_downconvert); + radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); + radeon_emit(cmd_buffer->cs, sx_blend_opt_control); - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; - cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert; - cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon; - cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control; + cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert; + cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon; + cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control; } static void radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer) { - if (!cmd_buffer->device->pbb_allowed) - return; + if (!cmd_buffer->device->pbb_allowed) + return; - struct radv_binning_settings settings = - radv_get_binning_settings(cmd_buffer->device->physical_device); - bool break_for_new_ps = - (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] != - cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) && - (settings.context_states_per_bin > 1 || - settings.persistent_states_per_bin > 1); - bool break_for_new_cb_target_mask = - (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask != - cmd_buffer->state.pipeline->graphics.cb_target_mask) && - settings.context_states_per_bin > 1; + struct radv_binning_settings settings = + radv_get_binning_settings(cmd_buffer->device->physical_device); + bool break_for_new_ps = + (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] != + cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) && + (settings.context_states_per_bin > 1 || settings.persistent_states_per_bin > 1); + bool break_for_new_cb_target_mask = + (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask != + cmd_buffer->state.pipeline->graphics.cb_target_mask) && + settings.context_states_per_bin > 1; - if (!break_for_new_ps && !break_for_new_cb_target_mask) - return; + if (!break_for_new_ps && !break_for_new_cb_target_mask) + return; - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) - return; + if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline) + return; - radv_update_multisample_state(cmd_buffer, pipeline); - radv_update_binning_state(cmd_buffer, pipeline); + radv_update_multisample_state(cmd_buffer, pipeline); + radv_update_binning_state(cmd_buffer, pipeline); - cmd_buffer->scratch_size_per_wave_needed = MAX2(cmd_buffer->scratch_size_per_wave_needed, - pipeline->scratch_bytes_per_wave); - cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, - pipeline->max_waves); + cmd_buffer->scratch_size_per_wave_needed = + MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave); + cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->max_waves); - if (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != - pipeline->graphics.can_use_guardband) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband != + pipeline->graphics.can_use_guardband) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; - if (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl != - pipeline->graphics.pa_su_sc_mode_cntl) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | - RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl != + pipeline->graphics.pa_su_sc_mode_cntl) + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; - if (!cmd_buffer->state.emitted_pipeline) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; + if (!cmd_buffer->state.emitted_pipeline) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; - if (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->graphics.db_depth_control != - pipeline->graphics.db_depth_control) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->graphics.db_depth_control != + pipeline->graphics.db_depth_control) + cmd_buffer->state.dirty |= + RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; - if (!cmd_buffer->state.emitted_pipeline) - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; + if (!cmd_buffer->state.emitted_pipeline) + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; - radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); + radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); - if (!cmd_buffer->state.emitted_pipeline || - cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw || - cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash || - memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, - pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) { - radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw); - cmd_buffer->state.context_roll_without_scissor_emitted = true; - } + if (!cmd_buffer->state.emitted_pipeline || + cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw || + cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash || + memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, pipeline->ctx_cs.buf, + pipeline->ctx_cs.cdw * 4)) { + radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw); + cmd_buffer->state.context_roll_without_scissor_emitted = true; + } - radv_emit_batch_break_on_new_ps(cmd_buffer); + radv_emit_batch_break_on_new_ps(cmd_buffer); - for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { - if (!pipeline->shaders[i]) - continue; + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!pipeline->shaders[i]) + continue; - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - pipeline->shaders[i]->bo); - } + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->shaders[i]->bo); + } - if (radv_pipeline_has_gs_copy_shader(pipeline)) - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - pipeline->gs_copy_shader->bo); + if (radv_pipeline_has_gs_copy_shader(pipeline)) + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->gs_copy_shader->bo); - if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_pipeline(cmd_buffer, pipeline); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_save_pipeline(cmd_buffer, pipeline); - cmd_buffer->state.emitted_pipeline = pipeline; + cmd_buffer->state.emitted_pipeline = pipeline; - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; } static void radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) { - si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count, - cmd_buffer->state.dynamic.viewport.viewports); + si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count, + cmd_buffer->state.dynamic.viewport.viewports); } static void radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) { - uint32_t count = cmd_buffer->state.dynamic.scissor.count; + uint32_t count = cmd_buffer->state.dynamic.scissor.count; - si_write_scissors(cmd_buffer->cs, 0, count, - cmd_buffer->state.dynamic.scissor.scissors, - cmd_buffer->state.dynamic.viewport.viewports, - cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); + si_write_scissors(cmd_buffer->cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors, + cmd_buffer->state.dynamic.viewport.viewports, + cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband); - cmd_buffer->state.context_roll_without_scissor_emitted = false; + cmd_buffer->state.context_roll_without_scissor_emitted = false; } static void radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) { - if (!cmd_buffer->state.dynamic.discard_rectangle.count) - return; + if (!cmd_buffer->state.dynamic.discard_rectangle.count) + return; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, - cmd_buffer->state.dynamic.discard_rectangle.count * 2); - for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { - VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; - radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); - radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | - S_028214_BR_Y(rect.offset.y + rect.extent.height)); - } + radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, + cmd_buffer->state.dynamic.discard_rectangle.count * 2); + for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { + VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; + radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); + radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | + S_028214_BR_Y(rect.offset.y + rect.extent.height)); + } } static void radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) { - unsigned width = cmd_buffer->state.dynamic.line_width * 8; + unsigned width = cmd_buffer->state.dynamic.line_width * 8; - radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, - S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF))); + radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, + S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF))); } static void radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); - radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); + radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); } static void radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg_seq(cmd_buffer->cs, - R_028430_DB_STENCILREFMASK, 2); - radeon_emit(cmd_buffer->cs, - S_028430_STENCILTESTVAL(d->stencil_reference.front) | - S_028430_STENCILMASK(d->stencil_compare_mask.front) | - S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | - S_028430_STENCILOPVAL(1)); - radeon_emit(cmd_buffer->cs, - S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | - S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | - S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | - S_028434_STENCILOPVAL_BF(1)); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2); + radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) | + S_028430_STENCILMASK(d->stencil_compare_mask.front) | + S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | + S_028430_STENCILOPVAL(1)); + radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | + S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | + S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | + S_028434_STENCILOPVAL_BF(1)); } static void radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, - fui(d->depth_bounds.min)); - radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, - fui(d->depth_bounds.max)); + radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, fui(d->depth_bounds.min)); + radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, fui(d->depth_bounds.max)); } static void radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - unsigned slope = fui(d->depth_bias.slope * 16.0f); + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned slope = fui(d->depth_bias.slope * 16.0f); - radeon_set_context_reg_seq(cmd_buffer->cs, - R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); - radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ - radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ - radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */ - radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ - radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */ + radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); + radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ + radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ + radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */ + radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ + radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */ } static void radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - uint32_t auto_reset_cntl = 1; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + uint32_t auto_reset_cntl = 1; - if (d->primitive_topology == V_008958_DI_PT_LINESTRIP) - auto_reset_cntl = 2; + if (d->primitive_topology == V_008958_DI_PT_LINESTRIP) + auto_reset_cntl = 2; - radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE, - S_028A0C_LINE_PATTERN(d->line_stipple.pattern) | - S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) | - S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl)); + radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE, + S_028A0C_LINE_PATTERN(d->line_stipple.pattern) | + S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) | + S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl)); } static void radv_emit_culling(struct radv_cmd_buffer *cmd_buffer, uint32_t states) { - unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl; - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) { - pa_su_sc_mode_cntl &= C_028814_CULL_FRONT; - pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)); + if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) { + pa_su_sc_mode_cntl &= C_028814_CULL_FRONT; + pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)); - pa_su_sc_mode_cntl &= C_028814_CULL_BACK; - pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)); - } + pa_su_sc_mode_cntl &= C_028814_CULL_BACK; + pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) { - pa_su_sc_mode_cntl &= C_028814_FACE; - pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face); - } + if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) { + pa_su_sc_mode_cntl &= C_028814_FACE; + pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face); + } - radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, - pa_su_sc_mode_cntl); + radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); } static void radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, - cmd_buffer->cs, - R_030908_VGT_PRIMITIVE_TYPE, 1, - d->primitive_topology); - } else { - radeon_set_config_reg(cmd_buffer->cs, - R_008958_VGT_PRIMITIVE_TYPE, - d->primitive_topology); - } + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, + R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology); + } else { + radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology); + } } static void radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer, uint32_t states) { - unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control; - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) { - db_depth_control &= C_028800_Z_ENABLE; - db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0); - } + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) { + db_depth_control &= C_028800_Z_ENABLE; + db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) { - db_depth_control &= C_028800_Z_WRITE_ENABLE; - db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0); - } + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) { + db_depth_control &= C_028800_Z_WRITE_ENABLE; + db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) { - db_depth_control &= C_028800_ZFUNC; - db_depth_control |= S_028800_ZFUNC(d->depth_compare_op); - } + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) { + db_depth_control &= C_028800_ZFUNC; + db_depth_control |= S_028800_ZFUNC(d->depth_compare_op); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { - db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE; - db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0); - } + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { + db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE; + db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) { - db_depth_control &= C_028800_STENCIL_ENABLE; - db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0); + if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) { + db_depth_control &= C_028800_STENCIL_ENABLE; + db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0); - db_depth_control &= C_028800_BACKFACE_ENABLE; - db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0); - } + db_depth_control &= C_028800_BACKFACE_ENABLE; + db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0); + } - if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) { - db_depth_control &= C_028800_STENCILFUNC; - db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op); + if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) { + db_depth_control &= C_028800_STENCILFUNC; + db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op); - db_depth_control &= C_028800_STENCILFUNC_BF; - db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op); - } + db_depth_control &= C_028800_STENCILFUNC_BF; + db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op); + } - radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, - db_depth_control); + radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); } static void radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) { - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, - S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) | - S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) | - S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) | - S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) | - S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) | - S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); + radeon_set_context_reg( + cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, + S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) | + S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) | + S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) | + S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) | + S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) | + S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); } static void radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) { - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; - uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; - uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; + uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; + uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl; - /* Emit per-draw VRS rate which is the first combiner. */ - radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, - S_03098C_RATE_X(rate_x) | - S_03098C_RATE_Y(rate_y)); + /* Emit per-draw VRS rate which is the first combiner. */ + radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, + S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); - /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the - * draw rate and the vertex rate. - */ - pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]); + /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the + * draw rate and the vertex rate. + */ + pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]); - radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); + radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); } static void -radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, - int index, - struct radv_color_buffer_info *cb, - struct radv_image_view *iview, - VkImageLayout layout, - bool in_render_loop, - bool disable_dcc) +radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, + struct radv_color_buffer_info *cb, struct radv_image_view *iview, + VkImageLayout layout, bool in_render_loop, bool disable_dcc) { - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8; - uint32_t cb_color_info = cb->cb_color_info; - struct radv_image *image = iview->image; + bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8; + uint32_t cb_color_info = cb->cb_color_info; + struct radv_image *image = iview->image; - if (!radv_layout_dcc_compressed(cmd_buffer->device, image, layout, in_render_loop, - radv_image_queue_family_mask(image, - cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index)) || - disable_dcc) { - cb_color_info &= C_028C70_DCC_ENABLE; - } + if (!radv_layout_dcc_compressed( + cmd_buffer->device, image, layout, in_render_loop, + radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index)) || + disable_dcc) { + cb_color_info &= C_028C70_DCC_ENABLE; + } - if (!radv_layout_fmask_compressed(cmd_buffer->device, image, layout, - radv_image_queue_family_mask(image, - cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { - cb_color_info &= C_028C70_COMPRESSION; - } + if (!radv_layout_fmask_compressed( + cmd_buffer->device, image, layout, + radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index))) { + cb_color_info &= C_028C70_COMPRESSION; + } - if (radv_image_is_tc_compat_cmask(image) && - (radv_is_fmask_decompress_pipeline(cmd_buffer) || - radv_is_dcc_decompress_pipeline(cmd_buffer))) { - /* If this bit is set, the FMASK decompression operation - * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). - */ - cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY; - } + if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) || + radv_is_dcc_decompress_pipeline(cmd_buffer))) { + /* If this bit is set, the FMASK decompression operation + * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). + */ + cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY; + } - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); - radeon_emit(cmd_buffer->cs, cb->cb_color_base); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, cb->cb_color_view); - radeon_emit(cmd_buffer->cs, cb_color_info); - radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); - radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); - radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); - radeon_emit(cmd_buffer->cs, 0); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); + radeon_emit(cmd_buffer->cs, cb->cb_color_base); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, cb->cb_color_view); + radeon_emit(cmd_buffer->cs, cb_color_info); + radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); + radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); + radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); + radeon_emit(cmd_buffer->cs, 0); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1); - radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1); + radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); - radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, - cb->cb_color_base >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, - cb->cb_color_cmask >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, - cb->cb_color_fmask >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, - cb->cb_dcc_base >> 32); - radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, - cb->cb_color_attrib2); - radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, - cb->cb_color_attrib3); - } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); - radeon_emit(cmd_buffer->cs, cb->cb_color_base); - radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); - radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); - radeon_emit(cmd_buffer->cs, cb->cb_color_view); - radeon_emit(cmd_buffer->cs, cb_color_info); - radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); - radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); - radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); - radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32)); - radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); - radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32)); + radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, + cb->cb_color_base >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, + cb->cb_color_cmask >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, + cb->cb_color_fmask >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, + cb->cb_dcc_base >> 32); + radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, + cb->cb_color_attrib2); + radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, + cb->cb_color_attrib3); + } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); + radeon_emit(cmd_buffer->cs, cb->cb_color_base); + radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); + radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); + radeon_emit(cmd_buffer->cs, cb->cb_color_view); + radeon_emit(cmd_buffer->cs, cb_color_info); + radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); + radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); + radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); + radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32)); + radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); + radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32)); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); - radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); - radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); + radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); + radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); - radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, - cb->cb_mrt_epitch); - } else { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); - radeon_emit(cmd_buffer->cs, cb->cb_color_base); - radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); - radeon_emit(cmd_buffer->cs, cb->cb_color_slice); - radeon_emit(cmd_buffer->cs, cb->cb_color_view); - radeon_emit(cmd_buffer->cs, cb_color_info); - radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); - radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); - radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); - radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); - radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); - radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); + radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, + cb->cb_mrt_epitch); + } else { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); + radeon_emit(cmd_buffer->cs, cb->cb_color_base); + radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); + radeon_emit(cmd_buffer->cs, cb->cb_color_slice); + radeon_emit(cmd_buffer->cs, cb->cb_color_view); + radeon_emit(cmd_buffer->cs, cb_color_info); + radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); + radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); + radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); + radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); + radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); + radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); - if (is_vi) { /* DCC BASE */ - radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); - } - } + if (is_vi) { /* DCC BASE */ + radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, + cb->cb_dcc_base); + } + } - if (radv_dcc_enabled(image, iview->base_mip)) { - /* Drawing with DCC enabled also compresses colorbuffers. */ - VkImageSubresourceRange range = { - .aspectMask = iview->aspect_mask, - .baseMipLevel = iview->base_mip, - .levelCount = iview->level_count, - .baseArrayLayer = iview->base_layer, - .layerCount = iview->layer_count, - }; + if (radv_dcc_enabled(image, iview->base_mip)) { + /* Drawing with DCC enabled also compresses colorbuffers. */ + VkImageSubresourceRange range = { + .aspectMask = iview->aspect_mask, + .baseMipLevel = iview->base_mip, + .levelCount = iview->level_count, + .baseArrayLayer = iview->base_layer, + .layerCount = iview->layer_count, + }; - radv_update_dcc_metadata(cmd_buffer, image, &range, true); - } + radv_update_dcc_metadata(cmd_buffer, image, &range, true); + } } static void -radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, - struct radv_ds_buffer_info *ds, - const struct radv_image_view *iview, - VkImageLayout layout, - bool in_render_loop, bool requires_cond_exec) +radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, + const struct radv_image_view *iview, VkImageLayout layout, + bool in_render_loop, bool requires_cond_exec) { - const struct radv_image *image = iview->image; - uint32_t db_z_info = ds->db_z_info; - uint32_t db_z_info_reg; + const struct radv_image *image = iview->image; + uint32_t db_z_info = ds->db_z_info; + uint32_t db_z_info_reg; - if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || - !radv_image_is_tc_compat_htile(image)) - return; + if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || + !radv_image_is_tc_compat_htile(image)) + return; - if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop, - radv_image_queue_family_mask(image, - cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { - db_z_info &= C_028040_TILE_SURFACE_ENABLE; - } + if (!radv_layout_is_htile_compressed( + cmd_buffer->device, image, layout, in_render_loop, + radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index))) { + db_z_info &= C_028040_TILE_SURFACE_ENABLE; + } - db_z_info &= C_028040_ZRANGE_PRECISION; + db_z_info &= C_028040_ZRANGE_PRECISION; - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { - db_z_info_reg = R_028038_DB_Z_INFO; - } else { - db_z_info_reg = R_028040_DB_Z_INFO; - } + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { + db_z_info_reg = R_028038_DB_Z_INFO; + } else { + db_z_info_reg = R_028040_DB_Z_INFO; + } - /* When we don't know the last fast clear value we need to emit a - * conditional packet that will eventually skip the following - * SET_CONTEXT_REG packet. - */ - if (requires_cond_exec) { - uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip); + /* When we don't know the last fast clear value we need to emit a + * conditional packet that will eventually skip the following + * SET_CONTEXT_REG packet. + */ + if (requires_cond_exec) { + uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip); - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ - } + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ + } - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); } static void -radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, - struct radv_ds_buffer_info *ds, - struct radv_image_view *iview, - VkImageLayout layout, - bool in_render_loop) +radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, + struct radv_image_view *iview, VkImageLayout layout, bool in_render_loop) { - const struct radv_image *image = iview->image; - uint32_t db_z_info = ds->db_z_info; - uint32_t db_stencil_info = ds->db_stencil_info; + const struct radv_image *image = iview->image; + uint32_t db_z_info = ds->db_z_info; + uint32_t db_stencil_info = ds->db_stencil_info; - if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop, - radv_image_queue_family_mask(image, - cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { - db_z_info &= C_028040_TILE_SURFACE_ENABLE; - db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); - } + if (!radv_layout_is_htile_compressed( + cmd_buffer->device, image, layout, in_render_loop, + radv_image_queue_family_mask(image, cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index))) { + db_z_info &= C_028040_TILE_SURFACE_ENABLE; + db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); + } - radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); - radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface); + radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); + radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); - radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); + radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); - radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); - radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); - radeon_emit(cmd_buffer->cs, db_z_info); - radeon_emit(cmd_buffer->cs, db_stencil_info); - radeon_emit(cmd_buffer->cs, ds->db_z_read_base); - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); - radeon_emit(cmd_buffer->cs, ds->db_z_read_base); - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); + radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); + radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); + radeon_emit(cmd_buffer->cs, db_z_info); + radeon_emit(cmd_buffer->cs, db_stencil_info); + radeon_emit(cmd_buffer->cs, ds->db_z_read_base); + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); + radeon_emit(cmd_buffer->cs, ds->db_z_read_base); + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5); - radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); - radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); - radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); - } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { - radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); - radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); - radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32)); - radeon_emit(cmd_buffer->cs, ds->db_depth_size); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5); + radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); + radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); + radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); + } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) { + radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); + radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); + radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32)); + radeon_emit(cmd_buffer->cs, ds->db_depth_size); - radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); - radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ - radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ - radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ - radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ - radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ + radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); + radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ + radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ + radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ + radeon_emit(cmd_buffer->cs, + S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ + radeon_emit(cmd_buffer->cs, + S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, + S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, + S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ - radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); - radeon_emit(cmd_buffer->cs, ds->db_z_info2); - radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); - } else { - radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); + radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); + radeon_emit(cmd_buffer->cs, ds->db_z_info2); + radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); + } else { + radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); - radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); - radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ - radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ - radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ - radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ - radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ - radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ - radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ + radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); + radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ + radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ + radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ + radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ + radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ + radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ + radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ + } - } + /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ + radv_update_zrange_precision(cmd_buffer, ds, iview, layout, in_render_loop, true); - /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ - radv_update_zrange_precision(cmd_buffer, ds, iview, layout, - in_render_loop, true); - - radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, - ds->pa_su_poly_offset_db_fmt_cntl); + radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, + ds->pa_su_poly_offset_db_fmt_cntl); } /** @@ -1873,157 +1791,143 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, */ static void radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, - VkClearDepthStencilValue ds_clear_value, - VkImageAspectFlags aspects) + const struct radv_image_view *iview, + VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - const struct radv_image *image = iview->image; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t att_idx; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + const struct radv_image *image = iview->image; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t att_idx; - if (!cmd_buffer->state.attachments || !subpass) - return; + if (!cmd_buffer->state.attachments || !subpass) + return; - if (!subpass->depth_stencil_attachment) - return; + if (!subpass->depth_stencil_attachment) + return; - att_idx = subpass->depth_stencil_attachment->attachment; - if (cmd_buffer->state.attachments[att_idx].iview->image != image) - return; + att_idx = subpass->depth_stencil_attachment->attachment; + if (cmd_buffer->state.attachments[att_idx].iview->image != image) + return; - if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)) { - radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); - radeon_emit(cs, ds_clear_value.stencil); - radeon_emit(cs, fui(ds_clear_value.depth)); - } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { - radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1); - radeon_emit(cs, fui(ds_clear_value.depth)); - } else { - assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); - radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1); - radeon_emit(cs, ds_clear_value.stencil); - } + if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); + radeon_emit(cs, ds_clear_value.stencil); + radeon_emit(cs, fui(ds_clear_value.depth)); + } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1); + radeon_emit(cs, fui(ds_clear_value.depth)); + } else { + assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1); + radeon_emit(cs, ds_clear_value.stencil); + } - /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is - * only needed when clearing Z to 0.0. - */ - if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - ds_clear_value.depth == 0.0) { - VkImageLayout layout = subpass->depth_stencil_attachment->layout; - bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; + /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is + * only needed when clearing Z to 0.0. + */ + if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) { + VkImageLayout layout = subpass->depth_stencil_attachment->layout; + bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; - radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, - iview, layout, in_render_loop, false); - } + radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, iview, + layout, in_render_loop, false); + } - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } /** * Set the clear depth/stencil values to the image's metadata. */ static void -radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, - VkClearDepthStencilValue ds_clear_value, - VkImageAspectFlags aspects) +radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, + VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t level_count = radv_get_levelCount(image, range); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t level_count = radv_get_levelCount(image, range); - if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT)) { - uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); + if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); - /* Use the fastest way when both aspects are used. */ - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + /* Use the fastest way when both aspects are used. */ + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - for (uint32_t l = 0; l < level_count; l++) { - radeon_emit(cs, ds_clear_value.stencil); - radeon_emit(cs, fui(ds_clear_value.depth)); - } - } else { - /* Otherwise we need one WRITE_DATA packet per level. */ - for (uint32_t l = 0; l < level_count; l++) { - uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l); - unsigned value; + for (uint32_t l = 0; l < level_count; l++) { + radeon_emit(cs, ds_clear_value.stencil); + radeon_emit(cs, fui(ds_clear_value.depth)); + } + } else { + /* Otherwise we need one WRITE_DATA packet per level. */ + for (uint32_t l = 0; l < level_count; l++) { + uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l); + unsigned value; - if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { - value = fui(ds_clear_value.depth); - va += 4; - } else { - assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); - value = ds_clear_value.stencil; - } + if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { + value = fui(ds_clear_value.depth); + va += 4; + } else { + assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); + value = ds_clear_value.stencil; + } - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, value); - } - } + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); + radeon_emit(cs, + S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); + } + } } /** * Update the TC-compat metadata value for this image. */ static void -radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, - uint32_t value) +radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, uint32_t value) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug) - return; + if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug) + return; - uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); - uint32_t level_count = radv_get_levelCount(image, range); + uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); + uint32_t level_count = radv_get_levelCount(image, range); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - for (uint32_t l = 0; l < level_count; l++) - radeon_emit(cs, value); + for (uint32_t l = 0; l < level_count; l++) + radeon_emit(cs, value); } static void radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, - VkClearDepthStencilValue ds_clear_value) + const struct radv_image_view *iview, + VkClearDepthStencilValue ds_clear_value) { - VkImageSubresourceRange range = { - .aspectMask = iview->aspect_mask, - .baseMipLevel = iview->base_mip, - .levelCount = iview->level_count, - .baseArrayLayer = iview->base_layer, - .layerCount = iview->layer_count, - }; - uint32_t cond_val; + VkImageSubresourceRange range = { + .aspectMask = iview->aspect_mask, + .baseMipLevel = iview->base_mip, + .levelCount = iview->level_count, + .baseArrayLayer = iview->base_layer, + .layerCount = iview->layer_count, + }; + uint32_t cond_val; - /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last - * depth clear value is 0.0f. - */ - cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; + /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last + * depth clear value is 0.0f. + */ + cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; - radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, - cond_val); + radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val); } /** @@ -2031,79 +1935,72 @@ radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, */ void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, - VkClearDepthStencilValue ds_clear_value, - VkImageAspectFlags aspects) + const struct radv_image_view *iview, + VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) { - VkImageSubresourceRange range = { - .aspectMask = iview->aspect_mask, - .baseMipLevel = iview->base_mip, - .levelCount = iview->level_count, - .baseArrayLayer = iview->base_layer, - .layerCount = iview->layer_count, - }; - struct radv_image *image = iview->image; + VkImageSubresourceRange range = { + .aspectMask = iview->aspect_mask, + .baseMipLevel = iview->base_mip, + .levelCount = iview->level_count, + .baseArrayLayer = iview->base_layer, + .layerCount = iview->layer_count, + }; + struct radv_image *image = iview->image; - assert(radv_htile_enabled(image, range.baseMipLevel)); + assert(radv_htile_enabled(image, range.baseMipLevel)); - radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, - ds_clear_value, aspects); + radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects); - if (radv_image_is_tc_compat_htile(image) && - (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { - radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, - ds_clear_value); - } + if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value); + } - radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, - aspects); + radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects); } /** * Load the clear depth/stencil values from the image's metadata. */ static void -radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview) +radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - const struct radv_image *image = iview->image; - VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); - uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip); - unsigned reg_offset = 0, reg_count = 0; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + const struct radv_image *image = iview->image; + VkImageAspectFlags aspects = vk_format_aspects(image->vk_format); + uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip); + unsigned reg_offset = 0, reg_count = 0; - assert(radv_image_has_htile(image)); + assert(radv_image_has_htile(image)); - if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { - ++reg_count; - } else { - ++reg_offset; - va += 4; - } - if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - ++reg_count; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + ++reg_count; + } else { + ++reg_offset; + va += 4; + } + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + ++reg_count; - uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; + uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; - if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { - radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); - radeon_emit(cs, reg_count); - } else { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, reg_count); + } else { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | + (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); - } + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); + } } /* @@ -2112,126 +2009,115 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, * cmask eliminate is required. */ void -radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, bool value) +radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, bool value) { - if (!image->fce_pred_offset) - return; + if (!image->fce_pred_offset) + return; - uint64_t pred_val = value; - uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel); - uint32_t level_count = radv_get_levelCount(image, range); - uint32_t count = 2 * level_count; + uint64_t pred_val = value; + uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel); + uint32_t level_count = radv_get_levelCount(image, range); + uint32_t count = 2 * level_count; - radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); + radeon_emit(cmd_buffer->cs, + S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); - for (uint32_t l = 0; l < level_count; l++) { - radeon_emit(cmd_buffer->cs, pred_val); - radeon_emit(cmd_buffer->cs, pred_val >> 32); - } + for (uint32_t l = 0; l < level_count; l++) { + radeon_emit(cmd_buffer->cs, pred_val); + radeon_emit(cmd_buffer->cs, pred_val >> 32); + } } /** * Update the DCC predicate to reflect the compression state. */ void -radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, bool value) +radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, bool value) { - if (image->dcc_pred_offset == 0) - return; + if (image->dcc_pred_offset == 0) + return; - uint64_t pred_val = value; - uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel); - uint32_t level_count = radv_get_levelCount(image, range); - uint32_t count = 2 * level_count; + uint64_t pred_val = value; + uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel); + uint32_t level_count = radv_get_levelCount(image, range); + uint32_t count = 2 * level_count; - assert(radv_dcc_enabled(image, range->baseMipLevel)); + assert(radv_dcc_enabled(image, range->baseMipLevel)); - radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); - radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cmd_buffer->cs, va); - radeon_emit(cmd_buffer->cs, va >> 32); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); + radeon_emit(cmd_buffer->cs, + S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, va >> 32); - for (uint32_t l = 0; l < level_count; l++) { - radeon_emit(cmd_buffer->cs, pred_val); - radeon_emit(cmd_buffer->cs, pred_val >> 32); - } + for (uint32_t l = 0; l < level_count; l++) { + radeon_emit(cmd_buffer->cs, pred_val); + radeon_emit(cmd_buffer->cs, pred_val >> 32); + } } /** * Update the fast clear color values if the image is bound as a color buffer. */ static void -radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - int cb_idx, - uint32_t color_values[2]) +radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + int cb_idx, uint32_t color_values[2]) { - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t att_idx; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t att_idx; - if (!cmd_buffer->state.attachments || !subpass) - return; + if (!cmd_buffer->state.attachments || !subpass) + return; - att_idx = subpass->color_attachments[cb_idx].attachment; - if (att_idx == VK_ATTACHMENT_UNUSED) - return; + att_idx = subpass->color_attachments[cb_idx].attachment; + if (att_idx == VK_ATTACHMENT_UNUSED) + return; - if (cmd_buffer->state.attachments[att_idx].iview->image != image) - return; + if (cmd_buffer->state.attachments[att_idx].iview->image != image) + return; - radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); - radeon_emit(cs, color_values[0]); - radeon_emit(cs, color_values[1]); + radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); + radeon_emit(cs, color_values[0]); + radeon_emit(cs, color_values[1]); - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } /** * Set the clear color values to the image's metadata. */ static void -radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, - uint32_t color_values[2]) +radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, uint32_t color_values[2]) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint32_t level_count = radv_get_levelCount(image, range); - uint32_t count = 2 * level_count; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint32_t level_count = radv_get_levelCount(image, range); + uint32_t count = 2 * level_count; - assert(radv_image_has_cmask(image) || - radv_dcc_enabled(image, range->baseMipLevel)); + assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)); - if (radv_image_has_clear_value(image)) { - uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); + if (radv_image_has_clear_value(image)) { + uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - for (uint32_t l = 0; l < level_count; l++) { - radeon_emit(cs, color_values[0]); - radeon_emit(cs, color_values[1]); - } - } else { - /* Some default value we can set in the update. */ - assert(color_values[0] == 0 && color_values[1] == 0); - } + for (uint32_t l = 0; l < level_count; l++) { + radeon_emit(cs, color_values[0]); + radeon_emit(cs, color_values[1]); + } + } else { + /* Some default value we can set in the update. */ + assert(color_values[0] == 0 && color_values[1] == 0); + } } /** @@ -2239,72 +2125,65 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, */ void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - const struct radv_image_view *iview, - int cb_idx, - uint32_t color_values[2]) + const struct radv_image_view *iview, int cb_idx, + uint32_t color_values[2]) { - struct radv_image *image = iview->image; - VkImageSubresourceRange range = { - .aspectMask = iview->aspect_mask, - .baseMipLevel = iview->base_mip, - .levelCount = iview->level_count, - .baseArrayLayer = iview->base_layer, - .layerCount = iview->layer_count, - }; + struct radv_image *image = iview->image; + VkImageSubresourceRange range = { + .aspectMask = iview->aspect_mask, + .baseMipLevel = iview->base_mip, + .levelCount = iview->level_count, + .baseArrayLayer = iview->base_layer, + .layerCount = iview->layer_count, + }; - assert(radv_image_has_cmask(image) || - radv_dcc_enabled(image, iview->base_mip)); + assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->base_mip)); - radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values); + radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values); - radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, - color_values); + radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values); } /** * Load the clear color values from the image's metadata. */ static void -radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image_view *iview, - int cb_idx) +radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, + int cb_idx) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_image *image = iview->image; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_image *image = iview->image; - if (!radv_image_has_cmask(image) && - !radv_dcc_enabled(image, iview->base_mip)) - return; + if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->base_mip)) + return; - if (!radv_image_has_clear_value(image)) { - uint32_t color_values[2] = {0, 0}; - radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, - color_values); - return; - } + if (!radv_image_has_clear_value(image)) { + uint32_t color_values[2] = {0, 0}; + radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values); + return; + } - uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip); - uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; + uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip); + uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; - if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { - radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); - radeon_emit(cs, 2); - } else { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_COUNT_SEL); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, 2); + } else { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_COUNT_SEL); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); - radeon_emit(cs, 0); - } + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); + radeon_emit(cs, 0); + } } /* GFX9+ metadata cache flushing workaround. metadata cache coherency is @@ -2316,36 +2195,35 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, static void radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) { - struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - bool color_mip_changed = false; + struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + bool color_mip_changed = false; - /* Entire workaround is not applicable before GFX9 */ - if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) - return; + /* Entire workaround is not applicable before GFX9 */ + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) + return; - if (!framebuffer) - return; + if (!framebuffer) + return; - for (int i = 0; i < subpass->color_count; ++i) { - int idx = subpass->color_attachments[i].attachment; - if (idx == VK_ATTACHMENT_UNUSED) - continue; + for (int i = 0; i < subpass->color_count; ++i) { + int idx = subpass->color_attachments[i].attachment; + if (idx == VK_ATTACHMENT_UNUSED) + continue; - struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; + struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; - if ((radv_image_has_CB_metadata(iview->image) || - radv_image_has_dcc(iview->image)) && - cmd_buffer->state.cb_mip[i] != iview->base_mip) - color_mip_changed = true; + if ((radv_image_has_CB_metadata(iview->image) || radv_image_has_dcc(iview->image)) && + cmd_buffer->state.cb_mip[i] != iview->base_mip) + color_mip_changed = true; - cmd_buffer->state.cb_mip[i] = iview->base_mip; - } + cmd_buffer->state.cb_mip[i] = iview->base_mip; + } - if (color_mip_changed) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - } + if (color_mip_changed) { + cmd_buffer->state.flush_bits |= + RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + } } /* This function does the flushes for mip changes if the levels are not zero for @@ -2355,2924 +2233,2739 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) { - /* Entire workaround is not applicable before GFX9 */ - if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) - return; + /* Entire workaround is not applicable before GFX9 */ + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9) + return; - bool need_color_mip_flush = false; - for (unsigned i = 0; i < 8; ++i) { - if (cmd_buffer->state.cb_mip[i]) { - need_color_mip_flush = true; - break; - } - } + bool need_color_mip_flush = false; + for (unsigned i = 0; i < 8; ++i) { + if (cmd_buffer->state.cb_mip[i]) { + need_color_mip_flush = true; + break; + } + } - if (need_color_mip_flush) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - } + if (need_color_mip_flush) { + cmd_buffer->state.flush_bits |= + RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + } - memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip)); + memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip)); } static void radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) { - int i; - struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; - const struct radv_subpass *subpass = cmd_buffer->state.subpass; + int i; + struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; - /* this may happen for inherited secondary recording */ - if (!framebuffer) - return; + /* this may happen for inherited secondary recording */ + if (!framebuffer) + return; - for (i = 0; i < 8; ++i) { - if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { - radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, - S_028C70_FORMAT(V_028C70_COLOR_INVALID)); - continue; - } + for (i = 0; i < 8; ++i) { + if (i >= subpass->color_count || + subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { + radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, + S_028C70_FORMAT(V_028C70_COLOR_INVALID)); + continue; + } - int idx = subpass->color_attachments[i].attachment; - struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; - VkImageLayout layout = subpass->color_attachments[i].layout; - bool in_render_loop = subpass->color_attachments[i].in_render_loop; + int idx = subpass->color_attachments[i].attachment; + struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; + VkImageLayout layout = subpass->color_attachments[i].layout; + bool in_render_loop = subpass->color_attachments[i].in_render_loop; - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo); - assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | - VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); - radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout, - in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc); + assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | + VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); + radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout, + in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc); - radv_load_color_clear_metadata(cmd_buffer, iview, i); - } + radv_load_color_clear_metadata(cmd_buffer, iview, i); + } - if (subpass->depth_stencil_attachment) { - int idx = subpass->depth_stencil_attachment->attachment; - VkImageLayout layout = subpass->depth_stencil_attachment->layout; - bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; - struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.attachments[idx].iview->bo); + if (subpass->depth_stencil_attachment) { + int idx = subpass->depth_stencil_attachment->attachment; + VkImageLayout layout = subpass->depth_stencil_attachment->layout; + bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; + struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, + cmd_buffer->state.attachments[idx].iview->bo); - radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout, in_render_loop); + radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout, + in_render_loop); - if (radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop, - radv_image_queue_family_mask(iview->image, - cmd_buffer->queue_family_index, - cmd_buffer->queue_family_index))) { - /* Only load the depth/stencil fast clear values when - * compressed rendering is enabled. - */ - radv_load_ds_clear_metadata(cmd_buffer, iview); - } - } else { - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) - radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); - else - radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); + if (radv_layout_is_htile_compressed( + cmd_buffer->device, iview->image, layout, in_render_loop, + radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, + cmd_buffer->queue_family_index))) { + /* Only load the depth/stencil fast clear values when + * compressed rendering is enabled. + */ + radv_load_ds_clear_metadata(cmd_buffer, iview); + } + } else { + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) + radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); + else + radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); - radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ - radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ - } - radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, - S_028208_BR_X(framebuffer->width) | - S_028208_BR_Y(framebuffer->height)); + radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */ + radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ + } + radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, + S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height)); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) { - bool disable_constant_encode = - cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode; - enum chip_class chip_class = - cmd_buffer->device->physical_device->rad_info.chip_class; - uint8_t watermark = chip_class >= GFX10 ? 6 : 4; + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) { + bool disable_constant_encode = + cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode; + enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class; + uint8_t watermark = chip_class >= GFX10 ? 6 : 4; - radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, - S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) | - S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | - S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); - } + radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) | + S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | + S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); + } - if (cmd_buffer->device->dfsm_allowed) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); - } + if (cmd_buffer->device->dfsm_allowed) { + radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + } - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; } static void radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_cmd_state *state = &cmd_buffer->state; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_cmd_state *state = &cmd_buffer->state; - if (state->index_type != state->last_index_type) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, - cs, R_03090C_VGT_INDEX_TYPE, - 2, state->index_type); - } else { - radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); - radeon_emit(cs, state->index_type); - } + if (state->index_type != state->last_index_type) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, + R_03090C_VGT_INDEX_TYPE, 2, state->index_type); + } else { + radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); + radeon_emit(cs, state->index_type); + } - state->last_index_type = state->index_type; - } + state->last_index_type = state->index_type; + } - /* For the direct indexed draws we use DRAW_INDEX_2, which includes - * the index_va and max_index_count already. */ - if (!indirect) - return; + /* For the direct indexed draws we use DRAW_INDEX_2, which includes + * the index_va and max_index_count already. */ + if (!indirect) + return; - radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); - radeon_emit(cs, state->index_va); - radeon_emit(cs, state->index_va >> 32); + radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); + radeon_emit(cs, state->index_va); + radeon_emit(cs, state->index_va >> 32); - radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); - radeon_emit(cs, state->max_index_count); + radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); + radeon_emit(cs, state->max_index_count); - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; } -void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) +void +radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) { - bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled; - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - uint32_t pa_sc_mode_cntl_1 = - pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0; - uint32_t db_count_control; + bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + uint32_t pa_sc_mode_cntl_1 = pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0; + uint32_t db_count_control; - if(!cmd_buffer->state.active_occlusion_queries) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { - if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && - pipeline->graphics.disable_out_of_order_rast_for_occlusion && - has_perfect_queries) { - /* Re-enable out-of-order rasterization if the - * bound pipeline supports it and if it's has - * been disabled before starting any perfect - * occlusion queries. - */ - radeon_set_context_reg(cmd_buffer->cs, - R_028A4C_PA_SC_MODE_CNTL_1, - pa_sc_mode_cntl_1); - } - } - db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); - } else { - const struct radv_subpass *subpass = cmd_buffer->state.subpass; - uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; - bool gfx10_perfect = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries; + if (!cmd_buffer->state.active_occlusion_queries) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { + if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && + pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) { + /* Re-enable out-of-order rasterization if the + * bound pipeline supports it and if it's has + * been disabled before starting any perfect + * occlusion queries. + */ + radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); + } + } + db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); + } else { + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; + bool gfx10_perfect = + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries; - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { - /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially - * covered tiles, discards, and early depth testing. For more details, - * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */ - db_count_control = - S_028004_PERFECT_ZPASS_COUNTS(1) | - S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | - S_028004_SAMPLE_RATE(sample_rate) | - S_028004_ZPASS_ENABLE(1) | - S_028004_SLICE_EVEN_ENABLE(1) | - S_028004_SLICE_ODD_ENABLE(1); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { + /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially + * covered tiles, discards, and early depth testing. For more details, + * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */ + db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | + S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) | + S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); - if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && - pipeline->graphics.disable_out_of_order_rast_for_occlusion && - has_perfect_queries) { - /* If the bound pipeline has enabled - * out-of-order rasterization, we should - * disable it before starting any perfect - * occlusion queries. - */ - pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE; + if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && + pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) { + /* If the bound pipeline has enabled + * out-of-order rasterization, we should + * disable it before starting any perfect + * occlusion queries. + */ + pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE; - radeon_set_context_reg(cmd_buffer->cs, - R_028A4C_PA_SC_MODE_CNTL_1, - pa_sc_mode_cntl_1); - } - } else { - db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | - S_028004_SAMPLE_RATE(sample_rate); - } - } + radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); + } + } else { + db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(sample_rate); + } + } - radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); + radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer) { - uint32_t states = cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state; + uint32_t states = + cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state; - if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) - radv_emit_viewport(cmd_buffer); + if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) + radv_emit_viewport(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && - !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) - radv_emit_scissor(cmd_buffer); + if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && + !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) + radv_emit_scissor(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) - radv_emit_line_width(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) + radv_emit_line_width(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) - radv_emit_blend_constants(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) + radv_emit_blend_constants(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) - radv_emit_stencil(cmd_buffer); + if (states & + (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | + RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) + radv_emit_stencil(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) - radv_emit_depth_bounds(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) + radv_emit_depth_bounds(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) - radv_emit_depth_bias(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) + radv_emit_depth_bias(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) - radv_emit_discard_rectangle(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) + radv_emit_discard_rectangle(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) - radv_emit_sample_locations(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) + radv_emit_sample_locations(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) - radv_emit_line_stipple(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) + radv_emit_line_stipple(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | - RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE)) - radv_emit_culling(cmd_buffer, states); + if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE)) + radv_emit_culling(cmd_buffer, states); - if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) - radv_emit_primitive_topology(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) + radv_emit_primitive_topology(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | - RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | - RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) - radv_emit_depth_control(cmd_buffer, states); + if (states & + (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | + RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) + radv_emit_depth_control(cmd_buffer, states); - if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) - radv_emit_stencil_control(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) + radv_emit_stencil_control(cmd_buffer); - if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) - radv_emit_fragment_shading_rate(cmd_buffer); + if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) + radv_emit_fragment_shading_rate(cmd_buffer); - cmd_buffer->state.dirty &= ~states; + cmd_buffer->state.dirty &= ~states; } static void -radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) +radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - struct radv_descriptor_set *set = - (struct radv_descriptor_set *)&descriptors_state->push_set.set; - unsigned bo_offset; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set; + unsigned bo_offset; - if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, - set->header.mapped_ptr, - &bo_offset)) - return; + if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, + &bo_offset)) + return; - set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - set->header.va += bo_offset; + set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + set->header.va += bo_offset; } static void radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) + VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - uint32_t size = MAX_SETS * 4; - uint32_t offset; - void *ptr; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + uint32_t size = MAX_SETS * 4; + uint32_t offset; + void *ptr; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr)) - return; + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr)) + return; - for (unsigned i = 0; i < MAX_SETS; i++) { - uint32_t *uptr = ((uint32_t *)ptr) + i; - uint64_t set_va = 0; - struct radv_descriptor_set *set = descriptors_state->sets[i]; - if (descriptors_state->valid & (1u << i)) - set_va = set->header.va; - uptr[0] = set_va & 0xffffffff; - } + for (unsigned i = 0; i < MAX_SETS; i++) { + uint32_t *uptr = ((uint32_t *)ptr) + i; + uint64_t set_va = 0; + struct radv_descriptor_set *set = descriptors_state->sets[i]; + if (descriptors_state->valid & (1u << i)) + set_va = set->header.va; + uptr[0] = set_va & 0xffffffff; + } - uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - va += offset; + uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + va += offset; - if (cmd_buffer->state.pipeline) { - if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + if (cmd_buffer->state.pipeline) { + if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX]) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, + AC_UD_INDIRECT_DESCRIPTOR_SETS, va); - if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT, + AC_UD_INDIRECT_DESCRIPTOR_SETS, va); - if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY, + AC_UD_INDIRECT_DESCRIPTOR_SETS, va); - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL, + AC_UD_INDIRECT_DESCRIPTOR_SETS, va); - if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); - } + if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL, + AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + } - if (cmd_buffer->state.compute_pipeline) - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE, - AC_UD_INDIRECT_DESCRIPTOR_SETS, va); + if (cmd_buffer->state.compute_pipeline) + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, + MESA_SHADER_COMPUTE, AC_UD_INDIRECT_DESCRIPTOR_SETS, va); } static void -radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, - VkShaderStageFlags stages) +radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages) { - VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ? - VK_PIPELINE_BIND_POINT_COMPUTE : - VK_PIPELINE_BIND_POINT_GRAPHICS; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - struct radv_cmd_state *state = &cmd_buffer->state; - bool flush_indirect_descriptors; + VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT + ? VK_PIPELINE_BIND_POINT_COMPUTE + : VK_PIPELINE_BIND_POINT_GRAPHICS; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_cmd_state *state = &cmd_buffer->state; + bool flush_indirect_descriptors; - if (!descriptors_state->dirty) - return; + if (!descriptors_state->dirty) + return; - if (descriptors_state->push_dirty) - radv_flush_push_descriptors(cmd_buffer, bind_point); + if (descriptors_state->push_dirty) + radv_flush_push_descriptors(cmd_buffer, bind_point); - flush_indirect_descriptors = - (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && - state->pipeline && state->pipeline->need_indirect_descriptor_sets) || - (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && - state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets); + flush_indirect_descriptors = + (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && state->pipeline && + state->pipeline->need_indirect_descriptor_sets) || + (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && state->compute_pipeline && + state->compute_pipeline->need_indirect_descriptor_sets); - if (flush_indirect_descriptors) - radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point); + if (flush_indirect_descriptors) + radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point); - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, - MAX_SETS * MESA_SHADER_STAGES * 4); + ASSERTED unsigned cdw_max = + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SETS * MESA_SHADER_STAGES * 4); - if (cmd_buffer->state.pipeline) { - radv_foreach_stage(stage, stages) { - if (!cmd_buffer->state.pipeline->shaders[stage]) - continue; + if (cmd_buffer->state.pipeline) { + radv_foreach_stage(stage, stages) + { + if (!cmd_buffer->state.pipeline->shaders[stage]) + continue; - radv_emit_descriptor_pointers(cmd_buffer, - cmd_buffer->state.pipeline, - descriptors_state, stage); - } - } + radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.pipeline, descriptors_state, + stage); + } + } - if (cmd_buffer->state.compute_pipeline && - (stages & VK_SHADER_STAGE_COMPUTE_BIT)) { - radv_emit_descriptor_pointers(cmd_buffer, - cmd_buffer->state.compute_pipeline, - descriptors_state, - MESA_SHADER_COMPUTE); - } + if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT)) { + radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.compute_pipeline, + descriptors_state, MESA_SHADER_COMPUTE); + } - descriptors_state->dirty = 0; - descriptors_state->push_dirty = false; + descriptors_state->dirty = 0; + descriptors_state->push_dirty = false; - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cmd_buffer->cs->cdw <= cdw_max); - if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_descriptors(cmd_buffer, bind_point); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_save_descriptors(cmd_buffer, bind_point); } static void -radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, - VkShaderStageFlags stages) +radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages) { - struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT - ? cmd_buffer->state.compute_pipeline - : cmd_buffer->state.pipeline; - VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ? - VK_PIPELINE_BIND_POINT_COMPUTE : - VK_PIPELINE_BIND_POINT_GRAPHICS; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - struct radv_pipeline_layout *layout = pipeline->layout; - struct radv_shader_variant *shader, *prev_shader; - bool need_push_constants = false; - unsigned offset; - void *ptr; - uint64_t va; + struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT + ? cmd_buffer->state.compute_pipeline + : cmd_buffer->state.pipeline; + VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT + ? VK_PIPELINE_BIND_POINT_COMPUTE + : VK_PIPELINE_BIND_POINT_GRAPHICS; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + struct radv_pipeline_layout *layout = pipeline->layout; + struct radv_shader_variant *shader, *prev_shader; + bool need_push_constants = false; + unsigned offset; + void *ptr; + uint64_t va; - stages &= cmd_buffer->push_constant_stages; - if (!stages || - (!layout->push_constant_size && !layout->dynamic_offset_count)) - return; + stages &= cmd_buffer->push_constant_stages; + if (!stages || (!layout->push_constant_size && !layout->dynamic_offset_count)) + return; - radv_foreach_stage(stage, stages) { - shader = radv_get_shader(pipeline, stage); - if (!shader) - continue; + radv_foreach_stage(stage, stages) + { + shader = radv_get_shader(pipeline, stage); + if (!shader) + continue; - need_push_constants |= shader->info.loads_push_constants; - need_push_constants |= shader->info.loads_dynamic_offsets; + need_push_constants |= shader->info.loads_push_constants; + need_push_constants |= shader->info.loads_dynamic_offsets; - uint8_t base = shader->info.base_inline_push_consts; - uint8_t count = shader->info.num_inline_push_consts; + uint8_t base = shader->info.base_inline_push_consts; + uint8_t count = shader->info.num_inline_push_consts; - radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, - AC_UD_INLINE_PUSH_CONSTANTS, - count, - (uint32_t *)&cmd_buffer->push_constants[base * 4]); - } + radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, count, + (uint32_t *)&cmd_buffer->push_constants[base * 4]); + } - if (need_push_constants) { - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size + - 16 * layout->dynamic_offset_count, &offset, &ptr)) - return; + if (need_push_constants) { + if (!radv_cmd_buffer_upload_alloc( + cmd_buffer, layout->push_constant_size + 16 * layout->dynamic_offset_count, &offset, + &ptr)) + return; - memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); - memcpy((char*)ptr + layout->push_constant_size, - descriptors_state->dynamic_buffers, - 16 * layout->dynamic_offset_count); + memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size); + memcpy((char *)ptr + layout->push_constant_size, descriptors_state->dynamic_buffers, + 16 * layout->dynamic_offset_count); - va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - va += offset; + va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + va += offset; - ASSERTED unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, MESA_SHADER_STAGES * 4); + ASSERTED unsigned cdw_max = + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MESA_SHADER_STAGES * 4); - prev_shader = NULL; - radv_foreach_stage(stage, stages) { - shader = radv_get_shader(pipeline, stage); + prev_shader = NULL; + radv_foreach_stage(stage, stages) + { + shader = radv_get_shader(pipeline, stage); - /* Avoid redundantly emitting the address for merged stages. */ - if (shader && shader != prev_shader) { - radv_emit_userdata_address(cmd_buffer, pipeline, stage, - AC_UD_PUSH_CONSTANTS, va); + /* Avoid redundantly emitting the address for merged stages. */ + if (shader && shader != prev_shader) { + radv_emit_userdata_address(cmd_buffer, pipeline, stage, AC_UD_PUSH_CONSTANTS, va); - prev_shader = shader; - } - } - assert(cmd_buffer->cs->cdw <= cdw_max); - } + prev_shader = shader; + } + } + assert(cmd_buffer->cs->cdw <= cdw_max); + } - cmd_buffer->push_constant_stages &= ~stages; + cmd_buffer->push_constant_stages &= ~stages; } static void -radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, - bool pipeline_is_dirty) +radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) { - if ((pipeline_is_dirty || - (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && - cmd_buffer->state.pipeline->num_vertex_bindings && - radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) { - unsigned vb_offset; - void *vb_ptr; - uint32_t i = 0; - uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings; - uint64_t va; + if ((pipeline_is_dirty || (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && + cmd_buffer->state.pipeline->num_vertex_bindings && + radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) { + unsigned vb_offset; + void *vb_ptr; + uint32_t i = 0; + uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings; + uint64_t va; - /* allocate some descriptor state for vertex buffers */ - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, - &vb_offset, &vb_ptr)) - return; + /* allocate some descriptor state for vertex buffers */ + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, &vb_offset, &vb_ptr)) + return; - for (i = 0; i < count; i++) { - uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; - uint32_t offset; - struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer; - unsigned num_records; - unsigned stride; + for (i = 0; i < count; i++) { + uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; + uint32_t offset; + struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer; + unsigned num_records; + unsigned stride; - if (!buffer) { - memset(desc, 0, 4 * 4); - continue; - } + if (!buffer) { + memset(desc, 0, 4 * 4); + continue; + } - va = radv_buffer_get_va(buffer->bo); + va = radv_buffer_get_va(buffer->bo); - offset = cmd_buffer->vertex_bindings[i].offset; - va += offset + buffer->offset; + offset = cmd_buffer->vertex_bindings[i].offset; + va += offset + buffer->offset; - if (cmd_buffer->vertex_bindings[i].size) { - num_records = cmd_buffer->vertex_bindings[i].size; - } else { - num_records = buffer->size - offset; - } + if (cmd_buffer->vertex_bindings[i].size) { + num_records = cmd_buffer->vertex_bindings[i].size; + } else { + num_records = buffer->size - offset; + } - if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) { - stride = cmd_buffer->vertex_bindings[i].stride; - } else { - stride = cmd_buffer->state.pipeline->binding_stride[i]; - } + if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) { + stride = cmd_buffer->vertex_bindings[i].stride; + } else { + stride = cmd_buffer->state.pipeline->binding_stride[i]; + } - if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride) - num_records = DIV_ROUND_UP(num_records, stride); + if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride) + num_records = DIV_ROUND_UP(num_records, stride); - uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + uint32_t rsrc_word3 = + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - /* OOB_SELECT chooses the out-of-bounds check: - * - 1: index >= NUM_RECORDS (Structured) - * - 3: offset >= NUM_RECORDS (Raw) - */ - int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW; + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + /* OOB_SELECT chooses the out-of-bounds check: + * - 1: index >= NUM_RECORDS (Structured) + * - 3: offset >= NUM_RECORDS (Raw) + */ + int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW; - rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) | - S_008F0C_OOB_SELECT(oob_select) | - S_008F0C_RESOURCE_LEVEL(1); - } else { - rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - } + rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) | + S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(1); + } else { + rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + } - desc[0] = va; - desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - desc[2] = num_records; - desc[3] = rsrc_word3; - } + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); + desc[2] = num_records; + desc[3] = rsrc_word3; + } - va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - va += vb_offset; + va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + va += vb_offset; - radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, - AC_UD_VS_VERTEX_BUFFERS, va); + radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, + AC_UD_VS_VERTEX_BUFFERS, va); - cmd_buffer->state.vb_va = va; - cmd_buffer->state.vb_size = count * 16; - cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS; + cmd_buffer->state.vb_va = va; + cmd_buffer->state.vb_size = count * 16; + cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS; - if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr); - } - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; + if (unlikely(cmd_buffer->device->trace_bo)) + radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr); + } + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; } static void radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) { - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct radv_userdata_info *loc; - uint32_t base_reg; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_userdata_info *loc; + uint32_t base_reg; - for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { - if (!radv_get_shader(pipeline, stage)) - continue; + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { + if (!radv_get_shader(pipeline, stage)) + continue; - loc = radv_lookup_user_sgpr(pipeline, stage, - AC_UD_STREAMOUT_BUFFERS); - if (loc->sgpr_idx == -1) - continue; + loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_STREAMOUT_BUFFERS); + if (loc->sgpr_idx == -1) + continue; - base_reg = pipeline->user_data_0[stage]; + base_reg = pipeline->user_data_0[stage]; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + loc->sgpr_idx * 4, va, false); - } + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, + false); + } - if (radv_pipeline_has_gs_copy_shader(pipeline)) { - loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; - if (loc->sgpr_idx != -1) { - base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; + if (radv_pipeline_has_gs_copy_shader(pipeline)) { + loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; + if (loc->sgpr_idx != -1) { + base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; - radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, - base_reg + loc->sgpr_idx * 4, va, false); - } - } + radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, + va, false); + } + } } static void radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { - struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - unsigned so_offset; - void *so_ptr; - uint64_t va; + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { + struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + unsigned so_offset; + void *so_ptr; + uint64_t va; - /* Allocate some descriptor state for streamout buffers. */ - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, - MAX_SO_BUFFERS * 16, - &so_offset, &so_ptr)) - return; + /* Allocate some descriptor state for streamout buffers. */ + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, MAX_SO_BUFFERS * 16, &so_offset, &so_ptr)) + return; - for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) { - struct radv_buffer *buffer = sb[i].buffer; - uint32_t *desc = &((uint32_t *)so_ptr)[i * 4]; + for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) { + struct radv_buffer *buffer = sb[i].buffer; + uint32_t *desc = &((uint32_t *)so_ptr)[i * 4]; - if (!(so->enabled_mask & (1 << i))) - continue; + if (!(so->enabled_mask & (1 << i))) + continue; - va = radv_buffer_get_va(buffer->bo) + buffer->offset; + va = radv_buffer_get_va(buffer->bo) + buffer->offset; - va += sb[i].offset; + va += sb[i].offset; - /* Set the descriptor. - * - * On GFX8, the format must be non-INVALID, otherwise - * the buffer will be considered not bound and store - * instructions will be no-ops. - */ - uint32_t size = 0xffffffff; + /* Set the descriptor. + * + * On GFX8, the format must be non-INVALID, otherwise + * the buffer will be considered not bound and store + * instructions will be no-ops. + */ + uint32_t size = 0xffffffff; - /* Compute the correct buffer size for NGG streamout - * because it's used to determine the max emit per - * buffer. - */ - if (cmd_buffer->device->physical_device->use_ngg_streamout) - size = buffer->size - sb[i].offset; + /* Compute the correct buffer size for NGG streamout + * because it's used to determine the max emit per + * buffer. + */ + if (cmd_buffer->device->physical_device->use_ngg_streamout) + size = buffer->size - sb[i].offset; - uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + uint32_t rsrc_word3 = + S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | - S_008F0C_RESOURCE_LEVEL(1); - } else { - rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - } + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + } else { + rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + } - desc[0] = va; - desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); - desc[2] = size; - desc[3] = rsrc_word3; - } + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); + desc[2] = size; + desc[3] = rsrc_word3; + } - va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - va += so_offset; + va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + va += so_offset; - radv_emit_streamout_buffers(cmd_buffer, va); - } + radv_emit_streamout_buffers(cmd_buffer, va); + } - cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; } static void radv_flush_ngg_gs_state(struct radv_cmd_buffer *cmd_buffer) { - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - struct radv_userdata_info *loc; - uint32_t ngg_gs_state = 0; - uint32_t base_reg; + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_userdata_info *loc; + uint32_t ngg_gs_state = 0; + uint32_t base_reg; - if (!radv_pipeline_has_gs(pipeline) || - !radv_pipeline_has_ngg(pipeline)) - return; + if (!radv_pipeline_has_gs(pipeline) || !radv_pipeline_has_ngg(pipeline)) + return; - /* By default NGG GS queries are disabled but they are enabled if the - * command buffer has active GDS queries or if it's a secondary command - * buffer that inherits the number of generated primitives. - */ - if (cmd_buffer->state.active_pipeline_gds_queries || - (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) - ngg_gs_state = 1; + /* By default NGG GS queries are disabled but they are enabled if the + * command buffer has active GDS queries or if it's a secondary command + * buffer that inherits the number of generated primitives. + */ + if (cmd_buffer->state.active_pipeline_gds_queries || + (cmd_buffer->state.inherited_pipeline_statistics & + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) + ngg_gs_state = 1; - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY, - AC_UD_NGG_GS_STATE); - base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY]; - assert(loc->sgpr_idx != -1); + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY, AC_UD_NGG_GS_STATE); + base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY]; + assert(loc->sgpr_idx != -1); - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, - ngg_gs_state); + radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_gs_state); } static void radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) { - radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty); - radv_flush_streamout_descriptors(cmd_buffer); - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); - radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); - radv_flush_ngg_gs_state(cmd_buffer); + radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty); + radv_flush_streamout_descriptors(cmd_buffer); + radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); + radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS); + radv_flush_ngg_gs_state(cmd_buffer); } struct radv_draw_info { - /** - * Number of vertices. - */ - uint32_t count; + /** + * Number of vertices. + */ + uint32_t count; - /** - * First instance id. - */ - uint32_t first_instance; + /** + * First instance id. + */ + uint32_t first_instance; - /** - * Number of instances. - */ - uint32_t instance_count; + /** + * Number of instances. + */ + uint32_t instance_count; - /** - * First index (indexed draws only). - */ - uint32_t first_index; + /** + * First index (indexed draws only). + */ + uint32_t first_index; - /** - * Whether it's an indexed draw. - */ - bool indexed; + /** + * Whether it's an indexed draw. + */ + bool indexed; - /** - * Indirect draw parameters resource. - */ - struct radv_buffer *indirect; - uint64_t indirect_offset; - uint32_t stride; + /** + * Indirect draw parameters resource. + */ + struct radv_buffer *indirect; + uint64_t indirect_offset; + uint32_t stride; - /** - * Draw count parameters resource. - */ - struct radv_buffer *count_buffer; - uint64_t count_buffer_offset; + /** + * Draw count parameters resource. + */ + struct radv_buffer *count_buffer; + uint64_t count_buffer_offset; - /** - * Stream output parameters resource. - */ - struct radv_buffer *strmout_buffer; - uint64_t strmout_buffer_offset; + /** + * Stream output parameters resource. + */ + struct radv_buffer *strmout_buffer; + uint64_t strmout_buffer_offset; }; static uint32_t radv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer) { - switch (cmd_buffer->state.index_type) { - case V_028A7C_VGT_INDEX_8: - return 0xffu; - case V_028A7C_VGT_INDEX_16: - return 0xffffu; - case V_028A7C_VGT_INDEX_32: - return 0xffffffffu; - default: - unreachable("invalid index type"); - } + switch (cmd_buffer->state.index_type) { + case V_028A7C_VGT_INDEX_8: + return 0xffu; + case V_028A7C_VGT_INDEX_16: + return 0xffffu; + case V_028A7C_VGT_INDEX_32: + return 0xffffffffu; + default: + unreachable("invalid index type"); + } } static void -si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, - bool instanced_draw, bool indirect_draw, - bool count_from_stream_output, - uint32_t draw_vertex_count) +si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, + bool indirect_draw, bool count_from_stream_output, + uint32_t draw_vertex_count) { - struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; - struct radv_cmd_state *state = &cmd_buffer->state; - unsigned topology = state->dynamic.primitive_topology; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - unsigned ia_multi_vgt_param; + struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; + struct radv_cmd_state *state = &cmd_buffer->state; + unsigned topology = state->dynamic.primitive_topology; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + unsigned ia_multi_vgt_param; - ia_multi_vgt_param = - si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, - indirect_draw, - count_from_stream_output, - draw_vertex_count, - topology); + ia_multi_vgt_param = + si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, + draw_vertex_count, topology); - if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { - if (info->chip_class == GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, - cs, - R_030960_IA_MULTI_VGT_PARAM, - 4, ia_multi_vgt_param); - } else if (info->chip_class >= GFX7) { - radeon_set_context_reg_idx(cs, - R_028AA8_IA_MULTI_VGT_PARAM, - 1, ia_multi_vgt_param); - } else { - radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, - ia_multi_vgt_param); - } - state->last_ia_multi_vgt_param = ia_multi_vgt_param; - } + if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { + if (info->chip_class == GFX9) { + radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, + R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); + } else if (info->chip_class >= GFX7) { + radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); + } else { + radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); + } + state->last_ia_multi_vgt_param = ia_multi_vgt_param; + } } static void -radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *draw_info) +radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) { - struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; - struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - int32_t primitive_reset_en; + struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; + struct radv_cmd_state *state = &cmd_buffer->state; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + int32_t primitive_reset_en; - /* Draw state. */ - if (info->chip_class < GFX10) { - si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, - draw_info->indirect, - !!draw_info->strmout_buffer, - draw_info->indirect ? 0 : draw_info->count); - } + /* Draw state. */ + if (info->chip_class < GFX10) { + si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect, + !!draw_info->strmout_buffer, + draw_info->indirect ? 0 : draw_info->count); + } - /* Primitive restart. */ - primitive_reset_en = - draw_info->indexed && state->pipeline->graphics.prim_restart_enable; + /* Primitive restart. */ + primitive_reset_en = draw_info->indexed && state->pipeline->graphics.prim_restart_enable; - if (primitive_reset_en != state->last_primitive_reset_en) { - state->last_primitive_reset_en = primitive_reset_en; - if (info->chip_class >= GFX9) { - radeon_set_uconfig_reg(cs, - R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, - primitive_reset_en); - } else { - radeon_set_context_reg(cs, - R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, - primitive_reset_en); - } - } + if (primitive_reset_en != state->last_primitive_reset_en) { + state->last_primitive_reset_en = primitive_reset_en; + if (info->chip_class >= GFX9) { + radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en); + } else { + radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en); + } + } - if (primitive_reset_en) { - uint32_t primitive_reset_index = - radv_get_primitive_reset_index(cmd_buffer); + if (primitive_reset_en) { + uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer); - if (primitive_reset_index != state->last_primitive_reset_index) { - radeon_set_context_reg(cs, - R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, - primitive_reset_index); - state->last_primitive_reset_index = primitive_reset_index; - } - } + if (primitive_reset_index != state->last_primitive_reset_index) { + radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index); + state->last_primitive_reset_index = primitive_reset_index; + } + } - if (draw_info->strmout_buffer) { - uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); + if (draw_info->strmout_buffer) { + uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); - va += draw_info->strmout_buffer->offset + - draw_info->strmout_buffer_offset; + va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset; - radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, - draw_info->stride); + radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); - radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); + radeon_emit(cs, 0); /* unused */ - radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); - } + radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); + } } -static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, - VkPipelineStageFlags src_stage_mask) +static void +radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_stage_mask) { - if (src_stage_mask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; - } + if (src_stage_mask & + (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; + } - if (src_stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | - VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; - } else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; - } + if (src_stage_mask & + (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; + } else if (src_stage_mask & + (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; + } } /* Determine if the image is affected by the pipe misaligned metadata issue * which requires to invalidate L2. */ static bool -radv_image_is_pipe_misaligned(const struct radv_device *device, - const struct radv_image *image) +radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) { - struct radeon_info *rad_info = &device->physical_device->rad_info; - unsigned log2_samples = util_logbase2(image->info.samples); + struct radeon_info *rad_info = &device->physical_device->rad_info; + unsigned log2_samples = util_logbase2(image->info.samples); - assert(rad_info->chip_class >= GFX10); + assert(rad_info->chip_class >= GFX10); - for (unsigned i = 0; i < image->plane_count; ++i) { - VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); - unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); - unsigned log2_bpp_and_samples; + for (unsigned i = 0; i < image->plane_count; ++i) { + VkFormat fmt = vk_format_get_plane_format(image->vk_format, i); + unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); + unsigned log2_bpp_and_samples; - if (rad_info->chip_class >= GFX10_3) { - log2_bpp_and_samples = log2_bpp + log2_samples; - } else { - if (vk_format_has_depth(image->vk_format) && - image->info.array_size >= 8) { - log2_bpp = 2; - } + if (rad_info->chip_class >= GFX10_3) { + log2_bpp_and_samples = log2_bpp + log2_samples; + } else { + if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) { + log2_bpp = 2; + } - log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); - } + log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples); + } - unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); - int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); + unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config); + int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8); - if (vk_format_has_depth(image->vk_format)) { - if (radv_image_is_tc_compat_htile(image) && overlap) { - return true; - } - } else { - unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); - int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); - int samples_overlap = MIN2(log2_samples, overlap); + if (vk_format_has_depth(image->vk_format)) { + if (radv_image_is_tc_compat_htile(image) && overlap) { + return true; + } + } else { + unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config); + int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags); + int samples_overlap = MIN2(log2_samples, overlap); - /* TODO: It shouldn't be necessary if the image has DCC but - * not readable by shader. - */ - if ((radv_image_has_dcc(image) || - radv_image_is_tc_compat_cmask(image)) && - (samples_overlap > log2_samples_frag_diff)) { - return true; - } - } - } + /* TODO: It shouldn't be necessary if the image has DCC but + * not readable by shader. + */ + if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) && + (samples_overlap > log2_samples_frag_diff)) { + return true; + } + } + } - return false; + return false; } static bool radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) { - if (device->physical_device->rad_info.chip_class >= GFX10) { - return !device->physical_device->rad_info.tcc_rb_non_coherent && - (image && !radv_image_is_pipe_misaligned(device, image)); - } else if (device->physical_device->rad_info.chip_class == GFX9 && image) { - if (image->info.samples == 1 && - (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !vk_format_has_stencil(image->vk_format)) { - /* Single-sample color and single-sample depth - * (not stencil) are coherent with shaders on - * GFX9. - */ - return true; - } - } + if (device->physical_device->rad_info.chip_class >= GFX10) { + return !device->physical_device->rad_info.tcc_rb_non_coherent && + (image && !radv_image_is_pipe_misaligned(device, image)); + } else if (device->physical_device->rad_info.chip_class == GFX9 && image) { + if (image->info.samples == 1 && + (image->usage & + (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !vk_format_has_stencil(image->vk_format)) { + /* Single-sample color and single-sample depth + * (not stencil) are coherent with shaders on + * GFX9. + */ + return true; + } + } - return false; + return false; } enum radv_cmd_flush_bits -radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags src_flags, - const struct radv_image *image) -{ - bool has_CB_meta = true, has_DB_meta = true; - bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); - enum radv_cmd_flush_bits flush_bits = 0; - - if (image) { - if (!radv_image_has_CB_metadata(image)) - has_CB_meta = false; - if (!radv_image_has_htile(image)) - has_DB_meta = false; - } - - u_foreach_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - /* since the STORAGE bit isn't set we know that this is a meta operation. - * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so - * set it here. */ - if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - if (vk_format_is_depth_or_stencil(image->vk_format)) { - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - } else { - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - } - } - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_WB_L2; - break; - case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: - case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_WB_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - if (has_CB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - if (has_DB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB; - - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_L2; - if (has_CB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - if (has_DB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - case VK_ACCESS_MEMORY_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB; - - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_L2; - if (has_CB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - if (has_DB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - default: - break; - } - } - return flush_bits; -} - -enum radv_cmd_flush_bits -radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, - VkAccessFlags dst_flags, +radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags, const struct radv_image *image) { - bool has_CB_meta = true, has_DB_meta = true; - enum radv_cmd_flush_bits flush_bits = 0; - bool flush_CB = true, flush_DB = true; - bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); + bool has_CB_meta = true, has_DB_meta = true; + bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); + enum radv_cmd_flush_bits flush_bits = 0; - if (image) { - if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - flush_CB = false; - flush_DB = false; - } + if (image) { + if (!radv_image_has_CB_metadata(image)) + has_CB_meta = false; + if (!radv_image_has_htile(image)) + has_DB_meta = false; + } - if (!radv_image_has_CB_metadata(image)) - has_CB_meta = false; - if (!radv_image_has_htile(image)) - has_DB_meta = false; - } + u_foreach_bit(b, src_flags) + { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + /* since the STORAGE bit isn't set we know that this is a meta operation. + * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so + * set it here. */ + if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + if (vk_format_is_depth_or_stencil(image->vk_format)) { + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + } else { + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + } + } + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_WB_L2; + break; + case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: + case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_WB_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + if (has_CB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + if (has_DB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; - u_foreach_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: - break; - case VK_ACCESS_UNIFORM_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; - break; - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - case VK_ACCESS_TRANSFER_READ_BIT: - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VCACHE; + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_INV_L2; + if (has_CB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + if (has_DB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + break; + case VK_ACCESS_MEMORY_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; - if (has_CB_meta || has_DB_meta) - flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_L2; - break; - case VK_ACCESS_SHADER_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VCACHE; - /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to - * invalidate the scalar cache. */ - if (!cmd_buffer->device->physical_device->use_llvm && !image) - flush_bits |= RADV_CMD_FLAG_INV_SCACHE; - - if (has_CB_meta || has_DB_meta) - flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_L2; - break; - case VK_ACCESS_SHADER_WRITE_BIT: - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - if (flush_CB) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - if (has_CB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - if (flush_DB) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - if (has_DB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - case VK_ACCESS_MEMORY_READ_BIT: - case VK_ACCESS_MEMORY_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VCACHE | - RADV_CMD_FLAG_INV_SCACHE; - if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_L2; - if (flush_CB) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - if (has_CB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; - if (flush_DB) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - if (has_DB_meta) - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - break; - default: - break; - } - } - return flush_bits; + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_INV_L2; + if (has_CB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + if (has_DB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + break; + default: + break; + } + } + return flush_bits; } -void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, - const struct radv_subpass_barrier *barrier) +enum radv_cmd_flush_bits +radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flags, + const struct radv_image *image) { - struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; - if (fb && !fb->imageless) { - for (int i = 0; i < fb->attachment_count; ++i) { - cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask, - fb->attachments[i]->image); - } - } else { - cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask, - NULL); - } + bool has_CB_meta = true, has_DB_meta = true; + enum radv_cmd_flush_bits flush_bits = 0; + bool flush_CB = true, flush_DB = true; + bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image); - radv_stage_flush(cmd_buffer, barrier->src_stage_mask); + if (image) { + if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + flush_CB = false; + flush_DB = false; + } - if (fb && !fb->imageless) { - for (int i = 0; i < fb->attachment_count; ++i) { - cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, - fb->attachments[i]->image); - } - } else { - cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, - NULL); - } + if (!radv_image_has_CB_metadata(image)) + has_CB_meta = false; + if (!radv_image_has_htile(image)) + has_DB_meta = false; + } + + u_foreach_bit(b, dst_flags) + { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: + break; + case VK_ACCESS_UNIFORM_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; + break; + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: + case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VCACHE; + + if (has_CB_meta || has_DB_meta) + flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_INV_L2; + break; + case VK_ACCESS_SHADER_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VCACHE; + /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to + * invalidate the scalar cache. */ + if (!cmd_buffer->device->physical_device->use_llvm && !image) + flush_bits |= RADV_CMD_FLAG_INV_SCACHE; + + if (has_CB_meta || has_DB_meta) + flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_INV_L2; + break; + case VK_ACCESS_SHADER_WRITE_BIT: + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + if (flush_CB) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + if (has_CB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT: + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + if (flush_DB) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + if (has_DB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + break; + case VK_ACCESS_MEMORY_READ_BIT: + case VK_ACCESS_MEMORY_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; + if (!image_is_coherent) + flush_bits |= RADV_CMD_FLAG_INV_L2; + if (flush_CB) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + if (has_CB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; + if (flush_DB) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + if (has_DB_meta) + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + break; + default: + break; + } + } + return flush_bits; +} + +void +radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) +{ + struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; + if (fb && !fb->imageless) { + for (int i = 0; i < fb->attachment_count; ++i) { + cmd_buffer->state.flush_bits |= + radv_src_access_flush(cmd_buffer, barrier->src_access_mask, fb->attachments[i]->image); + } + } else { + cmd_buffer->state.flush_bits |= + radv_src_access_flush(cmd_buffer, barrier->src_access_mask, NULL); + } + + radv_stage_flush(cmd_buffer, barrier->src_stage_mask); + + if (fb && !fb->imageless) { + for (int i = 0; i < fb->attachment_count; ++i) { + cmd_buffer->state.flush_bits |= + radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, fb->attachments[i]->image); + } + } else { + cmd_buffer->state.flush_bits |= + radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, NULL); + } } uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer) { - struct radv_cmd_state *state = &cmd_buffer->state; - uint32_t subpass_id = state->subpass - state->pass->subpasses; + struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t subpass_id = state->subpass - state->pass->subpasses; - /* The id of this subpass shouldn't exceed the number of subpasses in - * this render pass minus 1. - */ - assert(subpass_id < state->pass->subpass_count); - return subpass_id; + /* The id of this subpass shouldn't exceed the number of subpasses in + * this render pass minus 1. + */ + assert(subpass_id < state->pass->subpass_count); + return subpass_id; } static struct radv_sample_locations_state * -radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, - uint32_t att_idx, - bool begin_subpass) +radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, uint32_t att_idx, + bool begin_subpass) { - struct radv_cmd_state *state = &cmd_buffer->state; - uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); - struct radv_image_view *view = state->attachments[att_idx].iview; + struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); + struct radv_image_view *view = state->attachments[att_idx].iview; - if (view->image->info.samples == 1) - return NULL; + if (view->image->info.samples == 1) + return NULL; - if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) { - /* Return the initial sample locations if this is the initial - * layout transition of the given subpass attachemnt. - */ - if (state->attachments[att_idx].sample_location.count > 0) - return &state->attachments[att_idx].sample_location; - } else { - /* Otherwise return the subpass sample locations if defined. */ - if (state->subpass_sample_locs) { - /* Because the driver sets the current subpass before - * initial layout transitions, we should use the sample - * locations from the previous subpass to avoid an - * off-by-one problem. Otherwise, use the sample - * locations for the current subpass for final layout - * transitions. - */ - if (begin_subpass) - subpass_id--; + if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) { + /* Return the initial sample locations if this is the initial + * layout transition of the given subpass attachemnt. + */ + if (state->attachments[att_idx].sample_location.count > 0) + return &state->attachments[att_idx].sample_location; + } else { + /* Otherwise return the subpass sample locations if defined. */ + if (state->subpass_sample_locs) { + /* Because the driver sets the current subpass before + * initial layout transitions, we should use the sample + * locations from the previous subpass to avoid an + * off-by-one problem. Otherwise, use the sample + * locations for the current subpass for final layout + * transitions. + */ + if (begin_subpass) + subpass_id--; - for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) { - if (state->subpass_sample_locs[i].subpass_idx == subpass_id) - return &state->subpass_sample_locs[i].sample_location; - } - } - } + for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) { + if (state->subpass_sample_locs[i].subpass_idx == subpass_id) + return &state->subpass_sample_locs[i].sample_location; + } + } + } - return NULL; + return NULL; } -static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_subpass_attachment att, - bool begin_subpass) +static void +radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, + struct radv_subpass_attachment att, bool begin_subpass) { - unsigned idx = att.attachment; - struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview; - struct radv_sample_locations_state *sample_locs; - VkImageSubresourceRange range; - range.aspectMask = view->aspect_mask; - range.baseMipLevel = view->base_mip; - range.levelCount = 1; - range.baseArrayLayer = view->base_layer; - range.layerCount = cmd_buffer->state.framebuffer->layers; + unsigned idx = att.attachment; + struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview; + struct radv_sample_locations_state *sample_locs; + VkImageSubresourceRange range; + range.aspectMask = view->aspect_mask; + range.baseMipLevel = view->base_mip; + range.levelCount = 1; + range.baseArrayLayer = view->base_layer; + range.layerCount = cmd_buffer->state.framebuffer->layers; - if (cmd_buffer->state.subpass->view_mask) { - /* If the current subpass uses multiview, the driver might have - * performed a fast color/depth clear to the whole image - * (including all layers). To make sure the driver will - * decompress the image correctly (if needed), we have to - * account for the "real" number of layers. If the view mask is - * sparse, this will decompress more layers than needed. - */ - range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask); - } + if (cmd_buffer->state.subpass->view_mask) { + /* If the current subpass uses multiview, the driver might have + * performed a fast color/depth clear to the whole image + * (including all layers). To make sure the driver will + * decompress the image correctly (if needed), we have to + * account for the "real" number of layers. If the view mask is + * sparse, this will decompress more layers than needed. + */ + range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask); + } - /* Get the subpass sample locations for the given attachment, if NULL - * is returned the driver will use the default HW locations. - */ - sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, - begin_subpass); + /* Get the subpass sample locations for the given attachment, if NULL + * is returned the driver will use the default HW locations. + */ + sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass); - /* Determine if the subpass uses separate depth/stencil layouts. */ - bool uses_separate_depth_stencil_layouts = false; - if ((cmd_buffer->state.attachments[idx].current_layout != - cmd_buffer->state.attachments[idx].current_stencil_layout) || - (att.layout != att.stencil_layout)) { - uses_separate_depth_stencil_layouts = true; - } + /* Determine if the subpass uses separate depth/stencil layouts. */ + bool uses_separate_depth_stencil_layouts = false; + if ((cmd_buffer->state.attachments[idx].current_layout != + cmd_buffer->state.attachments[idx].current_stencil_layout) || + (att.layout != att.stencil_layout)) { + uses_separate_depth_stencil_layouts = true; + } - /* For separate layouts, perform depth and stencil transitions - * separately. - */ - if (uses_separate_depth_stencil_layouts && - (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | - VK_IMAGE_ASPECT_STENCIL_BIT))) { - /* Depth-only transitions. */ - range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - radv_handle_image_transition(cmd_buffer, - view->image, - cmd_buffer->state.attachments[idx].current_layout, - cmd_buffer->state.attachments[idx].current_in_render_loop, - att.layout, att.in_render_loop, - 0, 0, &range, sample_locs); - - /* Stencil-only transitions. */ - range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; - radv_handle_image_transition(cmd_buffer, - view->image, - cmd_buffer->state.attachments[idx].current_stencil_layout, - cmd_buffer->state.attachments[idx].current_in_render_loop, - att.stencil_layout, att.in_render_loop, - 0, 0, &range, sample_locs); - } else { - radv_handle_image_transition(cmd_buffer, - view->image, - cmd_buffer->state.attachments[idx].current_layout, - cmd_buffer->state.attachments[idx].current_in_render_loop, - att.layout, att.in_render_loop, - 0, 0, &range, sample_locs); - } - - cmd_buffer->state.attachments[idx].current_layout = att.layout; - cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout; - cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop; + /* For separate layouts, perform depth and stencil transitions + * separately. + */ + if (uses_separate_depth_stencil_layouts && + (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { + /* Depth-only transitions. */ + range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + radv_handle_image_transition(cmd_buffer, view->image, + cmd_buffer->state.attachments[idx].current_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, + att.layout, att.in_render_loop, 0, 0, &range, sample_locs); + /* Stencil-only transitions. */ + range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; + radv_handle_image_transition( + cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_stencil_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, att.stencil_layout, + att.in_render_loop, 0, 0, &range, sample_locs); + } else { + radv_handle_image_transition(cmd_buffer, view->image, + cmd_buffer->state.attachments[idx].current_layout, + cmd_buffer->state.attachments[idx].current_in_render_loop, + att.layout, att.in_render_loop, 0, 0, &range, sample_locs); + } + cmd_buffer->state.attachments[idx].current_layout = att.layout; + cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout; + cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop; } void -radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, - const struct radv_subpass *subpass) +radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass) { - cmd_buffer->state.subpass = subpass; + cmd_buffer->state.subpass = subpass; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; } static VkResult radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer, - struct radv_render_pass *pass, - const VkRenderPassBeginInfo *info) + struct radv_render_pass *pass, + const VkRenderPassBeginInfo *info) { - const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs = - vk_find_struct_const(info->pNext, - RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT); - struct radv_cmd_state *state = &cmd_buffer->state; + const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs = + vk_find_struct_const(info->pNext, RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT); + struct radv_cmd_state *state = &cmd_buffer->state; - if (!sample_locs) { - state->subpass_sample_locs = NULL; - return VK_SUCCESS; - } + if (!sample_locs) { + state->subpass_sample_locs = NULL; + return VK_SUCCESS; + } - for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) { - const VkAttachmentSampleLocationsEXT *att_sample_locs = - &sample_locs->pAttachmentInitialSampleLocations[i]; - uint32_t att_idx = att_sample_locs->attachmentIndex; - struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image; + for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) { + const VkAttachmentSampleLocationsEXT *att_sample_locs = + &sample_locs->pAttachmentInitialSampleLocations[i]; + uint32_t att_idx = att_sample_locs->attachmentIndex; + struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image; - assert(vk_format_is_depth_or_stencil(image->vk_format)); + assert(vk_format_is_depth_or_stencil(image->vk_format)); - /* From the Vulkan spec 1.1.108: - * - * "If the image referenced by the framebuffer attachment at - * index attachmentIndex was not created with - * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT - * then the values specified in sampleLocationsInfo are - * ignored." - */ - if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)) - continue; + /* From the Vulkan spec 1.1.108: + * + * "If the image referenced by the framebuffer attachment at + * index attachmentIndex was not created with + * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT + * then the values specified in sampleLocationsInfo are + * ignored." + */ + if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)) + continue; - const VkSampleLocationsInfoEXT *sample_locs_info = - &att_sample_locs->sampleLocationsInfo; + const VkSampleLocationsInfoEXT *sample_locs_info = &att_sample_locs->sampleLocationsInfo; - state->attachments[att_idx].sample_location.per_pixel = - sample_locs_info->sampleLocationsPerPixel; - state->attachments[att_idx].sample_location.grid_size = - sample_locs_info->sampleLocationGridSize; - state->attachments[att_idx].sample_location.count = - sample_locs_info->sampleLocationsCount; - typed_memcpy(&state->attachments[att_idx].sample_location.locations[0], - sample_locs_info->pSampleLocations, - sample_locs_info->sampleLocationsCount); - } + state->attachments[att_idx].sample_location.per_pixel = + sample_locs_info->sampleLocationsPerPixel; + state->attachments[att_idx].sample_location.grid_size = + sample_locs_info->sampleLocationGridSize; + state->attachments[att_idx].sample_location.count = sample_locs_info->sampleLocationsCount; + typed_memcpy(&state->attachments[att_idx].sample_location.locations[0], + sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount); + } - state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc, - sample_locs->postSubpassSampleLocationsCount * - sizeof(state->subpass_sample_locs[0]), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->subpass_sample_locs == NULL) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - return cmd_buffer->record_result; - } + state->subpass_sample_locs = + vk_alloc(&cmd_buffer->pool->alloc, + sample_locs->postSubpassSampleLocationsCount * sizeof(state->subpass_sample_locs[0]), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->subpass_sample_locs == NULL) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + return cmd_buffer->record_result; + } - state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount; + state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount; - for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) { - const VkSubpassSampleLocationsEXT *subpass_sample_locs_info = - &sample_locs->pPostSubpassSampleLocations[i]; - const VkSampleLocationsInfoEXT *sample_locs_info = - &subpass_sample_locs_info->sampleLocationsInfo; + for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) { + const VkSubpassSampleLocationsEXT *subpass_sample_locs_info = + &sample_locs->pPostSubpassSampleLocations[i]; + const VkSampleLocationsInfoEXT *sample_locs_info = + &subpass_sample_locs_info->sampleLocationsInfo; - state->subpass_sample_locs[i].subpass_idx = - subpass_sample_locs_info->subpassIndex; - state->subpass_sample_locs[i].sample_location.per_pixel = - sample_locs_info->sampleLocationsPerPixel; - state->subpass_sample_locs[i].sample_location.grid_size = - sample_locs_info->sampleLocationGridSize; - state->subpass_sample_locs[i].sample_location.count = - sample_locs_info->sampleLocationsCount; - typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0], - sample_locs_info->pSampleLocations, - sample_locs_info->sampleLocationsCount); - } + state->subpass_sample_locs[i].subpass_idx = subpass_sample_locs_info->subpassIndex; + state->subpass_sample_locs[i].sample_location.per_pixel = + sample_locs_info->sampleLocationsPerPixel; + state->subpass_sample_locs[i].sample_location.grid_size = + sample_locs_info->sampleLocationGridSize; + state->subpass_sample_locs[i].sample_location.count = sample_locs_info->sampleLocationsCount; + typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0], + sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount); + } - return VK_SUCCESS; + return VK_SUCCESS; } static VkResult -radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, - struct radv_render_pass *pass, - const VkRenderPassBeginInfo *info, - const struct radv_extra_render_pass_begin_info *extra) +radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, struct radv_render_pass *pass, + const VkRenderPassBeginInfo *info, + const struct radv_extra_render_pass_begin_info *extra) { - struct radv_cmd_state *state = &cmd_buffer->state; - const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL; + struct radv_cmd_state *state = &cmd_buffer->state; + const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL; - if (info) { - attachment_info = vk_find_struct_const(info->pNext, - RENDER_PASS_ATTACHMENT_BEGIN_INFO); - } + if (info) { + attachment_info = vk_find_struct_const(info->pNext, RENDER_PASS_ATTACHMENT_BEGIN_INFO); + } + if (pass->attachment_count == 0) { + state->attachments = NULL; + return VK_SUCCESS; + } - if (pass->attachment_count == 0) { - state->attachments = NULL; - return VK_SUCCESS; - } + state->attachments = + vk_alloc(&cmd_buffer->pool->alloc, pass->attachment_count * sizeof(state->attachments[0]), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (state->attachments == NULL) { + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + return cmd_buffer->record_result; + } - state->attachments = vk_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * - sizeof(state->attachments[0]), - 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - return cmd_buffer->record_result; - } + for (uint32_t i = 0; i < pass->attachment_count; ++i) { + struct radv_render_pass_attachment *att = &pass->attachments[i]; + VkImageAspectFlags att_aspects = vk_format_aspects(att->format); + VkImageAspectFlags clear_aspects = 0; - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - struct radv_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags att_aspects = vk_format_aspects(att->format); - VkImageAspectFlags clear_aspects = 0; + if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { + /* color attachment */ + if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; + } + } else { + /* depthstencil attachment */ + if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && + att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; + if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && + att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + } + } - if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } + state->attachments[i].pending_clear_aspects = clear_aspects; + state->attachments[i].cleared_views = 0; + if (clear_aspects && info) { + assert(info->clearValueCount > i); + state->attachments[i].clear_value = info->pClearValues[i]; + } - state->attachments[i].pending_clear_aspects = clear_aspects; - state->attachments[i].cleared_views = 0; - if (clear_aspects && info) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } + state->attachments[i].current_layout = att->initial_layout; + state->attachments[i].current_in_render_loop = false; + state->attachments[i].current_stencil_layout = att->stencil_initial_layout; + state->attachments[i].disable_dcc = extra && extra->disable_dcc; + state->attachments[i].sample_location.count = 0; - state->attachments[i].current_layout = att->initial_layout; - state->attachments[i].current_in_render_loop = false; - state->attachments[i].current_stencil_layout = att->stencil_initial_layout; - state->attachments[i].disable_dcc = extra && extra->disable_dcc; - state->attachments[i].sample_location.count = 0; + struct radv_image_view *iview; + if (attachment_info && attachment_info->attachmentCount > i) { + iview = radv_image_view_from_handle(attachment_info->pAttachments[i]); + } else { + iview = state->framebuffer->attachments[i]; + } - struct radv_image_view *iview; - if (attachment_info && attachment_info->attachmentCount > i) { - iview = radv_image_view_from_handle(attachment_info->pAttachments[i]); - } else { - iview = state->framebuffer->attachments[i]; - } + state->attachments[i].iview = iview; + if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview); + } else { + radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview); + } + } - state->attachments[i].iview = iview; - if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview); - } else { - radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview); - } - } - - return VK_SUCCESS; + return VK_SUCCESS; } -VkResult radv_AllocateCommandBuffers( - VkDevice _device, - const VkCommandBufferAllocateInfo *pAllocateInfo, - VkCommandBuffer *pCommandBuffers) +VkResult +radv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) { - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool); + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool); - VkResult result = VK_SUCCESS; - uint32_t i; + VkResult result = VK_SUCCESS; + uint32_t i; - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - if (!list_is_empty(&pool->free_cmd_buffers)) { - struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); + if (!list_is_empty(&pool->free_cmd_buffers)) { + struct radv_cmd_buffer *cmd_buffer = + list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); + list_del(&cmd_buffer->pool_link); + list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - result = radv_reset_cmd_buffer(cmd_buffer); - cmd_buffer->level = pAllocateInfo->level; - vk_object_base_reset(&cmd_buffer->base); + result = radv_reset_cmd_buffer(cmd_buffer); + cmd_buffer->level = pAllocateInfo->level; + vk_object_base_reset(&cmd_buffer->base); - pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); - } else { - result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - } - if (result != VK_SUCCESS) - break; - } + pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); + } else { + result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]); + } + if (result != VK_SUCCESS) + break; + } - if (result != VK_SUCCESS) { - radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, - i, pCommandBuffers); + if (result != VK_SUCCESS) { + radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers); - /* From the Vulkan 1.0.66 spec: - * - * "vkAllocateCommandBuffers can be used to create multiple - * command buffers. If the creation of any of those command - * buffers fails, the implementation must destroy all - * successfully created command buffer objects from this - * command, set all entries of the pCommandBuffers array to - * NULL and return the error." - */ - memset(pCommandBuffers, 0, - sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); - } + /* From the Vulkan 1.0.66 spec: + * + * "vkAllocateCommandBuffers can be used to create multiple + * command buffers. If the creation of any of those command + * buffers fails, the implementation must destroy all + * successfully created command buffer objects from this + * command, set all entries of the pCommandBuffers array to + * NULL and return the error." + */ + memset(pCommandBuffers, 0, sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); + } - return result; + return result; } -void radv_FreeCommandBuffers( - VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer *pCommandBuffers) +void +radv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) { - for (uint32_t i = 0; i < commandBufferCount; i++) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); + for (uint32_t i = 0; i < commandBufferCount; i++) { + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - if (cmd_buffer) { - if (cmd_buffer->pool) { - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers); - } else - radv_destroy_cmd_buffer(cmd_buffer); - - } - } + if (cmd_buffer) { + if (cmd_buffer->pool) { + list_del(&cmd_buffer->pool_link); + list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers); + } else + radv_destroy_cmd_buffer(cmd_buffer); + } + } } -VkResult radv_ResetCommandBuffer( - VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) +VkResult +radv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - return radv_reset_cmd_buffer(cmd_buffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + return radv_reset_cmd_buffer(cmd_buffer); } -VkResult radv_BeginCommandBuffer( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo *pBeginInfo) +VkResult +radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - VkResult result = VK_SUCCESS; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + VkResult result = VK_SUCCESS; - if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) { - /* If the command buffer has already been resetted with - * vkResetCommandBuffer, no need to do it again. - */ - result = radv_reset_cmd_buffer(cmd_buffer); - if (result != VK_SUCCESS) - return result; - } + if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) { + /* If the command buffer has already been resetted with + * vkResetCommandBuffer, no need to do it again. + */ + result = radv_reset_cmd_buffer(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } - memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); - cmd_buffer->state.last_primitive_reset_en = -1; - cmd_buffer->state.last_index_type = -1; - cmd_buffer->state.last_num_instances = -1; - cmd_buffer->state.last_vertex_offset = -1; - cmd_buffer->state.last_first_instance = -1; - cmd_buffer->state.last_drawid = -1; - cmd_buffer->state.predication_type = -1; - cmd_buffer->state.last_sx_ps_downconvert = -1; - cmd_buffer->state.last_sx_blend_opt_epsilon = -1; - cmd_buffer->state.last_sx_blend_opt_control = -1; - cmd_buffer->usage_flags = pBeginInfo->flags; + memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); + cmd_buffer->state.last_primitive_reset_en = -1; + cmd_buffer->state.last_index_type = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_vertex_offset = -1; + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_drawid = -1; + cmd_buffer->state.predication_type = -1; + cmd_buffer->state.last_sx_ps_downconvert = -1; + cmd_buffer->state.last_sx_blend_opt_epsilon = -1; + cmd_buffer->state.last_sx_blend_opt_control = -1; + cmd_buffer->usage_flags = pBeginInfo->flags; - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && - (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { - assert(pBeginInfo->pInheritanceInfo); - cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); - cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && + (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { + assert(pBeginInfo->pInheritanceInfo); + cmd_buffer->state.framebuffer = + radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); + cmd_buffer->state.pass = + radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); - struct radv_subpass *subpass = - &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; + struct radv_subpass *subpass = + &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; - if (cmd_buffer->state.framebuffer) { - result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL); - if (result != VK_SUCCESS) - return result; - } + if (cmd_buffer->state.framebuffer) { + result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL); + if (result != VK_SUCCESS) + return result; + } - cmd_buffer->state.inherited_pipeline_statistics = - pBeginInfo->pInheritanceInfo->pipelineStatistics; + cmd_buffer->state.inherited_pipeline_statistics = + pBeginInfo->pInheritanceInfo->pipelineStatistics; - radv_cmd_buffer_set_subpass(cmd_buffer, subpass); - } + radv_cmd_buffer_set_subpass(cmd_buffer, subpass); + } - if (unlikely(cmd_buffer->device->trace_bo)) - radv_cmd_buffer_trace_emit(cmd_buffer); + if (unlikely(cmd_buffer->device->trace_bo)) + radv_cmd_buffer_trace_emit(cmd_buffer); - radv_describe_begin_cmd_buffer(cmd_buffer); + radv_describe_begin_cmd_buffer(cmd_buffer); - cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING; + cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING; - return result; + return result; } -void radv_CmdBindVertexBuffers( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets) +void +radv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) { - radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, - bindingCount, pBuffers, pOffsets, - NULL, NULL); + radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets, + NULL, NULL); } -void radv_CmdBindVertexBuffers2EXT( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets, - const VkDeviceSize* pSizes, - const VkDeviceSize* pStrides) +void +radv_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, + const VkDeviceSize *pStrides) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; - bool changed = false; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; + bool changed = false; - /* We have to defer setting up vertex buffer since we need the buffer - * stride from the pipeline. */ + /* We have to defer setting up vertex buffer since we need the buffer + * stride from the pipeline. */ - assert(firstBinding + bindingCount <= MAX_VBS); - for (uint32_t i = 0; i < bindingCount; i++) { - RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); - uint32_t idx = firstBinding + i; - VkDeviceSize size = pSizes ? pSizes[i] : 0; - VkDeviceSize stride = pStrides ? pStrides[i] : 0; + assert(firstBinding + bindingCount <= MAX_VBS); + for (uint32_t i = 0; i < bindingCount; i++) { + RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); + uint32_t idx = firstBinding + i; + VkDeviceSize size = pSizes ? pSizes[i] : 0; + VkDeviceSize stride = pStrides ? pStrides[i] : 0; - /* pSizes and pStrides are optional. */ - if (!changed && - (vb[idx].buffer != buffer || - vb[idx].offset != pOffsets[i] || - vb[idx].size != size || - vb[idx].stride != stride)) { - changed = true; - } + /* pSizes and pStrides are optional. */ + if (!changed && (vb[idx].buffer != buffer || vb[idx].offset != pOffsets[i] || + vb[idx].size != size || vb[idx].stride != stride)) { + changed = true; + } - vb[idx].buffer = buffer; - vb[idx].offset = pOffsets[i]; - vb[idx].size = size; - vb[idx].stride = stride; + vb[idx].buffer = buffer; + vb[idx].offset = pOffsets[i]; + vb[idx].size = size; + vb[idx].stride = stride; - if (buffer) { - radv_cs_add_buffer(cmd_buffer->device->ws, - cmd_buffer->cs, vb[idx].buffer->bo); - } - } + if (buffer) { + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, vb[idx].buffer->bo); + } + } - if (!changed) { - /* No state changes. */ - return; - } + if (!changed) { + /* No state changes. */ + return; + } - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER; } static uint32_t vk_to_index_type(VkIndexType type) { - switch (type) { - case VK_INDEX_TYPE_UINT8_EXT: - return V_028A7C_VGT_INDEX_8; - case VK_INDEX_TYPE_UINT16: - return V_028A7C_VGT_INDEX_16; - case VK_INDEX_TYPE_UINT32: - return V_028A7C_VGT_INDEX_32; - default: - unreachable("invalid index type"); - } + switch (type) { + case VK_INDEX_TYPE_UINT8_EXT: + return V_028A7C_VGT_INDEX_8; + case VK_INDEX_TYPE_UINT16: + return V_028A7C_VGT_INDEX_16; + case VK_INDEX_TYPE_UINT32: + return V_028A7C_VGT_INDEX_32; + default: + unreachable("invalid index type"); + } } static uint32_t radv_get_vgt_index_size(uint32_t type) { - switch (type) { - case V_028A7C_VGT_INDEX_8: - return 1; - case V_028A7C_VGT_INDEX_16: - return 2; - case V_028A7C_VGT_INDEX_32: - return 4; - default: - unreachable("invalid index type"); - } + switch (type) { + case V_028A7C_VGT_INDEX_8: + return 1; + case V_028A7C_VGT_INDEX_16: + return 2; + case V_028A7C_VGT_INDEX_32: + return 4; + default: + unreachable("invalid index type"); + } } -void radv_CmdBindIndexBuffer( - VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType) +void +radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkIndexType indexType) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); - if (cmd_buffer->state.index_buffer == index_buffer && - cmd_buffer->state.index_offset == offset && - cmd_buffer->state.index_type == indexType) { - /* No state changes. */ - return; - } + if (cmd_buffer->state.index_buffer == index_buffer && cmd_buffer->state.index_offset == offset && + cmd_buffer->state.index_type == indexType) { + /* No state changes. */ + return; + } - cmd_buffer->state.index_buffer = index_buffer; - cmd_buffer->state.index_offset = offset; - cmd_buffer->state.index_type = vk_to_index_type(indexType); - cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo); - cmd_buffer->state.index_va += index_buffer->offset + offset; + cmd_buffer->state.index_buffer = index_buffer; + cmd_buffer->state.index_offset = offset; + cmd_buffer->state.index_type = vk_to_index_type(indexType); + cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo); + cmd_buffer->state.index_va += index_buffer->offset + offset; - int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); - cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo); + int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); + cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo); } - static void -radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point, - struct radv_descriptor_set *set, unsigned idx) +radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, + struct radv_descriptor_set *set, unsigned idx) { - struct radeon_winsys *ws = cmd_buffer->device->ws; + struct radeon_winsys *ws = cmd_buffer->device->ws; - radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); + radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); - assert(set); + assert(set); - if (!cmd_buffer->device->use_global_bo_list) { - for (unsigned j = 0; j < set->header.buffer_count; ++j) - if (set->descriptors[j]) - radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]); - } + if (!cmd_buffer->device->use_global_bo_list) { + for (unsigned j = 0; j < set->header.buffer_count; ++j) + if (set->descriptors[j]) + radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]); + } - if(set->header.bo) - radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo); + if (set->header.bo) + radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo); } -void radv_CmdBindDescriptorSets( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet* pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t* pDynamicOffsets) +void +radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t firstSet, uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - unsigned dyn_idx = 0; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); + unsigned dyn_idx = 0; - const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + const bool no_dynamic_bounds = + cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); - for (unsigned i = 0; i < descriptorSetCount; ++i) { - unsigned set_idx = i + firstSet; - RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); + for (unsigned i = 0; i < descriptorSetCount; ++i) { + unsigned set_idx = i + firstSet; + RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); - /* If the set is already bound we only need to update the - * (potentially changed) dynamic offsets. */ - if (descriptors_state->sets[set_idx] != set || - !(descriptors_state->valid & (1u << set_idx))) { - radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); - } + /* If the set is already bound we only need to update the + * (potentially changed) dynamic offsets. */ + if (descriptors_state->sets[set_idx] != set || + !(descriptors_state->valid & (1u << set_idx))) { + radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); + } - for(unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) { - unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; - uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4; - assert(dyn_idx < dynamicOffsetCount); + for (unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) { + unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; + uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4; + assert(dyn_idx < dynamicOffsetCount); - struct radv_descriptor_range *range = set->header.dynamic_descriptors + j; + struct radv_descriptor_range *range = set->header.dynamic_descriptors + j; - if (!range->va) { - memset(dst, 0, 4 * 4); - } else { - uint64_t va = range->va + pDynamicOffsets[dyn_idx]; - dst[0] = va; - dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); - dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size; - dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + if (!range->va) { + memset(dst, 0, 4 * 4); + } else { + uint64_t va = range->va + pDynamicOffsets[dyn_idx]; + dst[0] = va; + dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); + dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size; + dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { - dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | - S_008F0C_RESOURCE_LEVEL(1); - } else { - dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - } - } + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) { + dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); + } else { + dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + } + } - cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages; - } - } + cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages; + } + } } -static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, - struct radv_descriptor_set *set, - struct radv_descriptor_set_layout *layout, - VkPipelineBindPoint bind_point) +static bool +radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, + struct radv_descriptor_set_layout *layout, + VkPipelineBindPoint bind_point) { - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - set->header.size = layout->size; - set->header.layout = layout; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); + set->header.size = layout->size; + set->header.layout = layout; - if (descriptors_state->push_set.capacity < set->header.size) { - size_t new_size = MAX2(set->header.size, 1024); - new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity); - new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); + if (descriptors_state->push_set.capacity < set->header.size) { + size_t new_size = MAX2(set->header.size, 1024); + new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity); + new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); - free(set->header.mapped_ptr); - set->header.mapped_ptr = malloc(new_size); + free(set->header.mapped_ptr); + set->header.mapped_ptr = malloc(new_size); - if (!set->header.mapped_ptr) { - descriptors_state->push_set.capacity = 0; - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - return false; - } + if (!set->header.mapped_ptr) { + descriptors_state->push_set.capacity = 0; + cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; + return false; + } - descriptors_state->push_set.capacity = new_size; - } + descriptors_state->push_set.capacity = new_size; + } - return true; + return true; } -void radv_meta_push_descriptor_set( - struct radv_cmd_buffer* cmd_buffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t set, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites) +void +radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, + VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, + uint32_t set, uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites) { - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors; - unsigned bo_offset; + RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); + struct radv_descriptor_set *push_set = + (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors; + unsigned bo_offset; - assert(set == 0); - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); + assert(set == 0); + assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - push_set->header.size = layout->set[set].layout->size; - push_set->header.layout = layout->set[set].layout; + push_set->header.size = layout->set[set].layout->size; + push_set->header.layout = layout->set[set].layout; - if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, - &bo_offset, (void**) &push_set->header.mapped_ptr)) - return; + if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, &bo_offset, + (void **)&push_set->header.mapped_ptr)) + return; - push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); - push_set->header.va += bo_offset; + push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); + push_set->header.va += bo_offset; - radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), - descriptorWriteCount, pDescriptorWrites, 0, NULL); + radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, + radv_descriptor_set_to_handle(push_set), descriptorWriteCount, + pDescriptorWrites, 0, NULL); - radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); + radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); } -void radv_CmdPushDescriptorSetKHR( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t set, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet* pDescriptorWrites) +void +radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&descriptors_state->push_set.set; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); + struct radv_descriptor_set *push_set = + (struct radv_descriptor_set *)&descriptors_state->push_set.set; - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); + assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, - layout->set[set].layout, - pipelineBindPoint)) - return; + if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, + pipelineBindPoint)) + return; - /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR() - * because it is invalid, according to Vulkan spec. - */ - for (int i = 0; i < descriptorWriteCount; i++) { - ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; - assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT); - } + /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR() + * because it is invalid, according to Vulkan spec. + */ + for (int i = 0; i < descriptorWriteCount; i++) { + ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; + assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT); + } - radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, - radv_descriptor_set_to_handle(push_set), - descriptorWriteCount, pDescriptorWrites, 0, NULL); + radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer, + radv_descriptor_set_to_handle(push_set), descriptorWriteCount, + pDescriptorWrites, 0, NULL); - radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); - descriptors_state->push_dirty = true; + radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); + descriptors_state->push_dirty = true; } -void radv_CmdPushDescriptorSetWithTemplateKHR( - VkCommandBuffer commandBuffer, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - VkPipelineLayout _layout, - uint32_t set, - const void* pData) +void +radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + VkPipelineLayout _layout, uint32_t set, const void *pData) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); - RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, templ->bind_point); - struct radv_descriptor_set *push_set = - (struct radv_descriptor_set *)&descriptors_state->push_set.set; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); + RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, templ->bind_point); + struct radv_descriptor_set *push_set = + (struct radv_descriptor_set *)&descriptors_state->push_set.set; - assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); + assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); - if (!radv_init_push_descriptor_set(cmd_buffer, push_set, - layout->set[set].layout, - templ->bind_point)) - return; + if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, + templ->bind_point)) + return; - radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, - descriptorUpdateTemplate, pData); + radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, + descriptorUpdateTemplate, pData); - radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set); - descriptors_state->push_dirty = true; + radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set); + descriptors_state->push_dirty = true; } -void radv_CmdPushConstants(VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void* pValues) +void +radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, + VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, + const void *pValues) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - memcpy(cmd_buffer->push_constants + offset, pValues, size); - cmd_buffer->push_constant_stages |= stageFlags; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + memcpy(cmd_buffer->push_constants + offset, pValues, size); + cmd_buffer->push_constant_stages |= stageFlags; } -VkResult radv_EndCommandBuffer( - VkCommandBuffer commandBuffer) +VkResult +radv_EndCommandBuffer(VkCommandBuffer commandBuffer) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_emit_mip_change_flush_default(cmd_buffer); + radv_emit_mip_change_flush_default(cmd_buffer); - if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; + if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) + cmd_buffer->state.flush_bits |= + RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; - /* Make sure to sync all pending active queries at the end of - * command buffer. - */ - cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits; + /* Make sure to sync all pending active queries at the end of + * command buffer. + */ + cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits; - /* Since NGG streamout uses GDS, we need to make GDS idle when - * we leave the IB, otherwise another process might overwrite - * it while our shaders are busy. - */ - if (cmd_buffer->gds_needed) - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; + /* Since NGG streamout uses GDS, we need to make GDS idle when + * we leave the IB, otherwise another process might overwrite + * it while our shaders are busy. + */ + if (cmd_buffer->gds_needed) + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; - si_emit_cache_flush(cmd_buffer); - } + si_emit_cache_flush(cmd_buffer); + } - /* Make sure CP DMA is idle at the end of IBs because the kernel - * doesn't wait for it. - */ - si_cp_dma_wait_for_idle(cmd_buffer); + /* Make sure CP DMA is idle at the end of IBs because the kernel + * doesn't wait for it. + */ + si_cp_dma_wait_for_idle(cmd_buffer); - radv_describe_end_cmd_buffer(cmd_buffer); + radv_describe_end_cmd_buffer(cmd_buffer); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs); - VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs); - if (result != VK_SUCCESS) - return vk_error(cmd_buffer->device->instance, result); + VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs); + if (result != VK_SUCCESS) + return vk_error(cmd_buffer->device->instance, result); - cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE; + cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE; - return cmd_buffer->record_result; + return cmd_buffer->record_result; } static void radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) { - struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) - return; + if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline) + return; - assert(!pipeline->ctx_cs.cdw); + assert(!pipeline->ctx_cs.cdw); - cmd_buffer->state.emitted_compute_pipeline = pipeline; + cmd_buffer->state.emitted_compute_pipeline = pipeline; - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); - radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw); + radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw); - cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, - pipeline->scratch_bytes_per_wave); - cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted, - pipeline->max_waves); + cmd_buffer->compute_scratch_size_per_wave_needed = + MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave); + cmd_buffer->compute_scratch_waves_wanted = + MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->max_waves); - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - pipeline->shaders[MESA_SHADER_COMPUTE]->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, + pipeline->shaders[MESA_SHADER_COMPUTE]->bo); - if (unlikely(cmd_buffer->device->trace_bo)) - radv_save_pipeline(cmd_buffer, pipeline); -} - -static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, - VkPipelineBindPoint bind_point) -{ - struct radv_descriptor_state *descriptors_state = - radv_get_descriptors_state(cmd_buffer, bind_point); - - descriptors_state->dirty |= descriptors_state->valid; -} - -void radv_CmdBindPipeline( - VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_COMPUTE: - if (cmd_buffer->state.compute_pipeline == pipeline) - return; - radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - - cmd_buffer->state.compute_pipeline = pipeline; - cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; - break; - case VK_PIPELINE_BIND_POINT_GRAPHICS: - if (cmd_buffer->state.pipeline == pipeline) - return; - radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - - bool vtx_emit_count_changed = !pipeline || - !cmd_buffer->state.pipeline || - cmd_buffer->state.pipeline->graphics.vtx_emit_num != - pipeline->graphics.vtx_emit_num || - cmd_buffer->state.pipeline->graphics.vtx_base_sgpr != - pipeline->graphics.vtx_base_sgpr; - cmd_buffer->state.pipeline = pipeline; - if (!pipeline) - break; - - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; - cmd_buffer->push_constant_stages |= pipeline->active_stages; - - /* the new vertex shader might not have the same user regs */ - if (vtx_emit_count_changed) { - cmd_buffer->state.last_first_instance = -1; - cmd_buffer->state.last_vertex_offset = -1; - cmd_buffer->state.last_drawid = -1; - } - - /* Prefetch all pipeline shaders at first draw time. */ - cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; - - if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 || - cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) && - cmd_buffer->state.emitted_pipeline && - radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) && - !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) { - /* Transitioning from NGG to legacy GS requires - * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH - * is also emitted at the beginning of IBs when legacy - * GS ring pointers are set. - */ - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; - } - - radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); - radv_bind_streamout_state(cmd_buffer, pipeline); - - if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) - cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; - if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) - cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; - - if (radv_pipeline_has_tess(pipeline)) - cmd_buffer->tess_rings_needed = true; - break; - default: - assert(!"invalid bind point"); - break; - } -} - -void radv_CmdSetViewport( - VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport* pViewports) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - ASSERTED const uint32_t total_count = firstViewport + viewportCount; - - assert(firstViewport < MAX_VIEWPORTS); - assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); - - if (total_count <= state->dynamic.viewport.count && - !memcmp(state->dynamic.viewport.viewports + firstViewport, - pViewports, viewportCount * sizeof(*pViewports))) { - return; - } - - if (state->dynamic.viewport.count < total_count) - state->dynamic.viewport.count = total_count; - - memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, - viewportCount * sizeof(*pViewports)); - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; -} - -void radv_CmdSetScissor( - VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D* pScissors) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - ASSERTED const uint32_t total_count = firstScissor + scissorCount; - - assert(firstScissor < MAX_SCISSORS); - assert(total_count >= 1 && total_count <= MAX_SCISSORS); - - if (total_count <= state->dynamic.scissor.count && - !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors, - scissorCount * sizeof(*pScissors))) { - return; - } - - if (state->dynamic.scissor.count < total_count) - state->dynamic.scissor.count = total_count; - - memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, - scissorCount * sizeof(*pScissors)); - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; -} - -void radv_CmdSetLineWidth( - VkCommandBuffer commandBuffer, - float lineWidth) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - - if (cmd_buffer->state.dynamic.line_width == lineWidth) - return; - - cmd_buffer->state.dynamic.line_width = lineWidth; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; -} - -void radv_CmdSetDepthBias( - VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_bias.bias == depthBiasConstantFactor && - state->dynamic.depth_bias.clamp == depthBiasClamp && - state->dynamic.depth_bias.slope == depthBiasSlopeFactor) { - return; - } - - state->dynamic.depth_bias.bias = depthBiasConstantFactor; - state->dynamic.depth_bias.clamp = depthBiasClamp; - state->dynamic.depth_bias.slope = depthBiasSlopeFactor; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; -} - -void radv_CmdSetBlendConstants( - VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4)) - return; - - memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4); - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; -} - -void radv_CmdSetDepthBounds( - VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_bounds.min == minDepthBounds && - state->dynamic.depth_bounds.max == maxDepthBounds) { - return; - } - - state->dynamic.depth_bounds.min = minDepthBounds; - state->dynamic.depth_bounds.max = maxDepthBounds; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; -} - -void radv_CmdSetStencilCompareMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - bool front_same = state->dynamic.stencil_compare_mask.front == compareMask; - bool back_same = state->dynamic.stencil_compare_mask.back == compareMask; - - if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && - (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { - return; - } - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - state->dynamic.stencil_compare_mask.front = compareMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - state->dynamic.stencil_compare_mask.back = compareMask; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; -} - -void radv_CmdSetStencilWriteMask( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - bool front_same = state->dynamic.stencil_write_mask.front == writeMask; - bool back_same = state->dynamic.stencil_write_mask.back == writeMask; - - if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && - (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { - return; - } - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - state->dynamic.stencil_write_mask.front = writeMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - state->dynamic.stencil_write_mask.back = writeMask; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; -} - -void radv_CmdSetStencilReference( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - bool front_same = state->dynamic.stencil_reference.front == reference; - bool back_same = state->dynamic.stencil_reference.back == reference; - - if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && - (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { - return; - } - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd_buffer->state.dynamic.stencil_reference.front = reference; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd_buffer->state.dynamic.stencil_reference.back = reference; - - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; -} - -void radv_CmdSetDiscardRectangleEXT( - VkCommandBuffer commandBuffer, - uint32_t firstDiscardRectangle, - uint32_t discardRectangleCount, - const VkRect2D* pDiscardRectangles) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount; - - assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); - assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); - - if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle, - pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) { - return; - } - - typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], - pDiscardRectangles, discardRectangleCount); - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; -} - -void radv_CmdSetSampleLocationsEXT( - VkCommandBuffer commandBuffer, - const VkSampleLocationsInfoEXT* pSampleLocationsInfo) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); - - state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; - state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; - state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount; - typed_memcpy(&state->dynamic.sample_location.locations[0], - pSampleLocationsInfo->pSampleLocations, - pSampleLocationsInfo->sampleLocationsCount); - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; -} - -void radv_CmdSetLineStippleEXT( - VkCommandBuffer commandBuffer, - uint32_t lineStippleFactor, - uint16_t lineStipplePattern) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.line_stipple.factor == lineStippleFactor && - state->dynamic.line_stipple.pattern == lineStipplePattern) - return; - - state->dynamic.line_stipple.factor = lineStippleFactor; - state->dynamic.line_stipple.pattern = lineStipplePattern; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; -} - -void radv_CmdSetCullModeEXT( - VkCommandBuffer commandBuffer, - VkCullModeFlags cullMode) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.cull_mode == cullMode) - return; - - state->dynamic.cull_mode = cullMode; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE; -} - -void radv_CmdSetFrontFaceEXT( - VkCommandBuffer commandBuffer, - VkFrontFace frontFace) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.front_face == frontFace) - return; - - state->dynamic.front_face = frontFace; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; -} - -void radv_CmdSetPrimitiveTopologyEXT( - VkCommandBuffer commandBuffer, - VkPrimitiveTopology primitiveTopology) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - unsigned primitive_topology = si_translate_prim(primitiveTopology); - - if (state->dynamic.primitive_topology == primitive_topology) - return; - - state->dynamic.primitive_topology = primitive_topology; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; -} - -void radv_CmdSetViewportWithCountEXT( - VkCommandBuffer commandBuffer, - uint32_t viewportCount, - const VkViewport* pViewports) -{ - radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports); -} - -void radv_CmdSetScissorWithCountEXT( - VkCommandBuffer commandBuffer, - uint32_t scissorCount, - const VkRect2D* pScissors) -{ - radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors); -} - -void radv_CmdSetDepthTestEnableEXT( - VkCommandBuffer commandBuffer, - VkBool32 depthTestEnable) - -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_test_enable == depthTestEnable) - return; - - state->dynamic.depth_test_enable = depthTestEnable; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE; -} - -void radv_CmdSetDepthWriteEnableEXT( - VkCommandBuffer commandBuffer, - VkBool32 depthWriteEnable) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_write_enable == depthWriteEnable) - return; - - state->dynamic.depth_write_enable = depthWriteEnable; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE; -} - -void radv_CmdSetDepthCompareOpEXT( - VkCommandBuffer commandBuffer, - VkCompareOp depthCompareOp) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_compare_op == depthCompareOp) - return; - - state->dynamic.depth_compare_op = depthCompareOp; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP; -} - -void radv_CmdSetDepthBoundsTestEnableEXT( - VkCommandBuffer commandBuffer, - VkBool32 depthBoundsTestEnable) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable) - return; - - state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; -} - -void radv_CmdSetStencilTestEnableEXT( - VkCommandBuffer commandBuffer, - VkBool32 stencilTestEnable) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.stencil_test_enable == stencilTestEnable) - return; - - state->dynamic.stencil_test_enable = stencilTestEnable; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; -} - -void radv_CmdSetStencilOpEXT( - VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - VkStencilOp failOp, - VkStencilOp passOp, - VkStencilOp depthFailOp, - VkCompareOp compareOp) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - bool front_same = - state->dynamic.stencil_op.front.fail_op == failOp && - state->dynamic.stencil_op.front.pass_op == passOp && - state->dynamic.stencil_op.front.depth_fail_op == depthFailOp && - state->dynamic.stencil_op.front.compare_op == compareOp; - bool back_same = - state->dynamic.stencil_op.back.fail_op == failOp && - state->dynamic.stencil_op.back.pass_op == passOp && - state->dynamic.stencil_op.back.depth_fail_op == depthFailOp && - state->dynamic.stencil_op.back.compare_op == compareOp; - - if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && - (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) - return; - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { - state->dynamic.stencil_op.front.fail_op = failOp; - state->dynamic.stencil_op.front.pass_op = passOp; - state->dynamic.stencil_op.front.depth_fail_op = depthFailOp; - state->dynamic.stencil_op.front.compare_op = compareOp; - } - - if (faceMask & VK_STENCIL_FACE_BACK_BIT) { - state->dynamic.stencil_op.back.fail_op = failOp; - state->dynamic.stencil_op.back.pass_op = passOp; - state->dynamic.stencil_op.back.depth_fail_op = depthFailOp; - state->dynamic.stencil_op.back.compare_op = compareOp; - } - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; -} - -void radv_CmdSetFragmentShadingRateKHR( - VkCommandBuffer commandBuffer, - const VkExtent2D* pFragmentSize, - const VkFragmentShadingRateCombinerOpKHR combinerOps[2]) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_cmd_state *state = &cmd_buffer->state; - - if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width && - state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height && - state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] && - state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1]) - return; - - state->dynamic.fragment_shading_rate.size = *pFragmentSize; - for (unsigned i = 0; i < 2; i++) - state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i]; - - state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; -} - -void radv_CmdExecuteCommands( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); - - assert(commandBufferCount > 0); - - radv_emit_mip_change_flush_default(primary); - - /* Emit pending flushes on primary prior to executing secondary */ - si_emit_cache_flush(primary); - - for (uint32_t i = 0; i < commandBufferCount; i++) { - RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); - - primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed, - secondary->scratch_size_per_wave_needed); - primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted, - secondary->scratch_waves_wanted); - primary->compute_scratch_size_per_wave_needed = MAX2(primary->compute_scratch_size_per_wave_needed, - secondary->compute_scratch_size_per_wave_needed); - primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted, - secondary->compute_scratch_waves_wanted); - - if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) - primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; - if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) - primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; - if (secondary->tess_rings_needed) - primary->tess_rings_needed = true; - if (secondary->sample_positions_needed) - primary->sample_positions_needed = true; - if (secondary->gds_needed) - primary->gds_needed = true; - - if (!secondary->state.framebuffer && - (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { - /* Emit the framebuffer state from primary if secondary - * has been recorded without a framebuffer, otherwise - * fast color/depth clears can't work. - */ - radv_emit_fb_mip_change_flush(primary); - radv_emit_framebuffer_state(primary); - } - - primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); - - - /* When the secondary command buffer is compute only we don't - * need to re-emit the current graphics pipeline. - */ - if (secondary->state.emitted_pipeline) { - primary->state.emitted_pipeline = - secondary->state.emitted_pipeline; - } - - /* When the secondary command buffer is graphics only we don't - * need to re-emit the current compute pipeline. - */ - if (secondary->state.emitted_compute_pipeline) { - primary->state.emitted_compute_pipeline = - secondary->state.emitted_compute_pipeline; - } - - /* Only re-emit the draw packets when needed. */ - if (secondary->state.last_primitive_reset_en != -1) { - primary->state.last_primitive_reset_en = - secondary->state.last_primitive_reset_en; - } - - if (secondary->state.last_primitive_reset_index) { - primary->state.last_primitive_reset_index = - secondary->state.last_primitive_reset_index; - } - - if (secondary->state.last_ia_multi_vgt_param) { - primary->state.last_ia_multi_vgt_param = - secondary->state.last_ia_multi_vgt_param; - } - - primary->state.last_first_instance = secondary->state.last_first_instance; - primary->state.last_num_instances = secondary->state.last_num_instances; - primary->state.last_drawid = secondary->state.last_drawid; - primary->state.last_vertex_offset = secondary->state.last_vertex_offset; - primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert; - primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon; - primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control; - - if (secondary->state.last_index_type != -1) { - primary->state.last_index_type = - secondary->state.last_index_type; - } - } - - /* After executing commands from secondary buffers we have to dirty - * some states. - */ - primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE | - RADV_CMD_DIRTY_INDEX_BUFFER | - RADV_CMD_DIRTY_DYNAMIC_ALL; - radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); - radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); -} - -VkResult radv_CreateCommandPool( - VkDevice _device, - const VkCommandPoolCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkCommandPool* pCmdPool) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - struct radv_cmd_pool *pool; - - pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - vk_object_base_init(&device->vk, &pool->base, - VK_OBJECT_TYPE_COMMAND_POOL); - - if (pAllocator) - pool->alloc = *pAllocator; - else - pool->alloc = device->vk.alloc; - - list_inithead(&pool->cmd_buffers); - list_inithead(&pool->free_cmd_buffers); - - pool->queue_family_index = pCreateInfo->queueFamilyIndex; - - *pCmdPool = radv_cmd_pool_to_handle(pool); - - return VK_SUCCESS; - -} - -void radv_DestroyCommandPool( - VkDevice _device, - VkCommandPool commandPool, - const VkAllocationCallbacks* pAllocator) -{ - RADV_FROM_HANDLE(radv_device, device, _device); - RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - radv_destroy_cmd_buffer(cmd_buffer); - } - - list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) { - radv_destroy_cmd_buffer(cmd_buffer); - } - - vk_object_base_finish(&pool->base); - vk_free2(&device->vk.alloc, pAllocator, pool); -} - -VkResult radv_ResetCommandPool( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags) -{ - RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); - VkResult result; - - list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) { - result = radv_reset_cmd_buffer(cmd_buffer); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -void radv_TrimCommandPool( - VkDevice device, - VkCommandPool commandPool, - VkCommandPoolTrimFlags flags) -{ - RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) { - radv_destroy_cmd_buffer(cmd_buffer); - } + if (unlikely(cmd_buffer->device->trace_bo)) + radv_save_pipeline(cmd_buffer, pipeline); } static void -radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, - uint32_t subpass_id) +radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) { - struct radv_cmd_state *state = &cmd_buffer->state; - struct radv_subpass *subpass = &state->pass->subpasses[subpass_id]; + struct radv_descriptor_state *descriptors_state = + radv_get_descriptors_state(cmd_buffer, bind_point); - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 4096); + descriptors_state->dirty |= descriptors_state->valid; +} - radv_subpass_barrier(cmd_buffer, &subpass->start_barrier); +void +radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); - radv_cmd_buffer_set_subpass(cmd_buffer, subpass); + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + if (cmd_buffer->state.compute_pipeline == pipeline) + return; + radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); + cmd_buffer->state.compute_pipeline = pipeline; + cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + break; + case VK_PIPELINE_BIND_POINT_GRAPHICS: + if (cmd_buffer->state.pipeline == pipeline) + return; + radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); - for (uint32_t i = 0; i < subpass->attachment_count; ++i) { - const uint32_t a = subpass->attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; + bool vtx_emit_count_changed = + !pipeline || !cmd_buffer->state.pipeline || + cmd_buffer->state.pipeline->graphics.vtx_emit_num != pipeline->graphics.vtx_emit_num || + cmd_buffer->state.pipeline->graphics.vtx_base_sgpr != pipeline->graphics.vtx_base_sgpr; + cmd_buffer->state.pipeline = pipeline; + if (!pipeline) + break; - radv_handle_subpass_image_transition(cmd_buffer, - subpass->attachments[i], - true); - } + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; + cmd_buffer->push_constant_stages |= pipeline->active_stages; - radv_describe_barrier_end(cmd_buffer); + /* the new vertex shader might not have the same user regs */ + if (vtx_emit_count_changed) { + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_vertex_offset = -1; + cmd_buffer->state.last_drawid = -1; + } - radv_cmd_buffer_clear_subpass(cmd_buffer); + /* Prefetch all pipeline shaders at first draw time. */ + cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; - assert(cmd_buffer->cs->cdw <= cdw_max); + if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 || + cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) && + cmd_buffer->state.emitted_pipeline && + radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) && + !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) { + /* Transitioning from NGG to legacy GS requires + * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH + * is also emitted at the beginning of IBs when legacy + * GS ring pointers are set. + */ + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; + } + + radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); + radv_bind_streamout_state(cmd_buffer, pipeline); + + if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) + cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; + if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) + cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; + + if (radv_pipeline_has_tess(pipeline)) + cmd_buffer->tess_rings_needed = true; + break; + default: + assert(!"invalid bind point"); + break; + } +} + +void +radv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, + const VkViewport *pViewports) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + ASSERTED const uint32_t total_count = firstViewport + viewportCount; + + assert(firstViewport < MAX_VIEWPORTS); + assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); + + if (total_count <= state->dynamic.viewport.count && + !memcmp(state->dynamic.viewport.viewports + firstViewport, pViewports, + viewportCount * sizeof(*pViewports))) { + return; + } + + if (state->dynamic.viewport.count < total_count) + state->dynamic.viewport.count = total_count; + + memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, + viewportCount * sizeof(*pViewports)); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; +} + +void +radv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, + const VkRect2D *pScissors) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + ASSERTED const uint32_t total_count = firstScissor + scissorCount; + + assert(firstScissor < MAX_SCISSORS); + assert(total_count >= 1 && total_count <= MAX_SCISSORS); + + if (total_count <= state->dynamic.scissor.count && + !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors, + scissorCount * sizeof(*pScissors))) { + return; + } + + if (state->dynamic.scissor.count < total_count) + state->dynamic.scissor.count = total_count; + + memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, + scissorCount * sizeof(*pScissors)); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; +} + +void +radv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->state.dynamic.line_width == lineWidth) + return; + + cmd_buffer->state.dynamic.line_width = lineWidth; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; +} + +void +radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, + float depthBiasClamp, float depthBiasSlopeFactor) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_bias.bias == depthBiasConstantFactor && + state->dynamic.depth_bias.clamp == depthBiasClamp && + state->dynamic.depth_bias.slope == depthBiasSlopeFactor) { + return; + } + + state->dynamic.depth_bias.bias = depthBiasConstantFactor; + state->dynamic.depth_bias.clamp = depthBiasClamp; + state->dynamic.depth_bias.slope = depthBiasSlopeFactor; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; +} + +void +radv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4)) + return; + + memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; +} + +void +radv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_bounds.min == minDepthBounds && + state->dynamic.depth_bounds.max == maxDepthBounds) { + return; + } + + state->dynamic.depth_bounds.min = minDepthBounds; + state->dynamic.depth_bounds.max = maxDepthBounds; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; +} + +void +radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_compare_mask.front == compareMask; + bool back_same = state->dynamic.stencil_compare_mask.back == compareMask; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + state->dynamic.stencil_compare_mask.front = compareMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + state->dynamic.stencil_compare_mask.back = compareMask; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; +} + +void +radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_write_mask.front == writeMask; + bool back_same = state->dynamic.stencil_write_mask.back == writeMask; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + state->dynamic.stencil_write_mask.front = writeMask; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + state->dynamic.stencil_write_mask.back = writeMask; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; +} + +void +radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + uint32_t reference) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_reference.front == reference; + bool back_same = state->dynamic.stencil_reference.back == reference; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) { + return; + } + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmd_buffer->state.dynamic.stencil_reference.front = reference; + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmd_buffer->state.dynamic.stencil_reference.back = reference; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; +} + +void +radv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle, + uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount; + + assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); + assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); + + if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle, + pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) { + return; + } + + typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], + pDiscardRectangles, discardRectangleCount); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; +} + +void +radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, + const VkSampleLocationsInfoEXT *pSampleLocationsInfo) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); + + state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; + state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; + state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount; + typed_memcpy(&state->dynamic.sample_location.locations[0], + pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount); + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; +} + +void +radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.line_stipple.factor == lineStippleFactor && + state->dynamic.line_stipple.pattern == lineStipplePattern) + return; + + state->dynamic.line_stipple.factor = lineStippleFactor; + state->dynamic.line_stipple.pattern = lineStipplePattern; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; +} + +void +radv_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.cull_mode == cullMode) + return; + + state->dynamic.cull_mode = cullMode; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE; +} + +void +radv_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.front_face == frontFace) + return; + + state->dynamic.front_face = frontFace; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; +} + +void +radv_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer, + VkPrimitiveTopology primitiveTopology) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + unsigned primitive_topology = si_translate_prim(primitiveTopology); + + if (state->dynamic.primitive_topology == primitive_topology) + return; + + state->dynamic.primitive_topology = primitive_topology; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; +} + +void +radv_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer, uint32_t viewportCount, + const VkViewport *pViewports) +{ + radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports); +} + +void +radv_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer, uint32_t scissorCount, + const VkRect2D *pScissors) +{ + radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors); +} + +void +radv_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable) + +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_test_enable == depthTestEnable) + return; + + state->dynamic.depth_test_enable = depthTestEnable; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE; +} + +void +radv_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_write_enable == depthWriteEnable) + return; + + state->dynamic.depth_write_enable = depthWriteEnable; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE; +} + +void +radv_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_compare_op == depthCompareOp) + return; + + state->dynamic.depth_compare_op = depthCompareOp; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP; +} + +void +radv_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable) + return; + + state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; +} + +void +radv_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.stencil_test_enable == stencilTestEnable) + return; + + state->dynamic.stencil_test_enable = stencilTestEnable; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; +} + +void +radv_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, + VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp, + VkCompareOp compareOp) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + bool front_same = state->dynamic.stencil_op.front.fail_op == failOp && + state->dynamic.stencil_op.front.pass_op == passOp && + state->dynamic.stencil_op.front.depth_fail_op == depthFailOp && + state->dynamic.stencil_op.front.compare_op == compareOp; + bool back_same = state->dynamic.stencil_op.back.fail_op == failOp && + state->dynamic.stencil_op.back.pass_op == passOp && + state->dynamic.stencil_op.back.depth_fail_op == depthFailOp && + state->dynamic.stencil_op.back.compare_op == compareOp; + + if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) && + (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) + return; + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { + state->dynamic.stencil_op.front.fail_op = failOp; + state->dynamic.stencil_op.front.pass_op = passOp; + state->dynamic.stencil_op.front.depth_fail_op = depthFailOp; + state->dynamic.stencil_op.front.compare_op = compareOp; + } + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) { + state->dynamic.stencil_op.back.fail_op = failOp; + state->dynamic.stencil_op.back.pass_op = passOp; + state->dynamic.stencil_op.back.depth_fail_op = depthFailOp; + state->dynamic.stencil_op.back.compare_op = compareOp; + } + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; +} + +void +radv_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, const VkExtent2D *pFragmentSize, + const VkFragmentShadingRateCombinerOpKHR combinerOps[2]) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_cmd_state *state = &cmd_buffer->state; + + if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width && + state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height && + state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] && + state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1]) + return; + + state->dynamic.fragment_shading_rate.size = *pFragmentSize; + for (unsigned i = 0; i < 2; i++) + state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i]; + + state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; +} + +void +radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, + const VkCommandBuffer *pCmdBuffers) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); + + assert(commandBufferCount > 0); + + radv_emit_mip_change_flush_default(primary); + + /* Emit pending flushes on primary prior to executing secondary */ + si_emit_cache_flush(primary); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); + + primary->scratch_size_per_wave_needed = + MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); + primary->scratch_waves_wanted = + MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted); + primary->compute_scratch_size_per_wave_needed = + MAX2(primary->compute_scratch_size_per_wave_needed, + secondary->compute_scratch_size_per_wave_needed); + primary->compute_scratch_waves_wanted = + MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted); + + if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) + primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; + if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) + primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; + if (secondary->tess_rings_needed) + primary->tess_rings_needed = true; + if (secondary->sample_positions_needed) + primary->sample_positions_needed = true; + if (secondary->gds_needed) + primary->gds_needed = true; + + if (!secondary->state.framebuffer && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { + /* Emit the framebuffer state from primary if secondary + * has been recorded without a framebuffer, otherwise + * fast color/depth clears can't work. + */ + radv_emit_fb_mip_change_flush(primary); + radv_emit_framebuffer_state(primary); + } + + primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); + + /* When the secondary command buffer is compute only we don't + * need to re-emit the current graphics pipeline. + */ + if (secondary->state.emitted_pipeline) { + primary->state.emitted_pipeline = secondary->state.emitted_pipeline; + } + + /* When the secondary command buffer is graphics only we don't + * need to re-emit the current compute pipeline. + */ + if (secondary->state.emitted_compute_pipeline) { + primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline; + } + + /* Only re-emit the draw packets when needed. */ + if (secondary->state.last_primitive_reset_en != -1) { + primary->state.last_primitive_reset_en = secondary->state.last_primitive_reset_en; + } + + if (secondary->state.last_primitive_reset_index) { + primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index; + } + + if (secondary->state.last_ia_multi_vgt_param) { + primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param; + } + + primary->state.last_first_instance = secondary->state.last_first_instance; + primary->state.last_num_instances = secondary->state.last_num_instances; + primary->state.last_drawid = secondary->state.last_drawid; + primary->state.last_vertex_offset = secondary->state.last_vertex_offset; + primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert; + primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon; + primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control; + + if (secondary->state.last_index_type != -1) { + primary->state.last_index_type = secondary->state.last_index_type; + } + } + + /* After executing commands from secondary buffers we have to dirty + * some states. + */ + primary->state.dirty |= + RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_ALL; + radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); + radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); +} + +VkResult +radv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkCommandPool *pCmdPool) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_cmd_pool *pool; + + pool = + vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_COMMAND_POOL); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->vk.alloc; + + list_inithead(&pool->cmd_buffers); + list_inithead(&pool->free_cmd_buffers); + + pool->queue_family_index = pCreateInfo->queueFamilyIndex; + + *pCmdPool = radv_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +void +radv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + RADV_FROM_HANDLE(radv_device, device, _device); + RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); + + if (!pool) + return; + + list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) + { + radv_destroy_cmd_buffer(cmd_buffer); + } + + list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link) + { + radv_destroy_cmd_buffer(cmd_buffer); + } + + vk_object_base_finish(&pool->base); + vk_free2(&device->vk.alloc, pAllocator, pool); +} + +VkResult +radv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags) +{ + RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); + VkResult result; + + list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) + { + result = radv_reset_cmd_buffer(cmd_buffer); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +void +radv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags) +{ + RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); + + if (!pool) + return; + + list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link) + { + radv_destroy_cmd_buffer(cmd_buffer); + } +} + +static void +radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpass_id) +{ + struct radv_cmd_state *state = &cmd_buffer->state; + struct radv_subpass *subpass = &state->pass->subpasses[subpass_id]; + + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); + + radv_subpass_barrier(cmd_buffer, &subpass->start_barrier); + + radv_cmd_buffer_set_subpass(cmd_buffer, subpass); + + radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); + + for (uint32_t i = 0; i < subpass->attachment_count; ++i) { + const uint32_t a = subpass->attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + radv_handle_subpass_image_transition(cmd_buffer, subpass->attachments[i], true); + } + + radv_describe_barrier_end(cmd_buffer); + + radv_cmd_buffer_clear_subpass(cmd_buffer); + + assert(cmd_buffer->cs->cdw <= cdw_max); } static void radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer) { - struct radv_cmd_state *state = &cmd_buffer->state; - const struct radv_subpass *subpass = state->subpass; - uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); + struct radv_cmd_state *state = &cmd_buffer->state; + const struct radv_subpass *subpass = state->subpass; + uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); - radv_cmd_buffer_resolve_subpass(cmd_buffer); + radv_cmd_buffer_resolve_subpass(cmd_buffer); - radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); + radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); - for (uint32_t i = 0; i < subpass->attachment_count; ++i) { - const uint32_t a = subpass->attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; + for (uint32_t i = 0; i < subpass->attachment_count; ++i) { + const uint32_t a = subpass->attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; - if (state->pass->attachments[a].last_subpass_idx != subpass_id) - continue; + if (state->pass->attachments[a].last_subpass_idx != subpass_id) + continue; - VkImageLayout layout = state->pass->attachments[a].final_layout; - VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout; - struct radv_subpass_attachment att = { a, layout, stencil_layout }; - radv_handle_subpass_image_transition(cmd_buffer, att, false); - } + VkImageLayout layout = state->pass->attachments[a].final_layout; + VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout; + struct radv_subpass_attachment att = {a, layout, stencil_layout}; + radv_handle_subpass_image_transition(cmd_buffer, att, false); + } - radv_describe_barrier_end(cmd_buffer); + radv_describe_barrier_end(cmd_buffer); } void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *pRenderPassBegin, - const struct radv_extra_render_pass_begin_info *extra_info) + const VkRenderPassBeginInfo *pRenderPassBegin, + const struct radv_extra_render_pass_begin_info *extra_info) { - RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass); - RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - VkResult result; + RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass); + RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); + VkResult result; - cmd_buffer->state.framebuffer = framebuffer; - cmd_buffer->state.pass = pass; - cmd_buffer->state.render_area = pRenderPassBegin->renderArea; + cmd_buffer->state.framebuffer = framebuffer; + cmd_buffer->state.pass = pass; + cmd_buffer->state.render_area = pRenderPassBegin->renderArea; - result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info); - if (result != VK_SUCCESS) - return; + result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info); + if (result != VK_SUCCESS) + return; - result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin); - if (result != VK_SUCCESS) - return; + result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin); + if (result != VK_SUCCESS) + return; } -void radv_CmdBeginRenderPass2( - VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo* pRenderPassBeginInfo, - const VkSubpassBeginInfo* pSubpassBeginInfo) +void +radv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBeginInfo, + const VkSubpassBeginInfo *pSubpassBeginInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL); + radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL); - radv_cmd_buffer_begin_subpass(cmd_buffer, 0); + radv_cmd_buffer_begin_subpass(cmd_buffer, 0); } -void radv_CmdNextSubpass2( - VkCommandBuffer commandBuffer, - const VkSubpassBeginInfo* pSubpassBeginInfo, - const VkSubpassEndInfo* pSubpassEndInfo) +void +radv_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer); - radv_cmd_buffer_end_subpass(cmd_buffer); - radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); -} - -static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) -{ - struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; - for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { - if (!radv_get_shader(pipeline, stage)) - continue; - - struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX); - if (loc->sgpr_idx == -1) - continue; - uint32_t base_reg = pipeline->user_data_0[stage]; - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); - - } - if (radv_pipeline_has_gs_copy_shader(pipeline)) { - struct radv_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX]; - if (loc->sgpr_idx != -1) { - uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; - radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); - } - } + uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer); + radv_cmd_buffer_end_subpass(cmd_buffer); + radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); } static void -radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, - uint32_t vertex_count, - uint32_t use_opaque) +radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); - radeon_emit(cmd_buffer->cs, vertex_count); - radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) { + if (!radv_get_shader(pipeline, stage)) + continue; + + struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX); + if (loc->sgpr_idx == -1) + continue; + uint32_t base_reg = pipeline->user_data_0[stage]; + radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); + } + if (radv_pipeline_has_gs_copy_shader(pipeline)) { + struct radv_userdata_info *loc = + &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX]; + if (loc->sgpr_idx != -1) { + uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; + radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); + } + } +} + +static void +radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, + uint32_t use_opaque) +{ + radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); + radeon_emit(cmd_buffer->cs, vertex_count); + radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); } /** @@ -5283,216 +4976,194 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, * Hardware uses this information to return 0 for out-of-bounds reads. */ static void -radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, - uint64_t index_va, - uint32_t max_index_count, - uint32_t index_count) +radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va, + uint32_t max_index_count, uint32_t index_count) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); - radeon_emit(cmd_buffer->cs, max_index_count); - radeon_emit(cmd_buffer->cs, index_va); - radeon_emit(cmd_buffer->cs, index_va >> 32); - radeon_emit(cmd_buffer->cs, index_count); - radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); + radeon_emit(cmd_buffer->cs, max_index_count); + radeon_emit(cmd_buffer->cs, index_va); + radeon_emit(cmd_buffer->cs, index_va >> 32); + radeon_emit(cmd_buffer->cs, index_count); + radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA); } /* MUST inline this function to avoid massive perf loss in drawoverhead */ ALWAYS_INLINE static void -radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, - bool indexed, - uint32_t draw_count, - uint64_t count_va, - uint32_t stride) +radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, + uint32_t draw_count, uint64_t count_va, uint32_t stride) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; - bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid; - uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; - uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0; - bool predicating = cmd_buffer->state.predicating; - assert(base_reg); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; + bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid; + uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; + uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0; + bool predicating = cmd_buffer->state.predicating; + assert(base_reg); - /* just reset draw state for vertex data */ - cmd_buffer->state.last_first_instance = -1; - cmd_buffer->state.last_num_instances = -1; - cmd_buffer->state.last_drawid = -1; - cmd_buffer->state.last_vertex_offset = -1; + /* just reset draw state for vertex data */ + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_drawid = -1; + cmd_buffer->state.last_vertex_offset = -1; - vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2; - if (cmd_buffer->state.pipeline->graphics.uses_baseinstance) - start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2; - if (draw_id_enable) - draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2; + vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2; + if (cmd_buffer->state.pipeline->graphics.uses_baseinstance) + start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2; + if (draw_id_enable) + draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2; - if (draw_count == 1 && !count_va && !draw_id_enable) { - radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : - PKT3_DRAW_INDIRECT, 3, predicating)); - radeon_emit(cs, 0); - radeon_emit(cs, vertex_offset_reg); - radeon_emit(cs, start_instance_reg); - radeon_emit(cs, di_src_sel); - } else { - radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : - PKT3_DRAW_INDIRECT_MULTI, - 8, predicating)); - radeon_emit(cs, 0); - radeon_emit(cs, vertex_offset_reg); - radeon_emit(cs, start_instance_reg); - radeon_emit(cs, draw_id_reg | - S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | - S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); - radeon_emit(cs, draw_count); /* count */ - radeon_emit(cs, count_va); /* count_addr */ - radeon_emit(cs, count_va >> 32); - radeon_emit(cs, stride); /* stride */ - radeon_emit(cs, di_src_sel); - } + if (draw_count == 1 && !count_va && !draw_id_enable) { + radeon_emit(cs, + PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating)); + radeon_emit(cs, 0); + radeon_emit(cs, vertex_offset_reg); + radeon_emit(cs, start_instance_reg); + radeon_emit(cs, di_src_sel); + } else { + radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, + predicating)); + radeon_emit(cs, 0); + radeon_emit(cs, vertex_offset_reg); + radeon_emit(cs, start_instance_reg); + radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | + S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); + radeon_emit(cs, draw_count); /* count */ + radeon_emit(cs, count_va); /* count_addr */ + radeon_emit(cs, count_va >> 32); + radeon_emit(cs, stride); /* stride */ + radeon_emit(cs, di_src_sel); + } } static inline void radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - const uint32_t vertex_offset) + const struct radv_draw_info *info, const uint32_t vertex_offset) { - struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance; - const bool uses_drawid = state->pipeline->graphics.uses_drawid; - radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, - state->pipeline->graphics.vtx_emit_num); + struct radv_cmd_state *state = &cmd_buffer->state; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance; + const bool uses_drawid = state->pipeline->graphics.uses_drawid; + radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, + state->pipeline->graphics.vtx_emit_num); - radeon_emit(cs, vertex_offset); - state->last_vertex_offset = vertex_offset; - if (uses_drawid) { - radeon_emit(cs, 0); - state->last_drawid = 0; - } - if (uses_baseinstance) { - radeon_emit(cs, info->first_instance); - state->last_first_instance = info->first_instance; - } + radeon_emit(cs, vertex_offset); + state->last_vertex_offset = vertex_offset; + if (uses_drawid) { + radeon_emit(cs, 0); + state->last_drawid = 0; + } + if (uses_baseinstance) { + radeon_emit(cs, info->first_instance); + state->last_first_instance = info->first_instance; + } } static inline void -radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - const uint32_t vertex_offset) +radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, + const uint32_t vertex_offset) { - const struct radv_cmd_state *state = &cmd_buffer->state; - const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance; - const bool uses_drawid = state->pipeline->graphics.uses_drawid; + const struct radv_cmd_state *state = &cmd_buffer->state; + const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance; + const bool uses_drawid = state->pipeline->graphics.uses_drawid; - /* this looks very dumb, but it allows the compiler to optimize better and yields - * ~3-4% perf increase in drawoverhead - */ - if (vertex_offset != state->last_vertex_offset) { - radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); - } else if (uses_drawid && 0 != state->last_drawid) { - radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); - } else if (uses_baseinstance && info->first_instance != state->last_first_instance) { - radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); - } + /* this looks very dumb, but it allows the compiler to optimize better and yields + * ~3-4% perf increase in drawoverhead + */ + if (vertex_offset != state->last_vertex_offset) { + radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); + } else if (uses_drawid && 0 != state->last_drawid) { + radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); + } else if (uses_baseinstance && info->first_instance != state->last_first_instance) { + radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); + } } ALWAYS_INLINE static void radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - uint32_t count, - uint32_t first_index) + const struct radv_draw_info *info, uint32_t count, + uint32_t first_index) { - const struct radv_cmd_state *state = &cmd_buffer->state; - const int index_size = radv_get_vgt_index_size(state->index_type); - uint64_t index_va; + const struct radv_cmd_state *state = &cmd_buffer->state; + const int index_size = radv_get_vgt_index_size(state->index_type); + uint64_t index_va; - uint32_t remaining_indexes = cmd_buffer->state.max_index_count; - remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index; + uint32_t remaining_indexes = cmd_buffer->state.max_index_count; + remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index; - /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ - if (!remaining_indexes && - cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) - return; + /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ + if (!remaining_indexes && + cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) + return; - index_va = state->index_va; - index_va += first_index * index_size; + index_va = state->index_va; + index_va += first_index * index_size; - if (!state->subpass->view_mask) { - radv_cs_emit_draw_indexed_packet(cmd_buffer, - index_va, - remaining_indexes, - count); - } else { - u_foreach_bit(i, state->subpass->view_mask) { - radv_emit_view_index(cmd_buffer, i); + if (!state->subpass->view_mask) { + radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count); + } else { + u_foreach_bit(i, state->subpass->view_mask) + { + radv_emit_view_index(cmd_buffer, i); - radv_cs_emit_draw_indexed_packet(cmd_buffer, - index_va, - remaining_indexes, - count); - } - } + radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count); + } + } } ALWAYS_INLINE static void -radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - uint32_t count, - uint32_t use_opaque) +radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, + uint32_t count, uint32_t use_opaque) { - const struct radv_cmd_state *state = &cmd_buffer->state; - if (!state->subpass->view_mask) { - radv_cs_emit_draw_packet(cmd_buffer, - count, - use_opaque); - } else { - u_foreach_bit(i, state->subpass->view_mask) { - radv_emit_view_index(cmd_buffer, i); + const struct radv_cmd_state *state = &cmd_buffer->state; + if (!state->subpass->view_mask) { + radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque); + } else { + u_foreach_bit(i, state->subpass->view_mask) + { + radv_emit_view_index(cmd_buffer, i); - radv_cs_emit_draw_packet(cmd_buffer, - count, - use_opaque); - } - } + radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque); + } + } } static void radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) + const struct radv_draw_info *info) { - const struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_winsys *ws = cmd_buffer->device->ws; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; - const uint64_t count_va = info->count_buffer ? radv_buffer_get_va(info->count_buffer->bo) + info->count_buffer->offset + - info->count_buffer_offset : 0; + const struct radv_cmd_state *state = &cmd_buffer->state; + struct radeon_winsys *ws = cmd_buffer->device->ws; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + const uint64_t va = + radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; + const uint64_t count_va = info->count_buffer + ? radv_buffer_get_va(info->count_buffer->bo) + + info->count_buffer->offset + info->count_buffer_offset + : 0; - radv_cs_add_buffer(ws, cs, info->indirect->bo); + radv_cs_add_buffer(ws, cs, info->indirect->bo); - radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); - radeon_emit(cs, 1); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); + radeon_emit(cs, 1); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - if (info->count_buffer) { - radv_cs_add_buffer(ws, cs, info->count_buffer->bo); - } + if (info->count_buffer) { + radv_cs_add_buffer(ws, cs, info->count_buffer->bo); + } - if (!state->subpass->view_mask) { - radv_cs_emit_indirect_draw_packet(cmd_buffer, - info->indexed, - info->count, - count_va, - info->stride); - } else { - u_foreach_bit(i, state->subpass->view_mask) { - radv_emit_view_index(cmd_buffer, i); + if (!state->subpass->view_mask) { + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, + info->stride); + } else { + u_foreach_bit(i, state->subpass->view_mask) + { + radv_emit_view_index(cmd_buffer, i); - radv_cs_emit_indirect_draw_packet(cmd_buffer, - info->indexed, - info->count, - count_va, - info->stride); - } - } + radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, + info->stride); + } + } } /* @@ -5511,671 +5182,602 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, * return true on affected HW if radv_emit_all_graphics_states sets * any context registers. */ -static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +static bool +radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, + const struct radv_draw_info *info) { - struct radv_cmd_state *state = &cmd_buffer->state; + struct radv_cmd_state *state = &cmd_buffer->state; - if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) - return false; + if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) + return false; - if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) - return true; + if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) + return true; - uint64_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; + uint64_t used_states = + cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; - /* Index, vertex and streamout buffers don't change context regs, and - * pipeline is already handled. - */ - used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | - RADV_CMD_DIRTY_VERTEX_BUFFER | - RADV_CMD_DIRTY_STREAMOUT_BUFFER | - RADV_CMD_DIRTY_PIPELINE); + /* Index, vertex and streamout buffers don't change context regs, and + * pipeline is already handled. + */ + used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | + RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE); - if (cmd_buffer->state.dirty & used_states) - return true; + if (cmd_buffer->state.dirty & used_states) + return true; - uint32_t primitive_reset_index = - radv_get_primitive_reset_index(cmd_buffer); + uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer); - if (info->indexed && state->pipeline->graphics.prim_restart_enable && - primitive_reset_index != state->last_primitive_reset_index) - return true; + if (info->indexed && state->pipeline->graphics.prim_restart_enable && + primitive_reset_index != state->last_primitive_reset_index) + return true; - return false; + return false; } static void -radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info) +radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { - bool late_scissor_emission; + bool late_scissor_emission; - if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || - cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) - radv_emit_rbplus_state(cmd_buffer); + if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || + cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) + radv_emit_rbplus_state(cmd_buffer); - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) - radv_emit_graphics_pipeline(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) + radv_emit_graphics_pipeline(cmd_buffer); - /* This should be before the cmd_buffer->state.dirty is cleared - * (excluding RADV_CMD_DIRTY_PIPELINE) and after - * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ - late_scissor_emission = - radv_need_late_scissor_emission(cmd_buffer, info); + /* This should be before the cmd_buffer->state.dirty is cleared + * (excluding RADV_CMD_DIRTY_PIPELINE) and after + * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ + late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info); - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) - radv_emit_framebuffer_state(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) + radv_emit_framebuffer_state(cmd_buffer); - if (info->indexed) { - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) - radv_emit_index_buffer(cmd_buffer, info->indirect); - } else { - /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, - * so the state must be re-emitted before the next indexed - * draw. - */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { - cmd_buffer->state.last_index_type = -1; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; - } - } + if (info->indexed) { + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) + radv_emit_index_buffer(cmd_buffer, info->indirect); + } else { + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, + * so the state must be re-emitted before the next indexed + * draw. + */ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { + cmd_buffer->state.last_index_type = -1; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; + } + } - radv_cmd_buffer_flush_dynamic_state(cmd_buffer); + radv_cmd_buffer_flush_dynamic_state(cmd_buffer); - radv_emit_draw_registers(cmd_buffer, info); + radv_emit_draw_registers(cmd_buffer, info); - if (late_scissor_emission) - radv_emit_scissor(cmd_buffer); + if (late_scissor_emission) + radv_emit_scissor(cmd_buffer); } /* MUST inline this function to avoid massive perf loss in drawoverhead */ ALWAYS_INLINE static bool -radv_before_draw(struct radv_cmd_buffer *cmd_buffer, - const struct radv_draw_info *info, - uint32_t vertex_offset) +radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, + uint32_t vertex_offset) { - const bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; - const bool pipeline_is_dirty = - (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && - cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; + const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; + const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && + cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; - ASSERTED const unsigned cdw_max = - radeon_check_space(cmd_buffer->device->ws, - cmd_buffer->cs, 4096); + ASSERTED const unsigned cdw_max = + radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); - if (likely(!info->indirect)) { - /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is - * no workaround for indirect draws, but we can at least skip - * direct draws. - */ - if (unlikely(!info->instance_count)) - return false; + if (likely(!info->indirect)) { + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is + * no workaround for indirect draws, but we can at least skip + * direct draws. + */ + if (unlikely(!info->instance_count)) + return false; - /* Handle count == 0. */ - if (unlikely(!info->count && !info->strmout_buffer)) - return false; - } + /* Handle count == 0. */ + if (unlikely(!info->count && !info->strmout_buffer)) + return false; + } - /* Need to apply this workaround early as it can set flush flags. */ - if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) - radv_emit_fb_mip_change_flush(cmd_buffer); + /* Need to apply this workaround early as it can set flush flags. */ + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) + radv_emit_fb_mip_change_flush(cmd_buffer); - /* Use optimal packet order based on whether we need to sync the - * pipeline. - */ - if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { - /* If we have to wait for idle, set all states first, so that - * all SET packets are processed in parallel with previous draw - * calls. Then upload descriptors, set shader pointers, and - * draw, and prefetch at the end. This ensures that the time - * the CUs are idle is very short. (there are only SET_SH - * packets between the wait and the draw) - */ - radv_emit_all_graphics_states(cmd_buffer, info); - si_emit_cache_flush(cmd_buffer); - /* <-- CUs are idle here --> */ + /* Use optimal packet order based on whether we need to sync the + * pipeline. + */ + if (cmd_buffer->state.flush_bits & + (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + /* If we have to wait for idle, set all states first, so that + * all SET packets are processed in parallel with previous draw + * calls. Then upload descriptors, set shader pointers, and + * draw, and prefetch at the end. This ensures that the time + * the CUs are idle is very short. (there are only SET_SH + * packets between the wait and the draw) + */ + radv_emit_all_graphics_states(cmd_buffer, info); + si_emit_cache_flush(cmd_buffer); + /* <-- CUs are idle here --> */ - radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); - } else { - /* If we don't wait for idle, start prefetches first, then set - * states, and draw at the end. - */ - si_emit_cache_flush(cmd_buffer); + radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); + } else { + /* If we don't wait for idle, start prefetches first, then set + * states, and draw at the end. + */ + si_emit_cache_flush(cmd_buffer); - if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { - /* Only prefetch the vertex shader and VBO descriptors - * in order to start the draw as soon as possible. - */ - radv_emit_prefetch_L2(cmd_buffer, - cmd_buffer->state.pipeline, true); - } + if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { + /* Only prefetch the vertex shader and VBO descriptors + * in order to start the draw as soon as possible. + */ + radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, true); + } - radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); + radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); - radv_emit_all_graphics_states(cmd_buffer, info); - } + radv_emit_all_graphics_states(cmd_buffer, info); + } - radv_describe_draw(cmd_buffer); - if (likely(!info->indirect)) { - struct radv_cmd_state *state = &cmd_buffer->state; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - assert(state->pipeline->graphics.vtx_base_sgpr); - if (state->last_num_instances != info->instance_count) { - radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); - radeon_emit(cs, info->instance_count); - state->last_num_instances = info->instance_count; - } - radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset); - } - assert(cmd_buffer->cs->cdw <= cdw_max); + radv_describe_draw(cmd_buffer); + if (likely(!info->indirect)) { + struct radv_cmd_state *state = &cmd_buffer->state; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + assert(state->pipeline->graphics.vtx_base_sgpr); + if (state->last_num_instances != info->instance_count) { + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); + radeon_emit(cs, info->instance_count); + state->last_num_instances = info->instance_count; + } + radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset); + } + assert(cmd_buffer->cs->cdw <= cdw_max); - return true; + return true; } static void radv_after_draw(struct radv_cmd_buffer *cmd_buffer) { - const struct radeon_info *rad_info = - &cmd_buffer->device->physical_device->rad_info; - bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; - /* Start prefetches after the draw has been started. Both will - * run in parallel, but starting the draw first is more - * important. - */ - if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { - radv_emit_prefetch_L2(cmd_buffer, - cmd_buffer->state.pipeline, false); - } + const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; + bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; + /* Start prefetches after the draw has been started. Both will + * run in parallel, but starting the draw first is more + * important. + */ + if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { + radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, false); + } - /* Workaround for a VGT hang when streamout is enabled. - * It must be done after drawing. - */ - if (cmd_buffer->state.streamout.streamout_enabled && - (rad_info->family == CHIP_HAWAII || - rad_info->family == CHIP_TONGA || - rad_info->family == CHIP_FIJI)) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; - } + /* Workaround for a VGT hang when streamout is enabled. + * It must be done after drawing. + */ + if (cmd_buffer->state.streamout.streamout_enabled && + (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || + rad_info->family == CHIP_FIJI)) { + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; + } - radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); + radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); } -void radv_CmdDraw( - VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) +void +radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, + uint32_t firstVertex, uint32_t firstInstance) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_draw_info info; - info.count = vertexCount; - info.instance_count = instanceCount; - info.first_instance = firstInstance; - info.strmout_buffer = NULL; - info.indirect = NULL; - info.indexed = false; + info.count = vertexCount; + info.instance_count = instanceCount; + info.first_instance = firstInstance; + info.strmout_buffer = NULL; + info.indirect = NULL; + info.indexed = false; - if (!radv_before_draw(cmd_buffer, &info, firstVertex)) - return; - radv_emit_direct_draw_packets(cmd_buffer, &info, - vertexCount, 0); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, firstVertex)) + return; + radv_emit_direct_draw_packets(cmd_buffer, &info, vertexCount, 0); + radv_after_draw(cmd_buffer); } -void radv_CmdDrawIndexed( - VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) +void +radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, + uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_draw_info info; - info.indexed = true; - info.count = indexCount; - info.instance_count = instanceCount; - info.first_index = firstIndex; - info.first_instance = firstInstance; - info.strmout_buffer = NULL; - info.indirect = NULL; + info.indexed = true; + info.count = indexCount; + info.instance_count = instanceCount; + info.first_index = firstIndex; + info.first_instance = firstInstance; + info.strmout_buffer = NULL; + info.indirect = NULL; - if (!radv_before_draw(cmd_buffer, &info, vertexOffset)) - return; - radv_emit_draw_packets_indexed(cmd_buffer, &info, - indexCount, firstIndex); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, vertexOffset)) + return; + radv_emit_draw_packets_indexed(cmd_buffer, &info, indexCount, firstIndex); + radv_after_draw(cmd_buffer); } -void radv_CmdDrawIndirect( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) +void +radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + struct radv_draw_info info; - info.count = drawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - info.strmout_buffer = NULL; - info.count_buffer = NULL; - info.indexed = false; + info.count = drawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.stride = stride; + info.strmout_buffer = NULL; + info.count_buffer = NULL; + info.indexed = false; - if (!radv_before_draw(cmd_buffer, &info, 0)) - return; - radv_emit_indirect_draw_packets(cmd_buffer, &info); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, 0)) + return; + radv_emit_indirect_draw_packets(cmd_buffer, &info); + radv_after_draw(cmd_buffer); } -void radv_CmdDrawIndexedIndirect( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) +void +radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + uint32_t drawCount, uint32_t stride) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + struct radv_draw_info info; - info.indexed = true; - info.count = drawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - info.count_buffer = NULL; - info.strmout_buffer = NULL; + info.indexed = true; + info.count = drawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.stride = stride; + info.count_buffer = NULL; + info.strmout_buffer = NULL; - if (!radv_before_draw(cmd_buffer, &info, 0)) - return; - radv_emit_indirect_draw_packets(cmd_buffer, &info); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, 0)) + return; + radv_emit_indirect_draw_packets(cmd_buffer, &info); + radv_after_draw(cmd_buffer); } -void radv_CmdDrawIndirectCount( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) +void +radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, + VkBuffer _countBuffer, VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); + struct radv_draw_info info; - info.count = maxDrawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.count_buffer = count_buffer; - info.count_buffer_offset = countBufferOffset; - info.stride = stride; - info.strmout_buffer = NULL; - info.indexed = false; + info.count = maxDrawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.count_buffer = count_buffer; + info.count_buffer_offset = countBufferOffset; + info.stride = stride; + info.strmout_buffer = NULL; + info.indexed = false; - if (!radv_before_draw(cmd_buffer, &info, 0)) - return; - radv_emit_indirect_draw_packets(cmd_buffer, &info); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, 0)) + return; + radv_emit_indirect_draw_packets(cmd_buffer, &info); + radv_after_draw(cmd_buffer); } -void radv_CmdDrawIndexedIndirectCount( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - VkBuffer _countBuffer, - VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, - uint32_t stride) +void +radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkBuffer _countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); + struct radv_draw_info info; - info.indexed = true; - info.count = maxDrawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.count_buffer = count_buffer; - info.count_buffer_offset = countBufferOffset; - info.stride = stride; - info.strmout_buffer = NULL; + info.indexed = true; + info.count = maxDrawCount; + info.indirect = buffer; + info.indirect_offset = offset; + info.count_buffer = count_buffer; + info.count_buffer_offset = countBufferOffset; + info.stride = stride; + info.strmout_buffer = NULL; - if (!radv_before_draw(cmd_buffer, &info, 0)) - return; - radv_emit_indirect_draw_packets(cmd_buffer, &info); - radv_after_draw(cmd_buffer); + if (!radv_before_draw(cmd_buffer, &info, 0)) + return; + radv_emit_indirect_draw_packets(cmd_buffer, &info); + radv_after_draw(cmd_buffer); } struct radv_dispatch_info { - /** - * Determine the layout of the grid (in block units) to be used. - */ - uint32_t blocks[3]; + /** + * Determine the layout of the grid (in block units) to be used. + */ + uint32_t blocks[3]; - /** - * A starting offset for the grid. If unaligned is set, the offset - * must still be aligned. - */ - uint32_t offsets[3]; - /** - * Whether it's an unaligned compute dispatch. - */ - bool unaligned; + /** + * A starting offset for the grid. If unaligned is set, the offset + * must still be aligned. + */ + uint32_t offsets[3]; + /** + * Whether it's an unaligned compute dispatch. + */ + bool unaligned; - /** - * Indirect compute parameters resource. - */ - struct radv_buffer *indirect; - uint64_t indirect_offset; + /** + * Indirect compute parameters resource. + */ + struct radv_buffer *indirect; + uint64_t indirect_offset; }; static void radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, - const struct radv_dispatch_info *info) + const struct radv_dispatch_info *info) { - struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; - unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; - struct radeon_winsys *ws = cmd_buffer->device->ws; - bool predicating = cmd_buffer->state.predicating; - struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_userdata_info *loc; + struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; + unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; + struct radeon_winsys *ws = cmd_buffer->device->ws; + bool predicating = cmd_buffer->state.predicating; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_userdata_info *loc; - radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], - info->blocks[2]); + radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], info->blocks[2]); - loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, - AC_UD_CS_GRID_SIZE); + loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); - ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25); + ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25); - if (compute_shader->info.wave_size == 32) { - assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); - dispatch_initiator |= S_00B800_CS_W32_EN(1); - } + if (compute_shader->info.wave_size == 32) { + assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); + dispatch_initiator |= S_00B800_CS_W32_EN(1); + } - if (info->indirect) { - uint64_t va = radv_buffer_get_va(info->indirect->bo); + if (info->indirect) { + uint64_t va = radv_buffer_get_va(info->indirect->bo); - va += info->indirect->offset + info->indirect_offset; + va += info->indirect->offset + info->indirect_offset; - radv_cs_add_buffer(ws, cs, info->indirect->bo); + radv_cs_add_buffer(ws, cs, info->indirect->bo); - if (loc->sgpr_idx != -1) { - for (unsigned i = 0; i < 3; ++i) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG)); - radeon_emit(cs, (va + 4 * i)); - radeon_emit(cs, (va + 4 * i) >> 32); - radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 - + loc->sgpr_idx * 4) >> 2) + i); - radeon_emit(cs, 0); - } - } + if (loc->sgpr_idx != -1) { + for (unsigned i = 0; i < 3; ++i) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, + COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG)); + radeon_emit(cs, (va + 4 * i)); + radeon_emit(cs, (va + 4 * i) >> 32); + radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i); + radeon_emit(cs, 0); + } + } - if (radv_cmd_buffer_uses_mec(cmd_buffer)) { - radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, dispatch_initiator); - } else { - radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, 1); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); + if (radv_cmd_buffer_uses_mec(cmd_buffer)) { + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | PKT3_SHADER_TYPE_S(1)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, dispatch_initiator); + } else { + radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1)); + radeon_emit(cs, 1); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); - radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, 0); - radeon_emit(cs, dispatch_initiator); - } - } else { - unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] }; - unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] }; + radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | PKT3_SHADER_TYPE_S(1)); + radeon_emit(cs, 0); + radeon_emit(cs, dispatch_initiator); + } + } else { + unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]}; + unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]}; - if (info->unaligned) { - unsigned *cs_block_size = compute_shader->info.cs.block_size; - unsigned remainder[3]; + if (info->unaligned) { + unsigned *cs_block_size = compute_shader->info.cs.block_size; + unsigned remainder[3]; - /* If aligned, these should be an entire block size, - * not 0. - */ - remainder[0] = blocks[0] + cs_block_size[0] - - align_u32_npot(blocks[0], cs_block_size[0]); - remainder[1] = blocks[1] + cs_block_size[1] - - align_u32_npot(blocks[1], cs_block_size[1]); - remainder[2] = blocks[2] + cs_block_size[2] - - align_u32_npot(blocks[2], cs_block_size[2]); + /* If aligned, these should be an entire block size, + * not 0. + */ + remainder[0] = blocks[0] + cs_block_size[0] - align_u32_npot(blocks[0], cs_block_size[0]); + remainder[1] = blocks[1] + cs_block_size[1] - align_u32_npot(blocks[1], cs_block_size[1]); + remainder[2] = blocks[2] + cs_block_size[2] - align_u32_npot(blocks[2], cs_block_size[2]); - blocks[0] = round_up_u32(blocks[0], cs_block_size[0]); - blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); - blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); + blocks[0] = round_up_u32(blocks[0], cs_block_size[0]); + blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); + blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); - for(unsigned i = 0; i < 3; ++i) { - assert(offsets[i] % cs_block_size[i] == 0); - offsets[i] /= cs_block_size[i]; - } + for (unsigned i = 0; i < 3; ++i) { + assert(offsets[i] % cs_block_size[i] == 0); + offsets[i] /= cs_block_size[i]; + } - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, - S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); - radeon_emit(cs, - S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); - radeon_emit(cs, - S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | - S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | + S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | + S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | + S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); - dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); - } + dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); + } - if (loc->sgpr_idx != -1) { - assert(loc->num_sgprs == 3); + if (loc->sgpr_idx != -1) { + assert(loc->num_sgprs == 3); - radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + - loc->sgpr_idx * 4, 3); - radeon_emit(cs, blocks[0]); - radeon_emit(cs, blocks[1]); - radeon_emit(cs, blocks[2]); - } + radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); + radeon_emit(cs, blocks[0]); + radeon_emit(cs, blocks[1]); + radeon_emit(cs, blocks[2]); + } - if (offsets[0] || offsets[1] || offsets[2]) { - radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); - radeon_emit(cs, offsets[0]); - radeon_emit(cs, offsets[1]); - radeon_emit(cs, offsets[2]); + if (offsets[0] || offsets[1] || offsets[2]) { + radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); + radeon_emit(cs, offsets[0]); + radeon_emit(cs, offsets[1]); + radeon_emit(cs, offsets[2]); - /* The blocks in the packet are not counts but end values. */ - for (unsigned i = 0; i < 3; ++i) - blocks[i] += offsets[i]; - } else { - dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); - } + /* The blocks in the packet are not counts but end values. */ + for (unsigned i = 0; i < 3; ++i) + blocks[i] += offsets[i]; + } else { + dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); + } - radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | - PKT3_SHADER_TYPE_S(1)); - radeon_emit(cs, blocks[0]); - radeon_emit(cs, blocks[1]); - radeon_emit(cs, blocks[2]); - radeon_emit(cs, dispatch_initiator); - } + radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1)); + radeon_emit(cs, blocks[0]); + radeon_emit(cs, blocks[1]); + radeon_emit(cs, blocks[2]); + radeon_emit(cs, dispatch_initiator); + } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cmd_buffer->cs->cdw <= cdw_max); } static void radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer) { - radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); - radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); + radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); + radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT); } static void -radv_dispatch(struct radv_cmd_buffer *cmd_buffer, - const struct radv_dispatch_info *info) +radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info) { - struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; - bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; - bool pipeline_is_dirty = pipeline && - pipeline != cmd_buffer->state.emitted_compute_pipeline; + struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; + bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; + bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline; - if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { - /* If we have to wait for idle, set all states first, so that - * all SET packets are processed in parallel with previous draw - * calls. Then upload descriptors, set shader pointers, and - * dispatch, and prefetch at the end. This ensures that the - * time the CUs are idle is very short. (there are only SET_SH - * packets between the wait and the draw) - */ - radv_emit_compute_pipeline(cmd_buffer); - si_emit_cache_flush(cmd_buffer); - /* <-- CUs are idle here --> */ + if (cmd_buffer->state.flush_bits & + (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | + RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { + /* If we have to wait for idle, set all states first, so that + * all SET packets are processed in parallel with previous draw + * calls. Then upload descriptors, set shader pointers, and + * dispatch, and prefetch at the end. This ensures that the + * time the CUs are idle is very short. (there are only SET_SH + * packets between the wait and the draw) + */ + radv_emit_compute_pipeline(cmd_buffer); + si_emit_cache_flush(cmd_buffer); + /* <-- CUs are idle here --> */ - radv_upload_compute_shader_descriptors(cmd_buffer); + radv_upload_compute_shader_descriptors(cmd_buffer); - radv_emit_dispatch_packets(cmd_buffer, info); - /* <-- CUs are busy here --> */ + radv_emit_dispatch_packets(cmd_buffer, info); + /* <-- CUs are busy here --> */ - /* Start prefetches after the dispatch has been started. Both - * will run in parallel, but starting the dispatch first is - * more important. - */ - if (has_prefetch && pipeline_is_dirty) { - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_COMPUTE]); - } - } else { - /* If we don't wait for idle, start prefetches first, then set - * states, and dispatch at the end. - */ - si_emit_cache_flush(cmd_buffer); + /* Start prefetches after the dispatch has been started. Both + * will run in parallel, but starting the dispatch first is + * more important. + */ + if (has_prefetch && pipeline_is_dirty) { + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]); + } + } else { + /* If we don't wait for idle, start prefetches first, then set + * states, and dispatch at the end. + */ + si_emit_cache_flush(cmd_buffer); - if (has_prefetch && pipeline_is_dirty) { - radv_emit_shader_prefetch(cmd_buffer, - pipeline->shaders[MESA_SHADER_COMPUTE]); - } + if (has_prefetch && pipeline_is_dirty) { + radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]); + } - radv_upload_compute_shader_descriptors(cmd_buffer); + radv_upload_compute_shader_descriptors(cmd_buffer); - radv_emit_compute_pipeline(cmd_buffer); - radv_emit_dispatch_packets(cmd_buffer, info); - } + radv_emit_compute_pipeline(cmd_buffer); + radv_emit_dispatch_packets(cmd_buffer, info); + } - radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); + radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); } -void radv_CmdDispatchBase( - VkCommandBuffer commandBuffer, - uint32_t base_x, - uint32_t base_y, - uint32_t base_z, - uint32_t x, - uint32_t y, - uint32_t z) +void +radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y, + uint32_t base_z, uint32_t x, uint32_t y, uint32_t z) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_dispatch_info info = {0}; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_dispatch_info info = {0}; - info.blocks[0] = x; - info.blocks[1] = y; - info.blocks[2] = z; + info.blocks[0] = x; + info.blocks[1] = y; + info.blocks[2] = z; - info.offsets[0] = base_x; - info.offsets[1] = base_y; - info.offsets[2] = base_z; - radv_dispatch(cmd_buffer, &info); + info.offsets[0] = base_x; + info.offsets[1] = base_y; + info.offsets[2] = base_z; + radv_dispatch(cmd_buffer, &info); } -void radv_CmdDispatch( - VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) +void +radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { - radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); + radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); } -void radv_CmdDispatchIndirect( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) +void +radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); - struct radv_dispatch_info info = {0}; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); + struct radv_dispatch_info info = {0}; - info.indirect = buffer; - info.indirect_offset = offset; + info.indirect = buffer; + info.indirect_offset = offset; - radv_dispatch(cmd_buffer, &info); + radv_dispatch(cmd_buffer, &info); } -void radv_unaligned_dispatch( - struct radv_cmd_buffer *cmd_buffer, - uint32_t x, - uint32_t y, - uint32_t z) +void +radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) { - struct radv_dispatch_info info = {0}; + struct radv_dispatch_info info = {0}; - info.blocks[0] = x; - info.blocks[1] = y; - info.blocks[2] = z; - info.unaligned = 1; + info.blocks[0] = x; + info.blocks[1] = y; + info.blocks[2] = z; + info.unaligned = 1; - radv_dispatch(cmd_buffer, &info); + radv_dispatch(cmd_buffer, &info); } void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer) { - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); + vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs); - cmd_buffer->state.pass = NULL; - cmd_buffer->state.subpass = NULL; - cmd_buffer->state.attachments = NULL; - cmd_buffer->state.framebuffer = NULL; - cmd_buffer->state.subpass_sample_locs = NULL; + cmd_buffer->state.pass = NULL; + cmd_buffer->state.subpass = NULL; + cmd_buffer->state.attachments = NULL; + cmd_buffer->state.framebuffer = NULL; + cmd_buffer->state.subpass_sample_locs = NULL; } -void radv_CmdEndRenderPass2( - VkCommandBuffer commandBuffer, - const VkSubpassEndInfo* pSubpassEndInfo) +void +radv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); + radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); - radv_cmd_buffer_end_subpass(cmd_buffer); + radv_cmd_buffer_end_subpass(cmd_buffer); - radv_cmd_buffer_end_render_pass(cmd_buffer); + radv_cmd_buffer_end_render_pass(cmd_buffer); } /* @@ -6185,1194 +5787,1073 @@ void radv_CmdEndRenderPass2( * 0xfffffff0: Clear depth to 1.0 * 0x00000000: Clear depth to 0.0 */ -static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range) +static void +radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range) { - VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT; - struct radv_cmd_state *state = &cmd_buffer->state; - uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image); - VkClearDepthStencilValue value = {0}; - struct radv_barrier_data barrier = {0}; + VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT; + struct radv_cmd_state *state = &cmd_buffer->state; + uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image); + VkClearDepthStencilValue value = {0}; + struct radv_barrier_data barrier = {0}; - barrier.layout_transitions.init_mask_ram = 1; - radv_describe_layout_transition(cmd_buffer, &barrier); + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); - /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent - * in considering previous rendering work for WAW hazards. */ - state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image); + /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent + * in considering previous rendering work for WAW hazards. */ + state->flush_bits |= + radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image); - state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value); + state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value); - if (vk_format_has_stencil(image->vk_format)) - aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; + if (vk_format_has_stencil(image->vk_format)) + aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects); + radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects); - if (radv_image_is_tc_compat_htile(image)) { - /* Initialize the TC-compat metada value to 0 because by - * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only - * need have to conditionally update its value when performing - * a fast depth clear. - */ - radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0); - } + if (radv_image_is_tc_compat_htile(image)) { + /* Initialize the TC-compat metada value to 0 because by + * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only + * need have to conditionally update its value when performing + * a fast depth clear. + */ + radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0); + } } -static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - bool src_render_loop, - VkImageLayout dst_layout, - bool dst_render_loop, - unsigned src_queue_mask, - unsigned dst_queue_mask, - const VkImageSubresourceRange *range, - struct radv_sample_locations_state *sample_locs) +static void +radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, bool src_render_loop, + VkImageLayout dst_layout, bool dst_render_loop, + unsigned src_queue_mask, unsigned dst_queue_mask, + const VkImageSubresourceRange *range, + struct radv_sample_locations_state *sample_locs) { - struct radv_device *device = cmd_buffer->device; + struct radv_device *device = cmd_buffer->device; - if (!radv_htile_enabled(image, range->baseMipLevel)) - return; + if (!radv_htile_enabled(image, range->baseMipLevel)) + return; - if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - radv_initialize_htile(cmd_buffer, image, range); - } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) && - radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) { - radv_initialize_htile(cmd_buffer, image, range); - } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) && - !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) { - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { + radv_initialize_htile(cmd_buffer, image, range); + } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, + src_queue_mask) && + radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + radv_initialize_htile(cmd_buffer, image, range); + } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, + src_queue_mask) && + !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + cmd_buffer->state.flush_bits |= + RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - radv_decompress_depth_stencil(cmd_buffer, image, range, - sample_locs); + radv_decompress_depth_stencil(cmd_buffer, image, range, sample_locs); - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; - } + cmd_buffer->state.flush_bits |= + RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; + } } -static uint32_t radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range) +static uint32_t +radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range) { - static const uint32_t cmask_clear_values[4] = { - 0xffffffff, - 0xdddddddd, - 0xeeeeeeee, - 0xffffffff - }; - uint32_t log2_samples = util_logbase2(image->info.samples); - uint32_t value = cmask_clear_values[log2_samples]; - struct radv_barrier_data barrier = {0}; + static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff}; + uint32_t log2_samples = util_logbase2(image->info.samples); + uint32_t value = cmask_clear_values[log2_samples]; + struct radv_barrier_data barrier = {0}; - barrier.layout_transitions.init_mask_ram = 1; - radv_describe_layout_transition(cmd_buffer, &barrier); + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); - return radv_clear_cmask(cmd_buffer, image, range, value); + return radv_clear_cmask(cmd_buffer, image, range, value); } -uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range) +uint32_t +radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range) { - static const uint32_t fmask_clear_values[4] = { - 0x00000000, - 0x02020202, - 0xE4E4E4E4, - 0x76543210 - }; - uint32_t log2_samples = util_logbase2(image->info.samples); - uint32_t value = fmask_clear_values[log2_samples]; - struct radv_barrier_data barrier = {0}; + static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210}; + uint32_t log2_samples = util_logbase2(image->info.samples); + uint32_t value = fmask_clear_values[log2_samples]; + struct radv_barrier_data barrier = {0}; - barrier.layout_transitions.init_mask_ram = 1; - radv_describe_layout_transition(cmd_buffer, &barrier); + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); - return radv_clear_fmask(cmd_buffer, image, range, value); + return radv_clear_fmask(cmd_buffer, image, range, value); } -uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - const VkImageSubresourceRange *range, - uint32_t value) +uint32_t +radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + const VkImageSubresourceRange *range, uint32_t value) { - struct radv_barrier_data barrier = {0}; - uint32_t flush_bits = 0; - unsigned size = 0; + struct radv_barrier_data barrier = {0}; + uint32_t flush_bits = 0; + unsigned size = 0; - barrier.layout_transitions.init_mask_ram = 1; - radv_describe_layout_transition(cmd_buffer, &barrier); + barrier.layout_transitions.init_mask_ram = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); - flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value); + flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value); - if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) { - /* When DCC is enabled with mipmaps, some levels might not - * support fast clears and we have to initialize them as "fully - * expanded". - */ - /* Compute the size of all fast clearable DCC levels. */ - for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) { - struct legacy_surf_level *surf_level = - &image->planes[0].surface.u.legacy.level[i]; - unsigned dcc_fast_clear_size = - surf_level->dcc_slice_fast_clear_size * image->info.array_size; + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) { + /* When DCC is enabled with mipmaps, some levels might not + * support fast clears and we have to initialize them as "fully + * expanded". + */ + /* Compute the size of all fast clearable DCC levels. */ + for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) { + struct legacy_surf_level *surf_level = &image->planes[0].surface.u.legacy.level[i]; + unsigned dcc_fast_clear_size = + surf_level->dcc_slice_fast_clear_size * image->info.array_size; - if (!dcc_fast_clear_size) - break; + if (!dcc_fast_clear_size) + break; - size = surf_level->dcc_offset + dcc_fast_clear_size; - } + size = surf_level->dcc_offset + dcc_fast_clear_size; + } - /* Initialize the mipmap levels without DCC. */ - if (size != image->planes[0].surface.dcc_size) { - flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, - image->offset + image->planes[0].surface.dcc_offset + size, - image->planes[0].surface.dcc_size - size, - 0xffffffff); - } - } + /* Initialize the mipmap levels without DCC. */ + if (size != image->planes[0].surface.dcc_size) { + flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, + image->offset + image->planes[0].surface.dcc_offset + size, + image->planes[0].surface.dcc_size - size, 0xffffffff); + } + } - return flush_bits; + return flush_bits; } /** * Initialize DCC/FMASK/CMASK metadata for a color image. */ -static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - bool src_render_loop, - VkImageLayout dst_layout, - bool dst_render_loop, - unsigned src_queue_mask, - unsigned dst_queue_mask, - const VkImageSubresourceRange *range) +static void +radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, bool src_render_loop, + VkImageLayout dst_layout, bool dst_render_loop, + unsigned src_queue_mask, unsigned dst_queue_mask, + const VkImageSubresourceRange *range) { - uint32_t flush_bits = 0; + uint32_t flush_bits = 0; - /* Transitioning from LAYOUT_UNDEFINED layout not everyone is - * consistent in considering previous rendering work for WAW hazards. - */ - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image); + /* Transitioning from LAYOUT_UNDEFINED layout not everyone is + * consistent in considering previous rendering work for WAW hazards. + */ + cmd_buffer->state.flush_bits |= + radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image); - if (radv_image_has_cmask(image)) { - flush_bits |= radv_init_cmask(cmd_buffer, image, range); - } + if (radv_image_has_cmask(image)) { + flush_bits |= radv_init_cmask(cmd_buffer, image, range); + } - if (radv_image_has_fmask(image)) { - flush_bits |= radv_init_fmask(cmd_buffer, image, range); - } + if (radv_image_has_fmask(image)) { + flush_bits |= radv_init_fmask(cmd_buffer, image, range); + } - if (radv_dcc_enabled(image, range->baseMipLevel)) { - uint32_t value = 0xffffffffu; /* Fully expanded mode. */ + if (radv_dcc_enabled(image, range->baseMipLevel)) { + uint32_t value = 0xffffffffu; /* Fully expanded mode. */ - if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, - dst_render_loop, - dst_queue_mask)) { - value = 0u; - } + if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + value = 0u; + } - flush_bits |= radv_init_dcc(cmd_buffer, image, range, value); - } + flush_bits |= radv_init_dcc(cmd_buffer, image, range, value); + } - if (radv_image_has_cmask(image) || - radv_dcc_enabled(image, range->baseMipLevel)) { - radv_update_fce_metadata(cmd_buffer, image, range, false); + if (radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)) { + radv_update_fce_metadata(cmd_buffer, image, range, false); - uint32_t color_values[2] = {0}; - radv_set_color_clear_metadata(cmd_buffer, image, range, - color_values); - } + uint32_t color_values[2] = {0}; + radv_set_color_clear_metadata(cmd_buffer, image, range, color_values); + } - cmd_buffer->state.flush_bits |= flush_bits; + cmd_buffer->state.flush_bits |= flush_bits; } -static void radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - VkImageLayout dst_layout, - unsigned dst_queue_mask) +static void +radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask) { - if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR && - (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || - (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN)))) - radv_retile_dcc(cmd_buffer, image); + if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR && + (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || + (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN)))) + radv_retile_dcc(cmd_buffer, image); } /** * Handle color image transitions for DCC/FMASK/CMASK. */ -static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - bool src_render_loop, - VkImageLayout dst_layout, - bool dst_render_loop, - unsigned src_queue_mask, - unsigned dst_queue_mask, - const VkImageSubresourceRange *range) +static void +radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, bool src_render_loop, + VkImageLayout dst_layout, bool dst_render_loop, + unsigned src_queue_mask, unsigned dst_queue_mask, + const VkImageSubresourceRange *range) { - bool dcc_decompressed = false, fast_clear_flushed = false; + bool dcc_decompressed = false, fast_clear_flushed = false; - if (!radv_image_has_cmask(image) && - !radv_image_has_fmask(image) && - !radv_dcc_enabled(image, range->baseMipLevel)) - return; + if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) && + !radv_dcc_enabled(image, range->baseMipLevel)) + return; - if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { - radv_init_color_image_metadata(cmd_buffer, image, - src_layout, src_render_loop, - dst_layout, dst_render_loop, - src_queue_mask, dst_queue_mask, - range); + if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { + radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout, + dst_render_loop, src_queue_mask, dst_queue_mask, range); - if (image->retile_map) - radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); - return; - } + if (image->retile_map) + radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); + return; + } - if (radv_dcc_enabled(image, range->baseMipLevel)) { - if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { - cmd_buffer->state.flush_bits |= - radv_init_dcc(cmd_buffer, image, range, 0xffffffffu); - } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop, src_queue_mask) && - !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop, dst_queue_mask)) { - radv_decompress_dcc(cmd_buffer, image, range); - dcc_decompressed = true; - } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, - src_render_loop, src_queue_mask) && - !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, - dst_render_loop, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); - fast_clear_flushed = true; - } + if (radv_dcc_enabled(image, range->baseMipLevel)) { + if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { + cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu); + } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop, + src_queue_mask) && + !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + radv_decompress_dcc(cmd_buffer, image, range); + dcc_decompressed = true; + } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop, + src_queue_mask) && + !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); + fast_clear_flushed = true; + } - if (image->retile_map) - radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); - } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) { - if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, - src_render_loop, src_queue_mask) && - !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, - dst_render_loop, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); - fast_clear_flushed = true; - } - } + if (image->retile_map) + radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); + } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) { + if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop, + src_queue_mask) && + !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop, + dst_queue_mask)) { + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); + fast_clear_flushed = true; + } + } - /* MSAA color decompress. */ - if (radv_image_has_fmask(image) && - (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && - radv_layout_fmask_compressed(cmd_buffer->device, image, - src_layout, src_queue_mask) && - !radv_layout_fmask_compressed(cmd_buffer->device, image, - dst_layout, dst_queue_mask)) { - if (radv_dcc_enabled(image, range->baseMipLevel) && - !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && - !dcc_decompressed) { - /* A DCC decompress is required before expanding FMASK - * when DCC stores aren't supported to avoid being in - * a state where DCC is compressed and the main - * surface is uncompressed. - */ - radv_decompress_dcc(cmd_buffer, image, range); - } else if (!fast_clear_flushed) { - /* A FMASK decompress is required before expanding - * FMASK. - */ - radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); - } + /* MSAA color decompress. */ + if (radv_image_has_fmask(image) && + (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && + radv_layout_fmask_compressed(cmd_buffer->device, image, src_layout, src_queue_mask) && + !radv_layout_fmask_compressed(cmd_buffer->device, image, dst_layout, dst_queue_mask)) { + if (radv_dcc_enabled(image, range->baseMipLevel) && + !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) { + /* A DCC decompress is required before expanding FMASK + * when DCC stores aren't supported to avoid being in + * a state where DCC is compressed and the main + * surface is uncompressed. + */ + radv_decompress_dcc(cmd_buffer, image, range); + } else if (!fast_clear_flushed) { + /* A FMASK decompress is required before expanding + * FMASK. + */ + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); + } - struct radv_barrier_data barrier = {0}; - barrier.layout_transitions.fmask_color_expand = 1; - radv_describe_layout_transition(cmd_buffer, &barrier); + struct radv_barrier_data barrier = {0}; + barrier.layout_transitions.fmask_color_expand = 1; + radv_describe_layout_transition(cmd_buffer, &barrier); - radv_expand_fmask_image_inplace(cmd_buffer, image, range); - } + radv_expand_fmask_image_inplace(cmd_buffer, image, range); + } } -static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, - struct radv_image *image, - VkImageLayout src_layout, - bool src_render_loop, - VkImageLayout dst_layout, - bool dst_render_loop, - uint32_t src_family, - uint32_t dst_family, - const VkImageSubresourceRange *range, - struct radv_sample_locations_state *sample_locs) +static void +radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, + VkImageLayout src_layout, bool src_render_loop, + VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family, + uint32_t dst_family, const VkImageSubresourceRange *range, + struct radv_sample_locations_state *sample_locs) { - if (image->exclusive && src_family != dst_family) { - /* This is an acquire or a release operation and there will be - * a corresponding release/acquire. Do the transition in the - * most flexible queue. */ + if (image->exclusive && src_family != dst_family) { + /* This is an acquire or a release operation and there will be + * a corresponding release/acquire. Do the transition in the + * most flexible queue. */ - assert(src_family == cmd_buffer->queue_family_index || - dst_family == cmd_buffer->queue_family_index); + assert(src_family == cmd_buffer->queue_family_index || + dst_family == cmd_buffer->queue_family_index); - if (src_family == VK_QUEUE_FAMILY_EXTERNAL || - src_family == VK_QUEUE_FAMILY_FOREIGN_EXT) - return; + if (src_family == VK_QUEUE_FAMILY_EXTERNAL || src_family == VK_QUEUE_FAMILY_FOREIGN_EXT) + return; - if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) - return; + if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER) + return; - if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - (src_family == RADV_QUEUE_GENERAL || - dst_family == RADV_QUEUE_GENERAL)) - return; - } + if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && + (src_family == RADV_QUEUE_GENERAL || dst_family == RADV_QUEUE_GENERAL)) + return; + } - if (src_layout == dst_layout && src_render_loop == dst_render_loop) - return; + if (src_layout == dst_layout && src_render_loop == dst_render_loop) + return; - unsigned src_queue_mask = - radv_image_queue_family_mask(image, src_family, - cmd_buffer->queue_family_index); - unsigned dst_queue_mask = - radv_image_queue_family_mask(image, dst_family, - cmd_buffer->queue_family_index); + unsigned src_queue_mask = + radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index); + unsigned dst_queue_mask = + radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index); - if (vk_format_has_depth(image->vk_format)) { - radv_handle_depth_image_transition(cmd_buffer, image, - src_layout, src_render_loop, - dst_layout, dst_render_loop, - src_queue_mask, dst_queue_mask, - range, sample_locs); - } else { - radv_handle_color_image_transition(cmd_buffer, image, - src_layout, src_render_loop, - dst_layout, dst_render_loop, - src_queue_mask, dst_queue_mask, - range); - } + if (vk_format_has_depth(image->vk_format)) { + radv_handle_depth_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout, + dst_render_loop, src_queue_mask, dst_queue_mask, range, + sample_locs); + } else { + radv_handle_color_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout, + dst_render_loop, src_queue_mask, dst_queue_mask, range); + } } struct radv_barrier_info { - enum rgp_barrier_reason reason; - uint32_t eventCount; - const VkEvent *pEvents; - VkPipelineStageFlags srcStageMask; - VkPipelineStageFlags dstStageMask; + enum rgp_barrier_reason reason; + uint32_t eventCount; + const VkEvent *pEvents; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; }; static void -radv_barrier(struct radv_cmd_buffer *cmd_buffer, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers, - const struct radv_barrier_info *info) +radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers, const struct radv_barrier_info *info) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - enum radv_cmd_flush_bits src_flush_bits = 0; - enum radv_cmd_flush_bits dst_flush_bits = 0; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + enum radv_cmd_flush_bits src_flush_bits = 0; + enum radv_cmd_flush_bits dst_flush_bits = 0; - radv_describe_barrier_start(cmd_buffer, info->reason); + radv_describe_barrier_start(cmd_buffer, info->reason); - for (unsigned i = 0; i < info->eventCount; ++i) { - RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]); - uint64_t va = radv_buffer_get_va(event->bo); + for (unsigned i = 0; i < info->eventCount; ++i) { + RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]); + uint64_t va = radv_buffer_get_va(event->bo); - radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); - radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); - assert(cmd_buffer->cs->cdw <= cdw_max); - } + radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); + assert(cmd_buffer->cs->cdw <= cdw_max); + } - for (uint32_t i = 0; i < memoryBarrierCount; i++) { - src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, - NULL); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, - NULL); - } + for (uint32_t i = 0; i < memoryBarrierCount; i++) { + src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, NULL); + dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, NULL); + } - for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, - NULL); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, - NULL); - } + for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { + src_flush_bits |= + radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, NULL); + dst_flush_bits |= + radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, NULL); + } - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); - src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, - image); - dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, - image); - } + src_flush_bits |= + radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, image); + dst_flush_bits |= + radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, image); + } - /* The Vulkan spec 1.1.98 says: - * - * "An execution dependency with only - * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask - * will only prevent that stage from executing in subsequently - * submitted commands. As this stage does not perform any actual - * execution, this is not observable - in effect, it does not delay - * processing of subsequent commands. Similarly an execution dependency - * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask - * will effectively not wait for any prior commands to complete." - */ - if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) - radv_stage_flush(cmd_buffer, info->srcStageMask); - cmd_buffer->state.flush_bits |= src_flush_bits; + /* The Vulkan spec 1.1.98 says: + * + * "An execution dependency with only + * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask + * will only prevent that stage from executing in subsequently + * submitted commands. As this stage does not perform any actual + * execution, this is not observable - in effect, it does not delay + * processing of subsequent commands. Similarly an execution dependency + * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask + * will effectively not wait for any prior commands to complete." + */ + if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) + radv_stage_flush(cmd_buffer, info->srcStageMask); + cmd_buffer->state.flush_bits |= src_flush_bits; - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); + for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { + RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); - const struct VkSampleLocationsInfoEXT *sample_locs_info = - vk_find_struct_const(pImageMemoryBarriers[i].pNext, - SAMPLE_LOCATIONS_INFO_EXT); - struct radv_sample_locations_state sample_locations = {0}; + const struct VkSampleLocationsInfoEXT *sample_locs_info = + vk_find_struct_const(pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT); + struct radv_sample_locations_state sample_locations = {0}; - if (sample_locs_info) { - assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); - sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel; - sample_locations.grid_size = sample_locs_info->sampleLocationGridSize; - sample_locations.count = sample_locs_info->sampleLocationsCount; - typed_memcpy(&sample_locations.locations[0], - sample_locs_info->pSampleLocations, - sample_locs_info->sampleLocationsCount); - } + if (sample_locs_info) { + assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); + sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel; + sample_locations.grid_size = sample_locs_info->sampleLocationGridSize; + sample_locations.count = sample_locs_info->sampleLocationsCount; + typed_memcpy(&sample_locations.locations[0], sample_locs_info->pSampleLocations, + sample_locs_info->sampleLocationsCount); + } - radv_handle_image_transition(cmd_buffer, image, - pImageMemoryBarriers[i].oldLayout, - false, /* Outside of a renderpass we are never in a renderloop */ - pImageMemoryBarriers[i].newLayout, - false, /* Outside of a renderpass we are never in a renderloop */ - pImageMemoryBarriers[i].srcQueueFamilyIndex, - pImageMemoryBarriers[i].dstQueueFamilyIndex, - &pImageMemoryBarriers[i].subresourceRange, - sample_locs_info ? &sample_locations : NULL); - } + radv_handle_image_transition( + cmd_buffer, image, pImageMemoryBarriers[i].oldLayout, + false, /* Outside of a renderpass we are never in a renderloop */ + pImageMemoryBarriers[i].newLayout, + false, /* Outside of a renderpass we are never in a renderloop */ + pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, + &pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL); + } - /* Make sure CP DMA is idle because the driver might have performed a - * DMA operation for copying or filling buffers/images. - */ - if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) - si_cp_dma_wait_for_idle(cmd_buffer); + /* Make sure CP DMA is idle because the driver might have performed a + * DMA operation for copying or filling buffers/images. + */ + if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) + si_cp_dma_wait_for_idle(cmd_buffer); - cmd_buffer->state.flush_bits |= dst_flush_bits; + cmd_buffer->state.flush_bits |= dst_flush_bits; - radv_describe_barrier_end(cmd_buffer); + radv_describe_barrier_end(cmd_buffer); } -void radv_CmdPipelineBarrier( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +void +radv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, VkBool32 byRegion, + uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_barrier_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_barrier_info info; - info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER; - info.eventCount = 0; - info.pEvents = NULL; - info.srcStageMask = srcStageMask; - info.dstStageMask = destStageMask; + info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER; + info.eventCount = 0; + info.pEvents = NULL; + info.srcStageMask = srcStageMask; + info.dstStageMask = destStageMask; - radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers, &info); + radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info); } - -static void write_event(struct radv_cmd_buffer *cmd_buffer, - struct radv_event *event, - VkPipelineStageFlags stageMask, - unsigned value) +static void +write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, + VkPipelineStageFlags stageMask, unsigned value) { - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint64_t va = radv_buffer_get_va(event->bo); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(event->bo); - si_emit_cache_flush(cmd_buffer); + si_emit_cache_flush(cmd_buffer); - radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); - /* Flags that only require a top-of-pipe event. */ - VkPipelineStageFlags top_of_pipe_flags = - VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + /* Flags that only require a top-of-pipe event. */ + VkPipelineStageFlags top_of_pipe_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - /* Flags that only require a post-index-fetch event. */ - VkPipelineStageFlags post_index_fetch_flags = - top_of_pipe_flags | - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + /* Flags that only require a post-index-fetch event. */ + VkPipelineStageFlags post_index_fetch_flags = + top_of_pipe_flags | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; - /* Flags that only require signaling post PS. */ - VkPipelineStageFlags post_ps_flags = - post_index_fetch_flags | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | - VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | - VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT | - VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + /* Flags that only require signaling post PS. */ + VkPipelineStageFlags post_ps_flags = + post_index_fetch_flags | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT | + VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - /* Flags that only require signaling post CS. */ - VkPipelineStageFlags post_cs_flags = - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + /* Flags that only require signaling post CS. */ + VkPipelineStageFlags post_cs_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; - /* Make sure CP DMA is idle because the driver might have performed a - * DMA operation for copying or filling buffers/images. - */ - if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) - si_cp_dma_wait_for_idle(cmd_buffer); + /* Make sure CP DMA is idle because the driver might have performed a + * DMA operation for copying or filling buffers/images. + */ + if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)) + si_cp_dma_wait_for_idle(cmd_buffer); - if (!(stageMask & ~top_of_pipe_flags)) { - /* Just need to sync the PFP engine. */ - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_PFP)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, value); - } else if (!(stageMask & ~post_index_fetch_flags)) { - /* Sync ME because PFP reads index and indirect buffers. */ - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); - radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | - S_370_WR_CONFIRM(1) | - S_370_ENGINE_SEL(V_370_ME)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, value); - } else { - unsigned event_type; + if (!(stageMask & ~top_of_pipe_flags)) { + /* Just need to sync the PFP engine. */ + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); + } else if (!(stageMask & ~post_index_fetch_flags)) { + /* Sync ME because PFP reads index and indirect buffers. */ + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, value); + } else { + unsigned event_type; - if (!(stageMask & ~post_ps_flags)) { - /* Sync previous fragment shaders. */ - event_type = V_028A90_PS_DONE; - } else if (!(stageMask & ~post_cs_flags)) { - /* Sync previous compute shaders. */ - event_type = V_028A90_CS_DONE; - } else { - /* Otherwise, sync all prior GPU work. */ - event_type = V_028A90_BOTTOM_OF_PIPE_TS; - } + if (!(stageMask & ~post_ps_flags)) { + /* Sync previous fragment shaders. */ + event_type = V_028A90_PS_DONE; + } else if (!(stageMask & ~post_cs_flags)) { + /* Sync previous compute shaders. */ + event_type = V_028A90_CS_DONE; + } else { + /* Otherwise, sync all prior GPU work. */ + event_type = V_028A90_BOTTOM_OF_PIPE_TS; + } - si_cs_emit_write_event_eop(cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - event_type, 0, EOP_DST_SEL_MEM, - EOP_DATA_SEL_VALUE_32BIT, va, value, - cmd_buffer->gfx9_eop_bug_va); - } + si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class, + radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value, + cmd_buffer->gfx9_eop_bug_va); + } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cmd_buffer->cs->cdw <= cdw_max); } -void radv_CmdSetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) +void +radv_CmdSetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_event, event, _event); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_event, event, _event); - write_event(cmd_buffer, event, stageMask, 1); + write_event(cmd_buffer, event, stageMask, 1); } -void radv_CmdResetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) +void +radv_CmdResetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_event, event, _event); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_event, event, _event); - write_event(cmd_buffer, event, stageMask, 0); + write_event(cmd_buffer, event, stageMask, 0); } -void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +void +radv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_barrier_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_barrier_info info; - info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS; - info.eventCount = eventCount; - info.pEvents = pEvents; - info.srcStageMask = 0; + info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS; + info.eventCount = eventCount; + info.pEvents = pEvents; + info.srcStageMask = 0; - radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers, &info); + radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount, + pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info); } - -void radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, - uint32_t deviceMask) +void +radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) { /* No-op */ } /* VK_EXT_conditional_rendering */ -void radv_CmdBeginConditionalRenderingEXT( - VkCommandBuffer commandBuffer, - const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin) +void +radv_CmdBeginConditionalRenderingEXT( + VkCommandBuffer commandBuffer, + const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; - unsigned pred_op = PREDICATION_OP_BOOL32; - bool draw_visible = true; - uint64_t va; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + unsigned pred_op = PREDICATION_OP_BOOL32; + bool draw_visible = true; + uint64_t va; - va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset; + va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset; - /* By default, if the 32-bit value at offset in buffer memory is zero, - * then the rendering commands are discarded, otherwise they are - * executed as normal. If the inverted flag is set, all commands are - * discarded if the value is non zero. - */ - if (pConditionalRenderingBegin->flags & - VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) { - draw_visible = false; - } + /* By default, if the 32-bit value at offset in buffer memory is zero, + * then the rendering commands are discarded, otherwise they are + * executed as normal. If the inverted flag is set, all commands are + * discarded if the value is non zero. + */ + if (pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) { + draw_visible = false; + } - si_emit_cache_flush(cmd_buffer); + si_emit_cache_flush(cmd_buffer); - if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL && - !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { - uint64_t pred_value = 0, pred_va; - unsigned pred_offset; + if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL && + !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { + uint64_t pred_value = 0, pred_va; + unsigned pred_offset; - /* From the Vulkan spec 1.1.107: - * - * "If the 32-bit value at offset in buffer memory is zero, - * then the rendering commands are discarded, otherwise they - * are executed as normal. If the value of the predicate in - * buffer memory changes while conditional rendering is - * active, the rendering commands may be discarded in an - * implementation-dependent way. Some implementations may - * latch the value of the predicate upon beginning conditional - * rendering while others may read it before every rendering - * command." - * - * But, the AMD hardware treats the predicate as a 64-bit - * value which means we need a workaround in the driver. - * Luckily, it's not required to support if the value changes - * when predication is active. - * - * The workaround is as follows: - * 1) allocate a 64-value in the upload BO and initialize it - * to 0 - * 2) copy the 32-bit predicate value to the upload BO - * 3) use the new allocated VA address for predication - * - * Based on the conditionalrender demo, it's faster to do the - * COPY_DATA in ME (+ sync PFP) instead of PFP. - */ - radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset); + /* From the Vulkan spec 1.1.107: + * + * "If the 32-bit value at offset in buffer memory is zero, + * then the rendering commands are discarded, otherwise they + * are executed as normal. If the value of the predicate in + * buffer memory changes while conditional rendering is + * active, the rendering commands may be discarded in an + * implementation-dependent way. Some implementations may + * latch the value of the predicate upon beginning conditional + * rendering while others may read it before every rendering + * command." + * + * But, the AMD hardware treats the predicate as a 64-bit + * value which means we need a workaround in the driver. + * Luckily, it's not required to support if the value changes + * when predication is active. + * + * The workaround is as follows: + * 1) allocate a 64-value in the upload BO and initialize it + * to 0 + * 2) copy the 32-bit predicate value to the upload BO + * 3) use the new allocated VA address for predication + * + * Based on the conditionalrender demo, it's faster to do the + * COPY_DATA in ME (+ sync PFP) instead of PFP. + */ + radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset); - pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; + pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, pred_va); - radeon_emit(cs, pred_va >> 32); + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, pred_va); + radeon_emit(cs, pred_va >> 32); - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); + radeon_emit(cs, 0); - va = pred_va; - pred_op = PREDICATION_OP_BOOL64; - } + va = pred_va; + pred_op = PREDICATION_OP_BOOL64; + } - /* Enable predication for this command buffer. */ - si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); - cmd_buffer->state.predicating = true; + /* Enable predication for this command buffer. */ + si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); + cmd_buffer->state.predicating = true; - /* Store conditional rendering user info. */ - cmd_buffer->state.predication_type = draw_visible; - cmd_buffer->state.predication_op = pred_op; - cmd_buffer->state.predication_va = va; + /* Store conditional rendering user info. */ + cmd_buffer->state.predication_type = draw_visible; + cmd_buffer->state.predication_op = pred_op; + cmd_buffer->state.predication_va = va; } -void radv_CmdEndConditionalRenderingEXT( - VkCommandBuffer commandBuffer) +void +radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - /* Disable predication for this command buffer. */ - si_emit_set_predication_state(cmd_buffer, false, 0, 0); - cmd_buffer->state.predicating = false; + /* Disable predication for this command buffer. */ + si_emit_set_predication_state(cmd_buffer, false, 0, 0); + cmd_buffer->state.predicating = false; - /* Reset conditional rendering user info. */ - cmd_buffer->state.predication_type = -1; - cmd_buffer->state.predication_op = 0; - cmd_buffer->state.predication_va = 0; + /* Reset conditional rendering user info. */ + cmd_buffer->state.predication_type = -1; + cmd_buffer->state.predication_op = 0; + cmd_buffer->state.predication_va = 0; } /* VK_EXT_transform_feedback */ -void radv_CmdBindTransformFeedbackBuffersEXT( - VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer* pBuffers, - const VkDeviceSize* pOffsets, - const VkDeviceSize* pSizes) +void +radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; - uint8_t enabled_mask = 0; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; + uint8_t enabled_mask = 0; - assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); - for (uint32_t i = 0; i < bindingCount; i++) { - uint32_t idx = firstBinding + i; + assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); + for (uint32_t i = 0; i < bindingCount; i++) { + uint32_t idx = firstBinding + i; - sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); - sb[idx].offset = pOffsets[i]; + sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); + sb[idx].offset = pOffsets[i]; - if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) { - sb[idx].size = sb[idx].buffer->size - sb[idx].offset; - } else { - sb[idx].size = pSizes[i]; - } + if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) { + sb[idx].size = sb[idx].buffer->size - sb[idx].offset; + } else { + sb[idx].size = pSizes[i]; + } - radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, - sb[idx].buffer->bo); + radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, sb[idx].buffer->bo); - enabled_mask |= 1 << idx; - } + enabled_mask |= 1 << idx; + } - cmd_buffer->state.streamout.enabled_mask |= enabled_mask; + cmd_buffer->state.streamout.enabled_mask |= enabled_mask; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; } static void radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); - radeon_emit(cs, - S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | - S_028B94_RAST_STREAM(0) | - S_028B94_STREAMOUT_1_EN(so->streamout_enabled) | - S_028B94_STREAMOUT_2_EN(so->streamout_enabled) | - S_028B94_STREAMOUT_3_EN(so->streamout_enabled)); - radeon_emit(cs, so->hw_enabled_mask & - so->enabled_stream_buffers_mask); + radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); + radeon_emit(cs, S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | S_028B94_RAST_STREAM(0) | + S_028B94_STREAMOUT_1_EN(so->streamout_enabled) | + S_028B94_STREAMOUT_2_EN(so->streamout_enabled) | + S_028B94_STREAMOUT_3_EN(so->streamout_enabled)); + radeon_emit(cs, so->hw_enabled_mask & so->enabled_stream_buffers_mask); - cmd_buffer->state.context_roll_without_scissor_emitted = true; + cmd_buffer->state.context_roll_without_scissor_emitted = true; } static void radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - bool old_streamout_enabled = so->streamout_enabled; - uint32_t old_hw_enabled_mask = so->hw_enabled_mask; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + bool old_streamout_enabled = so->streamout_enabled; + uint32_t old_hw_enabled_mask = so->hw_enabled_mask; - so->streamout_enabled = enable; + so->streamout_enabled = enable; - so->hw_enabled_mask = so->enabled_mask | - (so->enabled_mask << 4) | - (so->enabled_mask << 8) | - (so->enabled_mask << 12); + so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | + (so->enabled_mask << 12); - if (!cmd_buffer->device->physical_device->use_ngg_streamout && - ((old_streamout_enabled != so->streamout_enabled) || - (old_hw_enabled_mask != so->hw_enabled_mask))) - radv_emit_streamout_enable(cmd_buffer); + if (!cmd_buffer->device->physical_device->use_ngg_streamout && + ((old_streamout_enabled != so->streamout_enabled) || + (old_hw_enabled_mask != so->hw_enabled_mask))) + radv_emit_streamout_enable(cmd_buffer); - if (cmd_buffer->device->physical_device->use_ngg_streamout) { - cmd_buffer->gds_needed = true; - cmd_buffer->gds_oa_needed = true; - } -} - -static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) -{ - struct radeon_cmdbuf *cs = cmd_buffer->cs; - unsigned reg_strmout_cntl; - - /* The register is at different places on different ASICs. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { - reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; - radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); - } else { - reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; - radeon_set_config_reg(cs, reg_strmout_cntl, 0); - } - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); - - radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); - radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ - radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ - radeon_emit(cs, 0); - radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ - radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ - radeon_emit(cs, 4); /* poll interval */ + if (cmd_buffer->device->physical_device->use_ngg_streamout) { + cmd_buffer->gds_needed = true; + cmd_buffer->gds_oa_needed = true; + } } static void -radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer *pCounterBuffers, - const VkDeviceSize *pCounterBufferOffsets) - +radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) { - struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radeon_cmdbuf *cs = cmd_buffer->cs; + unsigned reg_strmout_cntl; - radv_flush_vgt_streamout(cmd_buffer); + /* The register is at different places on different ASICs. */ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { + reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; + radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); + } else { + reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; + radeon_set_config_reg(cs, reg_strmout_cntl, 0); + } - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - u_foreach_bit(i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) - counter_buffer_idx = -1; + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); - /* AMD GCN binds streamout buffers as shader resources. - * VGT only counts primitives and tells the shader through - * SGPRs what to do. - */ - radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); - radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ - radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - - cmd_buffer->state.context_roll_without_scissor_emitted = true; - - if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { - /* The array of counter buffers is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t va = radv_buffer_get_va(buffer->bo); - uint64_t counter_buffer_offset = 0; - - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - - va += buffer->offset + counter_buffer_offset; - - /* Append */ - radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); - radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | - STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, va); /* src address lo */ - radeon_emit(cs, va >> 32); /* src address hi */ - - radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); - } else { - /* Start from the beginning. */ - radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); - radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | - STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ - } - } - - radv_set_streamout_enable(cmd_buffer, true); + radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); + radeon_emit(cs, + WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ + radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ + radeon_emit(cs, 0); + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ + radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ + radeon_emit(cs, 4); /* poll interval */ } static void -gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer *pCounterBuffers, - const VkDeviceSize *pCounterBufferOffsets) +radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) + { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - unsigned last_target = util_last_bit(so->enabled_mask) - 1; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + radv_flush_vgt_streamout(cmd_buffer); - /* Sync because the next streamout operation will overwrite GDS and we - * have to make sure it's idle. - * TODO: Improve by tracking if there is a streamout operation in - * flight. - */ - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; - si_emit_cache_flush(cmd_buffer); + assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + u_foreach_bit(i, so->enabled_mask) + { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; - u_foreach_bit(i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) - counter_buffer_idx = -1; + /* AMD GCN binds streamout buffers as shader resources. + * VGT only counts primitives and tells the shader through + * SGPRs what to do. + */ + radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2); + radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - bool append = counter_buffer_idx >= 0 && - pCounterBuffers && pCounterBuffers[counter_buffer_idx]; - uint64_t va = 0; + cmd_buffer->state.context_roll_without_scissor_emitted = true; - if (append) { - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t counter_buffer_offset = 0; + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { + /* The array of counter buffers is optional. */ + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); + uint64_t va = radv_buffer_get_va(buffer->bo); + uint64_t counter_buffer_offset = 0; - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; + if (pCounterBufferOffsets) + counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - va += radv_buffer_get_va(buffer->bo); - va += buffer->offset + counter_buffer_offset; + va += buffer->offset + counter_buffer_offset; - radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); - } + /* Append */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, va); /* src address lo */ + radeon_emit(cs, va >> 32); /* src address hi */ - radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | - S_411_DST_SEL(V_411_GDS) | - S_411_CP_SYNC(i == last_target)); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, 4 * i); /* destination in GDS */ - radeon_emit(cs, 0); - radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | - S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target)); - } + radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); + } else { + /* Start from the beginning. */ + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ + } + } - radv_set_streamout_enable(cmd_buffer, true); -} - -void radv_CmdBeginTransformFeedbackEXT( - VkCommandBuffer commandBuffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer* pCounterBuffers, - const VkDeviceSize* pCounterBufferOffsets) -{ - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - - if (cmd_buffer->device->physical_device->use_ngg_streamout) { - gfx10_emit_streamout_begin(cmd_buffer, - firstCounterBuffer, counterBufferCount, - pCounterBuffers, pCounterBufferOffsets); - } else { - radv_emit_streamout_begin(cmd_buffer, - firstCounterBuffer, counterBufferCount, - pCounterBuffers, pCounterBufferOffsets); - } + radv_set_streamout_enable(cmd_buffer, true); } static void -radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer *pCounterBuffers, - const VkDeviceSize *pCounterBufferOffsets) +gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + unsigned last_target = util_last_bit(so->enabled_mask) - 1; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - radv_flush_vgt_streamout(cmd_buffer); + assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); + assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - u_foreach_bit(i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) - counter_buffer_idx = -1; + /* Sync because the next streamout operation will overwrite GDS and we + * have to make sure it's idle. + * TODO: Improve by tracking if there is a streamout operation in + * flight. + */ + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; + si_emit_cache_flush(cmd_buffer); - if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { - /* The array of counters buffer is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t va = radv_buffer_get_va(buffer->bo); - uint64_t counter_buffer_offset = 0; + u_foreach_bit(i, so->enabled_mask) + { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; + bool append = + counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; + uint64_t va = 0; - va += buffer->offset + counter_buffer_offset; + if (append) { + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); + uint64_t counter_buffer_offset = 0; - radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); - radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | - STRMOUT_DATA_TYPE(1) | /* offset in bytes */ - STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | - STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ - radeon_emit(cs, va); /* dst address lo */ - radeon_emit(cs, va >> 32); /* dst address hi */ - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, 0); /* unused */ + if (pCounterBufferOffsets) + counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); - } + va += radv_buffer_get_va(buffer->bo); + va += buffer->offset + counter_buffer_offset; - /* Deactivate transform feedback by zeroing the buffer size. - * The counters (primitives generated, primitives emitted) may - * be enabled even if there is not buffer bound. This ensures - * that the primitives-emitted query won't increment. - */ - radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0); + radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); + } - cmd_buffer->state.context_roll_without_scissor_emitted = true; - } + radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); + radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | + S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, 4 * i); /* destination in GDS */ + radeon_emit(cs, 0); + radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target)); + } - radv_set_streamout_enable(cmd_buffer, false); + radv_set_streamout_enable(cmd_buffer, true); +} + +void +radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + + if (cmd_buffer->device->physical_device->use_ngg_streamout) { + gfx10_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, + pCounterBuffers, pCounterBufferOffsets); + } else { + radv_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, + pCounterBufferOffsets); + } } static void -gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer *pCounterBuffers, - const VkDeviceSize *pCounterBufferOffsets) +radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) { - struct radv_streamout_state *so = &cmd_buffer->state.streamout; - struct radeon_cmdbuf *cs = cmd_buffer->cs; + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); - assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + radv_flush_vgt_streamout(cmd_buffer); - u_foreach_bit(i, so->enabled_mask) { - int32_t counter_buffer_idx = i - firstCounterBuffer; - if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) - counter_buffer_idx = -1; + assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + u_foreach_bit(i, so->enabled_mask) + { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; - if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { - /* The array of counters buffer is optional. */ - RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); - uint64_t va = radv_buffer_get_va(buffer->bo); - uint64_t counter_buffer_offset = 0; + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { + /* The array of counters buffer is optional. */ + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); + uint64_t va = radv_buffer_get_va(buffer->bo); + uint64_t counter_buffer_offset = 0; - if (pCounterBufferOffsets) - counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; + if (pCounterBufferOffsets) + counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; - va += buffer->offset + counter_buffer_offset; + va += buffer->offset + counter_buffer_offset; - si_cs_emit_write_event_eop(cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - V_028A90_PS_DONE, 0, - EOP_DST_SEL_TC_L2, - EOP_DATA_SEL_GDS, - va, EOP_DATA_GDS(i, 1), 0); + radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); + radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ + STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | + STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ + radeon_emit(cs, va); /* dst address lo */ + radeon_emit(cs, va >> 32); /* dst address hi */ + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, 0); /* unused */ - radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); - } - } + radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); + } - radv_set_streamout_enable(cmd_buffer, false); + /* Deactivate transform feedback by zeroing the buffer size. + * The counters (primitives generated, primitives emitted) may + * be enabled even if there is not buffer bound. This ensures + * that the primitives-emitted query won't increment. + */ + radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0); + + cmd_buffer->state.context_roll_without_scissor_emitted = true; + } + + radv_set_streamout_enable(cmd_buffer, false); } -void radv_CmdEndTransformFeedbackEXT( - VkCommandBuffer commandBuffer, - uint32_t firstCounterBuffer, - uint32_t counterBufferCount, - const VkBuffer* pCounterBuffers, - const VkDeviceSize* pCounterBufferOffsets) +static void +gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_streamout_state *so = &cmd_buffer->state.streamout; + struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (cmd_buffer->device->physical_device->use_ngg_streamout) { - gfx10_emit_streamout_end(cmd_buffer, - firstCounterBuffer, counterBufferCount, - pCounterBuffers, pCounterBufferOffsets); - } else { - radv_emit_streamout_end(cmd_buffer, - firstCounterBuffer, counterBufferCount, - pCounterBuffers, pCounterBufferOffsets); - } + assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10); + assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); + + u_foreach_bit(i, so->enabled_mask) + { + int32_t counter_buffer_idx = i - firstCounterBuffer; + if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) + counter_buffer_idx = -1; + + if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { + /* The array of counters buffer is optional. */ + RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); + uint64_t va = radv_buffer_get_va(buffer->bo); + uint64_t counter_buffer_offset = 0; + + if (pCounterBufferOffsets) + counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; + + va += buffer->offset + counter_buffer_offset; + + si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class, + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, + EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0); + + radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); + } + } + + radv_set_streamout_enable(cmd_buffer, false); } -void radv_CmdDrawIndirectByteCountEXT( - VkCommandBuffer commandBuffer, - uint32_t instanceCount, - uint32_t firstInstance, - VkBuffer _counterBuffer, - VkDeviceSize counterBufferOffset, - uint32_t counterOffset, - uint32_t vertexStride) +void +radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, + uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); - struct radv_draw_info info; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - info.count = 0; - info.instance_count = instanceCount; - info.first_instance = firstInstance; - info.strmout_buffer = counterBuffer; - info.strmout_buffer_offset = counterBufferOffset; - info.stride = vertexStride; - info.indexed = false; - info.indirect = NULL; + if (cmd_buffer->device->physical_device->use_ngg_streamout) { + gfx10_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, + pCounterBufferOffsets); + } else { + radv_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, + pCounterBufferOffsets); + } +} - if (!radv_before_draw(cmd_buffer, &info, 0)) - return; - radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1)); - radv_after_draw(cmd_buffer); +void +radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, + uint32_t firstInstance, VkBuffer _counterBuffer, + VkDeviceSize counterBufferOffset, uint32_t counterOffset, + uint32_t vertexStride) +{ + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); + struct radv_draw_info info; + + info.count = 0; + info.instance_count = instanceCount; + info.first_instance = firstInstance; + info.strmout_buffer = counterBuffer; + info.strmout_buffer_offset = counterBufferOffset; + info.stride = vertexStride; + info.indexed = false; + info.indirect = NULL; + + if (!radv_before_draw(cmd_buffer, &info, 0)) + return; + radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1)); + radv_after_draw(cmd_buffer); } /* VK_AMD_buffer_marker */ -void radv_CmdWriteBufferMarkerAMD( - VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - uint32_t marker) +void +radv_CmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, + VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker) { - RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); - struct radeon_cmdbuf *cs = cmd_buffer->cs; - uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset; + RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset; - si_emit_cache_flush(cmd_buffer); + si_emit_cache_flush(cmd_buffer); - ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12); + ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12); - if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | - COPY_DATA_WR_CONFIRM); - radeon_emit(cs, marker); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } else { - si_cs_emit_write_event_eop(cs, - cmd_buffer->device->physical_device->rad_info.chip_class, - radv_cmd_buffer_uses_mec(cmd_buffer), - V_028A90_BOTTOM_OF_PIPE_TS, 0, - EOP_DST_SEL_MEM, - EOP_DATA_SEL_VALUE_32BIT, - va, marker, - cmd_buffer->gfx9_eop_bug_va); - } + if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) { + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | + COPY_DATA_WR_CONFIRM); + radeon_emit(cs, marker); + radeon_emit(cs, 0); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + } else { + si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class, + radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, + 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, + cmd_buffer->gfx9_eop_bug_va); + } - assert(cmd_buffer->cs->cdw <= cdw_max); + assert(cmd_buffer->cs->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h index 0b0d6714d25..bceedac3da6 100644 --- a/src/amd/vulkan/radv_constants.h +++ b/src/amd/vulkan/radv_constants.h @@ -30,30 +30,30 @@ #define ATI_VENDOR_ID 0x1002 -#define MAX_VBS 32 -#define MAX_VERTEX_ATTRIBS 32 -#define MAX_RTS 8 -#define MAX_VIEWPORTS 16 -#define MAX_SCISSORS 16 -#define MAX_DISCARD_RECTANGLES 4 -#define MAX_SAMPLE_LOCATIONS 32 -#define MAX_PUSH_CONSTANTS_SIZE 128 -#define MAX_PUSH_DESCRIPTORS 32 -#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 -#define MAX_DYNAMIC_STORAGE_BUFFERS 8 -#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) -#define MAX_SAMPLES_LOG2 4 -#define NUM_META_FS_KEYS 12 -#define RADV_MAX_DRM_DEVICES 8 -#define MAX_VIEWS 8 -#define MAX_SO_STREAMS 4 -#define MAX_SO_BUFFERS 4 -#define MAX_SO_OUTPUTS 64 -#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024) +#define MAX_VBS 32 +#define MAX_VERTEX_ATTRIBS 32 +#define MAX_RTS 8 +#define MAX_VIEWPORTS 16 +#define MAX_SCISSORS 16 +#define MAX_DISCARD_RECTANGLES 4 +#define MAX_SAMPLE_LOCATIONS 32 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_PUSH_DESCRIPTORS 32 +#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 +#define MAX_DYNAMIC_STORAGE_BUFFERS 8 +#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) +#define MAX_SAMPLES_LOG2 4 +#define NUM_META_FS_KEYS 12 +#define RADV_MAX_DRM_DEVICES 8 +#define MAX_VIEWS 8 +#define MAX_SO_STREAMS 4 +#define MAX_SO_BUFFERS 4 +#define MAX_SO_OUTPUTS 64 +#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024) #define MAX_INLINE_UNIFORM_BLOCK_COUNT 64 -#define MAX_BIND_POINTS 2 /* compute + graphics */ +#define MAX_BIND_POINTS 2 /* compute + graphics */ -#define NUM_DEPTH_CLEAR_PIPELINES 3 +#define NUM_DEPTH_CLEAR_PIPELINES 3 #define NUM_DEPTH_DECOMPRESS_PIPELINES 3 /* @@ -65,13 +65,13 @@ #define RADV_BUFFER_UPDATE_THRESHOLD 1024 /* descriptor index into scratch ring offsets */ -#define RING_SCRATCH 0 -#define RING_ESGS_VS 1 -#define RING_ESGS_GS 2 -#define RING_GSVS_VS 3 -#define RING_GSVS_GS 4 -#define RING_HS_TESS_FACTOR 5 -#define RING_HS_TESS_OFFCHIP 6 +#define RING_SCRATCH 0 +#define RING_ESGS_VS 1 +#define RING_ESGS_GS 2 +#define RING_GSVS_VS 3 +#define RING_GSVS_GS 4 +#define RING_HS_TESS_FACTOR 5 +#define RING_HS_TESS_OFFCHIP 6 #define RING_PS_SAMPLE_POSITIONS 7 /* max number of descriptor sets */ @@ -80,7 +80,7 @@ /* Make sure everything is addressable by a signed 32-bit int, and * our largest descriptors are 96 bytes. */ -#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31 ) / 96) +#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31) / 96) /* Our buffer size fields allow only 2**32 - 1. We round that down to a multiple * of 4 bytes so we can align buffer sizes up. @@ -91,4 +91,3 @@ #define RADV_SUBGROUP_SIZE 64 #endif /* RADV_CONSTANTS_H */ - diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index c6f9e4e4342..277c77b5967 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -25,166 +25,166 @@ #ifndef RADV_CS_H #define RADV_CS_H -#include -#include #include +#include +#include #include "radv_private.h" #include "sid.h" -static inline unsigned radeon_check_space(struct radeon_winsys *ws, - struct radeon_cmdbuf *cs, - unsigned needed) +static inline unsigned +radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed) { - if (cs->max_dw - cs->cdw < needed) - ws->cs_grow(cs, needed); - return cs->cdw + needed; + if (cs->max_dw - cs->cdw < needed) + ws->cs_grow(cs, needed); + return cs->cdw + needed; } -static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) +static inline void +radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { - assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); - radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); - radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2); + assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END); + assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); + radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0)); + radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2); } -static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) +static inline void +radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { - radeon_set_config_reg_seq(cs, reg, 1); - radeon_emit(cs, value); + radeon_set_config_reg_seq(cs, reg, 1); + radeon_emit(cs, value); } -static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) +static inline void +radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { - assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); - radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); - radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); + assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); + radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0)); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); } -static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) +static inline void +radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { - radeon_set_context_reg_seq(cs, reg, 1); - radeon_emit(cs, value); + radeon_set_context_reg_seq(cs, reg, 1); + radeon_emit(cs, value); } - -static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, - unsigned reg, unsigned idx, - unsigned value) +static inline void +radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value) { - assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); - radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); - radeon_emit(cs, value); + assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); + assert(cs->cdw + 3 <= cs->max_dw); + radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, value); } -static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, - unsigned reg, unsigned value, - unsigned mask) +static inline void +radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg, unsigned value, unsigned mask) { - assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); - assert(cs->cdw + 4 <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); - radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); - radeon_emit(cs, mask); - radeon_emit(cs, value); + assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END); + assert(cs->cdw + 4 <= cs->max_dw); + radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); + radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); + radeon_emit(cs, mask); + radeon_emit(cs, value); } -static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) +static inline void +radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { - assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); - radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); + assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); + assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0)); + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); } -static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) +static inline void +radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { - radeon_set_sh_reg_seq(cs, reg, 1); - radeon_emit(cs, value); + radeon_set_sh_reg_seq(cs, reg, 1); + radeon_emit(cs, value); } -static inline void radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, - struct radeon_cmdbuf *cs, - unsigned reg, unsigned idx, - unsigned value) +static inline void +radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, + unsigned reg, unsigned idx, unsigned value) { - assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); - assert(idx); + assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END); + assert(cs->cdw + 3 <= cs->max_dw); + assert(idx); - unsigned opcode = PKT3_SET_SH_REG_INDEX; - if (pdevice->rad_info.chip_class < GFX10) - opcode = PKT3_SET_SH_REG; + unsigned opcode = PKT3_SET_SH_REG_INDEX; + if (pdevice->rad_info.chip_class < GFX10) + opcode = PKT3_SET_SH_REG; - radeon_emit(cs, PKT3(opcode, 1, 0)); - radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28)); - radeon_emit(cs, value); + radeon_emit(cs, PKT3(opcode, 1, 0)); + radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, value); } -static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) +static inline void +radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { - assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); - radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); - radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); + assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); + assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); + radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0)); + radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); } -static inline void radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, - unsigned reg, unsigned num) +static inline void +radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num) { - assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 2 + num <= cs->max_dw); - assert(num); - radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1)); - radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); + assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); + assert(cs->cdw + 2 + num <= cs->max_dw); + assert(num); + radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1)); + radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); } -static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) +static inline void +radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { - radeon_set_uconfig_reg_seq(cs, reg, 1); - radeon_emit(cs, value); + radeon_set_uconfig_reg_seq(cs, reg, 1); + radeon_emit(cs, value); } -static inline void radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, - struct radeon_cmdbuf *cs, - unsigned reg, unsigned idx, - unsigned value) +static inline void +radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, + unsigned reg, unsigned idx, unsigned value) { - assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); - assert(cs->cdw + 3 <= cs->max_dw); - assert(idx); + assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END); + assert(cs->cdw + 3 <= cs->max_dw); + assert(idx); - unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; - if (pdevice->rad_info.chip_class < GFX9 || - (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26)) - opcode = PKT3_SET_UCONFIG_REG; + unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; + if (pdevice->rad_info.chip_class < GFX9 || + (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26)) + opcode = PKT3_SET_UCONFIG_REG; - radeon_emit(cs, PKT3(opcode, 1, 0)); - radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28)); - radeon_emit(cs, value); + radeon_emit(cs, PKT3(opcode, 1, 0)); + radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28)); + radeon_emit(cs, value); } -static inline void radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, - unsigned reg, - unsigned value) +static inline void +radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value) { - assert(reg < CIK_UCONFIG_REG_OFFSET); - assert(cs->cdw + 6 <= cs->max_dw); + assert(reg < CIK_UCONFIG_REG_OFFSET); + assert(cs->cdw + 6 <= cs->max_dw); - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | - COPY_DATA_DST_SEL(COPY_DATA_PERF)); - radeon_emit(cs, value); - radeon_emit(cs, 0); /* unused */ - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF)); + radeon_emit(cs, value); + radeon_emit(cs, 0); /* unused */ + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); /* unused */ } #endif /* RADV_CS_H */ diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 9ba853ee9f4..ca0c3389538 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -25,27 +25,27 @@ * IN THE SOFTWARE. */ -#include #include +#include #ifndef _WIN32 #include #endif #include #include "util/mesa-sha1.h" -#include "sid.h" #include "ac_debug.h" #include "radv_debug.h" #include "radv_shader.h" +#include "sid.h" #define TRACE_BO_SIZE 4096 -#define TMA_BO_SIZE 4096 +#define TMA_BO_SIZE 4096 -#define COLOR_RESET "\033[0m" -#define COLOR_RED "\033[31m" -#define COLOR_GREEN "\033[1;32m" -#define COLOR_YELLOW "\033[1;33m" -#define COLOR_CYAN "\033[1;36m" +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" #define RADV_DUMP_DIR "radv_dumps" @@ -64,967 +64,934 @@ bool radv_init_trace(struct radv_device *device) { - struct radeon_winsys *ws = device->ws; - VkResult result; + struct radeon_winsys *ws = device->ws; + VkResult result; - device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8, - RADEON_DOMAIN_VRAM, - RADEON_FLAG_CPU_ACCESS| - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_ZERO_VRAM, - RADV_BO_PRIORITY_UPLOAD_BUFFER); - if (!device->trace_bo) - return false; + device->trace_bo = ws->buffer_create( + ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, + RADV_BO_PRIORITY_UPLOAD_BUFFER); + if (!device->trace_bo) + return false; - result = ws->buffer_make_resident(ws, device->trace_bo, true); - if (result != VK_SUCCESS) - return false; + result = ws->buffer_make_resident(ws, device->trace_bo, true); + if (result != VK_SUCCESS) + return false; - device->trace_id_ptr = ws->buffer_map(device->trace_bo); - if (!device->trace_id_ptr) - return false; + device->trace_id_ptr = ws->buffer_map(device->trace_bo); + if (!device->trace_id_ptr) + return false; - ac_vm_fault_occured(device->physical_device->rad_info.chip_class, - &device->dmesg_timestamp, NULL); + ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp, + NULL); - return true; + return true; } void radv_finish_trace(struct radv_device *device) { - struct radeon_winsys *ws = device->ws; + struct radeon_winsys *ws = device->ws; - if (unlikely(device->trace_bo)) { - ws->buffer_make_resident(ws, device->trace_bo, false); - ws->buffer_destroy(ws, device->trace_bo); - } + if (unlikely(device->trace_bo)) { + ws->buffer_make_resident(ws, device->trace_bo, false); + ws->buffer_destroy(ws, device->trace_bo); + } } static void radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f) { - fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); - device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2); + fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); + device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2); } static void radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset) { - struct radeon_winsys *ws = device->ws; - uint32_t value; + struct radeon_winsys *ws = device->ws; + uint32_t value; - if (ws->read_registers(ws, offset, 1, &value)) - ac_dump_reg(f, device->physical_device->rad_info.chip_class, - offset, value, ~0); + if (ws->read_registers(ws, offset, 1, &value)) + ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0); } static void radv_dump_debug_registers(struct radv_device *device, FILE *f) { - struct radeon_info *info = &device->physical_device->rad_info; + struct radeon_info *info = &device->physical_device->rad_info; - fprintf(f, "Memory-mapped registers:\n"); - radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); + fprintf(f, "Memory-mapped registers:\n"); + radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); - radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); - radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); - radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); - radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); - radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); - radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); - radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); - if (info->chip_class <= GFX8) { - radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); - radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); - radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); - } - radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); - radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); - radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); - radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); - radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); - radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); - radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); - radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); - radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); - radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); - fprintf(f, "\n"); + radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); + radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); + radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); + radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); + radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); + radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); + radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); + if (info->chip_class <= GFX8) { + radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); + radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); + radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); + } + radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); + radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); + radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); + radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); + radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); + radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); + radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); + radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); + radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); + radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); + fprintf(f, "\n"); } static void -radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, - FILE *f) +radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) { - fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 4; j++) - ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, - desc[j], 0xffffffff); + fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff); } static void -radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, - FILE *f) +radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) { - unsigned sq_img_rsrc_word0 = chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 - : R_008F10_SQ_IMG_RSRC_WORD0; + unsigned sq_img_rsrc_word0 = + chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; - fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 8; j++) - ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, - desc[j], 0xffffffff); + fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff); - fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 8; j++) - ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, - desc[8 + j], 0xffffffff); + fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff); } static void -radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, - FILE *f) +radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) { - fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 4; j++) { - ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, - desc[j], 0xffffffff); - } + fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) { + ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff); + } } static void -radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, - const uint32_t *desc, FILE *f) +radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, + FILE *f) { - radv_dump_image_descriptor(chip_class, desc, f); - radv_dump_sampler_descriptor(chip_class, desc + 16, f); + radv_dump_image_descriptor(chip_class, desc, f); + radv_dump_sampler_descriptor(chip_class, desc + 16, f); } static void -radv_dump_descriptor_set(struct radv_device *device, - struct radv_descriptor_set *set, unsigned id, FILE *f) +radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id, + FILE *f) { - enum chip_class chip_class = device->physical_device->rad_info.chip_class; - const struct radv_descriptor_set_layout *layout; - int i; + enum chip_class chip_class = device->physical_device->rad_info.chip_class; + const struct radv_descriptor_set_layout *layout; + int i; - if (!set) - return; - layout = set->header.layout; + if (!set) + return; + layout = set->header.layout; - for (i = 0; i < set->header.layout->binding_count; i++) { - uint32_t *desc = - set->header.mapped_ptr + layout->binding[i].offset / 4; + for (i = 0; i < set->header.layout->binding_count; i++) { + uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4; - switch (layout->binding[i].type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - radv_dump_buffer_descriptor(chip_class, desc, f); - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - radv_dump_image_descriptor(chip_class, desc, f); - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - radv_dump_combined_image_sampler_descriptor(chip_class, desc, f); - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - radv_dump_sampler_descriptor(chip_class, desc, f); - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - /* todo */ - break; - default: - assert(!"unknown descriptor type"); - break; - } - fprintf(f, "\n"); - } - fprintf(f, "\n\n"); + switch (layout->binding[i].type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + radv_dump_buffer_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + radv_dump_image_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + radv_dump_combined_image_sampler_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + radv_dump_sampler_descriptor(chip_class, desc, f); + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + /* todo */ + break; + default: + assert(!"unknown descriptor type"); + break; + } + fprintf(f, "\n"); + } + fprintf(f, "\n\n"); } static void radv_dump_descriptors(struct radv_device *device, FILE *f) { - uint64_t *ptr = (uint64_t *)device->trace_id_ptr; - int i; + uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + int i; - fprintf(f, "Descriptors:\n"); - for (i = 0; i < MAX_SETS; i++) { - struct radv_descriptor_set *set = - *(struct radv_descriptor_set **)(ptr + i + 4); + fprintf(f, "Descriptors:\n"); + for (i = 0; i < MAX_SETS; i++) { + struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4); - radv_dump_descriptor_set(device, set, i, f); - } + radv_dump_descriptor_set(device, set, i, f); + } } struct radv_shader_inst { - char text[160]; /* one disasm line */ - unsigned offset; /* instruction offset */ - unsigned size; /* instruction size = 4 or 8 */ + char text[160]; /* one disasm line */ + unsigned offset; /* instruction offset */ + unsigned size; /* instruction size = 4 or 8 */ }; /* Split a disassembly string into lines and add them to the array pointed * to by "instructions". */ -static void si_add_split_disasm(const char *disasm, - uint64_t start_addr, - unsigned *num, - struct radv_shader_inst *instructions) +static void +si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, + struct radv_shader_inst *instructions) { - struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; - char *next; + struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; + char *next; - while ((next = strchr(disasm, '\n'))) { - struct radv_shader_inst *inst = &instructions[*num]; - unsigned len = next - disasm; + while ((next = strchr(disasm, '\n'))) { + struct radv_shader_inst *inst = &instructions[*num]; + unsigned len = next - disasm; - if (!memchr(disasm, ';', len)) { - /* Ignore everything that is not an instruction. */ - disasm = next + 1; - continue; - } + if (!memchr(disasm, ';', len)) { + /* Ignore everything that is not an instruction. */ + disasm = next + 1; + continue; + } - assert(len < ARRAY_SIZE(inst->text)); - memcpy(inst->text, disasm, len); - inst->text[len] = 0; - inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; + assert(len < ARRAY_SIZE(inst->text)); + memcpy(inst->text, disasm, len); + inst->text[len] = 0; + inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; - const char *semicolon = strchr(disasm, ';'); - assert(semicolon); - /* More than 16 chars after ";" means the instruction is 8 bytes long. */ - inst->size = next - semicolon > 16 ? 8 : 4; + const char *semicolon = strchr(disasm, ';'); + assert(semicolon); + /* More than 16 chars after ";" means the instruction is 8 bytes long. */ + inst->size = next - semicolon > 16 ? 8 : 4; - snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, - " [PC=0x%"PRIx64", off=%u, size=%u]", - start_addr + inst->offset, inst->offset, inst->size); + snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, + " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset, + inst->size); - last_inst = inst; - (*num)++; - disasm = next + 1; - } + last_inst = inst; + (*num)++; + disasm = next + 1; + } } static void -radv_dump_annotated_shader(struct radv_shader_variant *shader, - gl_shader_stage stage, struct ac_wave_info *waves, - unsigned num_waves, FILE *f) +radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage, + struct ac_wave_info *waves, unsigned num_waves, FILE *f) { - uint64_t start_addr, end_addr; - unsigned i; + uint64_t start_addr, end_addr; + unsigned i; - if (!shader) - return; + if (!shader) + return; - start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset; - end_addr = start_addr + shader->code_size; + start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset; + end_addr = start_addr + shader->code_size; - /* See if any wave executes the shader. */ - for (i = 0; i < num_waves; i++) { - if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) - break; - } + /* See if any wave executes the shader. */ + for (i = 0; i < num_waves; i++) { + if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) + break; + } - if (i == num_waves) - return; /* the shader is not being executed */ + if (i == num_waves) + return; /* the shader is not being executed */ - /* Remember the first found wave. The waves are sorted according to PC. */ - waves = &waves[i]; - num_waves -= i; + /* Remember the first found wave. The waves are sorted according to PC. */ + waves = &waves[i]; + num_waves -= i; - /* Get the list of instructions. - * Buffer size / 4 is the upper bound of the instruction count. - */ - unsigned num_inst = 0; - struct radv_shader_inst *instructions = - calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); + /* Get the list of instructions. + * Buffer size / 4 is the upper bound of the instruction count. + */ + unsigned num_inst = 0; + struct radv_shader_inst *instructions = + calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); - si_add_split_disasm(shader->disasm_string, - start_addr, &num_inst, instructions); + si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); - fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", - radv_get_shader_name(&shader->info, stage)); + fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", + radv_get_shader_name(&shader->info, stage)); - /* Print instructions with annotations. */ - for (i = 0; i < num_inst; i++) { - struct radv_shader_inst *inst = &instructions[i]; + /* Print instructions with annotations. */ + for (i = 0; i < num_inst; i++) { + struct radv_shader_inst *inst = &instructions[i]; - fprintf(f, "%s\n", inst->text); + fprintf(f, "%s\n", inst->text); - /* Print which waves execute the instruction right now. */ - while (num_waves && start_addr + inst->offset == waves->pc) { - fprintf(f, - " " COLOR_GREEN "^ SE%u SH%u CU%u " - "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", - waves->se, waves->sh, waves->cu, waves->simd, - waves->wave, waves->exec); + /* Print which waves execute the instruction right now. */ + while (num_waves && start_addr + inst->offset == waves->pc) { + fprintf(f, + " " COLOR_GREEN "^ SE%u SH%u CU%u " + "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", + waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); - if (inst->size == 4) { - fprintf(f, "INST32=%08X" COLOR_RESET "\n", - waves->inst_dw0); - } else { - fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", - waves->inst_dw0, waves->inst_dw1); - } + if (inst->size == 4) { + fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); + } else { + fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); + } - waves->matched = true; - waves = &waves[1]; - num_waves--; - } - } + waves->matched = true; + waves = &waves[1]; + num_waves--; + } + } - fprintf(f, "\n\n"); - free(instructions); + fprintf(f, "\n\n"); + free(instructions); } static void -radv_dump_annotated_shaders(struct radv_pipeline *pipeline, - VkShaderStageFlagBits active_stages, FILE *f) +radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, + FILE *f) { - struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; - enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class; - unsigned num_waves = ac_get_wave_info(chip_class, waves); + struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; + enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class; + unsigned num_waves = ac_get_wave_info(chip_class, waves); - fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET - "\n\n", num_waves); + fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); - /* Dump annotated active graphics shaders. */ - unsigned stages = active_stages; - while (stages) { - int stage = u_bit_scan(&stages); + /* Dump annotated active graphics shaders. */ + unsigned stages = active_stages; + while (stages) { + int stage = u_bit_scan(&stages); - radv_dump_annotated_shader(pipeline->shaders[stage], - stage, waves, num_waves, f); - } + radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f); + } - /* Print waves executing shaders that are not currently bound. */ - unsigned i; - bool found = false; - for (i = 0; i < num_waves; i++) { - if (waves[i].matched) - continue; + /* Print waves executing shaders that are not currently bound. */ + unsigned i; + bool found = false; + for (i = 0; i < num_waves; i++) { + if (waves[i].matched) + continue; - if (!found) { - fprintf(f, COLOR_CYAN - "Waves not executing currently-bound shaders:" - COLOR_RESET "\n"); - found = true; - } - fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64 - " INST=%08X %08X PC=%"PRIx64"\n", - waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, - waves[i].wave, waves[i].exec, waves[i].inst_dw0, - waves[i].inst_dw1, waves[i].pc); - } - if (found) - fprintf(f, "\n\n"); + if (!found) { + fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); + found = true; + } + fprintf(f, + " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 + "\n", + waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, + waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); + } + if (found) + fprintf(f, "\n\n"); } static void -radv_dump_shader(struct radv_pipeline *pipeline, - struct radv_shader_variant *shader, gl_shader_stage stage, - FILE *f) +radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader, + gl_shader_stage stage, FILE *f) { - if (!shader) - return; + if (!shader) + return; - fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage)); + fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage)); - if (shader->spirv) { - unsigned char sha1[21]; - char sha1buf[41]; + if (shader->spirv) { + unsigned char sha1[21]; + char sha1buf[41]; - _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); - _mesa_sha1_format(sha1buf, sha1); + _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); + _mesa_sha1_format(sha1buf, sha1); - fprintf(f, "SPIRV (sha1: %s):\n", sha1buf); - radv_print_spirv(shader->spirv, shader->spirv_size, f); - } + fprintf(f, "SPIRV (sha1: %s):\n", sha1buf); + radv_print_spirv(shader->spirv, shader->spirv_size, f); + } - if (shader->nir_string) { - fprintf(f, "NIR:\n%s\n", shader->nir_string); - } + if (shader->nir_string) { + fprintf(f, "NIR:\n%s\n", shader->nir_string); + } - fprintf(f, "%s IR:\n%s\n", - pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO", - shader->ir_string); - fprintf(f, "DISASM:\n%s\n", shader->disasm_string); + fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO", + shader->ir_string); + fprintf(f, "DISASM:\n%s\n", shader->disasm_string); - radv_dump_shader_stats(pipeline->device, pipeline, stage, f); + radv_dump_shader_stats(pipeline->device, pipeline, stage, f); } static void -radv_dump_shaders(struct radv_pipeline *pipeline, - VkShaderStageFlagBits active_stages, FILE *f) +radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, FILE *f) { - /* Dump active graphics shaders. */ - unsigned stages = active_stages; - while (stages) { - int stage = u_bit_scan(&stages); + /* Dump active graphics shaders. */ + unsigned stages = active_stages; + while (stages) { + int stage = u_bit_scan(&stages); - radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f); - } + radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f); + } } static void radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f) { - void *ptr = (uint64_t *)pipeline->device->trace_id_ptr; - uint32_t count = pipeline->num_vertex_bindings; - uint32_t *vb_ptr = &((uint32_t *)ptr)[3]; + void *ptr = (uint64_t *)pipeline->device->trace_id_ptr; + uint32_t count = pipeline->num_vertex_bindings; + uint32_t *vb_ptr = &((uint32_t *)ptr)[3]; - if (!count) - return; + if (!count) + return; - fprintf(f, "Num vertex bindings: %d\n", count); - for (uint32_t i = 0; i < count; i++) { - uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; - uint64_t va = 0; + fprintf(f, "Num vertex bindings: %d\n", count); + for (uint32_t i = 0; i < count; i++) { + uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; + uint64_t va = 0; - va |= desc[0]; - va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32; + va |= desc[0]; + va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32; - fprintf(f, "VBO#%d:\n", i); - fprintf(f, "\tVA: 0x%"PRIx64"\n", va); - fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1])); - fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]); - } + fprintf(f, "VBO#%d:\n", i); + fprintf(f, "\tVA: 0x%" PRIx64 "\n", va); + fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1])); + fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]); + } } static struct radv_pipeline * radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring) { - uint64_t *ptr = (uint64_t *)device->trace_id_ptr; - int offset = ring == RING_GFX ? 1 : 2; + uint64_t *ptr = (uint64_t *)device->trace_id_ptr; + int offset = ring == RING_GFX ? 1 : 2; - return *(struct radv_pipeline **)(ptr + offset); + return *(struct radv_pipeline **)(ptr + offset); } static void radv_dump_queue_state(struct radv_queue *queue, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); - struct radv_pipeline *pipeline; + enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); + struct radv_pipeline *pipeline; - fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); + fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); - pipeline = radv_get_saved_pipeline(queue->device, ring); - if (pipeline) { - radv_dump_shaders(pipeline, pipeline->active_stages, f); - if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) - radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f); - radv_dump_vertex_descriptors(pipeline, f); - radv_dump_descriptors(queue->device, f); - } + pipeline = radv_get_saved_pipeline(queue->device, ring); + if (pipeline) { + radv_dump_shaders(pipeline, pipeline->active_stages, f); + if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) + radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f); + radv_dump_vertex_descriptors(pipeline, f); + radv_dump_descriptors(queue->device, f); + } } static void radv_dump_cmd(const char *cmd, FILE *f) { #ifndef _WIN32 - char line[2048]; - FILE *p; + char line[2048]; + FILE *p; - p = popen(cmd, "r"); - if (p) { - while (fgets(line, sizeof(line), p)) - fputs(line, f); - fprintf(f, "\n"); - pclose(p); - } + p = popen(cmd, "r"); + if (p) { + while (fgets(line, sizeof(line), p)) + fputs(line, f); + fprintf(f, "\n"); + pclose(p); + } #endif } static void radv_dump_dmesg(FILE *f) { - fprintf(f, "\nLast 60 lines of dmesg:\n\n"); - radv_dump_cmd("dmesg | tail -n60", f); + fprintf(f, "\nLast 60 lines of dmesg:\n\n"); + radv_dump_cmd("dmesg | tail -n60", f); } void radv_dump_enabled_options(struct radv_device *device, FILE *f) { - uint64_t mask; + uint64_t mask; - if (device->instance->debug_flags) { - fprintf(f, "Enabled debug options: "); + if (device->instance->debug_flags) { + fprintf(f, "Enabled debug options: "); - mask = device->instance->debug_flags; - while (mask) { - int i = u_bit_scan64(&mask); - fprintf(f, "%s, ", radv_get_debug_option_name(i)); - } - fprintf(f, "\n"); - } + mask = device->instance->debug_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_debug_option_name(i)); + } + fprintf(f, "\n"); + } - if (device->instance->perftest_flags) { - fprintf(f, "Enabled perftest options: "); + if (device->instance->perftest_flags) { + fprintf(f, "Enabled perftest options: "); - mask = device->instance->perftest_flags; - while (mask) { - int i = u_bit_scan64(&mask); - fprintf(f, "%s, ", radv_get_perftest_option_name(i)); - } - fprintf(f, "\n"); - } + mask = device->instance->perftest_flags; + while (mask) { + int i = u_bit_scan64(&mask); + fprintf(f, "%s, ", radv_get_perftest_option_name(i)); + } + fprintf(f, "\n"); + } } static void radv_dump_app_info(struct radv_device *device, FILE *f) { - struct radv_instance *instance = device->instance; + struct radv_instance *instance = device->instance; - fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name); - fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version); - fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); - fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); - fprintf(f, "API version: %d.%d.%d\n", - VK_VERSION_MAJOR(instance->vk.app_info.api_version), - VK_VERSION_MINOR(instance->vk.app_info.api_version), - VK_VERSION_PATCH(instance->vk.app_info.api_version)); + fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name); + fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version); + fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); + fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); + fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version), + VK_VERSION_MINOR(instance->vk.app_info.api_version), + VK_VERSION_PATCH(instance->vk.app_info.api_version)); - radv_dump_enabled_options(device, f); + radv_dump_enabled_options(device, f); } static void radv_dump_device_name(struct radv_device *device, FILE *f) { - struct radeon_info *info = &device->physical_device->rad_info; + struct radeon_info *info = &device->physical_device->rad_info; #ifndef _WIN32 - char kernel_version[128] = {0}; - struct utsname uname_data; + char kernel_version[128] = {0}; + struct utsname uname_data; #endif - const char *chip_name; + const char *chip_name; - chip_name = device->ws->get_chip_name(device->ws); + chip_name = device->ws->get_chip_name(device->ws); #ifdef _WIN32 - fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", - chip_name, device->physical_device->name, - info->drm_major, info->drm_minor, info->drm_patchlevel); + fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name, + info->drm_major, info->drm_minor, info->drm_patchlevel); #else - if (uname(&uname_data) == 0) - snprintf(kernel_version, sizeof(kernel_version), - " / %s", uname_data.release); + if (uname(&uname_data) == 0) + snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); - fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", - chip_name, device->physical_device->name, - info->drm_major, info->drm_minor, info->drm_patchlevel, - kernel_version); + fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name, + info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version); #endif } static void radv_dump_umr_ring(struct radv_queue *queue, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); - struct radv_device *device = queue->device; - char cmd[128]; + enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); + struct radv_device *device = queue->device; + char cmd[128]; - /* TODO: Dump compute ring. */ - if (ring != RING_GFX) - return; + /* TODO: Dump compute ring. */ + if (ring != RING_GFX) + return; - sprintf(cmd, "umr -R %s 2>&1", - device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); + sprintf(cmd, "umr -R %s 2>&1", + device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); - fprintf(f, "\nUMR GFX ring:\n\n"); - radv_dump_cmd(cmd, f); + fprintf(f, "\nUMR GFX ring:\n\n"); + radv_dump_cmd(cmd, f); } static void radv_dump_umr_waves(struct radv_queue *queue, FILE *f) { - enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); - struct radv_device *device = queue->device; - char cmd[128]; + enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index); + struct radv_device *device = queue->device; + char cmd[128]; - /* TODO: Dump compute ring. */ - if (ring != RING_GFX) - return; + /* TODO: Dump compute ring. */ + if (ring != RING_GFX) + return; - sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1", - device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); + sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1", + device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); - fprintf(f, "\nUMR GFX waves:\n\n"); - radv_dump_cmd(cmd, f); + fprintf(f, "\nUMR GFX waves:\n\n"); + radv_dump_cmd(cmd, f); } static bool radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) { - struct radeon_winsys *ws = queue->device->ws; + struct radeon_winsys *ws = queue->device->ws; - if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx)) - return true; + if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx)) + return true; - return false; + return false; } void radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) { - struct radv_device *device = queue->device; - char dump_dir[256], dump_path[512]; - enum ring_type ring; - uint64_t addr; - FILE *f; + struct radv_device *device = queue->device; + char dump_dir[256], dump_path[512]; + enum ring_type ring; + uint64_t addr; + FILE *f; - ring = radv_queue_family_to_ring(queue->queue_family_index); + ring = radv_queue_family_to_ring(queue->queue_family_index); - bool hang_occurred = radv_gpu_hang_occured(queue, ring); - bool vm_fault_occurred = false; - if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) - vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class, - &device->dmesg_timestamp, &addr); - if (!hang_occurred && !vm_fault_occurred) - return; + bool hang_occurred = radv_gpu_hang_occured(queue, ring); + bool vm_fault_occurred = false; + if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) + vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class, + &device->dmesg_timestamp, &addr); + if (!hang_occurred && !vm_fault_occurred) + return; - fprintf(stderr, "radv: GPU hang detected...\n"); + fprintf(stderr, "radv: GPU hang detected...\n"); - /* Create a directory into $HOME/radv_dumps__