From 52becd39a590cc8cac7bbe38282b27fc0a8ebbbf Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Wed, 9 Sep 2020 09:26:59 -0400 Subject: [PATCH] turnip: rework vertex buffers draw state handling This exploits a HW optimization for when only the size of a draw state is changed, to make things simpler and more optimal (assuming a well behaved user which doesn't unecessarily call CmdBindVertexBuffers many times) Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 66 ++++++++++------------------ src/freedreno/vulkan/tu_pipeline.c | 14 +++--- src/freedreno/vulkan/tu_private.h | 21 +++------ 3 files changed, 35 insertions(+), 66 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 2e144d10e14..447a438af4d 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1638,10 +1638,6 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, return VK_SUCCESS; } -/* Sets vertex buffers to HW binding points. We emit VBs in SDS (so that bin - * rendering can skip over unused state), so we need to collect all the - * bindings together into a single state emit at draw time. - */ void tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, @@ -1650,18 +1646,25 @@ tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, const VkDeviceSize *pOffsets) { TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - assert(firstBinding + bindingCount <= MAX_VBS); + struct tu_cs cs; + /* TODO: track a "max_vb" value for the cmdbuf to save a bit of memory */ + cmd->state.vertex_buffers.iova = tu_cs_draw_state(&cmd->sub_cs, &cs, 4 * MAX_VBS).iova; for (uint32_t i = 0; i < bindingCount; i++) { struct tu_buffer *buf = tu_buffer_from_handle(pBuffers[i]); - cmd->state.vb.buffers[firstBinding + i] = buf; - cmd->state.vb.offsets[firstBinding + i] = pOffsets[i]; - + cmd->state.vb[firstBinding + i].base = tu_buffer_iova(buf) + pOffsets[i]; + cmd->state.vb[firstBinding + i].size = buf->size - pOffsets[i]; tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); } + for (uint32_t i = 0; i < MAX_VBS; i++) { + tu_cs_emit_regs(&cs, + A6XX_VFD_FETCH_BASE_LO(i, cmd->state.vb[i].base), + A6XX_VFD_FETCH_BASE_HI(i, cmd->state.vb[i].base >> 32), + A6XX_VFD_FETCH_SIZE(i, cmd->state.vb[i].size)); + } + cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; } @@ -2114,13 +2117,6 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, for_each_bit(i, mask) tu_cs_emit_draw_state(cs, TU_DRAW_STATE_DYNAMIC + i, pipeline->dynamic_state[i]); - /* If the new pipeline requires more VBs than we had previously set up, we - * need to re-emit them in SDS. If it requires the same set or fewer, we - * can just re-use the old SDS. - */ - if (pipeline->vi.bindings_used & ~cmd->vertex_bindings_set) - cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; - /* dynamic linewidth state depends pipeline state's gras_su_cntl * so the dynamic state ib must be updated when pipeline changes */ @@ -2132,6 +2128,17 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer, tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = cmd->state.dynamic_gras_su_cntl)); } + + /* the vertex_buffers draw state always contains all the currently + * bound vertex buffers. update its size to only emit the vbs which + * are actually used by the pipeline + * note there is a HW optimization which makes it so the draw state + * is not re-executed completely when only the size changes + */ + if (cmd->state.vertex_buffers.size != pipeline->num_vbs * 4) { + cmd->state.vertex_buffers.size = pipeline->num_vbs * 4; + cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; + } } void @@ -2905,30 +2912,6 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, return tu_cs_end_draw_state(&cmd->sub_cs, &cs); } -static struct tu_draw_state -tu6_emit_vertex_buffers(struct tu_cmd_buffer *cmd, - const struct tu_pipeline *pipeline) -{ - struct tu_cs cs; - tu_cs_begin_sub_stream(&cmd->sub_cs, 4 * MAX_VBS, &cs); - - int binding; - for_each_bit(binding, pipeline->vi.bindings_used) { - const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; - const VkDeviceSize offset = buf->bo_offset + - cmd->state.vb.offsets[binding]; - - tu_cs_emit_regs(&cs, - A6XX_VFD_FETCH_BASE(binding, .bo = buf->bo, .bo_offset = offset), - A6XX_VFD_FETCH_SIZE(binding, buf->size - offset)); - - } - - cmd->vertex_bindings_set = pipeline->vi.bindings_used; - - return tu_cs_end_draw_state(&cmd->sub_cs, &cs); -} - static uint64_t get_tess_param_bo_size(const struct tu_pipeline *pipeline, uint32_t draw_count) @@ -3068,9 +3051,6 @@ tu6_draw_common(struct tu_cmd_buffer *cmd, tu6_emit_consts(cmd, pipeline, descriptors_state, MESA_SHADER_FRAGMENT); } - if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) - cmd->state.vertex_buffers = tu6_emit_vertex_buffers(cmd, pipeline); - bool has_tess = pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; struct tu_draw_state tess_consts = {}; diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 36c29879b73..4b2be417576 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1475,8 +1475,7 @@ tu6_emit_program(struct tu_cs *cs, static void tu6_emit_vertex_input(struct tu_cs *cs, const struct ir3_shader_variant *vs, - const VkPipelineVertexInputStateCreateInfo *info, - uint32_t *bindings_used) + const VkPipelineVertexInputStateCreateInfo *info) { uint32_t vfd_decode_idx = 0; uint32_t binding_instanced = 0; /* bitmask of instanced bindings */ @@ -1492,7 +1491,6 @@ tu6_emit_vertex_input(struct tu_cs *cs, if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) binding_instanced |= 1 << binding->binding; - *bindings_used |= 1 << binding->binding; step_rate[binding->binding] = 1; } @@ -1513,8 +1511,6 @@ tu6_emit_vertex_input(struct tu_cs *cs, &info->pVertexAttributeDescriptions[i]; uint32_t input_idx; - assert(*bindings_used & BIT(attr->binding)); - for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) { if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location) break; @@ -2173,18 +2169,18 @@ tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, const struct ir3_shader_variant *vs = builder->variants[MESA_SHADER_VERTEX]; const struct ir3_shader_variant *bs = builder->binning_variant; + pipeline->num_vbs = vi_info->vertexBindingDescriptionCount; + struct tu_cs vi_cs; tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); - tu6_emit_vertex_input(&vi_cs, vs, vi_info, - &pipeline->vi.bindings_used); + tu6_emit_vertex_input(&vi_cs, vs, vi_info); pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs); if (bs) { tu_cs_begin_sub_stream(&pipeline->cs, MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs); - tu6_emit_vertex_input( - &vi_cs, bs, vi_info, &pipeline->vi.bindings_used); + tu6_emit_vertex_input(&vi_cs, bs, vi_info); pipeline->vi.binning_state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs); } diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 439a08d7cd2..0a70b58fbdc 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -676,12 +676,6 @@ tu_buffer_iova(struct tu_buffer *buffer) return buffer->bo->iova + buffer->bo_offset; } -struct tu_vertex_binding -{ - struct tu_buffer *buffer; - VkDeviceSize offset; -}; - const char * tu_get_debug_option_name(int id); @@ -861,11 +855,10 @@ struct tu_cmd_state struct tu_pipeline *compute_pipeline; /* Vertex buffers */ - struct - { - struct tu_buffer *buffers[MAX_VBS]; - VkDeviceSize offsets[MAX_VBS]; - } vb; + struct { + uint64_t base; + uint32_t size; + } vb[MAX_VBS]; /* for dynamic states that can't be emitted directly */ uint32_t dynamic_stencil_mask; @@ -983,8 +976,6 @@ struct tu_cmd_buffer enum tu_cmd_buffer_status status; struct tu_cmd_state state; - struct tu_vertex_binding vertex_bindings[MAX_VBS]; - uint32_t vertex_bindings_set; uint32_t queue_family_index; uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4]; @@ -1115,6 +1106,9 @@ struct tu_pipeline /* draw states for the pipeline */ struct tu_draw_state load_state, rast_state, ds_state, blend_state; + /* for vertex buffers state */ + uint32_t num_vbs; + struct { struct tu_draw_state state; @@ -1127,7 +1121,6 @@ struct tu_pipeline { struct tu_draw_state state; struct tu_draw_state binning_state; - uint32_t bindings_used; } vi; struct