From f58ece08da1d48eddb92678d8241d481a2efcf50 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 10 Nov 2020 17:33:54 +0100 Subject: [PATCH] tu: Handle robust UBO behavior for pushed UBO ranges If we push a UBO range but then find out at draw-time that part of the pushed range is out of range of the UBO descriptor, then we have to fill in the rest of the range with 0's to mimic the bounds-checking that ldc would've done. Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 96 +++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 9 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 2a7ff040005..090ff45ae89 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3037,6 +3037,55 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, tu_set_input_attachments(cmd, cmd->state.subpass); } +static uint32_t +tu6_user_consts_size(const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + const struct ir3_ubo_analysis_state *state = &link->const_state.ubo_state; + uint32_t dwords = 0; + + if (link->push_consts.count > 0) { + unsigned num_units = link->push_consts.count; + dwords += 4 + num_units * 4; + } + + for (uint32_t i = 0; i < state->num_enabled; i++) { + uint32_t size = state->range[i].end - state->range[i].start; + + size = MIN2(size, (16 * link->constlen) - state->range[i].offset); + + if (size == 0) + continue; + + if (!state->range[i].ubo.bindless) + continue; + + uint32_t *base = state->range[i].ubo.bindless_base == MAX_SETS ? + descriptors_state->dynamic_descriptors : + descriptors_state->sets[state->range[i].ubo.bindless_base]->mapped_ptr; + unsigned block = state->range[i].ubo.block; + uint32_t *desc = base + block * A6XX_TEX_CONST_DWORDS; + uint32_t desc_size = (desc[1] >> A6XX_UBO_1_SIZE__SHIFT) * 16; + desc_size = desc_size > state->range[i].start ? + desc_size - state->range[i].start : 0; + + if (desc_size < size) { + uint32_t zero_size = size - desc_size; + dwords += 4 + zero_size / 4; + size = desc_size; + } + + if (size > 0) { + dwords += 4; + } + } + + return dwords; +} + static void tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, struct tu_descriptor_state *descriptors_state, @@ -3095,15 +3144,40 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, unsigned block = state->range[i].ubo.block; uint32_t *desc = base + block * A6XX_TEX_CONST_DWORDS; uint64_t va = desc[0] | ((uint64_t)(desc[1] & A6XX_UBO_1_BASE_HI__MASK) << 32); - assert(va); + uint32_t desc_size = (desc[1] >> A6XX_UBO_1_SIZE__SHIFT) * 16; + desc_size = desc_size > state->range[i].start ? + desc_size - state->range[i].start : 0; - tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); - tu_cs_emit_qw(cs, va + offset); + /* Handle null UBO descriptors and out-of-range UBO reads by filling the + * rest with 0, simulating what reading with ldc would do. This behavior + * is required by VK_EXT_robustness2. + */ + if (desc_size < size) { + uint32_t zero_size = size - desc_size; + uint32_t zero_offset = state->range[i].offset + desc_size; + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + zero_size / 4); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(zero_offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(zero_size / 16)); + tu_cs_emit_qw(cs, 0); + for (unsigned i = 0; i < zero_size / 4; i++) { + tu_cs_emit(cs, 0); + } + size = desc_size; + } + + if (size > 0) { + assert(va); + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); + tu_cs_emit_qw(cs, va + offset); + } } } @@ -3113,8 +3187,12 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, struct tu_descriptor_state *descriptors_state, gl_shader_stage type) { + uint32_t dwords = tu6_user_consts_size(pipeline, descriptors_state, type); + if (dwords == 0) + return (struct tu_draw_state) {}; + struct tu_cs cs; - tu_cs_begin_sub_stream(&cmd->sub_cs, 512, &cs); /* TODO: maximum size? */ + tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs); tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants);