diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 2a7ff040005..090ff45ae89 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -3037,6 +3037,55 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer, tu_set_input_attachments(cmd, cmd->state.subpass); } +static uint32_t +tu6_user_consts_size(const struct tu_pipeline *pipeline, + struct tu_descriptor_state *descriptors_state, + gl_shader_stage type) +{ + const struct tu_program_descriptor_linkage *link = + &pipeline->program.link[type]; + const struct ir3_ubo_analysis_state *state = &link->const_state.ubo_state; + uint32_t dwords = 0; + + if (link->push_consts.count > 0) { + unsigned num_units = link->push_consts.count; + dwords += 4 + num_units * 4; + } + + for (uint32_t i = 0; i < state->num_enabled; i++) { + uint32_t size = state->range[i].end - state->range[i].start; + + size = MIN2(size, (16 * link->constlen) - state->range[i].offset); + + if (size == 0) + continue; + + if (!state->range[i].ubo.bindless) + continue; + + uint32_t *base = state->range[i].ubo.bindless_base == MAX_SETS ? + descriptors_state->dynamic_descriptors : + descriptors_state->sets[state->range[i].ubo.bindless_base]->mapped_ptr; + unsigned block = state->range[i].ubo.block; + uint32_t *desc = base + block * A6XX_TEX_CONST_DWORDS; + uint32_t desc_size = (desc[1] >> A6XX_UBO_1_SIZE__SHIFT) * 16; + desc_size = desc_size > state->range[i].start ? + desc_size - state->range[i].start : 0; + + if (desc_size < size) { + uint32_t zero_size = size - desc_size; + dwords += 4 + zero_size / 4; + size = desc_size; + } + + if (size > 0) { + dwords += 4; + } + } + + return dwords; +} + static void tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, struct tu_descriptor_state *descriptors_state, @@ -3095,15 +3144,40 @@ tu6_emit_user_consts(struct tu_cs *cs, const struct tu_pipeline *pipeline, unsigned block = state->range[i].ubo.block; uint32_t *desc = base + block * A6XX_TEX_CONST_DWORDS; uint64_t va = desc[0] | ((uint64_t)(desc[1] & A6XX_UBO_1_BASE_HI__MASK) << 32); - assert(va); + uint32_t desc_size = (desc[1] >> A6XX_UBO_1_SIZE__SHIFT) * 16; + desc_size = desc_size > state->range[i].start ? + desc_size - state->range[i].start : 0; - tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | - CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); - tu_cs_emit_qw(cs, va + offset); + /* Handle null UBO descriptors and out-of-range UBO reads by filling the + * rest with 0, simulating what reading with ldc would do. This behavior + * is required by VK_EXT_robustness2. + */ + if (desc_size < size) { + uint32_t zero_size = size - desc_size; + uint32_t zero_offset = state->range[i].offset + desc_size; + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3 + zero_size / 4); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(zero_offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(zero_size / 16)); + tu_cs_emit_qw(cs, 0); + for (unsigned i = 0; i < zero_size / 4; i++) { + tu_cs_emit(cs, 0); + } + size = desc_size; + } + + if (size > 0) { + assert(va); + tu_cs_emit_pkt7(cs, tu6_stage2opcode(type), 3); + tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(state->range[i].offset / 16) | + CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | + CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | + CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(type)) | + CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); + tu_cs_emit_qw(cs, va + offset); + } } } @@ -3113,8 +3187,12 @@ tu6_emit_consts(struct tu_cmd_buffer *cmd, struct tu_descriptor_state *descriptors_state, gl_shader_stage type) { + uint32_t dwords = tu6_user_consts_size(pipeline, descriptors_state, type); + if (dwords == 0) + return (struct tu_draw_state) {}; + struct tu_cs cs; - tu_cs_begin_sub_stream(&cmd->sub_cs, 512, &cs); /* TODO: maximum size? */ + tu_cs_begin_sub_stream(&cmd->sub_cs, dwords, &cs); tu6_emit_user_consts(&cs, pipeline, descriptors_state, type, cmd->push_constants);